diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 57e46ac69568b1efb67942dca3895632509fc32e..0000000000000000000000000000000000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: License Check - -# Controls when the workflow will run -on: - pull_request: - branches: [ kernel5.4/master ] - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - check: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v3 - - # Setup the python environment - - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - # Check the license of files - - name: Check the license - run: python ./package/default/license_check.py diff --git a/Documentation/Changes b/Documentation/Changes deleted file mode 120000 index 7564ae1682bae84b10e025026dcd080e34dc98ce..0000000000000000000000000000000000000000 --- a/Documentation/Changes +++ /dev/null @@ -1 +0,0 @@ -process/changes.rst \ No newline at end of file diff --git a/Documentation/Changes b/Documentation/Changes new file mode 100644 index 0000000000000000000000000000000000000000..7564ae1682bae84b10e025026dcd080e34dc98ce --- /dev/null +++ b/Documentation/Changes @@ -0,0 +1 @@ +process/changes.rst \ No newline at end of file diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S deleted file mode 100644 index 4aed3f9ab88aef714feb192af32262aae7ba62c7..0000000000000000000000000000000000000000 --- a/Documentation/EDID/1024x768.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 65000 /* kHz */ -#define XPIX 1024 -#define YPIX 768 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 320 -#define YBLANK 38 -#define XOFFSET 8 -#define XPULSE 144 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux XGA" -#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */ -#define HSYNC_POL 0 -#define VSYNC_POL 0 - -#include "edid.S" diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S deleted file mode 100644 index b26dd424cad7cc77d6c1cc79af1348409390a382..0000000000000000000000000000000000000000 --- a/Documentation/EDID/1280x1024.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 108000 /* kHz */ -#define XPIX 1280 -#define YPIX 1024 -#define XY_RATIO XY_RATIO_5_4 -#define XBLANK 408 -#define YBLANK 42 -#define XOFFSET 48 -#define XPULSE 112 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S deleted file mode 100644 index 0d091b282768f2bd7ac4399dbe3bd4d0814a3cb6..0000000000000000000000000000000000000000 --- a/Documentation/EDID/1600x1200.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor - - Copyright (C) 2013 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 162000 /* kHz */ -#define XPIX 1600 -#define YPIX 1200 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 560 -#define YBLANK 50 -#define XOFFSET 64 -#define XPULSE 192 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux UXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S deleted file mode 100644 index 7dfed9a33eab6f5f5c858bd34788d63b380ab5a3..0000000000000000000000000000000000000000 --- a/Documentation/EDID/1680x1050.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 146250 /* kHz */ -#define XPIX 1680 -#define YPIX 1050 -#define XY_RATIO XY_RATIO_16_10 -#define XBLANK 560 -#define YBLANK 39 -#define XOFFSET 104 -#define XPULSE 176 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux WSXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S deleted file mode 100644 index d6ffbba28e95bf1d7829887134d2ae06f0369ba6..0000000000000000000000000000000000000000 --- a/Documentation/EDID/1920x1080.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 148500 /* kHz */ -#define XPIX 1920 -#define YPIX 1080 -#define XY_RATIO XY_RATIO_16_9 -#define XBLANK 280 -#define YBLANK 45 -#define XOFFSET 88 -#define XPULSE 44 -#define YOFFSET 4 -#define YPULSE 5 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux FHD" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S deleted file mode 100644 index a5616588de086453bb814f3a1322920ac6c42511..0000000000000000000000000000000000000000 --- a/Documentation/EDID/800x600.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - 800x600.S: EDID data set for standard 800x600 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - Copyright (C) 2014 Linaro Limited - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 40000 /* kHz */ -#define XPIX 800 -#define YPIX 600 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 256 -#define YBLANK 28 -#define XOFFSET 40 -#define XPULSE 128 -#define YOFFSET 1 -#define YPULSE 4 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SVGA" -#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S deleted file mode 100644 index c3d13815526dbd64b1b6b2d54c9afd31974b7c8a..0000000000000000000000000000000000000000 --- a/Documentation/EDID/edid.S +++ /dev/null @@ -1,274 +0,0 @@ -/* - edid.S: EDID data template - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - - -/* Manufacturer */ -#define MFG_LNX1 'L' -#define MFG_LNX2 'N' -#define MFG_LNX3 'X' -#define SERIAL 0 -#define YEAR 2012 -#define WEEK 5 - -/* EDID 1.3 standard definitions */ -#define XY_RATIO_16_10 0b00 -#define XY_RATIO_4_3 0b01 -#define XY_RATIO_5_4 0b10 -#define XY_RATIO_16_9 0b11 - -/* Provide defaults for the timing bits */ -#ifndef ESTABLISHED_TIMING1_BITS -#define ESTABLISHED_TIMING1_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING2_BITS -#define ESTABLISHED_TIMING2_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING3_BITS -#define ESTABLISHED_TIMING3_BITS 0x00 -#endif - -#define mfgname2id(v1,v2,v3) \ - ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f)) -#define swap16(v1) ((v1>>8)+((v1&0xff)<<8)) -#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f)) -#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f)) -#define msbs4(v1,v2,v3,v4) \ - ((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\ - (((v3>>4)&0x03)<<2)+((v4>>4)&0x03)) -#define pixdpi2mm(pix,dpi) ((pix*25)/dpi) -#define xsize pixdpi2mm(XPIX,DPI) -#define ysize pixdpi2mm(YPIX,DPI) - - .data - -/* Fixed header pattern */ -header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00 - -mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3)) - -prod_code: .hword 0 - -/* Serial number. 32 bits, little endian. */ -serial_number: .long SERIAL - -/* Week of manufacture */ -week: .byte WEEK - -/* Year of manufacture, less 1990. (1990-2245) - If week=255, it is the model year instead */ -year: .byte YEAR-1990 - -version: .byte VERSION /* EDID version, usually 1 (for 1.3) */ -revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */ - -/* If Bit 7=1 Digital input. If set, the following bit definitions apply: - Bits 6-1 Reserved, must be 0 - Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB, - 1 pixel per clock, up to 8 bits per color, MSB aligned, - If Bit 7=0 Analog input. If clear, the following bit definitions apply: - Bits 6-5 Video white and sync levels, relative to blank - 00=+0.7/-0.3 V; 01=+0.714/-0.286 V; - 10=+1.0/-0.4 V; 11=+0.7/0 V - Bit 4 Blank-to-black setup (pedestal) expected - Bit 3 Separate sync supported - Bit 2 Composite sync (on HSync) supported - Bit 1 Sync on green supported - Bit 0 VSync pulse must be serrated when somposite or - sync-on-green is used. */ -video_parms: .byte 0x6d - -/* Maximum horizontal image size, in centimetres - (max 292 cm/115 in at 16:9 aspect ratio) */ -max_hor_size: .byte xsize/10 - -/* Maximum vertical image size, in centimetres. - If either byte is 0, undefined (e.g. projector) */ -max_vert_size: .byte ysize/10 - -/* Display gamma, minus 1, times 100 (range 1.00-3.5 */ -gamma: .byte 120 - -/* Bit 7 DPMS standby supported - Bit 6 DPMS suspend supported - Bit 5 DPMS active-off supported - Bits 4-3 Display type: 00=monochrome; 01=RGB colour; - 10=non-RGB multicolour; 11=undefined - Bit 2 Standard sRGB colour space. Bytes 25-34 must contain - sRGB standard values. - Bit 1 Preferred timing mode specified in descriptor block 1. - Bit 0 GTF supported with default parameter values. */ -dsp_features: .byte 0xea - -/* Chromaticity coordinates. */ -/* Red and green least-significant bits - Bits 7-6 Red x value least-significant 2 bits - Bits 5-4 Red y value least-significant 2 bits - Bits 3-2 Green x value lst-significant 2 bits - Bits 1-0 Green y value least-significant 2 bits */ -red_green_lsb: .byte 0x5e - -/* Blue and white least-significant 2 bits */ -blue_white_lsb: .byte 0xc0 - -/* Red x value most significant 8 bits. - 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */ -red_x_msb: .byte 0xa4 - -/* Red y value most significant 8 bits */ -red_y_msb: .byte 0x59 - -/* Green x and y value most significant 8 bits */ -green_x_y_msb: .byte 0x4a,0x98 - -/* Blue x and y value most significant 8 bits */ -blue_x_y_msb: .byte 0x25,0x20 - -/* Default white point x and y value most significant 8 bits */ -white_x_y_msb: .byte 0x50,0x54 - -/* Established timings */ -/* Bit 7 720x400 @ 70 Hz - Bit 6 720x400 @ 88 Hz - Bit 5 640x480 @ 60 Hz - Bit 4 640x480 @ 67 Hz - Bit 3 640x480 @ 72 Hz - Bit 2 640x480 @ 75 Hz - Bit 1 800x600 @ 56 Hz - Bit 0 800x600 @ 60 Hz */ -estbl_timing1: .byte ESTABLISHED_TIMING1_BITS - -/* Bit 7 800x600 @ 72 Hz - Bit 6 800x600 @ 75 Hz - Bit 5 832x624 @ 75 Hz - Bit 4 1024x768 @ 87 Hz, interlaced (1024x768) - Bit 3 1024x768 @ 60 Hz - Bit 2 1024x768 @ 72 Hz - Bit 1 1024x768 @ 75 Hz - Bit 0 1280x1024 @ 75 Hz */ -estbl_timing2: .byte ESTABLISHED_TIMING2_BITS - -/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II) - Bits 6-0 Other manufacturer-specific display mod */ -estbl_timing3: .byte ESTABLISHED_TIMING3_BITS - -/* Standard timing */ -/* X resolution, less 31, divided by 8 (256-2288 pixels) */ -std_xres: .byte (XPIX/8)-31 -/* Y resolution, X:Y pixel ratio - Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9. - Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */ -std_vres: .byte (XY_RATIO<<6)+VFREQ-60 - .fill 7,2,0x0101 /* Unused */ - -descriptor1: -/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */ -clock: .hword CLOCK/10 - -/* Horizontal active pixels 8 lsbits (0-4095) */ -x_act_lsb: .byte XPIX&0xff -/* Horizontal blanking pixels 8 lsbits (0-4095) - End of active to start of next active. */ -x_blk_lsb: .byte XBLANK&0xff -/* Bits 7-4 Horizontal active pixels 4 msbits - Bits 3-0 Horizontal blanking pixels 4 msbits */ -x_msbs: .byte msbs2(XPIX,XBLANK) - -/* Vertical active lines 8 lsbits (0-4095) */ -y_act_lsb: .byte YPIX&0xff -/* Vertical blanking lines 8 lsbits (0-4095) */ -y_blk_lsb: .byte YBLANK&0xff -/* Bits 7-4 Vertical active lines 4 msbits - Bits 3-0 Vertical blanking lines 4 msbits */ -y_msbs: .byte msbs2(YPIX,YBLANK) - -/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */ -x_snc_off_lsb: .byte XOFFSET&0xff -/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */ -x_snc_pls_lsb: .byte XPULSE&0xff -/* Bits 7-4 Vertical sync offset lines 4 lsbits (0-63) - Bits 3-0 Vertical sync pulse width lines 4 lsbits (0-63) */ -y_snc_lsb: .byte lsbs2(YOFFSET, YPULSE) -/* Bits 7-6 Horizontal sync offset pixels 2 msbits - Bits 5-4 Horizontal sync pulse width pixels 2 msbits - Bits 3-2 Vertical sync offset lines 2 msbits - Bits 1-0 Vertical sync pulse width lines 2 msbits */ -xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE) - -/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -x_dsp_size: .byte xsize&0xff - -/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -y_dsp_size: .byte ysize&0xff - -/* Bits 7-4 Horizontal display size, mm, 4 msbits - Bits 3-0 Vertical display size, mm, 4 msbits */ -dsp_size_mbsb: .byte msbs2(xsize,ysize) - -/* Horizontal border pixels (each side; total is twice this) */ -x_border: .byte 0 -/* Vertical border lines (each side; total is twice this) */ -y_border: .byte 0 - -/* Bit 7 Interlaced - Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0: - Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar, - sync=1 during left; 11=4-way interleaved stereo - Bit 0=1 2-way interleaved stereo: 01=Right image on even lines; - 10=Left image on even lines; 11=side-by-side - Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite; - 10=Digital composite (on HSync); 11=Digital separate - Bit 2 If digital separate: Vertical sync polarity (1=positive) - Other types: VSync serrated (HSync during VSync) - Bit 1 If analog sync: Sync on all 3 RGB lines (else green only) - Digital: HSync polarity (1=positive) - Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */ -features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1) - -descriptor2: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xff /* Descriptor is monitor serial number (text) */ - .byte 0 /* Must be zero */ -start1: .ascii "Linux #0" -end1: .byte 0x0a /* End marker */ - .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */ -descriptor3: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfd /* Descriptor is monitor range limits */ - .byte 0 /* Must be zero */ -start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */ - .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */ - .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up - to 10 MHz multiple (10-2550 MHz) */ - .byte 0 /* No extended timing information type */ -end2: .byte 0x0a /* End marker */ - .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */ -descriptor4: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfc /* Descriptor is text */ - .byte 0 /* Must be zero */ -start3: .ascii TIMING_NAME -end3: .byte 0x0a /* End marker */ - .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */ -extensions: .byte 0 /* Number of extensions to follow */ -checksum: .byte CRC /* Sum of all bytes must be 0 */ diff --git a/Documentation/admin-guide/cifs/winucase_convert.pl b/Documentation/admin-guide/cifs/winucase_convert.pl old mode 100755 new mode 100644 diff --git a/Documentation/arm/samsung/clksrc-change-registers.awk b/Documentation/arm/samsung/clksrc-change-registers.awk old mode 100755 new mode 100644 diff --git a/Documentation/devicetree/bindings/sound/rt1308.txt b/Documentation/devicetree/bindings/sound/rt1308.txt old mode 100755 new mode 100644 diff --git a/Documentation/features/list-arch.sh b/Documentation/features/list-arch.sh old mode 100755 new mode 100644 diff --git a/Documentation/features/scripts/features-refresh.sh b/Documentation/features/scripts/features-refresh.sh old mode 100755 new mode 100644 diff --git a/Documentation/sound/cards/multisound.sh b/Documentation/sound/cards/multisound.sh old mode 100755 new mode 100644 diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py old mode 100755 new mode 100644 diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl old mode 100755 new mode 100644 diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py old mode 100755 new mode 100644 diff --git a/Documentation/target/target-export-device b/Documentation/target/target-export-device old mode 100755 new mode 100644 diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py old mode 100755 new mode 100644 diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S deleted file mode 100644 index 06a7c95fe9ad05878678bc707ef63f02d1773867..0000000000000000000000000000000000000000 --- a/arch/alpha/boot/head.S +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/boot/head.S - * - * initial bootloader stuff.. - */ - -#include - - .set noreorder - .globl __start - .ent __start -__start: - br $29,2f -2: ldgp $29,0($29) - jsr $26,start_kernel - call_pal PAL_halt - .end __start - - .align 5 - .globl wrent - .ent wrent -wrent: - .prologue 0 - call_pal PAL_wrent - ret ($26) - .end wrent - - .align 5 - .globl wrkgp - .ent wrkgp -wrkgp: - .prologue 0 - call_pal PAL_wrkgp - ret ($26) - .end wrkgp - - .align 5 - .globl switch_to_osf_pal - .ent switch_to_osf_pal -switch_to_osf_pal: - subq $30,128,$30 - .frame $30,128,$26 - stq $26,0($30) - stq $1,8($30) - stq $2,16($30) - stq $3,24($30) - stq $4,32($30) - stq $5,40($30) - stq $6,48($30) - stq $7,56($30) - stq $8,64($30) - stq $9,72($30) - stq $10,80($30) - stq $11,88($30) - stq $12,96($30) - stq $13,104($30) - stq $14,112($30) - stq $15,120($30) - .prologue 0 - - stq $30,0($17) /* save KSP in PCB */ - - bis $30,$30,$20 /* a4 = KSP */ - br $17,1f - - ldq $26,0($30) - ldq $1,8($30) - ldq $2,16($30) - ldq $3,24($30) - ldq $4,32($30) - ldq $5,40($30) - ldq $6,48($30) - ldq $7,56($30) - ldq $8,64($30) - ldq $9,72($30) - ldq $10,80($30) - ldq $11,88($30) - ldq $12,96($30) - ldq $13,104($30) - ldq $14,112($30) - ldq $15,120($30) - addq $30,128,$30 - ret ($26) -1: call_pal PAL_swppal - .end switch_to_osf_pal - - .align 3 - .globl tbi - .ent tbi -tbi: - .prologue 0 - call_pal PAL_tbi - ret ($26) - .end tbi - - .align 3 - .globl halt - .ent halt -halt: - .prologue 0 - call_pal PAL_halt - .end halt - -/* $16 - new stack page */ - .align 3 - .globl move_stack - .ent move_stack -move_stack: - .prologue 0 - lda $0, 0x1fff($31) - and $0, $30, $1 /* Stack offset */ - or $1, $16, $16 /* New stack pointer */ - mov $30, $1 - mov $16, $2 -1: ldq $3, 0($1) /* Move the stack */ - addq $1, 8, $1 - stq $3, 0($2) - and $0, $1, $4 - addq $2, 8, $2 - bne $4, 1b - mov $16, $30 - ret ($26) - .end move_stack diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S deleted file mode 100644 index 2e09248f8324258305dee38e88d1bec65fda9f00..0000000000000000000000000000000000000000 --- a/arch/alpha/kernel/entry.S +++ /dev/null @@ -1,852 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/kernel/entry.S - * - * Kernel entry-points. - */ - -#include -#include -#include -#include -#include - - .text - .set noat - .cfi_sections .debug_frame - -/* Stack offsets. */ -#define SP_OFF 184 -#define SWITCH_STACK_SIZE 320 - -.macro CFI_START_OSF_FRAME func - .align 4 - .globl \func - .type \func,@function -\func: - .cfi_startproc simple - .cfi_return_column 64 - .cfi_def_cfa $sp, 48 - .cfi_rel_offset 64, 8 - .cfi_rel_offset $gp, 16 - .cfi_rel_offset $16, 24 - .cfi_rel_offset $17, 32 - .cfi_rel_offset $18, 40 -.endm - -.macro CFI_END_OSF_FRAME func - .cfi_endproc - .size \func, . - \func -.endm - -/* - * This defines the normal kernel pt-regs layout. - * - * regs 9-15 preserved by C code - * regs 16-18 saved by PAL-code - * regs 29-30 saved and set up by PAL-code - * JRP - Save regs 16-18 in a special area of the stack, so that - * the palcode-provided values are available to the signal handler. - */ - -.macro SAVE_ALL - subq $sp, SP_OFF, $sp - .cfi_adjust_cfa_offset SP_OFF - stq $0, 0($sp) - stq $1, 8($sp) - stq $2, 16($sp) - stq $3, 24($sp) - stq $4, 32($sp) - stq $28, 144($sp) - .cfi_rel_offset $0, 0 - .cfi_rel_offset $1, 8 - .cfi_rel_offset $2, 16 - .cfi_rel_offset $3, 24 - .cfi_rel_offset $4, 32 - .cfi_rel_offset $28, 144 - lda $2, alpha_mv - stq $5, 40($sp) - stq $6, 48($sp) - stq $7, 56($sp) - stq $8, 64($sp) - stq $19, 72($sp) - stq $20, 80($sp) - stq $21, 88($sp) - ldq $2, HAE_CACHE($2) - stq $22, 96($sp) - stq $23, 104($sp) - stq $24, 112($sp) - stq $25, 120($sp) - stq $26, 128($sp) - stq $27, 136($sp) - stq $2, 152($sp) - stq $16, 160($sp) - stq $17, 168($sp) - stq $18, 176($sp) - .cfi_rel_offset $5, 40 - .cfi_rel_offset $6, 48 - .cfi_rel_offset $7, 56 - .cfi_rel_offset $8, 64 - .cfi_rel_offset $19, 72 - .cfi_rel_offset $20, 80 - .cfi_rel_offset $21, 88 - .cfi_rel_offset $22, 96 - .cfi_rel_offset $23, 104 - .cfi_rel_offset $24, 112 - .cfi_rel_offset $25, 120 - .cfi_rel_offset $26, 128 - .cfi_rel_offset $27, 136 -.endm - -.macro RESTORE_ALL - lda $19, alpha_mv - ldq $0, 0($sp) - ldq $1, 8($sp) - ldq $2, 16($sp) - ldq $3, 24($sp) - ldq $21, 152($sp) - ldq $20, HAE_CACHE($19) - ldq $4, 32($sp) - ldq $5, 40($sp) - ldq $6, 48($sp) - ldq $7, 56($sp) - subq $20, $21, $20 - ldq $8, 64($sp) - beq $20, 99f - ldq $20, HAE_REG($19) - stq $21, HAE_CACHE($19) - stq $21, 0($20) -99: ldq $19, 72($sp) - ldq $20, 80($sp) - ldq $21, 88($sp) - ldq $22, 96($sp) - ldq $23, 104($sp) - ldq $24, 112($sp) - ldq $25, 120($sp) - ldq $26, 128($sp) - ldq $27, 136($sp) - ldq $28, 144($sp) - addq $sp, SP_OFF, $sp - .cfi_restore $0 - .cfi_restore $1 - .cfi_restore $2 - .cfi_restore $3 - .cfi_restore $4 - .cfi_restore $5 - .cfi_restore $6 - .cfi_restore $7 - .cfi_restore $8 - .cfi_restore $19 - .cfi_restore $20 - .cfi_restore $21 - .cfi_restore $22 - .cfi_restore $23 - .cfi_restore $24 - .cfi_restore $25 - .cfi_restore $26 - .cfi_restore $27 - .cfi_restore $28 - .cfi_adjust_cfa_offset -SP_OFF -.endm - -.macro DO_SWITCH_STACK - bsr $1, do_switch_stack - .cfi_adjust_cfa_offset SWITCH_STACK_SIZE - .cfi_rel_offset $9, 0 - .cfi_rel_offset $10, 8 - .cfi_rel_offset $11, 16 - .cfi_rel_offset $12, 24 - .cfi_rel_offset $13, 32 - .cfi_rel_offset $14, 40 - .cfi_rel_offset $15, 48 - /* We don't really care about the FP registers for debugging. */ -.endm - -.macro UNDO_SWITCH_STACK - bsr $1, undo_switch_stack - .cfi_restore $9 - .cfi_restore $10 - .cfi_restore $11 - .cfi_restore $12 - .cfi_restore $13 - .cfi_restore $14 - .cfi_restore $15 - .cfi_adjust_cfa_offset -SWITCH_STACK_SIZE -.endm - -/* - * Non-syscall kernel entry points. - */ - -CFI_START_OSF_FRAME entInt - SAVE_ALL - lda $8, 0x3fff - lda $26, ret_from_sys_call - bic $sp, $8, $8 - mov $sp, $19 - jsr $31, do_entInt -CFI_END_OSF_FRAME entInt - -CFI_START_OSF_FRAME entArith - SAVE_ALL - lda $8, 0x3fff - lda $26, ret_from_sys_call - bic $sp, $8, $8 - mov $sp, $18 - jsr $31, do_entArith -CFI_END_OSF_FRAME entArith - -CFI_START_OSF_FRAME entMM - SAVE_ALL -/* save $9 - $15 so the inline exception code can manipulate them. */ - subq $sp, 56, $sp - .cfi_adjust_cfa_offset 56 - stq $9, 0($sp) - stq $10, 8($sp) - stq $11, 16($sp) - stq $12, 24($sp) - stq $13, 32($sp) - stq $14, 40($sp) - stq $15, 48($sp) - .cfi_rel_offset $9, 0 - .cfi_rel_offset $10, 8 - .cfi_rel_offset $11, 16 - .cfi_rel_offset $12, 24 - .cfi_rel_offset $13, 32 - .cfi_rel_offset $14, 40 - .cfi_rel_offset $15, 48 - addq $sp, 56, $19 -/* handle the fault */ - lda $8, 0x3fff - bic $sp, $8, $8 - jsr $26, do_page_fault -/* reload the registers after the exception code played. */ - ldq $9, 0($sp) - ldq $10, 8($sp) - ldq $11, 16($sp) - ldq $12, 24($sp) - ldq $13, 32($sp) - ldq $14, 40($sp) - ldq $15, 48($sp) - addq $sp, 56, $sp - .cfi_restore $9 - .cfi_restore $10 - .cfi_restore $11 - .cfi_restore $12 - .cfi_restore $13 - .cfi_restore $14 - .cfi_restore $15 - .cfi_adjust_cfa_offset -56 -/* finish up the syscall as normal. */ - br ret_from_sys_call -CFI_END_OSF_FRAME entMM - -CFI_START_OSF_FRAME entIF - SAVE_ALL - lda $8, 0x3fff - lda $26, ret_from_sys_call - bic $sp, $8, $8 - mov $sp, $17 - jsr $31, do_entIF -CFI_END_OSF_FRAME entIF - -CFI_START_OSF_FRAME entUna - lda $sp, -256($sp) - .cfi_adjust_cfa_offset 256 - stq $0, 0($sp) - .cfi_rel_offset $0, 0 - .cfi_remember_state - ldq $0, 256($sp) /* get PS */ - stq $1, 8($sp) - stq $2, 16($sp) - stq $3, 24($sp) - and $0, 8, $0 /* user mode? */ - stq $4, 32($sp) - bne $0, entUnaUser /* yup -> do user-level unaligned fault */ - stq $5, 40($sp) - stq $6, 48($sp) - stq $7, 56($sp) - stq $8, 64($sp) - stq $9, 72($sp) - stq $10, 80($sp) - stq $11, 88($sp) - stq $12, 96($sp) - stq $13, 104($sp) - stq $14, 112($sp) - stq $15, 120($sp) - /* 16-18 PAL-saved */ - stq $19, 152($sp) - stq $20, 160($sp) - stq $21, 168($sp) - stq $22, 176($sp) - stq $23, 184($sp) - stq $24, 192($sp) - stq $25, 200($sp) - stq $26, 208($sp) - stq $27, 216($sp) - stq $28, 224($sp) - mov $sp, $19 - stq $gp, 232($sp) - .cfi_rel_offset $1, 1*8 - .cfi_rel_offset $2, 2*8 - .cfi_rel_offset $3, 3*8 - .cfi_rel_offset $4, 4*8 - .cfi_rel_offset $5, 5*8 - .cfi_rel_offset $6, 6*8 - .cfi_rel_offset $7, 7*8 - .cfi_rel_offset $8, 8*8 - .cfi_rel_offset $9, 9*8 - .cfi_rel_offset $10, 10*8 - .cfi_rel_offset $11, 11*8 - .cfi_rel_offset $12, 12*8 - .cfi_rel_offset $13, 13*8 - .cfi_rel_offset $14, 14*8 - .cfi_rel_offset $15, 15*8 - .cfi_rel_offset $19, 19*8 - .cfi_rel_offset $20, 20*8 - .cfi_rel_offset $21, 21*8 - .cfi_rel_offset $22, 22*8 - .cfi_rel_offset $23, 23*8 - .cfi_rel_offset $24, 24*8 - .cfi_rel_offset $25, 25*8 - .cfi_rel_offset $26, 26*8 - .cfi_rel_offset $27, 27*8 - .cfi_rel_offset $28, 28*8 - .cfi_rel_offset $29, 29*8 - lda $8, 0x3fff - stq $31, 248($sp) - bic $sp, $8, $8 - jsr $26, do_entUna - ldq $0, 0($sp) - ldq $1, 8($sp) - ldq $2, 16($sp) - ldq $3, 24($sp) - ldq $4, 32($sp) - ldq $5, 40($sp) - ldq $6, 48($sp) - ldq $7, 56($sp) - ldq $8, 64($sp) - ldq $9, 72($sp) - ldq $10, 80($sp) - ldq $11, 88($sp) - ldq $12, 96($sp) - ldq $13, 104($sp) - ldq $14, 112($sp) - ldq $15, 120($sp) - /* 16-18 PAL-saved */ - ldq $19, 152($sp) - ldq $20, 160($sp) - ldq $21, 168($sp) - ldq $22, 176($sp) - ldq $23, 184($sp) - ldq $24, 192($sp) - ldq $25, 200($sp) - ldq $26, 208($sp) - ldq $27, 216($sp) - ldq $28, 224($sp) - ldq $gp, 232($sp) - lda $sp, 256($sp) - .cfi_restore $1 - .cfi_restore $2 - .cfi_restore $3 - .cfi_restore $4 - .cfi_restore $5 - .cfi_restore $6 - .cfi_restore $7 - .cfi_restore $8 - .cfi_restore $9 - .cfi_restore $10 - .cfi_restore $11 - .cfi_restore $12 - .cfi_restore $13 - .cfi_restore $14 - .cfi_restore $15 - .cfi_restore $19 - .cfi_restore $20 - .cfi_restore $21 - .cfi_restore $22 - .cfi_restore $23 - .cfi_restore $24 - .cfi_restore $25 - .cfi_restore $26 - .cfi_restore $27 - .cfi_restore $28 - .cfi_restore $29 - .cfi_adjust_cfa_offset -256 - call_pal PAL_rti - - .align 4 -entUnaUser: - .cfi_restore_state - ldq $0, 0($sp) /* restore original $0 */ - lda $sp, 256($sp) /* pop entUna's stack frame */ - .cfi_restore $0 - .cfi_adjust_cfa_offset -256 - SAVE_ALL /* setup normal kernel stack */ - lda $sp, -56($sp) - .cfi_adjust_cfa_offset 56 - stq $9, 0($sp) - stq $10, 8($sp) - stq $11, 16($sp) - stq $12, 24($sp) - stq $13, 32($sp) - stq $14, 40($sp) - stq $15, 48($sp) - .cfi_rel_offset $9, 0 - .cfi_rel_offset $10, 8 - .cfi_rel_offset $11, 16 - .cfi_rel_offset $12, 24 - .cfi_rel_offset $13, 32 - .cfi_rel_offset $14, 40 - .cfi_rel_offset $15, 48 - lda $8, 0x3fff - addq $sp, 56, $19 - bic $sp, $8, $8 - jsr $26, do_entUnaUser - ldq $9, 0($sp) - ldq $10, 8($sp) - ldq $11, 16($sp) - ldq $12, 24($sp) - ldq $13, 32($sp) - ldq $14, 40($sp) - ldq $15, 48($sp) - lda $sp, 56($sp) - .cfi_restore $9 - .cfi_restore $10 - .cfi_restore $11 - .cfi_restore $12 - .cfi_restore $13 - .cfi_restore $14 - .cfi_restore $15 - .cfi_adjust_cfa_offset -56 - br ret_from_sys_call -CFI_END_OSF_FRAME entUna - -CFI_START_OSF_FRAME entDbg - SAVE_ALL - lda $8, 0x3fff - lda $26, ret_from_sys_call - bic $sp, $8, $8 - mov $sp, $16 - jsr $31, do_entDbg -CFI_END_OSF_FRAME entDbg - -/* - * The system call entry point is special. Most importantly, it looks - * like a function call to userspace as far as clobbered registers. We - * do preserve the argument registers (for syscall restarts) and $26 - * (for leaf syscall functions). - * - * So much for theory. We don't take advantage of this yet. - * - * Note that a0-a2 are not saved by PALcode as with the other entry points. - */ - - .align 4 - .globl entSys - .type entSys, @function - .cfi_startproc simple - .cfi_return_column 64 - .cfi_def_cfa $sp, 48 - .cfi_rel_offset 64, 8 - .cfi_rel_offset $gp, 16 -entSys: - SAVE_ALL - lda $8, 0x3fff - bic $sp, $8, $8 - lda $4, NR_SYSCALLS($31) - stq $16, SP_OFF+24($sp) - lda $5, sys_call_table - lda $27, sys_ni_syscall - cmpult $0, $4, $4 - ldl $3, TI_FLAGS($8) - stq $17, SP_OFF+32($sp) - s8addq $0, $5, $5 - stq $18, SP_OFF+40($sp) - .cfi_rel_offset $16, SP_OFF+24 - .cfi_rel_offset $17, SP_OFF+32 - .cfi_rel_offset $18, SP_OFF+40 -#ifdef CONFIG_AUDITSYSCALL - lda $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT - and $3, $6, $3 -#endif - bne $3, strace - beq $4, 1f - ldq $27, 0($5) -1: jsr $26, ($27), sys_ni_syscall - ldgp $gp, 0($26) - blt $0, $syscall_error /* the call failed */ - stq $0, 0($sp) - stq $31, 72($sp) /* a3=0 => no error */ - - .align 4 - .globl ret_from_sys_call -ret_from_sys_call: - cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ - ldq $0, SP_OFF($sp) - and $0, 8, $0 - beq $0, ret_to_kernel -ret_to_user: - /* Make sure need_resched and sigpending don't change between - sampling and the rti. */ - lda $16, 7 - call_pal PAL_swpipl - ldl $17, TI_FLAGS($8) - and $17, _TIF_WORK_MASK, $2 - bne $2, work_pending -restore_all: - .cfi_remember_state - RESTORE_ALL - call_pal PAL_rti - -ret_to_kernel: - .cfi_restore_state - lda $16, 7 - call_pal PAL_swpipl - br restore_all - - .align 3 -$syscall_error: - /* - * Some system calls (e.g., ptrace) can return arbitrary - * values which might normally be mistaken as error numbers. - * Those functions must zero $0 (v0) directly in the stack - * frame to indicate that a negative return value wasn't an - * error number.. - */ - ldq $18, 0($sp) /* old syscall nr (zero if success) */ - beq $18, $ret_success - - ldq $19, 72($sp) /* .. and this a3 */ - subq $31, $0, $0 /* with error in v0 */ - addq $31, 1, $1 /* set a3 for errno return */ - stq $0, 0($sp) - mov $31, $26 /* tell "ret_from_sys_call" we can restart */ - stq $1, 72($sp) /* a3 for return */ - br ret_from_sys_call - -$ret_success: - stq $0, 0($sp) - stq $31, 72($sp) /* a3=0 => no error */ - br ret_from_sys_call - -/* - * Do all cleanup when returning from all interrupts and system calls. - * - * Arguments: - * $8: current. - * $17: TI_FLAGS. - * $18: The old syscall number, or zero if this is not a return - * from a syscall that errored and is possibly restartable. - * $19: The old a3 value - */ - - .align 4 - .type work_pending, @function -work_pending: - and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 - bne $2, $work_notifysig - -$work_resched: - /* - * We can get here only if we returned from syscall without SIGPENDING - * or got through work_notifysig already. Either case means no syscall - * restarts for us, so let $18 and $19 burn. - */ - jsr $26, schedule - mov 0, $18 - br ret_to_user - -$work_notifysig: - mov $sp, $16 - DO_SWITCH_STACK - jsr $26, do_work_pending - UNDO_SWITCH_STACK - br restore_all - -/* - * PTRACE syscall handler - */ - - .align 4 - .type strace, @function -strace: - /* set up signal stack, call syscall_trace */ - DO_SWITCH_STACK - jsr $26, syscall_trace_enter /* returns the syscall number */ - UNDO_SWITCH_STACK - - /* get the arguments back.. */ - ldq $16, SP_OFF+24($sp) - ldq $17, SP_OFF+32($sp) - ldq $18, SP_OFF+40($sp) - ldq $19, 72($sp) - ldq $20, 80($sp) - ldq $21, 88($sp) - - /* get the system call pointer.. */ - lda $1, NR_SYSCALLS($31) - lda $2, sys_call_table - lda $27, sys_ni_syscall - cmpult $0, $1, $1 - s8addq $0, $2, $2 - beq $1, 1f - ldq $27, 0($2) -1: jsr $26, ($27), sys_gettimeofday -ret_from_straced: - ldgp $gp, 0($26) - - /* check return.. */ - blt $0, $strace_error /* the call failed */ - stq $31, 72($sp) /* a3=0 => no error */ -$strace_success: - stq $0, 0($sp) /* save return value */ - - DO_SWITCH_STACK - jsr $26, syscall_trace_leave - UNDO_SWITCH_STACK - br $31, ret_from_sys_call - - .align 3 -$strace_error: - ldq $18, 0($sp) /* old syscall nr (zero if success) */ - beq $18, $strace_success - ldq $19, 72($sp) /* .. and this a3 */ - - subq $31, $0, $0 /* with error in v0 */ - addq $31, 1, $1 /* set a3 for errno return */ - stq $0, 0($sp) - stq $1, 72($sp) /* a3 for return */ - - DO_SWITCH_STACK - mov $18, $9 /* save old syscall number */ - mov $19, $10 /* save old a3 */ - jsr $26, syscall_trace_leave - mov $9, $18 - mov $10, $19 - UNDO_SWITCH_STACK - - mov $31, $26 /* tell "ret_from_sys_call" we can restart */ - br ret_from_sys_call -CFI_END_OSF_FRAME entSys - -/* - * Save and restore the switch stack -- aka the balance of the user context. - */ - - .align 4 - .type do_switch_stack, @function - .cfi_startproc simple - .cfi_return_column 64 - .cfi_def_cfa $sp, 0 - .cfi_register 64, $1 -do_switch_stack: - lda $sp, -SWITCH_STACK_SIZE($sp) - .cfi_adjust_cfa_offset SWITCH_STACK_SIZE - stq $9, 0($sp) - stq $10, 8($sp) - stq $11, 16($sp) - stq $12, 24($sp) - stq $13, 32($sp) - stq $14, 40($sp) - stq $15, 48($sp) - stq $26, 56($sp) - stt $f0, 64($sp) - stt $f1, 72($sp) - stt $f2, 80($sp) - stt $f3, 88($sp) - stt $f4, 96($sp) - stt $f5, 104($sp) - stt $f6, 112($sp) - stt $f7, 120($sp) - stt $f8, 128($sp) - stt $f9, 136($sp) - stt $f10, 144($sp) - stt $f11, 152($sp) - stt $f12, 160($sp) - stt $f13, 168($sp) - stt $f14, 176($sp) - stt $f15, 184($sp) - stt $f16, 192($sp) - stt $f17, 200($sp) - stt $f18, 208($sp) - stt $f19, 216($sp) - stt $f20, 224($sp) - stt $f21, 232($sp) - stt $f22, 240($sp) - stt $f23, 248($sp) - stt $f24, 256($sp) - stt $f25, 264($sp) - stt $f26, 272($sp) - stt $f27, 280($sp) - mf_fpcr $f0 # get fpcr - stt $f28, 288($sp) - stt $f29, 296($sp) - stt $f30, 304($sp) - stt $f0, 312($sp) # save fpcr in slot of $f31 - ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state. - ret $31, ($1), 1 - .cfi_endproc - .size do_switch_stack, .-do_switch_stack - - .align 4 - .type undo_switch_stack, @function - .cfi_startproc simple - .cfi_def_cfa $sp, 0 - .cfi_register 64, $1 -undo_switch_stack: - ldq $9, 0($sp) - ldq $10, 8($sp) - ldq $11, 16($sp) - ldq $12, 24($sp) - ldq $13, 32($sp) - ldq $14, 40($sp) - ldq $15, 48($sp) - ldq $26, 56($sp) - ldt $f30, 312($sp) # get saved fpcr - ldt $f0, 64($sp) - ldt $f1, 72($sp) - ldt $f2, 80($sp) - ldt $f3, 88($sp) - mt_fpcr $f30 # install saved fpcr - ldt $f4, 96($sp) - ldt $f5, 104($sp) - ldt $f6, 112($sp) - ldt $f7, 120($sp) - ldt $f8, 128($sp) - ldt $f9, 136($sp) - ldt $f10, 144($sp) - ldt $f11, 152($sp) - ldt $f12, 160($sp) - ldt $f13, 168($sp) - ldt $f14, 176($sp) - ldt $f15, 184($sp) - ldt $f16, 192($sp) - ldt $f17, 200($sp) - ldt $f18, 208($sp) - ldt $f19, 216($sp) - ldt $f20, 224($sp) - ldt $f21, 232($sp) - ldt $f22, 240($sp) - ldt $f23, 248($sp) - ldt $f24, 256($sp) - ldt $f25, 264($sp) - ldt $f26, 272($sp) - ldt $f27, 280($sp) - ldt $f28, 288($sp) - ldt $f29, 296($sp) - ldt $f30, 304($sp) - lda $sp, SWITCH_STACK_SIZE($sp) - ret $31, ($1), 1 - .cfi_endproc - .size undo_switch_stack, .-undo_switch_stack - -/* - * The meat of the context switch code. - */ - - .align 4 - .globl alpha_switch_to - .type alpha_switch_to, @function - .cfi_startproc -alpha_switch_to: - DO_SWITCH_STACK - call_pal PAL_swpctx - lda $8, 0x3fff - UNDO_SWITCH_STACK - bic $sp, $8, $8 - mov $17, $0 - ret - .cfi_endproc - .size alpha_switch_to, .-alpha_switch_to - -/* - * New processes begin life here. - */ - - .globl ret_from_fork - .align 4 - .ent ret_from_fork -ret_from_fork: - lda $26, ret_from_sys_call - mov $17, $16 - jmp $31, schedule_tail -.end ret_from_fork - -/* - * ... and new kernel threads - here - */ - .align 4 - .globl ret_from_kernel_thread - .ent ret_from_kernel_thread -ret_from_kernel_thread: - mov $17, $16 - jsr $26, schedule_tail - mov $9, $27 - mov $10, $16 - jsr $26, ($9) - br $31, ret_to_user -.end ret_from_kernel_thread - - -/* - * Special system calls. Most of these are special in that they either - * have to play switch_stack games. - */ - -.macro fork_like name - .align 4 - .globl alpha_\name - .ent alpha_\name -alpha_\name: - .prologue 0 - bsr $1, do_switch_stack - jsr $26, sys_\name - ldq $26, 56($sp) - lda $sp, SWITCH_STACK_SIZE($sp) - ret -.end alpha_\name -.endm - -fork_like fork -fork_like vfork -fork_like clone - -.macro sigreturn_like name - .align 4 - .globl sys_\name - .ent sys_\name -sys_\name: - .prologue 0 - lda $9, ret_from_straced - cmpult $26, $9, $9 - lda $sp, -SWITCH_STACK_SIZE($sp) - jsr $26, do_\name - bne $9, 1f - jsr $26, syscall_trace_leave -1: br $1, undo_switch_stack - br ret_from_sys_call -.end sys_\name -.endm - -sigreturn_like sigreturn -sigreturn_like rt_sigreturn - - .align 4 - .globl alpha_syscall_zero - .ent alpha_syscall_zero -alpha_syscall_zero: - .prologue 0 - /* Special because it needs to do something opposite to - force_successful_syscall_return(). We use the saved - syscall number for that, zero meaning "not an error". - That works nicely, but for real syscall 0 we need to - make sure that this logics doesn't get confused. - Store a non-zero there - -ENOSYS we need in register - for our return value will do just fine. - */ - lda $0, -ENOSYS - unop - stq $0, 0($sp) - ret -.end alpha_syscall_zero diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S deleted file mode 100644 index bb48a8ae4e79fd9e16a173d7fb1f9f349e2a9845..0000000000000000000000000000000000000000 --- a/arch/alpha/kernel/head.S +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/kernel/head.S - * - * initial boot stuff.. At this point, the bootloader has already - * switched into OSF/1 PAL-code, and loaded us at the correct address - * (START_ADDR). So there isn't much left for us to do: just set up - * the kernel global pointer and jump to the kernel entry-point. - */ - -#include -#include -#include -#include - -__HEAD -.globl _stext - .set noreorder - .globl __start - .ent __start -_stext: -__start: - .prologue 0 - br $27,1f -1: ldgp $29,0($27) - /* We need to get current_task_info loaded up... */ - lda $8,init_thread_union - /* ... and find our stack ... */ - lda $30,0x4000 - SIZEOF_PT_REGS($8) - /* ... and then we can start the kernel. */ - jsr $26,start_kernel - call_pal PAL_halt - .end __start - -#ifdef CONFIG_SMP - .align 3 - .globl __smp_callin - .ent __smp_callin - /* On entry here from SRM console, the HWPCB of the per-cpu - slot for this processor has been loaded. We've arranged - for the UNIQUE value for this process to contain the PCBB - of the target idle task. */ -__smp_callin: - .prologue 1 - ldgp $29,0($27) # First order of business, load the GP. - - call_pal PAL_rduniq # Grab the target PCBB. - mov $0,$16 # Install it. - call_pal PAL_swpctx - - lda $8,0x3fff # Find "current". - bic $30,$8,$8 - - jsr $26,smp_callin - call_pal PAL_halt - .end __smp_callin -#endif /* CONFIG_SMP */ - - # - # The following two functions are needed for supporting SRM PALcode - # on the PC164 (at least), since that PALcode manages the interrupt - # masking, and we cannot duplicate the effort without causing problems - # - - .align 3 - .globl cserve_ena - .ent cserve_ena -cserve_ena: - .prologue 0 - bis $16,$16,$17 - lda $16,52($31) - call_pal PAL_cserve - ret ($26) - .end cserve_ena - - .align 3 - .globl cserve_dis - .ent cserve_dis -cserve_dis: - .prologue 0 - bis $16,$16,$17 - lda $16,53($31) - call_pal PAL_cserve - ret ($26) - .end cserve_dis - - # - # It is handy, on occasion, to make halt actually just loop. - # Putting it here means we dont have to recompile the whole - # kernel. - # - - .align 3 - .globl halt - .ent halt -halt: - .prologue 0 - call_pal PAL_halt - .end halt diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S deleted file mode 100644 index 9704f22ed5e3765d3324b262ee4c6e93d02020d8..0000000000000000000000000000000000000000 --- a/arch/alpha/kernel/systbls.S +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/kernel/systbls.S - * - * The system call table. - */ - -#include - -#define __SYSCALL(nr, entry, nargs) .quad entry - .data - .align 3 - .globl sys_call_table -sys_call_table: -#include -#undef __SYSCALL diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S deleted file mode 100644 index c4b5ceceab52f6a4cea05221cd0f06cb30de79bc..0000000000000000000000000000000000000000 --- a/arch/alpha/kernel/vmlinux.lds.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include -#include - -OUTPUT_FORMAT("elf64-alpha") -OUTPUT_ARCH(alpha) -ENTRY(__start) -PHDRS { kernel PT_LOAD; note PT_NOTE; } -jiffies = jiffies_64; -SECTIONS -{ -#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS - . = 0xfffffc0000310000; -#else - . = 0xfffffc0001010000; -#endif - - _text = .; /* Text and read-only data */ - .text : { - HEAD_TEXT - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - *(.fixup) - *(.gnu.warning) - } :kernel - swapper_pg_dir = SWAPPER_PGD; - _etext = .; /* End of text section */ - - NOTES :kernel :note - .dummy : { - *(.dummy) - } :kernel - - RODATA - EXCEPTION_TABLE(16) - - /* Will be freed after init */ - __init_begin = ALIGN(PAGE_SIZE); - INIT_TEXT_SECTION(PAGE_SIZE) - INIT_DATA_SECTION(16) - PERCPU_SECTION(L1_CACHE_BYTES) - /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page - needed for the THREAD_SIZE aligned init_task gets freed after init */ - . = ALIGN(THREAD_SIZE); - __init_end = .; - /* Freed after init ends here */ - - _sdata = .; /* Start of rw data section */ - _data = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - - .got : { - *(.got) - } - .sdata : { - *(.sdata) - } - _edata = .; /* End of data section */ - - BSS_SECTION(0, 0, 0) - _end = .; - - .mdebug 0 : { - *(.mdebug) - } - .note 0 : { - *(.note) - } - - STABS_DEBUG - DWARF_DEBUG - - DISCARDS -} diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S deleted file mode 100644 index b13c4a231f1b7fd088d2338585beb28fd5de8faa..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/callback_srm.S +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/callback_srm.S - */ - -#include -#include - -.text -#define HWRPB_CRB_OFFSET 0xc0 - -#if defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) -.align 4 -srm_dispatch: -#if defined(CONFIG_ALPHA_GENERIC) - ldl $4,alpha_using_srm - beq $4,nosrm -#endif - ldq $0,hwrpb # gp is set up by CALLBACK macro. - ldl $25,0($25) # Pick up the wrapper data. - mov $20,$21 # Shift arguments right. - mov $19,$20 - ldq $1,HWRPB_CRB_OFFSET($0) - mov $18,$19 - mov $17,$18 - mov $16,$17 - addq $0,$1,$2 # CRB address - ldq $27,0($2) # DISPATCH procedure descriptor (VMS call std) - extwl $25,0,$16 # SRM callback function code - ldq $3,8($27) # call address - extwl $25,2,$25 # argument information (VMS calling std) - jmp ($3) # Return directly to caller of wrapper. - -.align 4 -.globl srm_fixup -.ent srm_fixup -srm_fixup: - ldgp $29,0($27) -#if defined(CONFIG_ALPHA_GENERIC) - ldl $4,alpha_using_srm - beq $4,nosrm -#endif - ldq $0,hwrpb - ldq $1,HWRPB_CRB_OFFSET($0) - addq $0,$1,$2 # CRB address - ldq $27,16($2) # VA of FIXUP procedure descriptor - ldq $3,8($27) # call address - lda $25,2($31) # two integer arguments - jmp ($3) # Return directly to caller of srm_fixup. -.end srm_fixup - -#if defined(CONFIG_ALPHA_GENERIC) -.align 3 -nosrm: - lda $0,-1($31) - ret -#endif - -#define CALLBACK(NAME, CODE, ARG_CNT) \ -.align 4; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \ -ldgp $29,0($27); br $25,srm_dispatch; .word CODE, ARG_CNT; .end callback_##NAME - -#else /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */ - -#define CALLBACK(NAME, CODE, ARG_CNT) \ -.align 3; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \ -lda $0,-1($31); ret; .end callback_##NAME - -.align 3 -.globl srm_fixup -.ent srm_fixup -srm_fixup: - lda $0,-1($31) - ret -.end srm_fixup -#endif /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */ - -CALLBACK(puts, CCB_PUTS, 4) -CALLBACK(open, CCB_OPEN, 3) -CALLBACK(close, CCB_CLOSE, 2) -CALLBACK(read, CCB_READ, 5) -CALLBACK(open_console, CCB_OPEN_CONSOLE, 1) -CALLBACK(close_console, CCB_CLOSE_CONSOLE, 1) -CALLBACK(getenv, CCB_GET_ENV, 4) -CALLBACK(setenv, CCB_SET_ENV, 4) -CALLBACK(getc, CCB_GETC, 2) -CALLBACK(reset_term, CCB_RESET_TERM, 2) -CALLBACK(term_int, CCB_SET_TERM_INT, 3) -CALLBACK(term_ctl, CCB_SET_TERM_CTL, 3) -CALLBACK(process_keycode, CCB_PROCESS_KEYCODE, 3) -CALLBACK(ioctl, CCB_IOCTL, 6) -CALLBACK(write, CCB_WRITE, 5) -CALLBACK(reset_env, CCB_RESET_ENV, 4) -CALLBACK(save_env, CCB_SAVE_ENV, 1) -CALLBACK(pswitch, CCB_PSWITCH, 3) -CALLBACK(bios_emul, CCB_BIOS_EMUL, 5) - -EXPORT_SYMBOL(callback_getenv) -EXPORT_SYMBOL(callback_setenv) -EXPORT_SYMBOL(callback_save_env) - -.data -__alpha_using_srm: # For use by bootpheader - .long 7 # value is not 1 for link debugging - .weak alpha_using_srm; alpha_using_srm = __alpha_using_srm -__callback_init_done: # For use by bootpheader - .long 7 # value is not 1 for link debugging - .weak callback_init_done; callback_init_done = __callback_init_done - diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S deleted file mode 100644 index ce02de7b049347b99c7eed9ee7c7deceb41af895..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/clear_page.S +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/clear_page.S - * - * Zero an entire page. - */ -#include - .text - .align 4 - .global clear_page - .ent clear_page -clear_page: - .prologue 0 - - lda $0,128 - nop - unop - nop - -1: stq $31,0($16) - stq $31,8($16) - stq $31,16($16) - stq $31,24($16) - - stq $31,32($16) - stq $31,40($16) - stq $31,48($16) - subq $0,1,$0 - - stq $31,56($16) - addq $16,64,$16 - unop - bne $0,1b - - ret - nop - unop - nop - - .end clear_page - EXPORT_SYMBOL(clear_page) diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S deleted file mode 100644 index db6c6ca45896c8d3b643cf61df2bf935e8d5a8b7..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/clear_user.S +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/clear_user.S - * Contributed by Richard Henderson - * - * Zero user space, handling exceptions as we go. - * - * We have to make sure that $0 is always up-to-date and contains the - * right "bytes left to zero" value (and that it is updated only _after_ - * a successful copy). There is also some rather minor exception setup - * stuff. - */ -#include - -/* Allow an exception for an insn; exit if we get one. */ -#define EX(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exception-99b($31); \ - .previous - - .set noat - .set noreorder - .align 4 - - .globl __clear_user - .ent __clear_user - .frame $30, 0, $26 - .prologue 0 - -$loop: - and $1, 3, $4 # e0 : - beq $4, 1f # .. e1 : - -0: EX( stq_u $31, 0($16) ) # e0 : zero one word - subq $0, 8, $0 # .. e1 : - subq $4, 1, $4 # e0 : - addq $16, 8, $16 # .. e1 : - bne $4, 0b # e1 : - unop # : - -1: bic $1, 3, $1 # e0 : - beq $1, $tail # .. e1 : - -2: EX( stq_u $31, 0($16) ) # e0 : zero four words - subq $0, 8, $0 # .. e1 : - EX( stq_u $31, 8($16) ) # e0 : - subq $0, 8, $0 # .. e1 : - EX( stq_u $31, 16($16) ) # e0 : - subq $0, 8, $0 # .. e1 : - EX( stq_u $31, 24($16) ) # e0 : - subq $0, 8, $0 # .. e1 : - subq $1, 4, $1 # e0 : - addq $16, 32, $16 # .. e1 : - bne $1, 2b # e1 : - -$tail: - bne $2, 1f # e1 : is there a tail to do? - ret $31, ($26), 1 # .. e1 : - -1: EX( ldq_u $5, 0($16) ) # e0 : - clr $0 # .. e1 : - nop # e1 : - mskqh $5, $0, $5 # e0 : - EX( stq_u $5, 0($16) ) # e0 : - ret $31, ($26), 1 # .. e1 : - -__clear_user: - and $17, $17, $0 - and $16, 7, $4 # e0 : find dest misalignment - beq $0, $zerolength # .. e1 : - addq $0, $4, $1 # e0 : bias counter - and $1, 7, $2 # e1 : number of bytes in tail - srl $1, 3, $1 # e0 : - beq $4, $loop # .. e1 : - - EX( ldq_u $5, 0($16) ) # e0 : load dst word to mask back in - beq $1, $oneword # .. e1 : sub-word store? - - mskql $5, $16, $5 # e0 : take care of misaligned head - addq $16, 8, $16 # .. e1 : - EX( stq_u $5, -8($16) ) # e0 : - addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment - subq $1, 1, $1 # e0 : - subq $0, 8, $0 # .. e1 : - br $loop # e1 : - unop # : - -$oneword: - mskql $5, $16, $4 # e0 : - mskqh $5, $2, $5 # e0 : - or $5, $4, $5 # e1 : - EX( stq_u $5, 0($16) ) # e0 : - clr $0 # .. e1 : - -$zerolength: -$exception: - ret $31, ($26), 1 # .. e1 : - - .end __clear_user - EXPORT_SYMBOL(__clear_user) diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S deleted file mode 100644 index 5439a30c77d069ac50c7c97ab333e92d5b378ae1..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/copy_page.S +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/copy_page.S - * - * Copy an entire page. - */ -#include - .text - .align 4 - .global copy_page - .ent copy_page -copy_page: - .prologue 0 - - lda $18,128 - nop - unop - nop - -1: ldq $0,0($17) - ldq $1,8($17) - ldq $2,16($17) - ldq $3,24($17) - - ldq $4,32($17) - ldq $5,40($17) - ldq $6,48($17) - ldq $7,56($17) - - stq $0,0($16) - subq $18,1,$18 - stq $1,8($16) - addq $17,64,$17 - - stq $2,16($16) - stq $3,24($16) - stq $4,32($16) - stq $5,40($16) - - stq $6,48($16) - stq $7,56($16) - addq $16,64,$16 - bne $18, 1b - - ret - nop - unop - nop - - .end copy_page - EXPORT_SYMBOL(copy_page) diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S deleted file mode 100644 index 32ab0344b1853cf2032e76bd8cdbdceaf4454b03..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/copy_user.S +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/copy_user.S - * - * Copy to/from user space, handling exceptions as we go.. This - * isn't exactly pretty. - * - * This is essentially the same as "memcpy()", but with a few twists. - * Notably, we have to make sure that $0 is always up-to-date and - * contains the right "bytes left to copy" value (and that it is updated - * only _after_ a successful copy). There is also some rather minor - * exception setup stuff.. - */ - -#include - -/* Allow an exception for an insn; exit if we get one. */ -#define EXI(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exitin-99b($31); \ - .previous - -#define EXO(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exitout-99b($31); \ - .previous - - .set noat - .align 4 - .globl __copy_user - .ent __copy_user -__copy_user: - .prologue 0 - mov $18,$0 - and $16,7,$3 - beq $0,$35 - beq $3,$36 - subq $3,8,$3 - .align 4 -$37: - EXI( ldq_u $1,0($17) ) - EXO( ldq_u $2,0($16) ) - extbl $1,$17,$1 - mskbl $2,$16,$2 - insbl $1,$16,$1 - addq $3,1,$3 - bis $1,$2,$1 - EXO( stq_u $1,0($16) ) - subq $0,1,$0 - addq $16,1,$16 - addq $17,1,$17 - beq $0,$41 - bne $3,$37 -$36: - and $17,7,$1 - bic $0,7,$4 - beq $1,$43 - beq $4,$48 - EXI( ldq_u $3,0($17) ) - .align 4 -$50: - EXI( ldq_u $2,8($17) ) - subq $4,8,$4 - extql $3,$17,$3 - extqh $2,$17,$1 - bis $3,$1,$1 - EXO( stq $1,0($16) ) - addq $17,8,$17 - subq $0,8,$0 - addq $16,8,$16 - bis $2,$2,$3 - bne $4,$50 -$48: - beq $0,$41 - .align 4 -$57: - EXI( ldq_u $1,0($17) ) - EXO( ldq_u $2,0($16) ) - extbl $1,$17,$1 - mskbl $2,$16,$2 - insbl $1,$16,$1 - bis $1,$2,$1 - EXO( stq_u $1,0($16) ) - subq $0,1,$0 - addq $16,1,$16 - addq $17,1,$17 - bne $0,$57 - br $31,$41 - .align 4 -$43: - beq $4,$65 - .align 4 -$66: - EXI( ldq $1,0($17) ) - subq $4,8,$4 - EXO( stq $1,0($16) ) - addq $17,8,$17 - subq $0,8,$0 - addq $16,8,$16 - bne $4,$66 -$65: - beq $0,$41 - EXI( ldq $2,0($17) ) - EXO( ldq $1,0($16) ) - mskql $2,$0,$2 - mskqh $1,$0,$1 - bis $2,$1,$2 - EXO( stq $2,0($16) ) - bis $31,$31,$0 -$41: -$35: -$exitin: -$exitout: - ret $31,($26),1 - - .end __copy_user -EXPORT_SYMBOL(__copy_user) diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S deleted file mode 100644 index c7b213ab01abbc35dc32da97c600351b7b1513cc..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/csum_ipv6_magic.S +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/csum_ipv6_magic.S - * Contributed by Richard Henderson - * - * unsigned short csum_ipv6_magic(struct in6_addr *saddr, - * struct in6_addr *daddr, - * __u32 len, - * unsigned short proto, - * unsigned int csum); - * - * Misalignment handling (which costs 16 instructions / 8 cycles) - * added by Ivan Kokshaysky - */ - -#include - .globl csum_ipv6_magic - .align 4 - .ent csum_ipv6_magic - .frame $30,0,$26,0 -csum_ipv6_magic: - .prologue 0 - - ldq_u $0,0($16) # e0 : load src & dst addr words - zapnot $20,15,$20 # .. e1 : zero extend incoming csum - extqh $18,1,$4 # e0 : byte swap len & proto while we wait - ldq_u $21,7($16) # .. e1 : handle misalignment - - extbl $18,1,$5 # e0 : - ldq_u $1,8($16) # .. e1 : - extbl $18,2,$6 # e0 : - ldq_u $22,15($16) # .. e1 : - - extbl $18,3,$18 # e0 : - ldq_u $2,0($17) # .. e1 : - sra $4,32,$4 # e0 : - ldq_u $23,7($17) # .. e1 : - - extql $0,$16,$0 # e0 : - ldq_u $3,8($17) # .. e1 : - extqh $21,$16,$21 # e0 : - ldq_u $24,15($17) # .. e1 : - - sll $5,16,$5 # e0 : - or $0,$21,$0 # .. e1 : 1st src word complete - extql $1,$16,$1 # e0 : - addq $20,$0,$20 # .. e1 : begin summing the words - - extqh $22,$16,$22 # e0 : - cmpult $20,$0,$0 # .. e1 : - sll $6,8,$6 # e0 : - or $1,$22,$1 # .. e1 : 2nd src word complete - - extql $2,$17,$2 # e0 : - or $4,$18,$18 # .. e1 : - extqh $23,$17,$23 # e0 : - or $5,$6,$5 # .. e1 : - - extql $3,$17,$3 # e0 : - or $2,$23,$2 # .. e1 : 1st dst word complete - extqh $24,$17,$24 # e0 : - or $18,$5,$18 # .. e1 : len complete - - extwh $19,7,$7 # e0 : - or $3,$24,$3 # .. e1 : 2nd dst word complete - extbl $19,1,$19 # e0 : - addq $20,$1,$20 # .. e1 : - - or $19,$7,$19 # e0 : - cmpult $20,$1,$1 # .. e1 : - sll $19,48,$19 # e0 : - nop # .. e0 : - - sra $19,32,$19 # e0 : proto complete - addq $20,$2,$20 # .. e1 : - cmpult $20,$2,$2 # e0 : - addq $20,$3,$20 # .. e1 : - - cmpult $20,$3,$3 # e0 : - addq $20,$18,$20 # .. e1 : - cmpult $20,$18,$18 # e0 : - addq $20,$19,$20 # .. e1 : - - cmpult $20,$19,$19 # e0 : - addq $0,$1,$0 # .. e1 : merge the carries back into the csum - addq $2,$3,$2 # e0 : - addq $18,$19,$18 # .. e1 : - - addq $0,$2,$0 # e0 : - addq $20,$18,$20 # .. e1 : - addq $0,$20,$0 # e0 : - unop # : - - extwl $0,2,$2 # e0 : begin folding the 64-bit value - zapnot $0,3,$3 # .. e1 : - extwl $0,4,$1 # e0 : - addq $2,$3,$3 # .. e1 : - - extwl $0,6,$0 # e0 : - addq $3,$1,$3 # .. e1 : - addq $0,$3,$0 # e0 : - unop # : - - extwl $0,2,$1 # e0 : fold 18-bit value - zapnot $0,3,$0 # .. e1 : - addq $0,$1,$0 # e0 : - unop # : - - extwl $0,2,$1 # e0 : fold 17-bit value - zapnot $0,3,$0 # .. e1 : - addq $0,$1,$0 # e0 : - not $0,$0 # .. e1 : and complement. - - zapnot $0,3,$0 # e0 : - ret # .. e1 : - - .end csum_ipv6_magic - EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/alpha/lib/dbg_current.S b/arch/alpha/lib/dbg_current.S deleted file mode 100644 index be66121312774918505fc1cd3fd9b85463799e30..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/dbg_current.S +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/dbg_current.S - * Contributed by Richard Henderson (rth@cygnus.com) - * - * Trap if we find current not correct. - */ - -#include - - .text - .set noat - - .globl _mcount - .ent _mcount -_mcount: - .frame $30, 0, $28, 0 - .prologue 0 - - lda $0, -0x4000($30) - cmpult $8, $30, $1 - cmpule $0, $30, $2 - and $1, $2, $3 - bne $3, 1f - - call_pal PAL_bugchk - -1: ret $31, ($28), 1 - - .end _mcount diff --git a/arch/alpha/lib/dbg_stackcheck.S b/arch/alpha/lib/dbg_stackcheck.S deleted file mode 100644 index b3b6fc94f7f321439f60ba0bcb853eb0af9636a0..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/dbg_stackcheck.S +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/dbg_stackcheck.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Verify that we have not overflowed the stack. Oops if we have. - */ - -#include - - .text - .set noat - - .align 3 - .globl _mcount - .ent _mcount -_mcount: - .frame $30, 0, $28, 0 - .prologue 0 - - lda $0, TASK_SIZE($8) - cmpult $30, $0, $0 - bne $0, 1f - ret ($28) -1: stq $31, -8($31) # oops me, damn it. - br 1b - - .end _mcount diff --git a/arch/alpha/lib/dbg_stackkill.S b/arch/alpha/lib/dbg_stackkill.S deleted file mode 100644 index 6d9197e52a425af943920556f8572b21e5b548cc..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/dbg_stackkill.S +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/dbg_stackkill.S - * Contributed by Richard Henderson (rth@cygnus.com) - * - * Clobber the balance of the kernel stack, hoping to catch - * uninitialized local variables in the act. - */ - -#include - - .text - .set noat - - .align 5 - .globl _mcount - .ent _mcount -_mcount: - .frame $30, 0, $28, 0 - .prologue 0 - - ldi $0, 0xdeadbeef - lda $2, -STACK_SIZE - sll $0, 32, $1 - and $30, $2, $2 - or $0, $1, $0 - lda $2, TASK_SIZE($2) - cmpult $2, $30, $1 - beq $1, 2f -1: stq $0, 0($2) - addq $2, 8, $2 - cmpult $2, $30, $1 - bne $1, 1b -2: ret ($28) - - .end _mcount diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S deleted file mode 100644 index 2b60eb45e50b68993b6225c8c7f23cacedd00651..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/divide.S +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/divide.S - * - * (C) 1995 Linus Torvalds - * - * Alpha division.. - */ - -/* - * The alpha chip doesn't provide hardware division, so we have to do it - * by hand. The compiler expects the functions - * - * __divqu: 64-bit unsigned long divide - * __remqu: 64-bit unsigned long remainder - * __divqs/__remqs: signed 64-bit - * __divlu/__remlu: unsigned 32-bit - * __divls/__remls: signed 32-bit - * - * These are not normal C functions: instead of the normal - * calling sequence, these expect their arguments in registers - * $24 and $25, and return the result in $27. Register $28 may - * be clobbered (assembly temporary), anything else must be saved. - * - * In short: painful. - * - * This is a rather simple bit-at-a-time algorithm: it's very good - * at dividing random 64-bit numbers, but the more usual case where - * the divisor is small is handled better by the DEC algorithm - * using lookup tables. This uses much less memory, though, and is - * nicer on the cache.. Besides, I don't know the copyright status - * of the DEC code. - */ - -/* - * My temporaries: - * $0 - current bit - * $1 - shifted divisor - * $2 - modulus/quotient - * - * $23 - return address - * $24 - dividend - * $25 - divisor - * - * $27 - quotient/modulus - * $28 - compare status - */ - -#include -#define halt .long 0 - -/* - * Select function type and registers - */ -#define mask $0 -#define divisor $1 -#define compare $28 -#define tmp1 $3 -#define tmp2 $4 - -#ifdef DIV -#define DIV_ONLY(x,y...) x,##y -#define MOD_ONLY(x,y...) -#define func(x) __div##x -#define modulus $2 -#define quotient $27 -#define GETSIGN(x) xor $24,$25,x -#define STACK 48 -#else -#define DIV_ONLY(x,y...) -#define MOD_ONLY(x,y...) x,##y -#define func(x) __rem##x -#define modulus $27 -#define quotient $2 -#define GETSIGN(x) bis $24,$24,x -#define STACK 32 -#endif - -/* - * For 32-bit operations, we need to extend to 64-bit - */ -#ifdef INTSIZE -#define ufunction func(lu) -#define sfunction func(l) -#define LONGIFY(x) zapnot x,15,x -#define SLONGIFY(x) addl x,0,x -#else -#define ufunction func(qu) -#define sfunction func(q) -#define LONGIFY(x) -#define SLONGIFY(x) -#endif - -.set noat -.align 3 -.globl ufunction -.ent ufunction -ufunction: - subq $30,STACK,$30 - .frame $30,STACK,$23 - .prologue 0 - -7: stq $1, 0($30) - bis $25,$25,divisor - stq $2, 8($30) - bis $24,$24,modulus - stq $0,16($30) - bis $31,$31,quotient - LONGIFY(divisor) - stq tmp1,24($30) - LONGIFY(modulus) - bis $31,1,mask - DIV_ONLY(stq tmp2,32($30)) - beq divisor, 9f /* div by zero */ - -#ifdef INTSIZE - /* - * shift divisor left, using 3-bit shifts for - * 32-bit divides as we can't overflow. Three-bit - * shifts will result in looping three times less - * here, but can result in two loops more later. - * Thus using a large shift isn't worth it (and - * s8add pairs better than a sll..) - */ -1: cmpult divisor,modulus,compare - s8addq divisor,$31,divisor - s8addq mask,$31,mask - bne compare,1b -#else -1: cmpult divisor,modulus,compare - blt divisor, 2f - addq divisor,divisor,divisor - addq mask,mask,mask - bne compare,1b - unop -#endif - - /* ok, start to go right again.. */ -2: DIV_ONLY(addq quotient,mask,tmp2) - srl mask,1,mask - cmpule divisor,modulus,compare - subq modulus,divisor,tmp1 - DIV_ONLY(cmovne compare,tmp2,quotient) - srl divisor,1,divisor - cmovne compare,tmp1,modulus - bne mask,2b - -9: ldq $1, 0($30) - ldq $2, 8($30) - ldq $0,16($30) - ldq tmp1,24($30) - DIV_ONLY(ldq tmp2,32($30)) - addq $30,STACK,$30 - ret $31,($23),1 - .end ufunction -EXPORT_SYMBOL(ufunction) - -/* - * Uhh.. Ugly signed division. I'd rather not have it at all, but - * it's needed in some circumstances. There are different ways to - * handle this, really. This does: - * -a / b = a / -b = -(a / b) - * -a % b = -(a % b) - * a % -b = a % b - * which is probably not the best solution, but at least should - * have the property that (x/y)*y + (x%y) = x. - */ -.align 3 -.globl sfunction -.ent sfunction -sfunction: - subq $30,STACK,$30 - .frame $30,STACK,$23 - .prologue 0 - bis $24,$25,$28 - SLONGIFY($28) - bge $28,7b - stq $24,0($30) - subq $31,$24,$28 - stq $25,8($30) - cmovlt $24,$28,$24 /* abs($24) */ - stq $23,16($30) - subq $31,$25,$28 - stq tmp1,24($30) - cmovlt $25,$28,$25 /* abs($25) */ - unop - bsr $23,ufunction - ldq $24,0($30) - ldq $25,8($30) - GETSIGN($28) - subq $31,$27,tmp1 - SLONGIFY($28) - ldq $23,16($30) - cmovlt $28,tmp1,$27 - ldq tmp1,24($30) - addq $30,STACK,$30 - ret $31,($23),1 - .end sfunction -EXPORT_SYMBOL(sfunction) diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S deleted file mode 100644 index 325864c81586daa94d1b04534ed54016fadad6d3..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-clear_page.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-clear_page.S - * - * Zero an entire page. - */ -#include - .text - .align 4 - .global clear_page - .ent clear_page -clear_page: - .prologue 0 - - lda $0,128 - lda $1,125 - addq $16,64,$2 - addq $16,128,$3 - - addq $16,192,$17 - wh64 ($16) - wh64 ($2) - wh64 ($3) - -1: wh64 ($17) - stq $31,0($16) - subq $0,1,$0 - subq $1,1,$1 - - stq $31,8($16) - stq $31,16($16) - addq $17,64,$2 - nop - - stq $31,24($16) - stq $31,32($16) - cmovgt $1,$2,$17 - nop - - stq $31,40($16) - stq $31,48($16) - nop - nop - - stq $31,56($16) - addq $16,64,$16 - nop - bne $0,1b - - ret - nop - nop - nop - - .end clear_page - EXPORT_SYMBOL(clear_page) diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S deleted file mode 100644 index 7e644f83cdf2905807b15b6cd788770c1be732d4..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-clear_user.S +++ /dev/null @@ -1,213 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-clear_user.S - * 21264 version contributed by Rick Gorton - * - * Zero user space, handling exceptions as we go. - * - * We have to make sure that $0 is always up-to-date and contains the - * right "bytes left to zero" value (and that it is updated only _after_ - * a successful copy). There is also some rather minor exception setup - * stuff. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - * Determining actual stalls (other than slotting) doesn't appear to be easy to do. - * From perusing the source code context where this routine is called, it is - * a fair assumption that significant fractions of entire pages are zeroed, so - * it's going to be worth the effort to hand-unroll a big loop, and use wh64. - * ASSUMPTION: - * The believed purpose of only updating $0 after a store is that a signal - * may come along during the execution of this chunk of code, and we don't - * want to leave a hole (and we also want to avoid repeating lots of work) - */ - -#include -/* Allow an exception for an insn; exit if we get one. */ -#define EX(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exception-99b($31); \ - .previous - - .set noat - .set noreorder - .align 4 - - .globl __clear_user - .ent __clear_user - .frame $30, 0, $26 - .prologue 0 - - # Pipeline info : Slotting & Comments -__clear_user: - and $17, $17, $0 - and $16, 7, $4 # .. E .. .. : find dest head misalignment - beq $0, $zerolength # U .. .. .. : U L U L - - addq $0, $4, $1 # .. .. .. E : bias counter - and $1, 7, $2 # .. .. E .. : number of misaligned bytes in tail -# Note - we never actually use $2, so this is a moot computation -# and we can rewrite this later... - srl $1, 3, $1 # .. E .. .. : number of quadwords to clear - beq $4, $headalign # U .. .. .. : U L U L - -/* - * Head is not aligned. Write (8 - $4) bytes to head of destination - * This means $16 is known to be misaligned - */ - EX( ldq_u $5, 0($16) ) # .. .. .. L : load dst word to mask back in - beq $1, $onebyte # .. .. U .. : sub-word store? - mskql $5, $16, $5 # .. U .. .. : take care of misaligned head - addq $16, 8, $16 # E .. .. .. : L U U L - - EX( stq_u $5, -8($16) ) # .. .. .. L : - subq $1, 1, $1 # .. .. E .. : - addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment - subq $0, 8, $0 # E .. .. .. : U L U L - - .align 4 -/* - * (The .align directive ought to be a moot point) - * values upon initial entry to the loop - * $1 is number of quadwords to clear (zero is a valid value) - * $2 is number of trailing bytes (0..7) ($2 never used...) - * $16 is known to be aligned 0mod8 - */ -$headalign: - subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop - and $16, 0x3f, $2 # .. .. E .. : Forward work for huge loop - subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) - blt $4, $trailquad # U .. .. .. : U L U L - -/* - * We know that we're going to do at least 16 quads, which means we are - * going to be able to use the large block clear loop at least once. - * Figure out how many quads we need to clear before we are 0mod64 aligned - * so we can use the wh64 instruction. - */ - - nop # .. .. .. E - nop # .. .. E .. - nop # .. E .. .. - beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 - -$alignmod64: - EX( stq_u $31, 0($16) ) # .. .. .. L - addq $3, 8, $3 # .. .. E .. - subq $0, 8, $0 # .. E .. .. - nop # E .. .. .. : U L U L - - nop # .. .. .. E - subq $1, 1, $1 # .. .. E .. - addq $16, 8, $16 # .. E .. .. - blt $3, $alignmod64 # U .. .. .. : U L U L - -$bigalign: -/* - * $0 is the number of bytes left - * $1 is the number of quads left - * $16 is aligned 0mod64 - * we know that we'll be taking a minimum of one trip through - * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle - * We are _not_ going to update $0 after every single store. That - * would be silly, because there will be cross-cluster dependencies - * no matter how the code is scheduled. By doing it in slightly - * staggered fashion, we can still do this loop in 5 fetches - * The worse case will be doing two extra quads in some future execution, - * in the event of an interrupted clear. - * Assumes the wh64 needs to be for 2 trips through the loop in the future - * The wh64 is issued on for the starting destination address for trip +2 - * through the loop, and if there are less than two trips left, the target - * address will be for the current trip. - */ - nop # E : - nop # E : - nop # E : - bis $16,$16,$3 # E : U L U L : Initial wh64 address is dest - /* This might actually help for the current trip... */ - -$do_wh64: - wh64 ($3) # .. .. .. L1 : memory subsystem hint - subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? - EX( stq_u $31, 0($16) ) # .. L .. .. - subq $0, 8, $0 # E .. .. .. : U L U L - - addq $16, 128, $3 # E : Target address of wh64 - EX( stq_u $31, 8($16) ) # L : - EX( stq_u $31, 16($16) ) # L : - subq $0, 16, $0 # E : U L L U - - nop # E : - EX( stq_u $31, 24($16) ) # L : - EX( stq_u $31, 32($16) ) # L : - subq $0, 168, $5 # E : U L L U : two trips through the loop left? - /* 168 = 192 - 24, since we've already completed some stores */ - - subq $0, 16, $0 # E : - EX( stq_u $31, 40($16) ) # L : - EX( stq_u $31, 48($16) ) # L : - cmovlt $5, $16, $3 # E : U L L U : Latency 2, extra mapping cycle - - subq $1, 8, $1 # E : - subq $0, 16, $0 # E : - EX( stq_u $31, 56($16) ) # L : - nop # E : U L U L - - nop # E : - subq $0, 8, $0 # E : - addq $16, 64, $16 # E : - bge $4, $do_wh64 # U : U L U L - -$trailquad: - # zero to 16 quadwords left to store, plus any trailing bytes - # $1 is the number of quadwords left to go. - # - nop # .. .. .. E - nop # .. .. E .. - nop # .. E .. .. - beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go - -$onequad: - EX( stq_u $31, 0($16) ) # .. .. .. L - subq $1, 1, $1 # .. .. E .. - subq $0, 8, $0 # .. E .. .. - nop # E .. .. .. : U L U L - - nop # .. .. .. E - nop # .. .. E .. - addq $16, 8, $16 # .. E .. .. - bgt $1, $onequad # U .. .. .. : U L U L - - # We have an unknown number of bytes left to go. -$trailbytes: - nop # .. .. .. E - nop # .. .. E .. - nop # .. E .. .. - beq $0, $zerolength # U .. .. .. : U L U L - - # $0 contains the number of bytes left to copy (0..31) - # so we will use $0 as the loop counter - # We know for a fact that $0 > 0 zero due to previous context -$onebyte: - EX( stb $31, 0($16) ) # .. .. .. L - subq $0, 1, $0 # .. .. E .. : - addq $16, 1, $16 # .. E .. .. : - bgt $0, $onebyte # U .. .. .. : U L U L - -$zerolength: -$exception: # Destination for exception recovery(?) - nop # .. .. .. E : - nop # .. .. E .. : - nop # .. E .. .. : - ret $31, ($26), 1 # L0 .. .. .. : L U L U - .end __clear_user - EXPORT_SYMBOL(__clear_user) diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S deleted file mode 100644 index fd7212c8dcf1848533ed8dc07c2f19c6a7aa673e..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-copy_page.S +++ /dev/null @@ -1,205 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-copy_page.S - * - * Copy an entire page. - */ - -/* The following comparison of this routine vs the normal copy_page.S - was written by an unnamed ev6 hardware designer and forwarded to me - via Steven Hobbs . - - First Problem: STQ overflows. - ----------------------------- - - It would be nice if EV6 handled every resource overflow efficiently, - but for some it doesn't. Including store queue overflows. It causes - a trap and a restart of the pipe. - - To get around this we sometimes use (to borrow a term from a VSSAD - researcher) "aeration". The idea is to slow the rate at which the - processor receives valid instructions by inserting nops in the fetch - path. In doing so, you can prevent the overflow and actually make - the code run faster. You can, of course, take advantage of the fact - that the processor can fetch at most 4 aligned instructions per cycle. - - I inserted enough nops to force it to take 10 cycles to fetch the - loop code. In theory, EV6 should be able to execute this loop in - 9 cycles but I was not able to get it to run that fast -- the initial - conditions were such that I could not reach this optimum rate on - (chaotic) EV6. I wrote the code such that everything would issue - in order. - - Second Problem: Dcache index matches. - ------------------------------------- - - If you are going to use this routine on random aligned pages, there - is a 25% chance that the pages will be at the same dcache indices. - This results in many nasty memory traps without care. - - The solution is to schedule the prefetches to avoid the memory - conflicts. I schedule the wh64 prefetches farther ahead of the - read prefetches to avoid this problem. - - Third Problem: Needs more prefetching. - -------------------------------------- - - In order to improve the code I added deeper prefetching to take the - most advantage of EV6's bandwidth. - - I also prefetched the read stream. Note that adding the read prefetch - forced me to add another cycle to the inner-most kernel - up to 11 - from the original 8 cycles per iteration. We could improve performance - further by unrolling the loop and doing multiple prefetches per cycle. - - I think that the code below will be very robust and fast code for the - purposes of copying aligned pages. It is slower when both source and - destination pages are in the dcache, but it is my guess that this is - less important than the dcache miss case. */ - -#include - .text - .align 4 - .global copy_page - .ent copy_page -copy_page: - .prologue 0 - - /* Prefetch 5 read cachelines; write-hint 10 cache lines. */ - wh64 ($16) - ldl $31,0($17) - ldl $31,64($17) - lda $1,1*64($16) - - wh64 ($1) - ldl $31,128($17) - ldl $31,192($17) - lda $1,2*64($16) - - wh64 ($1) - ldl $31,256($17) - lda $18,118 - lda $1,3*64($16) - - wh64 ($1) - nop - lda $1,4*64($16) - lda $2,5*64($16) - - wh64 ($1) - wh64 ($2) - lda $1,6*64($16) - lda $2,7*64($16) - - wh64 ($1) - wh64 ($2) - lda $1,8*64($16) - lda $2,9*64($16) - - wh64 ($1) - wh64 ($2) - lda $19,10*64($16) - nop - - /* Main prefetching/write-hinting loop. */ -1: ldq $0,0($17) - ldq $1,8($17) - unop - unop - - unop - unop - ldq $2,16($17) - ldq $3,24($17) - - ldq $4,32($17) - ldq $5,40($17) - unop - unop - - unop - unop - ldq $6,48($17) - ldq $7,56($17) - - ldl $31,320($17) - unop - unop - unop - - /* This gives the extra cycle of aeration above the minimum. */ - unop - unop - unop - unop - - wh64 ($19) - unop - unop - unop - - stq $0,0($16) - subq $18,1,$18 - stq $1,8($16) - unop - - unop - stq $2,16($16) - addq $17,64,$17 - stq $3,24($16) - - stq $4,32($16) - stq $5,40($16) - addq $19,64,$19 - unop - - stq $6,48($16) - stq $7,56($16) - addq $16,64,$16 - bne $18, 1b - - /* Prefetch the final 5 cache lines of the read stream. */ - lda $18,10 - ldl $31,320($17) - ldl $31,384($17) - ldl $31,448($17) - - ldl $31,512($17) - ldl $31,576($17) - nop - nop - - /* Non-prefetching, non-write-hinting cleanup loop for the - final 10 cache lines. */ -2: ldq $0,0($17) - ldq $1,8($17) - ldq $2,16($17) - ldq $3,24($17) - - ldq $4,32($17) - ldq $5,40($17) - ldq $6,48($17) - ldq $7,56($17) - - stq $0,0($16) - subq $18,1,$18 - stq $1,8($16) - addq $17,64,$17 - - stq $2,16($16) - stq $3,24($16) - stq $4,32($16) - stq $5,40($16) - - stq $6,48($16) - stq $7,56($16) - addq $16,64,$16 - bne $18, 2b - - ret - nop - unop - nop - - .end copy_page - EXPORT_SYMBOL(copy_page) diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S deleted file mode 100644 index f3e43375439743c54a2e58b2e7928339f50642bb..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-copy_user.S +++ /dev/null @@ -1,227 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-copy_user.S - * - * 21264 version contributed by Rick Gorton - * - * Copy to/from user space, handling exceptions as we go.. This - * isn't exactly pretty. - * - * This is essentially the same as "memcpy()", but with a few twists. - * Notably, we have to make sure that $0 is always up-to-date and - * contains the right "bytes left to copy" value (and that it is updated - * only _after_ a successful copy). There is also some rather minor - * exception setup stuff.. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - */ - -#include -/* Allow an exception for an insn; exit if we get one. */ -#define EXI(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exitin-99b($31); \ - .previous - -#define EXO(x,y...) \ - 99: x,##y; \ - .section __ex_table,"a"; \ - .long 99b - .; \ - lda $31, $exitout-99b($31); \ - .previous - - .set noat - .align 4 - .globl __copy_user - .ent __copy_user - # Pipeline info: Slotting & Comments -__copy_user: - .prologue 0 - mov $18, $0 # .. .. .. E - subq $18, 32, $1 # .. .. E. .. : Is this going to be a small copy? - nop # .. E .. .. - beq $18, $zerolength # U .. .. .. : U L U L - - and $16,7,$3 # .. .. .. E : is leading dest misalignment - ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data - beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall) - subq $3, 8, $3 # E .. .. .. : L U U L : trip counter -/* - * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U) - * This loop aligns the destination a byte at a time - * We know we have at least one trip through this loop - */ -$aligndest: - EXI( ldbu $1,0($17) ) # .. .. .. L : Keep loads separate from stores - addq $16,1,$16 # .. .. E .. : Section 3.8 in the CWG - addq $3,1,$3 # .. E .. .. : - nop # E .. .. .. : U L U L - -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO( stb $1,-1($16) ) # .. .. .. L : - addq $17,1,$17 # .. .. E .. : Section 3.8 in the CWG - subq $0,1,$0 # .. E .. .. : - bne $3, $aligndest # U .. .. .. : U L U L - -/* - * If we fell through into here, we have a minimum of 33 - 7 bytes - * If we arrived via branch, we have a minimum of 32 bytes - */ -$destaligned: - and $17,7,$1 # .. .. .. E : Check _current_ source alignment - bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop - EXI( ldq_u $3,0($17) ) # .. L .. .. : Forward fetch for fallthrough code - beq $1,$quadaligned # U .. .. .. : U L U L - -/* - * In the worst case, we've just executed an ldq_u here from 0($17) - * and we'll repeat it once if we take the branch - */ - -/* Misaligned quadword loop - not unrolled. Leave it that way. */ -$misquad: - EXI( ldq_u $2,8($17) ) # .. .. .. L : - subq $4,8,$4 # .. .. E .. : - extql $3,$17,$3 # .. U .. .. : - extqh $2,$17,$1 # U .. .. .. : U U L L - - bis $3,$1,$1 # .. .. .. E : - EXO( stq $1,0($16) ) # .. .. L .. : - addq $17,8,$17 # .. E .. .. : - subq $0,8,$0 # E .. .. .. : U L L U - - addq $16,8,$16 # .. .. .. E : - bis $2,$2,$3 # .. .. E .. : - nop # .. E .. .. : - bne $4,$misquad # U .. .. .. : U L U L - - nop # .. .. .. E - nop # .. .. E .. - nop # .. E .. .. - beq $0,$zerolength # U .. .. .. : U L U L - -/* We know we have at least one trip through the byte loop */ - EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad - addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG) - nop # .. E .. .. : - br $31, $dirtyentry # L0 .. .. .. : L U U L -/* Do the trailing byte loop load, then hop into the store part of the loop */ - -/* - * A minimum of (33 - 7) bytes to do a quad at a time. - * Based upon the usage context, it's worth the effort to unroll this loop - * $0 - number of bytes to be moved - * $4 - number of bytes to move as quadwords - * $16 is current destination address - * $17 is current source address - */ -$quadaligned: - subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff - nop # .. .. E .. - nop # .. E .. .. - blt $2, $onequad # U .. .. .. : U L U L - -/* - * There is a significant assumption here that the source and destination - * addresses differ by more than 32 bytes. In this particular case, a - * sparsity of registers further bounds this to be a minimum of 8 bytes. - * But if this isn't met, then the output result will be incorrect. - * Furthermore, due to a lack of available registers, we really can't - * unroll this to be an 8x loop (which would enable us to use the wh64 - * instruction memory hint instruction). - */ -$unroll4: - EXI( ldq $1,0($17) ) # .. .. .. L - EXI( ldq $2,8($17) ) # .. .. L .. - subq $4,32,$4 # .. E .. .. - nop # E .. .. .. : U U L L - - addq $17,16,$17 # .. .. .. E - EXO( stq $1,0($16) ) # .. .. L .. - EXO( stq $2,8($16) ) # .. L .. .. - subq $0,16,$0 # E .. .. .. : U L L U - - addq $16,16,$16 # .. .. .. E - EXI( ldq $1,0($17) ) # .. .. L .. - EXI( ldq $2,8($17) ) # .. L .. .. - subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip? - - EXO( stq $1,0($16) ) # .. .. .. L - EXO( stq $2,8($16) ) # .. .. L .. - subq $0,16,$0 # .. E .. .. - addq $17,16,$17 # E .. .. .. : U L L U - - nop # .. .. .. E - nop # .. .. E .. - addq $16,16,$16 # .. E .. .. - bgt $3,$unroll4 # U .. .. .. : U L U L - - nop - nop - nop - beq $4, $noquads - -$onequad: - EXI( ldq $1,0($17) ) - subq $4,8,$4 - addq $17,8,$17 - nop - - EXO( stq $1,0($16) ) - subq $0,8,$0 - addq $16,8,$16 - bne $4,$onequad - -$noquads: - nop - nop - nop - beq $0,$zerolength - -/* - * For small copies (or the tail of a larger copy), do a very simple byte loop. - * There's no point in doing a lot of complex alignment calculations to try to - * to quadword stuff for a small amount of data. - * $0 - remaining number of bytes left to copy - * $16 - current dest addr - * $17 - current source addr - */ - -$onebyteloop: - EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad - addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG) - nop # .. E .. .. : - nop # E .. .. .. : U L U L - -$dirtyentry: -/* - * the -1 is to compensate for the inc($16) done in a previous quadpack - * which allows us zero dependencies within either quadpack in the loop - */ - EXO ( stb $2,-1($16) ) # .. .. .. L : - addq $17,1,$17 # .. .. E .. : quadpack as the load - subq $0,1,$0 # .. E .. .. : change count _after_ copy - bgt $0,$onebyteloop # U .. .. .. : U L U L - -$zerolength: -$exitin: -$exitout: # Destination for exception recovery(?) - nop # .. .. .. E - nop # .. .. E .. - nop # .. E .. .. - ret $31,($26),1 # L0 .. .. .. : L U L U - - .end __copy_user - EXPORT_SYMBOL(__copy_user) diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S deleted file mode 100644 index 9a73f90700a13a63cb3929d9ba3db7daab0250fc..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-csum_ipv6_magic.S +++ /dev/null @@ -1,153 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-csum_ipv6_magic.S - * 21264 version contributed by Rick Gorton - * - * unsigned short csum_ipv6_magic(struct in6_addr *saddr, - * struct in6_addr *daddr, - * __u32 len, - * unsigned short proto, - * unsigned int csum); - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - * Determining actual stalls (other than slotting) doesn't appear to be easy to do. - * - * unsigned short csum_ipv6_magic(struct in6_addr *saddr, - * struct in6_addr *daddr, - * __u32 len, - * unsigned short proto, - * unsigned int csum); - * - * Swap (takes form 0xaabb) - * Then shift it left by 48, so result is: - * 0xbbaa0000 00000000 - * Then turn it back into a sign extended 32-bit item - * 0xbbaa0000 - * - * Swap (an unsigned int) using Mike Burrows' 7-instruction sequence - * (we can't hide the 3-cycle latency of the unpkbw in the 6-instruction sequence) - * Assume input takes form 0xAABBCCDD - * - * Finally, original 'folding' approach is to split the long into 4 unsigned shorts - * add 4 ushorts, resulting in ushort/carry - * add carry bits + ushort --> ushort - * add carry bits + ushort --> ushort (in case the carry results in an overflow) - * Truncate to a ushort. (took 13 instructions) - * From doing some testing, using the approach in checksum.c:from64to16() - * results in the same outcome: - * split into 2 uints, add those, generating a ulong - * add the 3 low ushorts together, generating a uint - * a final add of the 2 lower ushorts - * truncating the result. - * - * Misalignment handling added by Ivan Kokshaysky - * The cost is 16 instructions (~8 cycles), including two extra loads which - * may cause additional delay in rare cases (load-load replay traps). - */ - -#include - .globl csum_ipv6_magic - .align 4 - .ent csum_ipv6_magic - .frame $30,0,$26,0 -csum_ipv6_magic: - .prologue 0 - - ldq_u $0,0($16) # L : Latency: 3 - inslh $18,7,$4 # U : 0000000000AABBCC - ldq_u $1,8($16) # L : Latency: 3 - sll $19,8,$7 # U : U L U L : 0x00000000 00aabb00 - - and $16,7,$6 # E : src misalignment - ldq_u $5,15($16) # L : Latency: 3 - zapnot $20,15,$20 # U : zero extend incoming csum - ldq_u $2,0($17) # L : U L U L : Latency: 3 - - extql $0,$6,$0 # U : - extqh $1,$6,$22 # U : - ldq_u $3,8($17) # L : Latency: 3 - sll $19,24,$19 # U : U U L U : 0x000000aa bb000000 - - cmoveq $6,$31,$22 # E : src aligned? - ldq_u $23,15($17) # L : Latency: 3 - inswl $18,3,$18 # U : 000000CCDD000000 - addl $19,$7,$19 # E : U L U L : bbaabb00 - - or $0,$22,$0 # E : 1st src word complete - extql $1,$6,$1 # U : - or $18,$4,$18 # E : 000000CCDDAABBCC - extqh $5,$6,$5 # U : L U L U - - and $17,7,$6 # E : dst misalignment - extql $2,$6,$2 # U : - or $1,$5,$1 # E : 2nd src word complete - extqh $3,$6,$22 # U : L U L U : - - cmoveq $6,$31,$22 # E : dst aligned? - extql $3,$6,$3 # U : - addq $20,$0,$20 # E : begin summing the words - extqh $23,$6,$23 # U : L U L U : - - srl $18,16,$4 # U : 0000000000CCDDAA - or $2,$22,$2 # E : 1st dst word complete - zap $19,0x3,$19 # U : bbaa0000 - or $3,$23,$3 # E : U L U L : 2nd dst word complete - - cmpult $20,$0,$0 # E : - addq $20,$1,$20 # E : - zapnot $18,0xa,$18 # U : 00000000DD00BB00 - zap $4,0xa,$4 # U : U U L L : 0000000000CC00AA - - or $18,$4,$18 # E : 00000000DDCCBBAA - nop # E : - cmpult $20,$1,$1 # E : - addq $20,$2,$20 # E : U L U L - - cmpult $20,$2,$2 # E : - addq $20,$3,$20 # E : - cmpult $20,$3,$3 # E : (1 cycle stall on $20) - addq $20,$18,$20 # E : U L U L (1 cycle stall on $20) - - cmpult $20,$18,$18 # E : - addq $20,$19,$20 # E : (1 cycle stall on $20) - addq $0,$1,$0 # E : merge the carries back into the csum - addq $2,$3,$2 # E : - - cmpult $20,$19,$19 # E : - addq $18,$19,$18 # E : (1 cycle stall on $19) - addq $0,$2,$0 # E : - addq $20,$18,$20 # E : U L U L : - /* (1 cycle stall on $18, 2 cycles on $20) */ - - addq $0,$20,$0 # E : - zapnot $0,15,$1 # U : Start folding output (1 cycle stall on $0) - nop # E : - srl $0,32,$0 # U : U L U L : (1 cycle stall on $0) - - addq $1,$0,$1 # E : Finished generating ulong - extwl $1,2,$2 # U : ushort[1] (1 cycle stall on $1) - zapnot $1,3,$0 # U : ushort[0] (1 cycle stall on $1) - extwl $1,4,$1 # U : ushort[2] (1 cycle stall on $1) - - addq $0,$2,$0 # E - addq $0,$1,$3 # E : Finished generating uint - /* (1 cycle stall on $0) */ - extwl $3,2,$1 # U : ushort[1] (1 cycle stall on $3) - nop # E : L U L U - - addq $1,$3,$0 # E : Final carry - not $0,$4 # E : complement (1 cycle stall on $0) - zapnot $4,3,$0 # U : clear upper garbage bits - /* (1 cycle stall on $4) */ - ret # L0 : L U L U - - .end csum_ipv6_magic - EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S deleted file mode 100644 index 137ff1a07356311d598cd8c254cb8214c8707ec7..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-divide.S +++ /dev/null @@ -1,263 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-divide.S - * - * 21264 version contributed by Rick Gorton - * - * Alpha division.. - */ - -/* - * The alpha chip doesn't provide hardware division, so we have to do it - * by hand. The compiler expects the functions - * - * __divqu: 64-bit unsigned long divide - * __remqu: 64-bit unsigned long remainder - * __divqs/__remqs: signed 64-bit - * __divlu/__remlu: unsigned 32-bit - * __divls/__remls: signed 32-bit - * - * These are not normal C functions: instead of the normal - * calling sequence, these expect their arguments in registers - * $24 and $25, and return the result in $27. Register $28 may - * be clobbered (assembly temporary), anything else must be saved. - * - * In short: painful. - * - * This is a rather simple bit-at-a-time algorithm: it's very good - * at dividing random 64-bit numbers, but the more usual case where - * the divisor is small is handled better by the DEC algorithm - * using lookup tables. This uses much less memory, though, and is - * nicer on the cache.. Besides, I don't know the copyright status - * of the DEC code. - */ - -/* - * My temporaries: - * $0 - current bit - * $1 - shifted divisor - * $2 - modulus/quotient - * - * $23 - return address - * $24 - dividend - * $25 - divisor - * - * $27 - quotient/modulus - * $28 - compare status - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ - -#include -#define halt .long 0 - -/* - * Select function type and registers - */ -#define mask $0 -#define divisor $1 -#define compare $28 -#define tmp1 $3 -#define tmp2 $4 - -#ifdef DIV -#define DIV_ONLY(x,y...) x,##y -#define MOD_ONLY(x,y...) -#define func(x) __div##x -#define modulus $2 -#define quotient $27 -#define GETSIGN(x) xor $24,$25,x -#define STACK 48 -#else -#define DIV_ONLY(x,y...) -#define MOD_ONLY(x,y...) x,##y -#define func(x) __rem##x -#define modulus $27 -#define quotient $2 -#define GETSIGN(x) bis $24,$24,x -#define STACK 32 -#endif - -/* - * For 32-bit operations, we need to extend to 64-bit - */ -#ifdef INTSIZE -#define ufunction func(lu) -#define sfunction func(l) -#define LONGIFY(x) zapnot x,15,x -#define SLONGIFY(x) addl x,0,x -#else -#define ufunction func(qu) -#define sfunction func(q) -#define LONGIFY(x) -#define SLONGIFY(x) -#endif - -.set noat -.align 4 -.globl ufunction -.ent ufunction -ufunction: - subq $30,STACK,$30 # E : - .frame $30,STACK,$23 - .prologue 0 - -7: stq $1, 0($30) # L : - bis $25,$25,divisor # E : - stq $2, 8($30) # L : L U L U - - bis $24,$24,modulus # E : - stq $0,16($30) # L : - bis $31,$31,quotient # E : - LONGIFY(divisor) # E : U L L U - - stq tmp1,24($30) # L : - LONGIFY(modulus) # E : - bis $31,1,mask # E : - DIV_ONLY(stq tmp2,32($30)) # L : L U U L - - beq divisor, 9f /* div by zero */ - /* - * In spite of the DIV_ONLY being either a non-instruction - * or an actual stq, the addition of the .align directive - * below ensures that label 1 is going to be nicely aligned - */ - - .align 4 -#ifdef INTSIZE - /* - * shift divisor left, using 3-bit shifts for - * 32-bit divides as we can't overflow. Three-bit - * shifts will result in looping three times less - * here, but can result in two loops more later. - * Thus using a large shift isn't worth it (and - * s8add pairs better than a sll..) - */ -1: cmpult divisor,modulus,compare # E : - s8addq divisor,$31,divisor # E : - s8addq mask,$31,mask # E : - bne compare,1b # U : U L U L -#else -1: cmpult divisor,modulus,compare # E : - nop # E : - nop # E : - blt divisor, 2f # U : U L U L - - addq divisor,divisor,divisor # E : - addq mask,mask,mask # E : - unop # E : - bne compare,1b # U : U L U L -#endif - - /* ok, start to go right again.. */ -2: - /* - * Keep things nicely bundled... use a nop instead of not - * having an instruction for DIV_ONLY - */ -#ifdef DIV - DIV_ONLY(addq quotient,mask,tmp2) # E : -#else - nop # E : -#endif - srl mask,1,mask # U : - cmpule divisor,modulus,compare # E : - subq modulus,divisor,tmp1 # E : - -#ifdef DIV - DIV_ONLY(cmovne compare,tmp2,quotient) # E : Latency 2, extra map slot - nop # E : as part of the cmovne - srl divisor,1,divisor # U : - nop # E : L U L U - - nop # E : - cmovne compare,tmp1,modulus # E : Latency 2, extra map slot - nop # E : as part of the cmovne - bne mask,2b # U : U L U L -#else - srl divisor,1,divisor # U : - cmovne compare,tmp1,modulus # E : Latency 2, extra map slot - nop # E : as part of the cmovne - bne mask,2b # U : U L L U -#endif - -9: ldq $1, 0($30) # L : - ldq $2, 8($30) # L : - nop # E : - nop # E : U U L L - - ldq $0,16($30) # L : - ldq tmp1,24($30) # L : - nop # E : - nop # E : - -#ifdef DIV - DIV_ONLY(ldq tmp2,32($30)) # L : -#else - nop # E : -#endif - addq $30,STACK,$30 # E : - ret $31,($23),1 # L0 : L U U L - .end ufunction -EXPORT_SYMBOL(ufunction) - -/* - * Uhh.. Ugly signed division. I'd rather not have it at all, but - * it's needed in some circumstances. There are different ways to - * handle this, really. This does: - * -a / b = a / -b = -(a / b) - * -a % b = -(a % b) - * a % -b = a % b - * which is probably not the best solution, but at least should - * have the property that (x/y)*y + (x%y) = x. - */ -.align 4 -.globl sfunction -.ent sfunction -sfunction: - subq $30,STACK,$30 # E : - .frame $30,STACK,$23 - .prologue 0 - bis $24,$25,$28 # E : - SLONGIFY($28) # E : - bge $28,7b # U : - - stq $24,0($30) # L : - subq $31,$24,$28 # E : - stq $25,8($30) # L : - nop # E : U L U L - - cmovlt $24,$28,$24 /* abs($24) */ # E : Latency 2, extra map slot - nop # E : as part of the cmov - stq $23,16($30) # L : - subq $31,$25,$28 # E : U L U L - - stq tmp1,24($30) # L : - cmovlt $25,$28,$25 /* abs($25) */ # E : Latency 2, extra map slot - nop # E : - bsr $23,ufunction # L0: L U L U - - ldq $24,0($30) # L : - ldq $25,8($30) # L : - GETSIGN($28) # E : - subq $31,$27,tmp1 # E : U U L L - - SLONGIFY($28) # E : - ldq $23,16($30) # L : - cmovlt $28,tmp1,$27 # E : Latency 2, extra map slot - nop # E : U L L U : as part of the cmov - - ldq tmp1,24($30) # L : - nop # E : as part of the cmov - addq $30,STACK,$30 # E : - ret $31,($23),1 # L0 : L U U L - .end sfunction -EXPORT_SYMBOL(sfunction) diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S deleted file mode 100644 index 56bf9e14eeeefadf510cbe4d52fba27c0f1f5701..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-memchr.S +++ /dev/null @@ -1,193 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-memchr.S - * - * 21264 version contributed by Rick Gorton - * - * Finds characters in a memory area. Optimized for the Alpha: - * - * - memory accessed as aligned quadwords only - * - uses cmpbge to compare 8 bytes in parallel - * - does binary search to find 0 byte in last - * quadword (HAKMEM needed 12 instructions to - * do this instead of the 9 instructions that - * binary search needs). - * - * For correctness consider that: - * - * - only minimum number of quadwords may be accessed - * - the third argument is an unsigned long - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ -#include - .set noreorder - .set noat - - .align 4 - .globl memchr - .ent memchr -memchr: - .frame $30,0,$26,0 - .prologue 0 - - # Hack -- if someone passes in (size_t)-1, hoping to just - # search til the end of the address space, we will overflow - # below when we find the address of the last byte. Given - # that we will never have a 56-bit address space, cropping - # the length is the easiest way to avoid trouble. - zap $18, 0x80, $5 # U : Bound length - beq $18, $not_found # U : - ldq_u $1, 0($16) # L : load first quadword Latency=3 - and $17, 0xff, $17 # E : L L U U : 00000000000000ch - - insbl $17, 1, $2 # U : 000000000000ch00 - cmpult $18, 9, $4 # E : small (< 1 quad) string? - or $2, $17, $17 # E : 000000000000chch - lda $3, -1($31) # E : U L L U - - sll $17, 16, $2 # U : 00000000chch0000 - addq $16, $5, $5 # E : Max search address - or $2, $17, $17 # E : 00000000chchchch - sll $17, 32, $2 # U : U L L U : chchchch00000000 - - or $2, $17, $17 # E : chchchchchchchch - extql $1, $16, $7 # U : $7 is upper bits - beq $4, $first_quad # U : - ldq_u $6, -1($5) # L : L U U L : eight or less bytes to search Latency=3 - - extqh $6, $16, $6 # U : 2 cycle stall for $6 - mov $16, $0 # E : - nop # E : - or $7, $6, $1 # E : L U L U $1 = quadword starting at $16 - - # Deal with the case where at most 8 bytes remain to be searched - # in $1. E.g.: - # $18 = 6 - # $1 = ????c6c5c4c3c2c1 -$last_quad: - negq $18, $6 # E : - xor $17, $1, $1 # E : - srl $3, $6, $6 # U : $6 = mask of $18 bits set - cmpbge $31, $1, $2 # E : L U L U - - nop - nop - and $2, $6, $2 # E : - beq $2, $not_found # U : U L U L - -$found_it: -#ifdef CONFIG_ALPHA_EV67 - /* - * Since we are guaranteed to have set one of the bits, we don't - * have to worry about coming back with a 0x40 out of cttz... - */ - cttz $2, $3 # U0 : - addq $0, $3, $0 # E : All done - nop # E : - ret # L0 : L U L U -#else - /* - * Slow and clunky. It can probably be improved. - * An exercise left for others. - */ - negq $2, $3 # E : - and $2, $3, $2 # E : - and $2, 0x0f, $1 # E : - addq $0, 4, $3 # E : - - cmoveq $1, $3, $0 # E : Latency 2, extra map cycle - nop # E : keep with cmov - and $2, 0x33, $1 # E : - addq $0, 2, $3 # E : U L U L : 2 cycle stall on $0 - - cmoveq $1, $3, $0 # E : Latency 2, extra map cycle - nop # E : keep with cmov - and $2, 0x55, $1 # E : - addq $0, 1, $3 # E : U L U L : 2 cycle stall on $0 - - cmoveq $1, $3, $0 # E : Latency 2, extra map cycle - nop - nop - ret # L0 : L U L U -#endif - - # Deal with the case where $18 > 8 bytes remain to be - # searched. $16 may not be aligned. - .align 4 -$first_quad: - andnot $16, 0x7, $0 # E : - insqh $3, $16, $2 # U : $2 = 0000ffffffffffff ($16<0:2> ff) - xor $1, $17, $1 # E : - or $1, $2, $1 # E : U L U L $1 = ====ffffffffffff - - cmpbge $31, $1, $2 # E : - bne $2, $found_it # U : - # At least one byte left to process. - ldq $1, 8($0) # L : - subq $5, 1, $18 # E : U L U L - - addq $0, 8, $0 # E : - # Make $18 point to last quad to be accessed (the - # last quad may or may not be partial). - andnot $18, 0x7, $18 # E : - cmpult $0, $18, $2 # E : - beq $2, $final # U : U L U L - - # At least two quads remain to be accessed. - - subq $18, $0, $4 # E : $4 <- nr quads to be processed - and $4, 8, $4 # E : odd number of quads? - bne $4, $odd_quad_count # U : - # At least three quads remain to be accessed - mov $1, $4 # E : L U L U : move prefetched value to correct reg - - .align 4 -$unrolled_loop: - ldq $1, 8($0) # L : prefetch $1 - xor $17, $4, $2 # E : - cmpbge $31, $2, $2 # E : - bne $2, $found_it # U : U L U L - - addq $0, 8, $0 # E : - nop # E : - nop # E : - nop # E : - -$odd_quad_count: - xor $17, $1, $2 # E : - ldq $4, 8($0) # L : prefetch $4 - cmpbge $31, $2, $2 # E : - addq $0, 8, $6 # E : - - bne $2, $found_it # U : - cmpult $6, $18, $6 # E : - addq $0, 8, $0 # E : - nop # E : - - bne $6, $unrolled_loop # U : - mov $4, $1 # E : move prefetched value into $1 - nop # E : - nop # E : - -$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do - nop # E : - nop # E : - bne $18, $last_quad # U : - -$not_found: - mov $31, $0 # E : - nop # E : - nop # E : - ret # L0 : - - .end memchr - EXPORT_SYMBOL(memchr) diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S deleted file mode 100644 index ffbd056b6eb2905d72d01b6f5bc65d9ac4a06340..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-memcpy.S +++ /dev/null @@ -1,250 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-memcpy.S - * 21264 version by Rick Gorton - * - * Reasonably optimized memcpy() routine for the Alpha 21264 - * - * - memory accessed as aligned quadwords only - * - uses bcmpge to compare 8 bytes in parallel - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * - * Temp usage notes: - * $1,$2, - scratch - */ -#include - .set noreorder - .set noat - - .align 4 - .globl memcpy - .ent memcpy -memcpy: - .frame $30,0,$26,0 - .prologue 0 - - mov $16, $0 # E : copy dest to return - ble $18, $nomoredata # U : done with the copy? - xor $16, $17, $1 # E : are source and dest alignments the same? - and $1, 7, $1 # E : are they the same mod 8? - - bne $1, $misaligned # U : Nope - gotta do this the slow way - /* source and dest are same mod 8 address */ - and $16, 7, $1 # E : Are both 0mod8? - beq $1, $both_0mod8 # U : Yes - nop # E : - - /* - * source and dest are same misalignment. move a byte at a time - * until a 0mod8 alignment for both is reached. - * At least one byte more to move - */ - -$head_align: - ldbu $1, 0($17) # L : grab a byte - subq $18, 1, $18 # E : count-- - addq $17, 1, $17 # E : src++ - stb $1, 0($16) # L : - addq $16, 1, $16 # E : dest++ - and $16, 7, $1 # E : Are we at 0mod8 yet? - ble $18, $nomoredata # U : done with the copy? - bne $1, $head_align # U : - -$both_0mod8: - cmple $18, 127, $1 # E : Can we unroll the loop? - bne $1, $no_unroll # U : - and $16, 63, $1 # E : get mod64 alignment - beq $1, $do_unroll # U : no single quads to fiddle - -$single_head_quad: - ldq $1, 0($17) # L : get 8 bytes - subq $18, 8, $18 # E : count -= 8 - addq $17, 8, $17 # E : src += 8 - nop # E : - - stq $1, 0($16) # L : store - addq $16, 8, $16 # E : dest += 8 - and $16, 63, $1 # E : get mod64 alignment - bne $1, $single_head_quad # U : still not fully aligned - -$do_unroll: - addq $16, 64, $7 # E : Initial (+1 trip) wh64 address - cmple $18, 127, $1 # E : Can we go through the unrolled loop? - bne $1, $tail_quads # U : Nope - nop # E : - -$unroll_body: - wh64 ($7) # L1 : memory subsystem hint: 64 bytes at - # ($7) are about to be over-written - ldq $6, 0($17) # L0 : bytes 0..7 - nop # E : - nop # E : - - ldq $4, 8($17) # L : bytes 8..15 - ldq $5, 16($17) # L : bytes 16..23 - addq $7, 64, $7 # E : Update next wh64 address - nop # E : - - ldq $3, 24($17) # L : bytes 24..31 - addq $16, 64, $1 # E : fallback value for wh64 - nop # E : - nop # E : - - addq $17, 32, $17 # E : src += 32 bytes - stq $6, 0($16) # L : bytes 0..7 - nop # E : - nop # E : - - stq $4, 8($16) # L : bytes 8..15 - stq $5, 16($16) # L : bytes 16..23 - subq $18, 192, $2 # E : At least two more trips to go? - nop # E : - - stq $3, 24($16) # L : bytes 24..31 - addq $16, 32, $16 # E : dest += 32 bytes - nop # E : - nop # E : - - ldq $6, 0($17) # L : bytes 0..7 - ldq $4, 8($17) # L : bytes 8..15 - cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use - # fallback wh64 address if < 2 more trips - nop # E : - - ldq $5, 16($17) # L : bytes 16..23 - ldq $3, 24($17) # L : bytes 24..31 - addq $16, 32, $16 # E : dest += 32 - subq $18, 64, $18 # E : count -= 64 - - addq $17, 32, $17 # E : src += 32 - stq $6, -32($16) # L : bytes 0..7 - stq $4, -24($16) # L : bytes 8..15 - cmple $18, 63, $1 # E : At least one more trip? - - stq $5, -16($16) # L : bytes 16..23 - stq $3, -8($16) # L : bytes 24..31 - nop # E : - beq $1, $unroll_body - -$tail_quads: -$no_unroll: - .align 4 - subq $18, 8, $18 # E : At least a quad left? - blt $18, $less_than_8 # U : Nope - nop # E : - nop # E : - -$move_a_quad: - ldq $1, 0($17) # L : fetch 8 - subq $18, 8, $18 # E : count -= 8 - addq $17, 8, $17 # E : src += 8 - nop # E : - - stq $1, 0($16) # L : store 8 - addq $16, 8, $16 # E : dest += 8 - bge $18, $move_a_quad # U : - nop # E : - -$less_than_8: - .align 4 - addq $18, 8, $18 # E : add back for trailing bytes - ble $18, $nomoredata # U : All-done - nop # E : - nop # E : - - /* Trailing bytes */ -$tail_bytes: - subq $18, 1, $18 # E : count-- - ldbu $1, 0($17) # L : fetch a byte - addq $17, 1, $17 # E : src++ - nop # E : - - stb $1, 0($16) # L : store a byte - addq $16, 1, $16 # E : dest++ - bgt $18, $tail_bytes # U : more to be done? - nop # E : - - /* branching to exit takes 3 extra cycles, so replicate exit here */ - ret $31, ($26), 1 # L0 : - nop # E : - nop # E : - nop # E : - -$misaligned: - mov $0, $4 # E : dest temp - and $0, 7, $1 # E : dest alignment mod8 - beq $1, $dest_0mod8 # U : life doesnt totally suck - nop - -$aligndest: - ble $18, $nomoredata # U : - ldbu $1, 0($17) # L : fetch a byte - subq $18, 1, $18 # E : count-- - addq $17, 1, $17 # E : src++ - - stb $1, 0($4) # L : store it - addq $4, 1, $4 # E : dest++ - and $4, 7, $1 # E : dest 0mod8 yet? - bne $1, $aligndest # U : go until we are aligned. - - /* Source has unknown alignment, but dest is known to be 0mod8 */ -$dest_0mod8: - subq $18, 8, $18 # E : At least a quad left? - blt $18, $misalign_tail # U : Nope - ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes - nop # E : - -$mis_quad: - ldq_u $16, 8($17) # L : Fetch next 8 - extql $3, $17, $3 # U : masking - extqh $16, $17, $1 # U : masking - bis $3, $1, $1 # E : merged bytes to store - - subq $18, 8, $18 # E : count -= 8 - addq $17, 8, $17 # E : src += 8 - stq $1, 0($4) # L : store 8 (aligned) - mov $16, $3 # E : "rotate" source data - - addq $4, 8, $4 # E : dest += 8 - bge $18, $mis_quad # U : More quads to move - nop - nop - -$misalign_tail: - addq $18, 8, $18 # E : account for tail stuff - ble $18, $nomoredata # U : - nop - nop - -$misalign_byte: - ldbu $1, 0($17) # L : fetch 1 - subq $18, 1, $18 # E : count-- - addq $17, 1, $17 # E : src++ - nop # E : - - stb $1, 0($4) # L : store - addq $4, 1, $4 # E : dest++ - bgt $18, $misalign_byte # U : more to go? - nop - - -$nomoredata: - ret $31, ($26), 1 # L0 : - nop # E : - nop # E : - nop # E : - - .end memcpy - EXPORT_SYMBOL(memcpy) - -/* For backwards module compatibility. */ -__memcpy = memcpy -.globl __memcpy diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S deleted file mode 100644 index 1cfcfbbea6f068fcc69200bfd7a1d4177e1d6b75..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-memset.S +++ /dev/null @@ -1,605 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-memset.S - * - * This is an efficient (and relatively small) implementation of the C library - * "memset()" function for the 21264 implementation of Alpha. - * - * 21264 version contributed by Rick Gorton - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * The algorithm for the leading and trailing quadwords remains the same, - * however the loop has been unrolled to enable better memory throughput, - * and the code has been replicated for each of the entry points: __memset - * and __memset16 to permit better scheduling to eliminate the stalling - * encountered during the mask replication. - * A future enhancement might be to put in a byte store loop for really - * small (say < 32 bytes) memset()s. Whether or not that change would be - * a win in the kernel would depend upon the contextual usage. - * WARNING: Maintaining this is going to be more work than the above version, - * as fixes will need to be made in multiple places. The performance gain - * is worth it. - */ -#include - .set noat - .set noreorder -.text - .globl memset - .globl __memset - .globl ___memset - .globl __memset16 - .globl __constant_c_memset - - .ent ___memset -.align 5 -___memset: - .frame $30,0,$26,0 - .prologue 0 - - /* - * Serious stalling happens. The only way to mitigate this is to - * undertake a major re-write to interleave the constant materialization - * with other parts of the fall-through code. This is important, even - * though it makes maintenance tougher. - * Do this later. - */ - and $17,255,$1 # E : 00000000000000ch - insbl $17,1,$2 # U : 000000000000ch00 - bis $16,$16,$0 # E : return value - ble $18,end_b # U : zero length requested? - - addq $18,$16,$6 # E : max address to write to - bis $1,$2,$17 # E : 000000000000chch - insbl $1,2,$3 # U : 0000000000ch0000 - insbl $1,3,$4 # U : 00000000ch000000 - - or $3,$4,$3 # E : 00000000chch0000 - inswl $17,4,$5 # U : 0000chch00000000 - xor $16,$6,$1 # E : will complete write be within one quadword? - inswl $17,6,$2 # U : chch000000000000 - - or $17,$3,$17 # E : 00000000chchchch - or $2,$5,$2 # E : chchchch00000000 - bic $1,7,$1 # E : fit within a single quadword? - and $16,7,$3 # E : Target addr misalignment - - or $17,$2,$17 # E : chchchchchchchch - beq $1,within_quad_b # U : - nop # E : - beq $3,aligned_b # U : target is 0mod8 - - /* - * Target address is misaligned, and won't fit within a quadword - */ - ldq_u $4,0($16) # L : Fetch first partial - bis $16,$16,$5 # E : Save the address - insql $17,$16,$2 # U : Insert new bytes - subq $3,8,$3 # E : Invert (for addressing uses) - - addq $18,$3,$18 # E : $18 is new count ($3 is negative) - mskql $4,$16,$4 # U : clear relevant parts of the quad - subq $16,$3,$16 # E : $16 is new aligned destination - bis $2,$4,$1 # E : Final bytes - - nop - stq_u $1,0($5) # L : Store result - nop - nop - -.align 4 -aligned_b: - /* - * We are now guaranteed to be quad aligned, with at least - * one partial quad to write. - */ - - sra $18,3,$3 # U : Number of remaining quads to write - and $18,7,$18 # E : Number of trailing bytes to write - bis $16,$16,$5 # E : Save dest address - beq $3,no_quad_b # U : tail stuff only - - /* - * it's worth the effort to unroll this and use wh64 if possible - * Lifted a bunch of code from clear_user.S - * At this point, entry values are: - * $16 Current destination address - * $5 A copy of $16 - * $6 The max quadword address to write to - * $18 Number trailer bytes - * $3 Number quads to write - */ - - and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) - subq $3, 16, $4 # E : Only try to unroll if > 128 bytes - subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) - blt $4, loop_b # U : - - /* - * We know we've got at least 16 quads, minimum of one trip - * through unrolled loop. Do a quad at a time to get us 0mod64 - * aligned. - */ - - nop # E : - nop # E : - nop # E : - beq $1, $bigalign_b # U : - -$alignmod64_b: - stq $17, 0($5) # L : - subq $3, 1, $3 # E : For consistency later - addq $1, 8, $1 # E : Increment towards zero for alignment - addq $5, 8, $4 # E : Initial wh64 address (filler instruction) - - nop - nop - addq $5, 8, $5 # E : Inc address - blt $1, $alignmod64_b # U : - -$bigalign_b: - /* - * $3 - number quads left to go - * $5 - target address (aligned 0mod64) - * $17 - mask of stuff to store - * Scratch registers available: $7, $2, $4, $1 - * we know that we'll be taking a minimum of one trip through - * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle - * Assumes the wh64 needs to be for 2 trips through the loop in the future - * The wh64 is issued on for the starting destination address for trip +2 - * through the loop, and if there are less than two trips left, the target - * address will be for the current trip. - */ - -$do_wh64_b: - wh64 ($4) # L1 : memory subsystem write hint - subq $3, 24, $2 # E : For determining future wh64 addresses - stq $17, 0($5) # L : - nop # E : - - addq $5, 128, $4 # E : speculative target of next wh64 - stq $17, 8($5) # L : - stq $17, 16($5) # L : - addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) - - stq $17, 24($5) # L : - stq $17, 32($5) # L : - cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle - nop - - stq $17, 40($5) # L : - stq $17, 48($5) # L : - subq $3, 16, $2 # E : Repeat the loop at least once more? - nop - - stq $17, 56($5) # L : - addq $5, 64, $5 # E : - subq $3, 8, $3 # E : - bge $2, $do_wh64_b # U : - - nop - nop - nop - beq $3, no_quad_b # U : Might have finished already - -.align 4 - /* - * Simple loop for trailing quadwords, or for small amounts - * of data (where we can't use an unrolled loop and wh64) - */ -loop_b: - stq $17,0($5) # L : - subq $3,1,$3 # E : Decrement number quads left - addq $5,8,$5 # E : Inc address - bne $3,loop_b # U : more? - -no_quad_b: - /* - * Write 0..7 trailing bytes. - */ - nop # E : - beq $18,end_b # U : All done? - ldq $7,0($5) # L : - mskqh $7,$6,$2 # U : Mask final quad - - insqh $17,$6,$4 # U : New bits - bis $2,$4,$1 # E : Put it all together - stq $1,0($5) # L : And back to memory - ret $31,($26),1 # L0 : - -within_quad_b: - ldq_u $1,0($16) # L : - insql $17,$16,$2 # U : New bits - mskql $1,$16,$4 # U : Clear old - bis $2,$4,$2 # E : New result - - mskql $2,$6,$4 # U : - mskqh $1,$6,$2 # U : - bis $2,$4,$1 # E : - stq_u $1,0($16) # L : - -end_b: - nop - nop - nop - ret $31,($26),1 # L0 : - .end ___memset - EXPORT_SYMBOL(___memset) - - /* - * This is the original body of code, prior to replication and - * rescheduling. Leave it here, as there may be calls to this - * entry point. - */ -.align 4 - .ent __constant_c_memset -__constant_c_memset: - .frame $30,0,$26,0 - .prologue 0 - - addq $18,$16,$6 # E : max address to write to - bis $16,$16,$0 # E : return value - xor $16,$6,$1 # E : will complete write be within one quadword? - ble $18,end # U : zero length requested? - - bic $1,7,$1 # E : fit within a single quadword - beq $1,within_one_quad # U : - and $16,7,$3 # E : Target addr misalignment - beq $3,aligned # U : target is 0mod8 - - /* - * Target address is misaligned, and won't fit within a quadword - */ - ldq_u $4,0($16) # L : Fetch first partial - bis $16,$16,$5 # E : Save the address - insql $17,$16,$2 # U : Insert new bytes - subq $3,8,$3 # E : Invert (for addressing uses) - - addq $18,$3,$18 # E : $18 is new count ($3 is negative) - mskql $4,$16,$4 # U : clear relevant parts of the quad - subq $16,$3,$16 # E : $16 is new aligned destination - bis $2,$4,$1 # E : Final bytes - - nop - stq_u $1,0($5) # L : Store result - nop - nop - -.align 4 -aligned: - /* - * We are now guaranteed to be quad aligned, with at least - * one partial quad to write. - */ - - sra $18,3,$3 # U : Number of remaining quads to write - and $18,7,$18 # E : Number of trailing bytes to write - bis $16,$16,$5 # E : Save dest address - beq $3,no_quad # U : tail stuff only - - /* - * it's worth the effort to unroll this and use wh64 if possible - * Lifted a bunch of code from clear_user.S - * At this point, entry values are: - * $16 Current destination address - * $5 A copy of $16 - * $6 The max quadword address to write to - * $18 Number trailer bytes - * $3 Number quads to write - */ - - and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) - subq $3, 16, $4 # E : Only try to unroll if > 128 bytes - subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) - blt $4, loop # U : - - /* - * We know we've got at least 16 quads, minimum of one trip - * through unrolled loop. Do a quad at a time to get us 0mod64 - * aligned. - */ - - nop # E : - nop # E : - nop # E : - beq $1, $bigalign # U : - -$alignmod64: - stq $17, 0($5) # L : - subq $3, 1, $3 # E : For consistency later - addq $1, 8, $1 # E : Increment towards zero for alignment - addq $5, 8, $4 # E : Initial wh64 address (filler instruction) - - nop - nop - addq $5, 8, $5 # E : Inc address - blt $1, $alignmod64 # U : - -$bigalign: - /* - * $3 - number quads left to go - * $5 - target address (aligned 0mod64) - * $17 - mask of stuff to store - * Scratch registers available: $7, $2, $4, $1 - * we know that we'll be taking a minimum of one trip through - * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle - * Assumes the wh64 needs to be for 2 trips through the loop in the future - * The wh64 is issued on for the starting destination address for trip +2 - * through the loop, and if there are less than two trips left, the target - * address will be for the current trip. - */ - -$do_wh64: - wh64 ($4) # L1 : memory subsystem write hint - subq $3, 24, $2 # E : For determining future wh64 addresses - stq $17, 0($5) # L : - nop # E : - - addq $5, 128, $4 # E : speculative target of next wh64 - stq $17, 8($5) # L : - stq $17, 16($5) # L : - addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) - - stq $17, 24($5) # L : - stq $17, 32($5) # L : - cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle - nop - - stq $17, 40($5) # L : - stq $17, 48($5) # L : - subq $3, 16, $2 # E : Repeat the loop at least once more? - nop - - stq $17, 56($5) # L : - addq $5, 64, $5 # E : - subq $3, 8, $3 # E : - bge $2, $do_wh64 # U : - - nop - nop - nop - beq $3, no_quad # U : Might have finished already - -.align 4 - /* - * Simple loop for trailing quadwords, or for small amounts - * of data (where we can't use an unrolled loop and wh64) - */ -loop: - stq $17,0($5) # L : - subq $3,1,$3 # E : Decrement number quads left - addq $5,8,$5 # E : Inc address - bne $3,loop # U : more? - -no_quad: - /* - * Write 0..7 trailing bytes. - */ - nop # E : - beq $18,end # U : All done? - ldq $7,0($5) # L : - mskqh $7,$6,$2 # U : Mask final quad - - insqh $17,$6,$4 # U : New bits - bis $2,$4,$1 # E : Put it all together - stq $1,0($5) # L : And back to memory - ret $31,($26),1 # L0 : - -within_one_quad: - ldq_u $1,0($16) # L : - insql $17,$16,$2 # U : New bits - mskql $1,$16,$4 # U : Clear old - bis $2,$4,$2 # E : New result - - mskql $2,$6,$4 # U : - mskqh $1,$6,$2 # U : - bis $2,$4,$1 # E : - stq_u $1,0($16) # L : - -end: - nop - nop - nop - ret $31,($26),1 # L0 : - .end __constant_c_memset - EXPORT_SYMBOL(__constant_c_memset) - - /* - * This is a replicant of the __constant_c_memset code, rescheduled - * to mask stalls. Note that entry point names also had to change - */ - .align 5 - .ent __memset16 - -__memset16: - .frame $30,0,$26,0 - .prologue 0 - - inswl $17,0,$5 # U : 000000000000c1c2 - inswl $17,2,$2 # U : 00000000c1c20000 - bis $16,$16,$0 # E : return value - addq $18,$16,$6 # E : max address to write to - - ble $18, end_w # U : zero length requested? - inswl $17,4,$3 # U : 0000c1c200000000 - inswl $17,6,$4 # U : c1c2000000000000 - xor $16,$6,$1 # E : will complete write be within one quadword? - - or $2,$5,$2 # E : 00000000c1c2c1c2 - or $3,$4,$17 # E : c1c2c1c200000000 - bic $1,7,$1 # E : fit within a single quadword - and $16,7,$3 # E : Target addr misalignment - - or $17,$2,$17 # E : c1c2c1c2c1c2c1c2 - beq $1,within_quad_w # U : - nop - beq $3,aligned_w # U : target is 0mod8 - - /* - * Target address is misaligned, and won't fit within a quadword - */ - ldq_u $4,0($16) # L : Fetch first partial - bis $16,$16,$5 # E : Save the address - insql $17,$16,$2 # U : Insert new bytes - subq $3,8,$3 # E : Invert (for addressing uses) - - addq $18,$3,$18 # E : $18 is new count ($3 is negative) - mskql $4,$16,$4 # U : clear relevant parts of the quad - subq $16,$3,$16 # E : $16 is new aligned destination - bis $2,$4,$1 # E : Final bytes - - nop - stq_u $1,0($5) # L : Store result - nop - nop - -.align 4 -aligned_w: - /* - * We are now guaranteed to be quad aligned, with at least - * one partial quad to write. - */ - - sra $18,3,$3 # U : Number of remaining quads to write - and $18,7,$18 # E : Number of trailing bytes to write - bis $16,$16,$5 # E : Save dest address - beq $3,no_quad_w # U : tail stuff only - - /* - * it's worth the effort to unroll this and use wh64 if possible - * Lifted a bunch of code from clear_user.S - * At this point, entry values are: - * $16 Current destination address - * $5 A copy of $16 - * $6 The max quadword address to write to - * $18 Number trailer bytes - * $3 Number quads to write - */ - - and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop) - subq $3, 16, $4 # E : Only try to unroll if > 128 bytes - subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64) - blt $4, loop_w # U : - - /* - * We know we've got at least 16 quads, minimum of one trip - * through unrolled loop. Do a quad at a time to get us 0mod64 - * aligned. - */ - - nop # E : - nop # E : - nop # E : - beq $1, $bigalign_w # U : - -$alignmod64_w: - stq $17, 0($5) # L : - subq $3, 1, $3 # E : For consistency later - addq $1, 8, $1 # E : Increment towards zero for alignment - addq $5, 8, $4 # E : Initial wh64 address (filler instruction) - - nop - nop - addq $5, 8, $5 # E : Inc address - blt $1, $alignmod64_w # U : - -$bigalign_w: - /* - * $3 - number quads left to go - * $5 - target address (aligned 0mod64) - * $17 - mask of stuff to store - * Scratch registers available: $7, $2, $4, $1 - * we know that we'll be taking a minimum of one trip through - * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle - * Assumes the wh64 needs to be for 2 trips through the loop in the future - * The wh64 is issued on for the starting destination address for trip +2 - * through the loop, and if there are less than two trips left, the target - * address will be for the current trip. - */ - -$do_wh64_w: - wh64 ($4) # L1 : memory subsystem write hint - subq $3, 24, $2 # E : For determining future wh64 addresses - stq $17, 0($5) # L : - nop # E : - - addq $5, 128, $4 # E : speculative target of next wh64 - stq $17, 8($5) # L : - stq $17, 16($5) # L : - addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr) - - stq $17, 24($5) # L : - stq $17, 32($5) # L : - cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle - nop - - stq $17, 40($5) # L : - stq $17, 48($5) # L : - subq $3, 16, $2 # E : Repeat the loop at least once more? - nop - - stq $17, 56($5) # L : - addq $5, 64, $5 # E : - subq $3, 8, $3 # E : - bge $2, $do_wh64_w # U : - - nop - nop - nop - beq $3, no_quad_w # U : Might have finished already - -.align 4 - /* - * Simple loop for trailing quadwords, or for small amounts - * of data (where we can't use an unrolled loop and wh64) - */ -loop_w: - stq $17,0($5) # L : - subq $3,1,$3 # E : Decrement number quads left - addq $5,8,$5 # E : Inc address - bne $3,loop_w # U : more? - -no_quad_w: - /* - * Write 0..7 trailing bytes. - */ - nop # E : - beq $18,end_w # U : All done? - ldq $7,0($5) # L : - mskqh $7,$6,$2 # U : Mask final quad - - insqh $17,$6,$4 # U : New bits - bis $2,$4,$1 # E : Put it all together - stq $1,0($5) # L : And back to memory - ret $31,($26),1 # L0 : - -within_quad_w: - ldq_u $1,0($16) # L : - insql $17,$16,$2 # U : New bits - mskql $1,$16,$4 # U : Clear old - bis $2,$4,$2 # E : New result - - mskql $2,$6,$4 # U : - mskqh $1,$6,$2 # U : - bis $2,$4,$1 # E : - stq_u $1,0($16) # L : - -end_w: - nop - nop - nop - ret $31,($26),1 # L0 : - - .end __memset16 - EXPORT_SYMBOL(__memset16) - -memset = ___memset -__memset = ___memset - EXPORT_SYMBOL(memset) - EXPORT_SYMBOL(__memset) diff --git a/arch/alpha/lib/ev6-stxcpy.S b/arch/alpha/lib/ev6-stxcpy.S deleted file mode 100644 index 65f5f7310d802d8f98cf91c61667a82fa367a9ff..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-stxcpy.S +++ /dev/null @@ -1,322 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-stxcpy.S - * 21264 version contributed by Rick Gorton - * - * Copy a null-terminated string from SRC to DST. - * - * This is an internal routine used by strcpy, stpcpy, and strcat. - * As such, it uses special linkage conventions to make implementation - * of these public functions more efficient. - * - * On input: - * t9 = return address - * a0 = DST - * a1 = SRC - * - * On output: - * t12 = bitmask (with one bit set) indicating the last byte written - * a0 = unaligned address of the last *word* written - * - * Furthermore, v0, a3-a5, t11, and t12 are untouched. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ - -#include - - .set noat - .set noreorder - - .text - -/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that - doesn't like putting the entry point for a procedure somewhere in the - middle of the procedure descriptor. Work around this by putting the - aligned copy in its own procedure descriptor */ - - - .ent stxcpy_aligned - .align 4 -stxcpy_aligned: - .frame sp, 0, t9 - .prologue 0 - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == the first source word. */ - - /* Create the 1st output word and detect 0's in the 1st input word. */ - lda t2, -1 # E : build a mask against false zero - mskqh t2, a1, t2 # U : detection in the src word (stall) - mskqh t1, a1, t3 # U : - ornot t1, t2, t2 # E : (stall) - - mskql t0, a1, t0 # U : assemble the first output word - cmpbge zero, t2, t8 # E : bits set iff null found - or t0, t3, t1 # E : (stall) - bne t8, $a_eos # U : (stall) - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == a source word not containing a null. */ - /* Nops here to separate store quads from load quads */ - -$a_loop: - stq_u t1, 0(a0) # L : - addq a0, 8, a0 # E : - nop - nop - - ldq_u t1, 0(a1) # L : Latency=3 - addq a1, 8, a1 # E : - cmpbge zero, t1, t8 # E : (3 cycle stall) - beq t8, $a_loop # U : (stall for t8) - - /* Take care of the final (partial) word store. - On entry to this basic block we have: - t1 == the source word containing the null - t8 == the cmpbge mask that found it. */ -$a_eos: - negq t8, t6 # E : find low bit set - and t8, t6, t12 # E : (stall) - /* For the sake of the cache, don't read a destination word - if we're not going to need it. */ - and t12, 0x80, t6 # E : (stall) - bne t6, 1f # U : (stall) - - /* We're doing a partial word store and so need to combine - our source and original destination words. */ - ldq_u t0, 0(a0) # L : Latency=3 - subq t12, 1, t6 # E : - zapnot t1, t6, t1 # U : clear src bytes >= null (stall) - or t12, t6, t8 # E : (stall) - - zap t0, t8, t0 # E : clear dst bytes <= null - or t0, t1, t1 # E : (stall) - nop - nop - -1: stq_u t1, 0(a0) # L : - ret (t9) # L0 : Latency=3 - nop - nop - - .end stxcpy_aligned - - .align 4 - .ent __stxcpy - .globl __stxcpy -__stxcpy: - .frame sp, 0, t9 - .prologue 0 - - /* Are source and destination co-aligned? */ - xor a0, a1, t0 # E : - unop # E : - and t0, 7, t0 # E : (stall) - bne t0, $unaligned # U : (stall) - - /* We are co-aligned; take care of a partial first word. */ - ldq_u t1, 0(a1) # L : load first src word - and a0, 7, t0 # E : take care not to load a word ... - addq a1, 8, a1 # E : - beq t0, stxcpy_aligned # U : ... if we wont need it (stall) - - ldq_u t0, 0(a0) # L : - br stxcpy_aligned # L0 : Latency=3 - nop - nop - - -/* The source and destination are not co-aligned. Align the destination - and cope. We have to be very careful about not reading too much and - causing a SEGV. */ - - .align 4 -$u_head: - /* We know just enough now to be able to assemble the first - full source word. We can still find a zero at the end of it - that prevents us from outputting the whole thing. - - On entry to this basic block: - t0 == the first dest word, for masking back in, if needed else 0 - t1 == the low bits of the first source word - t6 == bytemask that is -1 in dest word bytes */ - - ldq_u t2, 8(a1) # L : - addq a1, 8, a1 # E : - extql t1, a1, t1 # U : (stall on a1) - extqh t2, a1, t4 # U : (stall on a1) - - mskql t0, a0, t0 # U : - or t1, t4, t1 # E : - mskqh t1, a0, t1 # U : (stall on t1) - or t0, t1, t1 # E : (stall on t1) - - or t1, t6, t6 # E : - cmpbge zero, t6, t8 # E : (stall) - lda t6, -1 # E : for masking just below - bne t8, $u_final # U : (stall) - - mskql t6, a1, t6 # U : mask out the bits we have - or t6, t2, t2 # E : already extracted before (stall) - cmpbge zero, t2, t8 # E : testing eos (stall) - bne t8, $u_late_head_exit # U : (stall) - - /* Finally, we've got all the stupid leading edge cases taken care - of and we can set up to enter the main loop. */ - - stq_u t1, 0(a0) # L : store first output word - addq a0, 8, a0 # E : - extql t2, a1, t0 # U : position ho-bits of lo word - ldq_u t2, 8(a1) # U : read next high-order source word - - addq a1, 8, a1 # E : - cmpbge zero, t2, t8 # E : (stall for t2) - nop # E : - bne t8, $u_eos # U : (stall) - - /* Unaligned copy main loop. In order to avoid reading too much, - the loop is structured to detect zeros in aligned source words. - This has, unfortunately, effectively pulled half of a loop - iteration out into the head and half into the tail, but it does - prevent nastiness from accumulating in the very thing we want - to run as fast as possible. - - On entry to this basic block: - t0 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word - - We further know that t2 does not contain a null terminator. */ - - .align 3 -$u_loop: - extqh t2, a1, t1 # U : extract high bits for current word - addq a1, 8, a1 # E : (stall) - extql t2, a1, t3 # U : extract low bits for next time (stall) - addq a0, 8, a0 # E : - - or t0, t1, t1 # E : current dst word now complete - ldq_u t2, 0(a1) # L : Latency=3 load high word for next time - stq_u t1, -8(a0) # L : save the current word (stall) - mov t3, t0 # E : - - cmpbge zero, t2, t8 # E : test new word for eos - beq t8, $u_loop # U : (stall) - nop - nop - - /* We've found a zero somewhere in the source word we just read. - If it resides in the lower half, we have one (probably partial) - word to write out, and if it resides in the upper half, we - have one full and one partial word left to write out. - - On entry to this basic block: - t0 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word. */ -$u_eos: - extqh t2, a1, t1 # U : - or t0, t1, t1 # E : first (partial) source word complete (stall) - cmpbge zero, t1, t8 # E : is the null in this first bit? (stall) - bne t8, $u_final # U : (stall) - -$u_late_head_exit: - stq_u t1, 0(a0) # L : the null was in the high-order bits - addq a0, 8, a0 # E : - extql t2, a1, t1 # U : - cmpbge zero, t1, t8 # E : (stall) - - /* Take care of a final (probably partial) result word. - On entry to this basic block: - t1 == assembled source word - t8 == cmpbge mask that found the null. */ -$u_final: - negq t8, t6 # E : isolate low bit set - and t6, t8, t12 # E : (stall) - and t12, 0x80, t6 # E : avoid dest word load if we can (stall) - bne t6, 1f # U : (stall) - - ldq_u t0, 0(a0) # E : - subq t12, 1, t6 # E : - or t6, t12, t8 # E : (stall) - zapnot t1, t6, t1 # U : kill source bytes >= null (stall) - - zap t0, t8, t0 # U : kill dest bytes <= null (2 cycle data stall) - or t0, t1, t1 # E : (stall) - nop - nop - -1: stq_u t1, 0(a0) # L : - ret (t9) # L0 : Latency=3 - nop - nop - - /* Unaligned copy entry point. */ - .align 4 -$unaligned: - - ldq_u t1, 0(a1) # L : load first source word - and a0, 7, t4 # E : find dest misalignment - and a1, 7, t5 # E : find src misalignment - /* Conditionally load the first destination word and a bytemask - with 0xff indicating that the destination byte is sacrosanct. */ - mov zero, t0 # E : - - mov zero, t6 # E : - beq t4, 1f # U : - ldq_u t0, 0(a0) # L : - lda t6, -1 # E : - - mskql t6, a0, t6 # U : - nop - nop - nop -1: - subq a1, t4, a1 # E : sub dest misalignment from src addr - /* If source misalignment is larger than dest misalignment, we need - extra startup checks to avoid SEGV. */ - cmplt t4, t5, t12 # E : - beq t12, $u_head # U : - lda t2, -1 # E : mask out leading garbage in source - - mskqh t2, t5, t2 # U : - ornot t1, t2, t3 # E : (stall) - cmpbge zero, t3, t8 # E : is there a zero? (stall) - beq t8, $u_head # U : (stall) - - /* At this point we've found a zero in the first partial word of - the source. We need to isolate the valid source data and mask - it into the original destination data. (Incidentally, we know - that we'll need at least one byte of that original dest word.) */ - - ldq_u t0, 0(a0) # L : - negq t8, t6 # E : build bitmask of bytes <= zero - and t6, t8, t12 # E : (stall) - and a1, 7, t5 # E : - - subq t12, 1, t6 # E : - or t6, t12, t8 # E : (stall) - srl t12, t5, t12 # U : adjust final null return value - zapnot t2, t8, t2 # U : prepare source word; mirror changes (stall) - - and t1, t2, t1 # E : to source validity mask - extql t2, a1, t2 # U : - extql t1, a1, t1 # U : (stall) - andnot t0, t2, t0 # .. e1 : zero place for source to reside (stall) - - or t0, t1, t1 # e1 : and put it there - stq_u t1, 0(a0) # .. e0 : (stall) - ret (t9) # e1 : - nop - - .end __stxcpy - diff --git a/arch/alpha/lib/ev6-stxncpy.S b/arch/alpha/lib/ev6-stxncpy.S deleted file mode 100644 index 76da205282eec7aa1115568beb1f16f184e4a08d..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev6-stxncpy.S +++ /dev/null @@ -1,398 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev6-stxncpy.S - * 21264 version contributed by Rick Gorton - * - * Copy no more than COUNT bytes of the null-terminated string from - * SRC to DST. - * - * This is an internal routine used by strncpy, stpncpy, and strncat. - * As such, it uses special linkage conventions to make implementation - * of these public functions more efficient. - * - * On input: - * t9 = return address - * a0 = DST - * a1 = SRC - * a2 = COUNT - * - * Furthermore, COUNT may not be zero. - * - * On output: - * t0 = last word written - * t10 = bitmask (with one bit set) indicating the byte position of - * the end of the range specified by COUNT - * t12 = bitmask (with one bit set) indicating the last byte written - * a0 = unaligned address of the last *word* written - * a2 = the number of full words left in COUNT - * - * Furthermore, v0, a3-a5, t11, and $at are untouched. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ - -#include - - .set noat - .set noreorder - - .text - -/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that - doesn't like putting the entry point for a procedure somewhere in the - middle of the procedure descriptor. Work around this by putting the - aligned copy in its own procedure descriptor */ - - - .ent stxncpy_aligned - .align 4 -stxncpy_aligned: - .frame sp, 0, t9, 0 - .prologue 0 - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == the first source word. */ - - /* Create the 1st output word and detect 0's in the 1st input word. */ - lda t2, -1 # E : build a mask against false zero - mskqh t2, a1, t2 # U : detection in the src word (stall) - mskqh t1, a1, t3 # U : - ornot t1, t2, t2 # E : (stall) - - mskql t0, a1, t0 # U : assemble the first output word - cmpbge zero, t2, t8 # E : bits set iff null found - or t0, t3, t0 # E : (stall) - beq a2, $a_eoc # U : - - bne t8, $a_eos # U : - nop - nop - nop - - /* On entry to this basic block: - t0 == a source word not containing a null. */ - - /* - * nops here to: - * separate store quads from load quads - * limit of 1 bcond/quad to permit training - */ -$a_loop: - stq_u t0, 0(a0) # L : - addq a0, 8, a0 # E : - subq a2, 1, a2 # E : - nop - - ldq_u t0, 0(a1) # L : - addq a1, 8, a1 # E : - cmpbge zero, t0, t8 # E : - beq a2, $a_eoc # U : - - beq t8, $a_loop # U : - nop - nop - nop - - /* Take care of the final (partial) word store. At this point - the end-of-count bit is set in t8 iff it applies. - - On entry to this basic block we have: - t0 == the source word containing the null - t8 == the cmpbge mask that found it. */ - -$a_eos: - negq t8, t12 # E : find low bit set - and t8, t12, t12 # E : (stall) - /* For the sake of the cache, don't read a destination word - if we're not going to need it. */ - and t12, 0x80, t6 # E : (stall) - bne t6, 1f # U : (stall) - - /* We're doing a partial word store and so need to combine - our source and original destination words. */ - ldq_u t1, 0(a0) # L : - subq t12, 1, t6 # E : - or t12, t6, t8 # E : (stall) - zapnot t0, t8, t0 # U : clear src bytes > null (stall) - - zap t1, t8, t1 # .. e1 : clear dst bytes <= null - or t0, t1, t0 # e1 : (stall) - nop - nop - -1: stq_u t0, 0(a0) # L : - ret (t9) # L0 : Latency=3 - nop - nop - - /* Add the end-of-count bit to the eos detection bitmask. */ -$a_eoc: - or t10, t8, t8 # E : - br $a_eos # L0 : Latency=3 - nop - nop - - .end stxncpy_aligned - - .align 4 - .ent __stxncpy - .globl __stxncpy -__stxncpy: - .frame sp, 0, t9, 0 - .prologue 0 - - /* Are source and destination co-aligned? */ - xor a0, a1, t1 # E : - and a0, 7, t0 # E : find dest misalignment - and t1, 7, t1 # E : (stall) - addq a2, t0, a2 # E : bias count by dest misalignment (stall) - - subq a2, 1, a2 # E : - and a2, 7, t2 # E : (stall) - srl a2, 3, a2 # U : a2 = loop counter = (count - 1)/8 (stall) - addq zero, 1, t10 # E : - - sll t10, t2, t10 # U : t10 = bitmask of last count byte - bne t1, $unaligned # U : - /* We are co-aligned; take care of a partial first word. */ - ldq_u t1, 0(a1) # L : load first src word - addq a1, 8, a1 # E : - - beq t0, stxncpy_aligned # U : avoid loading dest word if not needed - ldq_u t0, 0(a0) # L : - nop - nop - - br stxncpy_aligned # .. e1 : - nop - nop - nop - - - -/* The source and destination are not co-aligned. Align the destination - and cope. We have to be very careful about not reading too much and - causing a SEGV. */ - - .align 4 -$u_head: - /* We know just enough now to be able to assemble the first - full source word. We can still find a zero at the end of it - that prevents us from outputting the whole thing. - - On entry to this basic block: - t0 == the first dest word, unmasked - t1 == the shifted low bits of the first source word - t6 == bytemask that is -1 in dest word bytes */ - - ldq_u t2, 8(a1) # L : Latency=3 load second src word - addq a1, 8, a1 # E : - mskql t0, a0, t0 # U : mask trailing garbage in dst - extqh t2, a1, t4 # U : (3 cycle stall on t2) - - or t1, t4, t1 # E : first aligned src word complete (stall) - mskqh t1, a0, t1 # U : mask leading garbage in src (stall) - or t0, t1, t0 # E : first output word complete (stall) - or t0, t6, t6 # E : mask original data for zero test (stall) - - cmpbge zero, t6, t8 # E : - beq a2, $u_eocfin # U : - lda t6, -1 # E : - nop - - bne t8, $u_final # U : - mskql t6, a1, t6 # U : mask out bits already seen - stq_u t0, 0(a0) # L : store first output word - or t6, t2, t2 # E : (stall) - - cmpbge zero, t2, t8 # E : find nulls in second partial - addq a0, 8, a0 # E : - subq a2, 1, a2 # E : - bne t8, $u_late_head_exit # U : - - /* Finally, we've got all the stupid leading edge cases taken care - of and we can set up to enter the main loop. */ - extql t2, a1, t1 # U : position hi-bits of lo word - beq a2, $u_eoc # U : - ldq_u t2, 8(a1) # L : read next high-order source word - addq a1, 8, a1 # E : - - extqh t2, a1, t0 # U : position lo-bits of hi word (stall) - cmpbge zero, t2, t8 # E : - nop - bne t8, $u_eos # U : - - /* Unaligned copy main loop. In order to avoid reading too much, - the loop is structured to detect zeros in aligned source words. - This has, unfortunately, effectively pulled half of a loop - iteration out into the head and half into the tail, but it does - prevent nastiness from accumulating in the very thing we want - to run as fast as possible. - - On entry to this basic block: - t0 == the shifted low-order bits from the current source word - t1 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word - - We further know that t2 does not contain a null terminator. */ - - .align 4 -$u_loop: - or t0, t1, t0 # E : current dst word now complete - subq a2, 1, a2 # E : decrement word count - extql t2, a1, t1 # U : extract low bits for next time - addq a0, 8, a0 # E : - - stq_u t0, -8(a0) # U : save the current word - beq a2, $u_eoc # U : - ldq_u t2, 8(a1) # U : Latency=3 load high word for next time - addq a1, 8, a1 # E : - - extqh t2, a1, t0 # U : extract low bits (2 cycle stall) - cmpbge zero, t2, t8 # E : test new word for eos - nop - beq t8, $u_loop # U : - - /* We've found a zero somewhere in the source word we just read. - If it resides in the lower half, we have one (probably partial) - word to write out, and if it resides in the upper half, we - have one full and one partial word left to write out. - - On entry to this basic block: - t0 == the shifted low-order bits from the current source word - t1 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word. */ -$u_eos: - or t0, t1, t0 # E : first (partial) source word complete - nop - cmpbge zero, t0, t8 # E : is the null in this first bit? (stall) - bne t8, $u_final # U : (stall) - - stq_u t0, 0(a0) # L : the null was in the high-order bits - addq a0, 8, a0 # E : - subq a2, 1, a2 # E : - nop - -$u_late_head_exit: - extql t2, a1, t0 # U : - cmpbge zero, t0, t8 # E : - or t8, t10, t6 # E : (stall) - cmoveq a2, t6, t8 # E : Latency=2, extra map slot (stall) - - /* Take care of a final (probably partial) result word. - On entry to this basic block: - t0 == assembled source word - t8 == cmpbge mask that found the null. */ -$u_final: - negq t8, t6 # E : isolate low bit set - and t6, t8, t12 # E : (stall) - and t12, 0x80, t6 # E : avoid dest word load if we can (stall) - bne t6, 1f # U : (stall) - - ldq_u t1, 0(a0) # L : - subq t12, 1, t6 # E : - or t6, t12, t8 # E : (stall) - zapnot t0, t8, t0 # U : kill source bytes > null - - zap t1, t8, t1 # U : kill dest bytes <= null - or t0, t1, t0 # E : (stall) - nop - nop - -1: stq_u t0, 0(a0) # L : - ret (t9) # L0 : Latency=3 - - /* Got to end-of-count before end of string. - On entry to this basic block: - t1 == the shifted high-order bits from the previous source word */ -$u_eoc: - and a1, 7, t6 # E : avoid final load if possible - sll t10, t6, t6 # U : (stall) - and t6, 0xff, t6 # E : (stall) - bne t6, 1f # U : (stall) - - ldq_u t2, 8(a1) # L : load final src word - nop - extqh t2, a1, t0 # U : extract low bits for last word (stall) - or t1, t0, t1 # E : (stall) - -1: cmpbge zero, t1, t8 # E : - mov t1, t0 # E : - -$u_eocfin: # end-of-count, final word - or t10, t8, t8 # E : - br $u_final # L0 : Latency=3 - - /* Unaligned copy entry point. */ - .align 4 -$unaligned: - - ldq_u t1, 0(a1) # L : load first source word - and a0, 7, t4 # E : find dest misalignment - and a1, 7, t5 # E : find src misalignment - /* Conditionally load the first destination word and a bytemask - with 0xff indicating that the destination byte is sacrosanct. */ - mov zero, t0 # E : - - mov zero, t6 # E : - beq t4, 1f # U : - ldq_u t0, 0(a0) # L : - lda t6, -1 # E : - - mskql t6, a0, t6 # U : - nop - nop - subq a1, t4, a1 # E : sub dest misalignment from src addr - - /* If source misalignment is larger than dest misalignment, we need - extra startup checks to avoid SEGV. */ - -1: cmplt t4, t5, t12 # E : - extql t1, a1, t1 # U : shift src into place - lda t2, -1 # E : for creating masks later - beq t12, $u_head # U : (stall) - - extql t2, a1, t2 # U : - cmpbge zero, t1, t8 # E : is there a zero? - andnot t2, t6, t2 # E : dest mask for a single word copy - or t8, t10, t5 # E : test for end-of-count too - - cmpbge zero, t2, t3 # E : - cmoveq a2, t5, t8 # E : Latency=2, extra map slot - nop # E : keep with cmoveq - andnot t8, t3, t8 # E : (stall) - - beq t8, $u_head # U : - /* At this point we've found a zero in the first partial word of - the source. We need to isolate the valid source data and mask - it into the original destination data. (Incidentally, we know - that we'll need at least one byte of that original dest word.) */ - ldq_u t0, 0(a0) # L : - negq t8, t6 # E : build bitmask of bytes <= zero - mskqh t1, t4, t1 # U : - - and t6, t8, t12 # E : - subq t12, 1, t6 # E : (stall) - or t6, t12, t8 # E : (stall) - zapnot t2, t8, t2 # U : prepare source word; mirror changes (stall) - - zapnot t1, t8, t1 # U : to source validity mask - andnot t0, t2, t0 # E : zero place for source to reside - or t0, t1, t0 # E : and put it there (stall both t0, t1) - stq_u t0, 0(a0) # L : (stall) - - ret (t9) # L0 : Latency=3 - nop - nop - nop - - .end __stxncpy diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S deleted file mode 100644 index ec3096a9e8d409558f7a29d19868688f7971dc81..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev67-strcat.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev67-strcat.S - * 21264 version contributed by Rick Gorton - * - * Append a null-terminated string from SRC to DST. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - * Commentary: It seems bogus to walk the input string twice - once - * to determine the length, and then again while doing the copy. - * A significant (future) enhancement would be to only read the input - * string once. - */ - -#include - .text - - .align 4 - .globl strcat - .ent strcat -strcat: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # E : set up return value - /* Find the end of the string. */ - ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned) - lda $2, -1 # E : - insqh $2, $16, $2 # U : - - andnot $16, 7, $16 # E : - or $2, $1, $1 # E : - cmpbge $31, $1, $2 # E : bits set iff byte == 0 - bne $2, $found # U : - -$loop: ldq $1, 8($16) # L : - addq $16, 8, $16 # E : - cmpbge $31, $1, $2 # E : - beq $2, $loop # U : - -$found: cttz $2, $3 # U0 : - addq $16, $3, $16 # E : - /* Now do the append. */ - mov $26, $23 # E : - br __stxcpy # L0 : - - .end strcat - EXPORT_SYMBOL(strcat) diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S deleted file mode 100644 index fbf89e0b6dc3b7bbb70560f1852728c01f3bf26d..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev67-strchr.S +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev67-strchr.S - * 21264 version contributed by Rick Gorton - * - * Return the address of a given character within a null-terminated - * string, or null if it is not found. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ -#include -#include - - .set noreorder - .set noat - - .align 4 - .globl strchr - .ent strchr -strchr: - .frame sp, 0, ra - .prologue 0 - - ldq_u t0, 0(a0) # L : load first quadword Latency=3 - and a1, 0xff, t3 # E : 00000000000000ch - insbl a1, 1, t5 # U : 000000000000ch00 - insbl a1, 7, a2 # U : ch00000000000000 - - insbl t3, 6, a3 # U : 00ch000000000000 - or t5, t3, a1 # E : 000000000000chch - andnot a0, 7, v0 # E : align our loop pointer - lda t4, -1 # E : build garbage mask - - mskqh t4, a0, t4 # U : only want relevant part of first quad - or a2, a3, a2 # E : chch000000000000 - inswl a1, 2, t5 # E : 00000000chch0000 - inswl a1, 4, a3 # E : 0000chch00000000 - - or a1, a2, a1 # E : chch00000000chch - or a3, t5, t5 # E : 0000chchchch0000 - cmpbge zero, t0, t2 # E : bits set iff byte == zero - cmpbge zero, t4, t4 # E : bits set iff byte is garbage - - /* This quad is _very_ serialized. Lots of stalling happens */ - or t5, a1, a1 # E : chchchchchchchch - xor t0, a1, t1 # E : make bytes == c zero - cmpbge zero, t1, t3 # E : bits set iff byte == c - or t2, t3, t0 # E : bits set iff char match or zero match - - andnot t0, t4, t0 # E : clear garbage bits - cttz t0, a2 # U0 : speculative (in case we get a match) - nop # E : - bne t0, $found # U : - - /* - * Yuk. This loop is going to stall like crazy waiting for the - * data to be loaded. Not much can be done about it unless it's - * unrolled multiple times - is that safe to do in kernel space? - * Or would exception handling recovery code do the trick here? - */ -$loop: ldq t0, 8(v0) # L : Latency=3 - addq v0, 8, v0 # E : - xor t0, a1, t1 # E : - cmpbge zero, t0, t2 # E : bits set iff byte == 0 - - cmpbge zero, t1, t3 # E : bits set iff byte == c - or t2, t3, t0 # E : - cttz t3, a2 # U0 : speculative (in case we get a match) - beq t0, $loop # U : - -$found: negq t0, t1 # E : clear all but least set bit - and t0, t1, t0 # E : - and t0, t3, t1 # E : bit set iff byte was the char - addq v0, a2, v0 # E : Add in the bit number from above - - cmoveq t1, $31, v0 # E : Two mapping slots, latency = 2 - nop - nop - ret # L0 : - - .end strchr - EXPORT_SYMBOL(strchr) diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S deleted file mode 100644 index b73106ffbbc7c7e32e286a92ec9f2cc276038bfd..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev67-strlen.S +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev67-strlen.S - * 21264 version by Rick Gorton - * - * Finds length of a 0-terminated string. Optimized for the - * Alpha architecture: - * - * - memory accessed as aligned quadwords only - * - uses bcmpge to compare 8 bytes in parallel - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - */ -#include - .set noreorder - .set noat - - .globl strlen - .ent strlen - .align 4 -strlen: - ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) - lda $2, -1($31) # E : - insqh $2, $16, $2 # U : - andnot $16, 7, $0 # E : - - or $2, $1, $1 # E : - cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 0 - nop # E : - bne $2, $found # U : - -$loop: ldq $1, 8($0) # L : - addq $0, 8, $0 # E : addr += 8 - cmpbge $31, $1, $2 # E : - beq $2, $loop # U : - -$found: - cttz $2, $3 # U0 : - addq $0, $3, $0 # E : - subq $0, $16, $0 # E : - ret $31, ($26) # L0 : - - .end strlen - EXPORT_SYMBOL(strlen) diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S deleted file mode 100644 index ceb0ca528789aa67026ed17e1e2b6db7245de242..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev67-strncat.S +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev67-strncat.S - * 21264 version contributed by Rick Gorton - * - * Append no more than COUNT characters from the null-terminated string SRC - * to the null-terminated string DST. Always null-terminate the new DST. - * - * This differs slightly from the semantics in libc in that we never write - * past count, whereas libc may write to count+1. This follows the generic - * implementation in lib/string.c and is, IMHO, more sensible. - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - * Try not to change the actual algorithm if possible for consistency. - */ - -#include - .text - - .align 4 - .globl strncat - .ent strncat -strncat: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # set up return value - beq $18, $zerocount # U : - /* Find the end of the string. */ - ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned) - lda $2, -1($31) # E : - - insqh $2, $0, $2 # U : - andnot $16, 7, $16 # E : - nop # E : - or $2, $1, $1 # E : - - nop # E : - nop # E : - cmpbge $31, $1, $2 # E : bits set iff byte == 0 - bne $2, $found # U : - -$loop: ldq $1, 8($16) # L : - addq $16, 8, $16 # E : - cmpbge $31, $1, $2 # E : - beq $2, $loop # U : - -$found: cttz $2, $3 # U0 : - addq $16, $3, $16 # E : - nop # E : - bsr $23, __stxncpy # L0 :/* Now do the append. */ - - /* Worry about the null termination. */ - - zapnot $1, $27, $2 # U : was last byte a null? - cmplt $27, $24, $5 # E : did we fill the buffer completely? - bne $2, 0f # U : - ret # L0 : - -0: or $5, $18, $2 # E : - nop - bne $2, 2f # U : - and $24, 0x80, $3 # E : no zero next byte - - nop # E : - bne $3, 1f # U : - /* Here there are bytes left in the current word. Clear one. */ - addq $24, $24, $24 # E : end-of-count bit <<= 1 - nop # E : - -2: zap $1, $24, $1 # U : - nop # E : - stq_u $1, 0($16) # L : - ret # L0 : - -1: /* Here we must clear the first byte of the next DST word */ - stb $31, 8($16) # L : - nop # E : - nop # E : - ret # L0 : - -$zerocount: - nop # E : - nop # E : - nop # E : - ret # L0 : - - .end strncat - EXPORT_SYMBOL(strncat) diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S deleted file mode 100644 index 7f80e398530f564666cfac6f8b1fa8c836b028d4..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/ev67-strrchr.S +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/ev67-strrchr.S - * 21264 version by Rick Gorton - * - * Finds length of a 0-terminated string. Optimized for the - * Alpha architecture: - * - * - memory accessed as aligned quadwords only - * - uses bcmpge to compare 8 bytes in parallel - * - * Much of the information about 21264 scheduling/coding comes from: - * Compiler Writer's Guide for the Alpha 21264 - * abbreviated as 'CWG' in other comments here - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html - * Scheduling notation: - * E - either cluster - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 - */ - -#include -#include - - .set noreorder - .set noat - - .align 4 - .ent strrchr - .globl strrchr -strrchr: - .frame sp, 0, ra - .prologue 0 - - and a1, 0xff, t2 # E : 00000000000000ch - insbl a1, 1, t4 # U : 000000000000ch00 - insbl a1, 2, t5 # U : 0000000000ch0000 - ldq_u t0, 0(a0) # L : load first quadword Latency=3 - - mov zero, t6 # E : t6 is last match aligned addr - or t2, t4, a1 # E : 000000000000chch - sll t5, 8, t3 # U : 00000000ch000000 - mov zero, t8 # E : t8 is last match byte compare mask - - andnot a0, 7, v0 # E : align source addr - or t5, t3, t3 # E : 00000000chch0000 - sll a1, 32, t2 # U : 0000chch00000000 - sll a1, 48, t4 # U : chch000000000000 - - or t4, a1, a1 # E : chch00000000chch - or t2, t3, t2 # E : 0000chchchch0000 - or a1, t2, a1 # E : chchchchchchchch - lda t5, -1 # E : build garbage mask - - cmpbge zero, t0, t1 # E : bits set iff byte == zero - mskqh t5, a0, t4 # E : Complete garbage mask - xor t0, a1, t2 # E : make bytes == c zero - cmpbge zero, t4, t4 # E : bits set iff byte is garbage - - cmpbge zero, t2, t3 # E : bits set iff byte == c - andnot t1, t4, t1 # E : clear garbage from null test - andnot t3, t4, t3 # E : clear garbage from char test - bne t1, $eos # U : did we already hit the terminator? - - /* Character search main loop */ -$loop: - ldq t0, 8(v0) # L : load next quadword - cmovne t3, v0, t6 # E : save previous comparisons match - nop # : Latency=2, extra map slot (keep nop with cmov) - nop - - cmovne t3, t3, t8 # E : Latency=2, extra map slot - nop # : keep with cmovne - addq v0, 8, v0 # E : - xor t0, a1, t2 # E : - - cmpbge zero, t0, t1 # E : bits set iff byte == zero - cmpbge zero, t2, t3 # E : bits set iff byte == c - beq t1, $loop # U : if we havnt seen a null, loop - nop - - /* Mask out character matches after terminator */ -$eos: - negq t1, t4 # E : isolate first null byte match - and t1, t4, t4 # E : - subq t4, 1, t5 # E : build a mask of the bytes up to... - or t4, t5, t4 # E : ... and including the null - - and t3, t4, t3 # E : mask out char matches after null - cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot - nop # : Keep with cmovne - nop - - cmovne t3, v0, t6 # E : - nop # : Keep with cmovne - /* Locate the address of the last matched character */ - ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0) - nop - - cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen - nop # E : hide the cmov latency (2) behind ctlz latency - lda t5, 0x3f($31) # E : - subq t5, t2, t5 # E : Normalize leading zero count - - addq t6, t5, v0 # E : and add to quadword address - ret # L0 : Latency=3 - nop - nop - - .end strrchr - EXPORT_SYMBOL(strrchr) diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S deleted file mode 100644 index c13d3eca2e0592736dd17112ff836dcac5c0b7cb..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/memchr.S +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright (C) 1996 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David Mosberger (davidm@cs.arizona.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* Finds characters in a memory area. Optimized for the Alpha: - - - memory accessed as aligned quadwords only - - uses cmpbge to compare 8 bytes in parallel - - does binary search to find 0 byte in last - quadword (HAKMEM needed 12 instructions to - do this instead of the 9 instructions that - binary search needs). - -For correctness consider that: - - - only minimum number of quadwords may be accessed - - the third argument is an unsigned long -*/ -#include - .set noreorder - .set noat - - .globl memchr - .ent memchr -memchr: - .frame $30,0,$26,0 - .prologue 0 - - # Hack -- if someone passes in (size_t)-1, hoping to just - # search til the end of the address space, we will overflow - # below when we find the address of the last byte. Given - # that we will never have a 56-bit address space, cropping - # the length is the easiest way to avoid trouble. - zap $18, 0x80, $5 #-e0 : - - beq $18, $not_found # .. e1 : - ldq_u $1, 0($16) # e1 : load first quadword - insbl $17, 1, $2 # .. e0 : $2 = 000000000000ch00 - and $17, 0xff, $17 #-e0 : $17 = 00000000000000ch - cmpult $18, 9, $4 # .. e1 : - or $2, $17, $17 # e0 : $17 = 000000000000chch - lda $3, -1($31) # .. e1 : - sll $17, 16, $2 #-e0 : $2 = 00000000chch0000 - addq $16, $5, $5 # .. e1 : - or $2, $17, $17 # e1 : $17 = 00000000chchchch - unop # : - sll $17, 32, $2 #-e0 : $2 = chchchch00000000 - or $2, $17, $17 # e1 : $17 = chchchchchchchch - extql $1, $16, $7 # e0 : - beq $4, $first_quad # .. e1 : - - ldq_u $6, -1($5) #-e1 : eight or less bytes to search - extqh $6, $16, $6 # .. e0 : - mov $16, $0 # e0 : - or $7, $6, $1 # .. e1 : $1 = quadword starting at $16 - - # Deal with the case where at most 8 bytes remain to be searched - # in $1. E.g.: - # $18 = 6 - # $1 = ????c6c5c4c3c2c1 -$last_quad: - negq $18, $6 #-e0 : - xor $17, $1, $1 # .. e1 : - srl $3, $6, $6 # e0 : $6 = mask of $18 bits set - cmpbge $31, $1, $2 # .. e1 : - and $2, $6, $2 #-e0 : - beq $2, $not_found # .. e1 : - -$found_it: - # Now, determine which byte matched: - negq $2, $3 # e0 : - and $2, $3, $2 # e1 : - - and $2, 0x0f, $1 #-e0 : - addq $0, 4, $3 # .. e1 : - cmoveq $1, $3, $0 # e0 : - - addq $0, 2, $3 # .. e1 : - and $2, 0x33, $1 #-e0 : - cmoveq $1, $3, $0 # .. e1 : - - and $2, 0x55, $1 # e0 : - addq $0, 1, $3 # .. e1 : - cmoveq $1, $3, $0 #-e0 : - -$done: ret # .. e1 : - - # Deal with the case where $18 > 8 bytes remain to be - # searched. $16 may not be aligned. - .align 4 -$first_quad: - andnot $16, 0x7, $0 #-e1 : - insqh $3, $16, $2 # .. e0 : $2 = 0000ffffffffffff ($16<0:2> ff) - xor $1, $17, $1 # e0 : - or $1, $2, $1 # e1 : $1 = ====ffffffffffff - cmpbge $31, $1, $2 #-e0 : - bne $2, $found_it # .. e1 : - - # At least one byte left to process. - - ldq $1, 8($0) # e0 : - subq $5, 1, $18 # .. e1 : - addq $0, 8, $0 #-e0 : - - # Make $18 point to last quad to be accessed (the - # last quad may or may not be partial). - - andnot $18, 0x7, $18 # .. e1 : - cmpult $0, $18, $2 # e0 : - beq $2, $final # .. e1 : - - # At least two quads remain to be accessed. - - subq $18, $0, $4 #-e0 : $4 <- nr quads to be processed - and $4, 8, $4 # e1 : odd number of quads? - bne $4, $odd_quad_count # e1 : - - # At least three quads remain to be accessed - - mov $1, $4 # e0 : move prefetched value to correct reg - - .align 4 -$unrolled_loop: - ldq $1, 8($0) #-e0 : prefetch $1 - xor $17, $4, $2 # .. e1 : - cmpbge $31, $2, $2 # e0 : - bne $2, $found_it # .. e1 : - - addq $0, 8, $0 #-e0 : -$odd_quad_count: - xor $17, $1, $2 # .. e1 : - ldq $4, 8($0) # e0 : prefetch $4 - cmpbge $31, $2, $2 # .. e1 : - addq $0, 8, $6 #-e0 : - bne $2, $found_it # .. e1 : - - cmpult $6, $18, $6 # e0 : - addq $0, 8, $0 # .. e1 : - bne $6, $unrolled_loop #-e1 : - - mov $4, $1 # e0 : move prefetched value into $1 -$final: subq $5, $0, $18 # .. e1 : $18 <- number of bytes left to do - bne $18, $last_quad # e1 : - -$not_found: - mov $31, $0 #-e0 : - ret # .. e1 : - - .end memchr - EXPORT_SYMBOL(memchr) diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S deleted file mode 100644 index 42d1922d0edfca212abebd5bbc674257723ba0ac..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/memmove.S +++ /dev/null @@ -1,183 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/memmove.S - * - * Barely optimized memmove routine for Alpha EV5. - * - * This is hand-massaged output from the original memcpy.c. We defer to - * memcpy whenever possible; the backwards copy loops are not unrolled. - */ -#include - .set noat - .set noreorder - .text - - .align 4 - .globl memmove - .ent memmove -memmove: - ldgp $29, 0($27) - unop - nop - .prologue 1 - - addq $16,$18,$4 - addq $17,$18,$5 - cmpule $4,$17,$1 /* dest + n <= src */ - cmpule $5,$16,$2 /* dest >= src + n */ - - bis $1,$2,$1 - mov $16,$0 - xor $16,$17,$2 - bne $1,memcpy !samegp - - and $2,7,$2 /* Test for src/dest co-alignment. */ - and $16,7,$1 - cmpule $16,$17,$3 - bne $3,$memmove_up /* dest < src */ - - and $4,7,$1 - bne $2,$misaligned_dn - unop - beq $1,$skip_aligned_byte_loop_head_dn - -$aligned_byte_loop_head_dn: - lda $4,-1($4) - lda $5,-1($5) - unop - ble $18,$egress - - ldq_u $3,0($5) - ldq_u $2,0($4) - lda $18,-1($18) - extbl $3,$5,$1 - - insbl $1,$4,$1 - mskbl $2,$4,$2 - bis $1,$2,$1 - and $4,7,$6 - - stq_u $1,0($4) - bne $6,$aligned_byte_loop_head_dn - -$skip_aligned_byte_loop_head_dn: - lda $18,-8($18) - blt $18,$skip_aligned_word_loop_dn - -$aligned_word_loop_dn: - ldq $1,-8($5) - nop - lda $5,-8($5) - lda $18,-8($18) - - stq $1,-8($4) - nop - lda $4,-8($4) - bge $18,$aligned_word_loop_dn - -$skip_aligned_word_loop_dn: - lda $18,8($18) - bgt $18,$byte_loop_tail_dn - unop - ret $31,($26),1 - - .align 4 -$misaligned_dn: - nop - fnop - unop - beq $18,$egress - -$byte_loop_tail_dn: - ldq_u $3,-1($5) - ldq_u $2,-1($4) - lda $5,-1($5) - lda $4,-1($4) - - lda $18,-1($18) - extbl $3,$5,$1 - insbl $1,$4,$1 - mskbl $2,$4,$2 - - bis $1,$2,$1 - stq_u $1,0($4) - bgt $18,$byte_loop_tail_dn - br $egress - -$memmove_up: - mov $16,$4 - mov $17,$5 - bne $2,$misaligned_up - beq $1,$skip_aligned_byte_loop_head_up - -$aligned_byte_loop_head_up: - unop - ble $18,$egress - ldq_u $3,0($5) - ldq_u $2,0($4) - - lda $18,-1($18) - extbl $3,$5,$1 - insbl $1,$4,$1 - mskbl $2,$4,$2 - - bis $1,$2,$1 - lda $5,1($5) - stq_u $1,0($4) - lda $4,1($4) - - and $4,7,$6 - bne $6,$aligned_byte_loop_head_up - -$skip_aligned_byte_loop_head_up: - lda $18,-8($18) - blt $18,$skip_aligned_word_loop_up - -$aligned_word_loop_up: - ldq $1,0($5) - nop - lda $5,8($5) - lda $18,-8($18) - - stq $1,0($4) - nop - lda $4,8($4) - bge $18,$aligned_word_loop_up - -$skip_aligned_word_loop_up: - lda $18,8($18) - bgt $18,$byte_loop_tail_up - unop - ret $31,($26),1 - - .align 4 -$misaligned_up: - nop - fnop - unop - beq $18,$egress - -$byte_loop_tail_up: - ldq_u $3,0($5) - ldq_u $2,0($4) - lda $18,-1($18) - extbl $3,$5,$1 - - insbl $1,$4,$1 - mskbl $2,$4,$2 - bis $1,$2,$1 - stq_u $1,0($4) - - lda $5,1($5) - lda $4,1($4) - nop - bgt $18,$byte_loop_tail_up - -$egress: - ret $31,($26),1 - nop - nop - nop - - .end memmove - EXPORT_SYMBOL(memmove) diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S deleted file mode 100644 index 00393e30df259150a5bafe4d0e370987c3fce1e9..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/memset.S +++ /dev/null @@ -1,133 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/alpha/lib/memset.S - * - * This is an efficient (and small) implementation of the C library "memset()" - * function for the alpha. - * - * (C) Copyright 1996 Linus Torvalds - * - * This routine is "moral-ware": you are free to use it any way you wish, and - * the only obligation I put on you is a moral one: if you make any improvements - * to the routine, please send me your improvements for me to use similarly. - * - * The scheduling comments are according to the EV5 documentation (and done by - * hand, so they might well be incorrect, please do tell me about it..) - */ -#include - .set noat - .set noreorder -.text - .globl memset - .globl __memset - .globl ___memset - .globl __memset16 - .globl __constant_c_memset - - .ent ___memset -.align 5 -___memset: - .frame $30,0,$26,0 - .prologue 0 - - and $17,255,$1 /* E1 */ - insbl $17,1,$17 /* .. E0 */ - bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ - sll $17,16,$1 /* E1 (p-c latency, next cycle) */ - - bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ - sll $17,32,$1 /* E1 (p-c latency, next cycle) */ - bis $17,$1,$17 /* E0 (p-c latency, next cycle) */ - ldq_u $31,0($30) /* .. E1 */ - -.align 5 -__constant_c_memset: - addq $18,$16,$6 /* E0 */ - bis $16,$16,$0 /* .. E1 */ - xor $16,$6,$1 /* E0 */ - ble $18,end /* .. E1 */ - - bic $1,7,$1 /* E0 */ - beq $1,within_one_quad /* .. E1 (note EV5 zero-latency forwarding) */ - and $16,7,$3 /* E0 */ - beq $3,aligned /* .. E1 (note EV5 zero-latency forwarding) */ - - ldq_u $4,0($16) /* E0 */ - bis $16,$16,$5 /* .. E1 */ - insql $17,$16,$2 /* E0 */ - subq $3,8,$3 /* .. E1 */ - - addq $18,$3,$18 /* E0 $18 is new count ($3 is negative) */ - mskql $4,$16,$4 /* .. E1 (and possible load stall) */ - subq $16,$3,$16 /* E0 $16 is new aligned destination */ - bis $2,$4,$1 /* .. E1 */ - - bis $31,$31,$31 /* E0 */ - ldq_u $31,0($30) /* .. E1 */ - stq_u $1,0($5) /* E0 */ - bis $31,$31,$31 /* .. E1 */ - -.align 4 -aligned: - sra $18,3,$3 /* E0 */ - and $18,7,$18 /* .. E1 */ - bis $16,$16,$5 /* E0 */ - beq $3,no_quad /* .. E1 */ - -.align 3 -loop: - stq $17,0($5) /* E0 */ - subq $3,1,$3 /* .. E1 */ - addq $5,8,$5 /* E0 */ - bne $3,loop /* .. E1 */ - -no_quad: - bis $31,$31,$31 /* E0 */ - beq $18,end /* .. E1 */ - ldq $7,0($5) /* E0 */ - mskqh $7,$6,$2 /* .. E1 (and load stall) */ - - insqh $17,$6,$4 /* E0 */ - bis $2,$4,$1 /* .. E1 */ - stq $1,0($5) /* E0 */ - ret $31,($26),1 /* .. E1 */ - -.align 3 -within_one_quad: - ldq_u $1,0($16) /* E0 */ - insql $17,$16,$2 /* E1 */ - mskql $1,$16,$4 /* E0 (after load stall) */ - bis $2,$4,$2 /* E0 */ - - mskql $2,$6,$4 /* E0 */ - mskqh $1,$6,$2 /* .. E1 */ - bis $2,$4,$1 /* E0 */ - stq_u $1,0($16) /* E0 */ - -end: - ret $31,($26),1 /* E1 */ - .end ___memset -EXPORT_SYMBOL(___memset) -EXPORT_SYMBOL(__constant_c_memset) - - .align 5 - .ent __memset16 -__memset16: - .prologue 0 - - inswl $17,0,$1 /* E0 */ - inswl $17,2,$2 /* E0 */ - inswl $17,4,$3 /* E0 */ - or $1,$2,$1 /* .. E1 */ - inswl $17,6,$4 /* E0 */ - or $1,$3,$1 /* .. E1 */ - or $1,$4,$17 /* E0 */ - br __constant_c_memset /* .. E1 */ - - .end __memset16 -EXPORT_SYMBOL(__memset16) - -memset = ___memset -__memset = ___memset - EXPORT_SYMBOL(memset) - EXPORT_SYMBOL(__memset) diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S deleted file mode 100644 index 055877dccd276869895b8a48ad6d6c2590c019e2..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strcat.S +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strcat.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Append a null-terminated string from SRC to DST. - */ -#include - - .text - - .align 3 - .globl strcat - .ent strcat -strcat: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # set up return value - - /* Find the end of the string. */ - - ldq_u $1, 0($16) # load first quadword (a0 may be misaligned) - lda $2, -1 - insqh $2, $16, $2 - andnot $16, 7, $16 - or $2, $1, $1 - cmpbge $31, $1, $2 # bits set iff byte == 0 - bne $2, $found - -$loop: ldq $1, 8($16) - addq $16, 8, $16 - cmpbge $31, $1, $2 - beq $2, $loop - -$found: negq $2, $3 # clear all but least set bit - and $2, $3, $2 - - and $2, 0xf0, $3 # binary search for that set bit - and $2, 0xcc, $4 - and $2, 0xaa, $5 - cmovne $3, 4, $3 - cmovne $4, 2, $4 - cmovne $5, 1, $5 - addq $3, $4, $3 - addq $16, $5, $16 - addq $16, $3, $16 - - /* Now do the append. */ - - mov $26, $23 - br __stxcpy - - .end strcat -EXPORT_SYMBOL(strcat); diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S deleted file mode 100644 index 17871dd00280489f80b381a2610ca9b547058c9c..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strchr.S +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strchr.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Return the address of a given character within a null-terminated - * string, or null if it is not found. - */ -#include -#include - - .set noreorder - .set noat - - .align 3 - .globl strchr - .ent strchr -strchr: - .frame sp, 0, ra - .prologue 0 - - zapnot a1, 1, a1 # e0 : zero extend the search character - ldq_u t0, 0(a0) # .. e1 : load first quadword - sll a1, 8, t5 # e0 : replicate the search character - andnot a0, 7, v0 # .. e1 : align our loop pointer - or t5, a1, a1 # e0 : - lda t4, -1 # .. e1 : build garbage mask - sll a1, 16, t5 # e0 : - cmpbge zero, t0, t2 # .. e1 : bits set iff byte == zero - mskqh t4, a0, t4 # e0 : - or t5, a1, a1 # .. e1 : - sll a1, 32, t5 # e0 : - cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage - or t5, a1, a1 # e0 : - xor t0, a1, t1 # .. e1 : make bytes == c zero - cmpbge zero, t1, t3 # e0 : bits set iff byte == c - or t2, t3, t0 # e1 : bits set iff char match or zero match - andnot t0, t4, t0 # e0 : clear garbage bits - bne t0, $found # .. e1 (zdb) - -$loop: ldq t0, 8(v0) # e0 : - addq v0, 8, v0 # .. e1 : - nop # e0 : - xor t0, a1, t1 # .. e1 (ev5 data stall) - cmpbge zero, t0, t2 # e0 : bits set iff byte == 0 - cmpbge zero, t1, t3 # .. e1 : bits set iff byte == c - or t2, t3, t0 # e0 : - beq t0, $loop # .. e1 (zdb) - -$found: negq t0, t1 # e0 : clear all but least set bit - and t0, t1, t0 # e1 (stall) - - and t0, t3, t1 # e0 : bit set iff byte was the char - beq t1, $retnull # .. e1 (zdb) - - and t0, 0xf0, t2 # e0 : binary search for that set bit - and t0, 0xcc, t3 # .. e1 : - and t0, 0xaa, t4 # e0 : - cmovne t2, 4, t2 # .. e1 : - cmovne t3, 2, t3 # e0 : - cmovne t4, 1, t4 # .. e1 : - addq t2, t3, t2 # e0 : - addq v0, t4, v0 # .. e1 : - addq v0, t2, v0 # e0 : - ret # .. e1 : - -$retnull: - mov zero, v0 # e0 : - ret # .. e1 : - - .end strchr - EXPORT_SYMBOL(strchr) diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S deleted file mode 100644 index cb74ad23a90df8a79a38ea157ac434bc7a7ac567..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strcpy.S +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strcpy.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Copy a null-terminated string from SRC to DST. Return a pointer - * to the null-terminator in the source. - */ -#include - .text - - .align 3 - .globl strcpy - .ent strcpy -strcpy: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # set up return value - mov $26, $23 # set up return address - unop - br __stxcpy # do the copy - - .end strcpy - EXPORT_SYMBOL(strcpy) diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S deleted file mode 100644 index dd882fe4d7e3076469dbdba9d6c3a6135583c75a..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strlen.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) - * - * Finds length of a 0-terminated string. Optimized for the - * Alpha architecture: - * - * - memory accessed as aligned quadwords only - * - uses bcmpge to compare 8 bytes in parallel - * - does binary search to find 0 byte in last - * quadword (HAKMEM needed 12 instructions to - * do this instead of the 9 instructions that - * binary search needs). - */ -#include - .set noreorder - .set noat - - .align 3 - - .globl strlen - .ent strlen - -strlen: - ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) - lda $2, -1($31) - insqh $2, $16, $2 - andnot $16, 7, $0 - or $2, $1, $1 - cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 - bne $2, found - -loop: ldq $1, 8($0) - addq $0, 8, $0 # addr += 8 - nop # helps dual issue last two insns - cmpbge $31, $1, $2 - beq $2, loop - -found: blbs $2, done # make aligned case fast - negq $2, $3 - and $2, $3, $2 - - and $2, 0x0f, $1 - addq $0, 4, $3 - cmoveq $1, $3, $0 - - and $2, 0x33, $1 - addq $0, 2, $3 - cmoveq $1, $3, $0 - - and $2, 0x55, $1 - addq $0, 1, $3 - cmoveq $1, $3, $0 - -done: subq $0, $16, $0 - ret $31, ($26) - - .end strlen - EXPORT_SYMBOL(strlen) diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S deleted file mode 100644 index 522fee3e26ac148f8d0175f1964d7f586aa2fb64..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strncat.S +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strncat.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Append no more than COUNT characters from the null-terminated string SRC - * to the null-terminated string DST. Always null-terminate the new DST. - * - * This differs slightly from the semantics in libc in that we never write - * past count, whereas libc may write to count+1. This follows the generic - * implementation in lib/string.c and is, IMHO, more sensible. - */ -#include - .text - - .align 3 - .globl strncat - .ent strncat -strncat: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # set up return value - beq $18, $zerocount - - /* Find the end of the string. */ - - ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) - lda $2, -1($31) - insqh $2, $16, $2 - andnot $16, 7, $16 - or $2, $1, $1 - cmpbge $31, $1, $2 # bits set iff byte == 0 - bne $2, $found - -$loop: ldq $1, 8($16) - addq $16, 8, $16 - cmpbge $31, $1, $2 - beq $2, $loop - -$found: negq $2, $3 # clear all but least set bit - and $2, $3, $2 - - and $2, 0xf0, $3 # binary search for that set bit - and $2, 0xcc, $4 - and $2, 0xaa, $5 - cmovne $3, 4, $3 - cmovne $4, 2, $4 - cmovne $5, 1, $5 - addq $3, $4, $3 - addq $16, $5, $16 - addq $16, $3, $16 - - /* Now do the append. */ - - bsr $23, __stxncpy - - /* Worry about the null termination. */ - - zapnot $1, $27, $2 # was last byte a null? - bne $2, 0f - ret - -0: cmplt $27, $24, $2 # did we fill the buffer completely? - or $2, $18, $2 - bne $2, 2f - - and $24, 0x80, $2 # no zero next byte - bne $2, 1f - - /* Here there are bytes left in the current word. Clear one. */ - addq $24, $24, $24 # end-of-count bit <<= 1 -2: zap $1, $24, $1 - stq_u $1, 0($16) - ret - -1: /* Here we must read the next DST word and clear the first byte. */ - ldq_u $1, 8($16) - zap $1, 1, $1 - stq_u $1, 8($16) - -$zerocount: - ret - - .end strncat - EXPORT_SYMBOL(strncat) diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S deleted file mode 100644 index cc57fad8b7ca7728c28824dbc506357a562b3b8a..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strncpy.S +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strncpy.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Copy no more than COUNT bytes of the null-terminated string from - * SRC to DST. If SRC does not cover all of COUNT, the balance is - * zeroed. - * - * Or, rather, if the kernel cared about that weird ANSI quirk. This - * version has cropped that bit o' nastiness as well as assuming that - * __stxncpy is in range of a branch. - */ -#include - .set noat - .set noreorder - - .text - - .align 4 - .globl strncpy - .ent strncpy -strncpy: - .frame $30, 0, $26 - .prologue 0 - - mov $16, $0 # set return value now - beq $18, $zerolen - unop - bsr $23, __stxncpy # do the work of the copy - - unop - bne $18, $multiword # do we have full words left? - subq $24, 1, $3 # nope - subq $27, 1, $4 - - or $3, $24, $3 # clear the bits between the last - or $4, $27, $4 # written byte and the last byte in COUNT - andnot $3, $4, $4 - zap $1, $4, $1 - - stq_u $1, 0($16) - ret - - .align 4 -$multiword: - subq $27, 1, $2 # clear the final bits in the prev word - or $2, $27, $2 - zapnot $1, $2, $1 - subq $18, 1, $18 - - stq_u $1, 0($16) - addq $16, 8, $16 - unop - beq $18, 1f - - nop - unop - nop - blbc $18, 0f - - stq_u $31, 0($16) # zero one word - subq $18, 1, $18 - addq $16, 8, $16 - beq $18, 1f - -0: stq_u $31, 0($16) # zero two words - subq $18, 2, $18 - stq_u $31, 8($16) - addq $16, 16, $16 - bne $18, 0b - -1: ldq_u $1, 0($16) # clear the leading bits in the final word - subq $24, 1, $2 - or $2, $24, $2 - - zap $1, $2, $1 - stq_u $1, 0($16) -$zerolen: - ret - - .end strncpy - EXPORT_SYMBOL(strncpy) diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S deleted file mode 100644 index 7650ba99b7e2c877cb26d97351fa09bfe50b0e5e..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/strrchr.S +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/strrchr.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Return the address of the last occurrence of a given character - * within a null-terminated string, or null if it is not found. - */ -#include -#include - - .set noreorder - .set noat - - .align 3 - .ent strrchr - .globl strrchr -strrchr: - .frame sp, 0, ra - .prologue 0 - - zapnot a1, 1, a1 # e0 : zero extend our test character - mov zero, t6 # .. e1 : t6 is last match aligned addr - sll a1, 8, t5 # e0 : replicate our test character - mov zero, t8 # .. e1 : t8 is last match byte compare mask - or t5, a1, a1 # e0 : - ldq_u t0, 0(a0) # .. e1 : load first quadword - sll a1, 16, t5 # e0 : - andnot a0, 7, v0 # .. e1 : align source addr - or t5, a1, a1 # e0 : - lda t4, -1 # .. e1 : build garbage mask - sll a1, 32, t5 # e0 : - cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero - mskqh t4, a0, t4 # e0 : - or t5, a1, a1 # .. e1 : character replication complete - xor t0, a1, t2 # e0 : make bytes == c zero - cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage - cmpbge zero, t2, t3 # e0 : bits set iff byte == c - andnot t1, t4, t1 # .. e1 : clear garbage from null test - andnot t3, t4, t3 # e0 : clear garbage from char test - bne t1, $eos # .. e1 : did we already hit the terminator? - - /* Character search main loop */ -$loop: - ldq t0, 8(v0) # e0 : load next quadword - cmovne t3, v0, t6 # .. e1 : save previous comparisons match - cmovne t3, t3, t8 # e0 : - addq v0, 8, v0 # .. e1 : - xor t0, a1, t2 # e0 : - cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero - cmpbge zero, t2, t3 # e0 : bits set iff byte == c - beq t1, $loop # .. e1 : if we havnt seen a null, loop - - /* Mask out character matches after terminator */ -$eos: - negq t1, t4 # e0 : isolate first null byte match - and t1, t4, t4 # e1 : - subq t4, 1, t5 # e0 : build a mask of the bytes up to... - or t4, t5, t4 # e1 : ... and including the null - - and t3, t4, t3 # e0 : mask out char matches after null - cmovne t3, t3, t8 # .. e1 : save it, if match found - cmovne t3, v0, t6 # e0 : - - /* Locate the address of the last matched character */ - - /* Retain the early exit for the ev4 -- the ev5 mispredict penalty - is 5 cycles -- the same as just falling through. */ - beq t8, $retnull # .. e1 : - - and t8, 0xf0, t2 # e0 : binary search for the high bit set - cmovne t2, t2, t8 # .. e1 (zdb) - cmovne t2, 4, t2 # e0 : - and t8, 0xcc, t1 # .. e1 : - cmovne t1, t1, t8 # e0 : - cmovne t1, 2, t1 # .. e1 : - and t8, 0xaa, t0 # e0 : - cmovne t0, 1, t0 # .. e1 (zdb) - addq t2, t1, t1 # e0 : - addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix - addq v0, t1, v0 # e0 : - ret # .. e1 : - -$retnull: - mov zero, v0 # e0 : - ret # .. e1 : - - .end strrchr - EXPORT_SYMBOL(strrchr) diff --git a/arch/alpha/lib/stxcpy.S b/arch/alpha/lib/stxcpy.S deleted file mode 100644 index 58723b0a36d4a12c7c28e8be821876152f2b8584..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/stxcpy.S +++ /dev/null @@ -1,290 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/stxcpy.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Copy a null-terminated string from SRC to DST. - * - * This is an internal routine used by strcpy, stpcpy, and strcat. - * As such, it uses special linkage conventions to make implementation - * of these public functions more efficient. - * - * On input: - * t9 = return address - * a0 = DST - * a1 = SRC - * - * On output: - * t12 = bitmask (with one bit set) indicating the last byte written - * a0 = unaligned address of the last *word* written - * - * Furthermore, v0, a3-a5, t11, and t12 are untouched. - */ - -#include - - .set noat - .set noreorder - - .text - -/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that - doesn't like putting the entry point for a procedure somewhere in the - middle of the procedure descriptor. Work around this by putting the - aligned copy in its own procedure descriptor */ - - .ent stxcpy_aligned - .align 3 -stxcpy_aligned: - .frame sp, 0, t9 - .prologue 0 - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == the first source word. */ - - /* Create the 1st output word and detect 0's in the 1st input word. */ - lda t2, -1 # e1 : build a mask against false zero - mskqh t2, a1, t2 # e0 : detection in the src word - mskqh t1, a1, t3 # e0 : - ornot t1, t2, t2 # .. e1 : - mskql t0, a1, t0 # e0 : assemble the first output word - cmpbge zero, t2, t8 # .. e1 : bits set iff null found - or t0, t3, t1 # e0 : - bne t8, $a_eos # .. e1 : - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == a source word not containing a null. */ - -$a_loop: - stq_u t1, 0(a0) # e0 : - addq a0, 8, a0 # .. e1 : - ldq_u t1, 0(a1) # e0 : - addq a1, 8, a1 # .. e1 : - cmpbge zero, t1, t8 # e0 (stall) - beq t8, $a_loop # .. e1 (zdb) - - /* Take care of the final (partial) word store. - On entry to this basic block we have: - t1 == the source word containing the null - t8 == the cmpbge mask that found it. */ -$a_eos: - negq t8, t6 # e0 : find low bit set - and t8, t6, t12 # e1 (stall) - - /* For the sake of the cache, don't read a destination word - if we're not going to need it. */ - and t12, 0x80, t6 # e0 : - bne t6, 1f # .. e1 (zdb) - - /* We're doing a partial word store and so need to combine - our source and original destination words. */ - ldq_u t0, 0(a0) # e0 : - subq t12, 1, t6 # .. e1 : - zapnot t1, t6, t1 # e0 : clear src bytes >= null - or t12, t6, t8 # .. e1 : - zap t0, t8, t0 # e0 : clear dst bytes <= null - or t0, t1, t1 # e1 : - -1: stq_u t1, 0(a0) # e0 : - ret (t9) # .. e1 : - - .end stxcpy_aligned - - .align 3 - .ent __stxcpy - .globl __stxcpy -__stxcpy: - .frame sp, 0, t9 - .prologue 0 - - /* Are source and destination co-aligned? */ - xor a0, a1, t0 # e0 : - unop # : - and t0, 7, t0 # e0 : - bne t0, $unaligned # .. e1 : - - /* We are co-aligned; take care of a partial first word. */ - ldq_u t1, 0(a1) # e0 : load first src word - and a0, 7, t0 # .. e1 : take care not to load a word ... - addq a1, 8, a1 # e0 : - beq t0, stxcpy_aligned # .. e1 : ... if we wont need it - ldq_u t0, 0(a0) # e0 : - br stxcpy_aligned # .. e1 : - - -/* The source and destination are not co-aligned. Align the destination - and cope. We have to be very careful about not reading too much and - causing a SEGV. */ - - .align 3 -$u_head: - /* We know just enough now to be able to assemble the first - full source word. We can still find a zero at the end of it - that prevents us from outputting the whole thing. - - On entry to this basic block: - t0 == the first dest word, for masking back in, if needed else 0 - t1 == the low bits of the first source word - t6 == bytemask that is -1 in dest word bytes */ - - ldq_u t2, 8(a1) # e0 : - addq a1, 8, a1 # .. e1 : - - extql t1, a1, t1 # e0 : - extqh t2, a1, t4 # e0 : - mskql t0, a0, t0 # e0 : - or t1, t4, t1 # .. e1 : - mskqh t1, a0, t1 # e0 : - or t0, t1, t1 # e1 : - - or t1, t6, t6 # e0 : - cmpbge zero, t6, t8 # .. e1 : - lda t6, -1 # e0 : for masking just below - bne t8, $u_final # .. e1 : - - mskql t6, a1, t6 # e0 : mask out the bits we have - or t6, t2, t2 # e1 : already extracted before - cmpbge zero, t2, t8 # e0 : testing eos - bne t8, $u_late_head_exit # .. e1 (zdb) - - /* Finally, we've got all the stupid leading edge cases taken care - of and we can set up to enter the main loop. */ - - stq_u t1, 0(a0) # e0 : store first output word - addq a0, 8, a0 # .. e1 : - extql t2, a1, t0 # e0 : position ho-bits of lo word - ldq_u t2, 8(a1) # .. e1 : read next high-order source word - addq a1, 8, a1 # e0 : - cmpbge zero, t2, t8 # .. e1 : - nop # e0 : - bne t8, $u_eos # .. e1 : - - /* Unaligned copy main loop. In order to avoid reading too much, - the loop is structured to detect zeros in aligned source words. - This has, unfortunately, effectively pulled half of a loop - iteration out into the head and half into the tail, but it does - prevent nastiness from accumulating in the very thing we want - to run as fast as possible. - - On entry to this basic block: - t0 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word - - We further know that t2 does not contain a null terminator. */ - - .align 3 -$u_loop: - extqh t2, a1, t1 # e0 : extract high bits for current word - addq a1, 8, a1 # .. e1 : - extql t2, a1, t3 # e0 : extract low bits for next time - addq a0, 8, a0 # .. e1 : - or t0, t1, t1 # e0 : current dst word now complete - ldq_u t2, 0(a1) # .. e1 : load high word for next time - stq_u t1, -8(a0) # e0 : save the current word - mov t3, t0 # .. e1 : - cmpbge zero, t2, t8 # e0 : test new word for eos - beq t8, $u_loop # .. e1 : - - /* We've found a zero somewhere in the source word we just read. - If it resides in the lower half, we have one (probably partial) - word to write out, and if it resides in the upper half, we - have one full and one partial word left to write out. - - On entry to this basic block: - t0 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word. */ -$u_eos: - extqh t2, a1, t1 # e0 : - or t0, t1, t1 # e1 : first (partial) source word complete - - cmpbge zero, t1, t8 # e0 : is the null in this first bit? - bne t8, $u_final # .. e1 (zdb) - -$u_late_head_exit: - stq_u t1, 0(a0) # e0 : the null was in the high-order bits - addq a0, 8, a0 # .. e1 : - extql t2, a1, t1 # e0 : - cmpbge zero, t1, t8 # .. e1 : - - /* Take care of a final (probably partial) result word. - On entry to this basic block: - t1 == assembled source word - t8 == cmpbge mask that found the null. */ -$u_final: - negq t8, t6 # e0 : isolate low bit set - and t6, t8, t12 # e1 : - - and t12, 0x80, t6 # e0 : avoid dest word load if we can - bne t6, 1f # .. e1 (zdb) - - ldq_u t0, 0(a0) # e0 : - subq t12, 1, t6 # .. e1 : - or t6, t12, t8 # e0 : - zapnot t1, t6, t1 # .. e1 : kill source bytes >= null - zap t0, t8, t0 # e0 : kill dest bytes <= null - or t0, t1, t1 # e1 : - -1: stq_u t1, 0(a0) # e0 : - ret (t9) # .. e1 : - - /* Unaligned copy entry point. */ - .align 3 -$unaligned: - - ldq_u t1, 0(a1) # e0 : load first source word - - and a0, 7, t4 # .. e1 : find dest misalignment - and a1, 7, t5 # e0 : find src misalignment - - /* Conditionally load the first destination word and a bytemask - with 0xff indicating that the destination byte is sacrosanct. */ - - mov zero, t0 # .. e1 : - mov zero, t6 # e0 : - beq t4, 1f # .. e1 : - ldq_u t0, 0(a0) # e0 : - lda t6, -1 # .. e1 : - mskql t6, a0, t6 # e0 : -1: - subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr - - /* If source misalignment is larger than dest misalignment, we need - extra startup checks to avoid SEGV. */ - - cmplt t4, t5, t12 # e0 : - beq t12, $u_head # .. e1 (zdb) - - lda t2, -1 # e1 : mask out leading garbage in source - mskqh t2, t5, t2 # e0 : - nop # e0 : - ornot t1, t2, t3 # .. e1 : - cmpbge zero, t3, t8 # e0 : is there a zero? - beq t8, $u_head # .. e1 (zdb) - - /* At this point we've found a zero in the first partial word of - the source. We need to isolate the valid source data and mask - it into the original destination data. (Incidentally, we know - that we'll need at least one byte of that original dest word.) */ - - ldq_u t0, 0(a0) # e0 : - - negq t8, t6 # .. e1 : build bitmask of bytes <= zero - and t6, t8, t12 # e0 : - and a1, 7, t5 # .. e1 : - subq t12, 1, t6 # e0 : - or t6, t12, t8 # e1 : - srl t12, t5, t12 # e0 : adjust final null return value - - zapnot t2, t8, t2 # .. e1 : prepare source word; mirror changes - and t1, t2, t1 # e1 : to source validity mask - extql t2, a1, t2 # .. e0 : - extql t1, a1, t1 # e0 : - - andnot t0, t2, t0 # .. e1 : zero place for source to reside - or t0, t1, t1 # e1 : and put it there - stq_u t1, 0(a0) # .. e0 : - ret (t9) # e1 : - - .end __stxcpy diff --git a/arch/alpha/lib/stxncpy.S b/arch/alpha/lib/stxncpy.S deleted file mode 100644 index 011d9091c6e16deb49f8d7b76c47e384bf599b23..0000000000000000000000000000000000000000 --- a/arch/alpha/lib/stxncpy.S +++ /dev/null @@ -1,346 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/alpha/lib/stxncpy.S - * Contributed by Richard Henderson (rth@tamu.edu) - * - * Copy no more than COUNT bytes of the null-terminated string from - * SRC to DST. - * - * This is an internal routine used by strncpy, stpncpy, and strncat. - * As such, it uses special linkage conventions to make implementation - * of these public functions more efficient. - * - * On input: - * t9 = return address - * a0 = DST - * a1 = SRC - * a2 = COUNT - * - * Furthermore, COUNT may not be zero. - * - * On output: - * t0 = last word written - * t10 = bitmask (with one bit set) indicating the byte position of - * the end of the range specified by COUNT - * t12 = bitmask (with one bit set) indicating the last byte written - * a0 = unaligned address of the last *word* written - * a2 = the number of full words left in COUNT - * - * Furthermore, v0, a3-a5, t11, and $at are untouched. - */ - -#include - - .set noat - .set noreorder - - .text - -/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that - doesn't like putting the entry point for a procedure somewhere in the - middle of the procedure descriptor. Work around this by putting the - aligned copy in its own procedure descriptor */ - - .ent stxncpy_aligned - .align 3 -stxncpy_aligned: - .frame sp, 0, t9, 0 - .prologue 0 - - /* On entry to this basic block: - t0 == the first destination word for masking back in - t1 == the first source word. */ - - /* Create the 1st output word and detect 0's in the 1st input word. */ - lda t2, -1 # e1 : build a mask against false zero - mskqh t2, a1, t2 # e0 : detection in the src word - mskqh t1, a1, t3 # e0 : - ornot t1, t2, t2 # .. e1 : - mskql t0, a1, t0 # e0 : assemble the first output word - cmpbge zero, t2, t8 # .. e1 : bits set iff null found - or t0, t3, t0 # e0 : - beq a2, $a_eoc # .. e1 : - bne t8, $a_eos # .. e1 : - - /* On entry to this basic block: - t0 == a source word not containing a null. */ - -$a_loop: - stq_u t0, 0(a0) # e0 : - addq a0, 8, a0 # .. e1 : - ldq_u t0, 0(a1) # e0 : - addq a1, 8, a1 # .. e1 : - subq a2, 1, a2 # e0 : - cmpbge zero, t0, t8 # .. e1 (stall) - beq a2, $a_eoc # e1 : - beq t8, $a_loop # e1 : - - /* Take care of the final (partial) word store. At this point - the end-of-count bit is set in t8 iff it applies. - - On entry to this basic block we have: - t0 == the source word containing the null - t8 == the cmpbge mask that found it. */ - -$a_eos: - negq t8, t12 # e0 : find low bit set - and t8, t12, t12 # e1 (stall) - - /* For the sake of the cache, don't read a destination word - if we're not going to need it. */ - and t12, 0x80, t6 # e0 : - bne t6, 1f # .. e1 (zdb) - - /* We're doing a partial word store and so need to combine - our source and original destination words. */ - ldq_u t1, 0(a0) # e0 : - subq t12, 1, t6 # .. e1 : - or t12, t6, t8 # e0 : - unop # - zapnot t0, t8, t0 # e0 : clear src bytes > null - zap t1, t8, t1 # .. e1 : clear dst bytes <= null - or t0, t1, t0 # e1 : - -1: stq_u t0, 0(a0) # e0 : - ret (t9) # e1 : - - /* Add the end-of-count bit to the eos detection bitmask. */ -$a_eoc: - or t10, t8, t8 - br $a_eos - - .end stxncpy_aligned - - .align 3 - .ent __stxncpy - .globl __stxncpy -__stxncpy: - .frame sp, 0, t9, 0 - .prologue 0 - - /* Are source and destination co-aligned? */ - xor a0, a1, t1 # e0 : - and a0, 7, t0 # .. e1 : find dest misalignment - and t1, 7, t1 # e0 : - addq a2, t0, a2 # .. e1 : bias count by dest misalignment - subq a2, 1, a2 # e0 : - and a2, 7, t2 # e1 : - srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 - addq zero, 1, t10 # .. e1 : - sll t10, t2, t10 # e0 : t10 = bitmask of last count byte - bne t1, $unaligned # .. e1 : - - /* We are co-aligned; take care of a partial first word. */ - - ldq_u t1, 0(a1) # e0 : load first src word - addq a1, 8, a1 # .. e1 : - - beq t0, stxncpy_aligned # avoid loading dest word if not needed - ldq_u t0, 0(a0) # e0 : - br stxncpy_aligned # .. e1 : - - -/* The source and destination are not co-aligned. Align the destination - and cope. We have to be very careful about not reading too much and - causing a SEGV. */ - - .align 3 -$u_head: - /* We know just enough now to be able to assemble the first - full source word. We can still find a zero at the end of it - that prevents us from outputting the whole thing. - - On entry to this basic block: - t0 == the first dest word, unmasked - t1 == the shifted low bits of the first source word - t6 == bytemask that is -1 in dest word bytes */ - - ldq_u t2, 8(a1) # e0 : load second src word - addq a1, 8, a1 # .. e1 : - mskql t0, a0, t0 # e0 : mask trailing garbage in dst - extqh t2, a1, t4 # e0 : - or t1, t4, t1 # e1 : first aligned src word complete - mskqh t1, a0, t1 # e0 : mask leading garbage in src - or t0, t1, t0 # e0 : first output word complete - or t0, t6, t6 # e1 : mask original data for zero test - cmpbge zero, t6, t8 # e0 : - beq a2, $u_eocfin # .. e1 : - lda t6, -1 # e0 : - bne t8, $u_final # .. e1 : - - mskql t6, a1, t6 # e0 : mask out bits already seen - nop # .. e1 : - stq_u t0, 0(a0) # e0 : store first output word - or t6, t2, t2 # .. e1 : - cmpbge zero, t2, t8 # e0 : find nulls in second partial - addq a0, 8, a0 # .. e1 : - subq a2, 1, a2 # e0 : - bne t8, $u_late_head_exit # .. e1 : - - /* Finally, we've got all the stupid leading edge cases taken care - of and we can set up to enter the main loop. */ - - extql t2, a1, t1 # e0 : position hi-bits of lo word - beq a2, $u_eoc # .. e1 : - ldq_u t2, 8(a1) # e0 : read next high-order source word - addq a1, 8, a1 # .. e1 : - extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall) - cmpbge zero, t2, t8 # .. e1 : - nop # e0 : - bne t8, $u_eos # .. e1 : - - /* Unaligned copy main loop. In order to avoid reading too much, - the loop is structured to detect zeros in aligned source words. - This has, unfortunately, effectively pulled half of a loop - iteration out into the head and half into the tail, but it does - prevent nastiness from accumulating in the very thing we want - to run as fast as possible. - - On entry to this basic block: - t0 == the shifted low-order bits from the current source word - t1 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word - - We further know that t2 does not contain a null terminator. */ - - .align 3 -$u_loop: - or t0, t1, t0 # e0 : current dst word now complete - subq a2, 1, a2 # .. e1 : decrement word count - stq_u t0, 0(a0) # e0 : save the current word - addq a0, 8, a0 # .. e1 : - extql t2, a1, t1 # e0 : extract high bits for next time - beq a2, $u_eoc # .. e1 : - ldq_u t2, 8(a1) # e0 : load high word for next time - addq a1, 8, a1 # .. e1 : - nop # e0 : - cmpbge zero, t2, t8 # e1 : test new word for eos (stall) - extqh t2, a1, t0 # e0 : extract low bits for current word - beq t8, $u_loop # .. e1 : - - /* We've found a zero somewhere in the source word we just read. - If it resides in the lower half, we have one (probably partial) - word to write out, and if it resides in the upper half, we - have one full and one partial word left to write out. - - On entry to this basic block: - t0 == the shifted low-order bits from the current source word - t1 == the shifted high-order bits from the previous source word - t2 == the unshifted current source word. */ -$u_eos: - or t0, t1, t0 # e0 : first (partial) source word complete - nop # .. e1 : - cmpbge zero, t0, t8 # e0 : is the null in this first bit? - bne t8, $u_final # .. e1 (zdb) - - stq_u t0, 0(a0) # e0 : the null was in the high-order bits - addq a0, 8, a0 # .. e1 : - subq a2, 1, a2 # e1 : - -$u_late_head_exit: - extql t2, a1, t0 # .. e0 : - cmpbge zero, t0, t8 # e0 : - or t8, t10, t6 # e1 : - cmoveq a2, t6, t8 # e0 : - nop # .. e1 : - - /* Take care of a final (probably partial) result word. - On entry to this basic block: - t0 == assembled source word - t8 == cmpbge mask that found the null. */ -$u_final: - negq t8, t6 # e0 : isolate low bit set - and t6, t8, t12 # e1 : - - and t12, 0x80, t6 # e0 : avoid dest word load if we can - bne t6, 1f # .. e1 (zdb) - - ldq_u t1, 0(a0) # e0 : - subq t12, 1, t6 # .. e1 : - or t6, t12, t8 # e0 : - zapnot t0, t8, t0 # .. e1 : kill source bytes > null - zap t1, t8, t1 # e0 : kill dest bytes <= null - or t0, t1, t0 # e1 : - -1: stq_u t0, 0(a0) # e0 : - ret (t9) # .. e1 : - - /* Got to end-of-count before end of string. - On entry to this basic block: - t1 == the shifted high-order bits from the previous source word */ -$u_eoc: - and a1, 7, t6 # e1 : - sll t10, t6, t6 # e0 : - and t6, 0xff, t6 # e0 : - bne t6, 1f # .. e1 : - - ldq_u t2, 8(a1) # e0 : load final src word - nop # .. e1 : - extqh t2, a1, t0 # e0 : extract low bits for last word - or t1, t0, t1 # e1 : - -1: cmpbge zero, t1, t8 - mov t1, t0 - -$u_eocfin: # end-of-count, final word - or t10, t8, t8 - br $u_final - - /* Unaligned copy entry point. */ - .align 3 -$unaligned: - - ldq_u t1, 0(a1) # e0 : load first source word - - and a0, 7, t4 # .. e1 : find dest misalignment - and a1, 7, t5 # e0 : find src misalignment - - /* Conditionally load the first destination word and a bytemask - with 0xff indicating that the destination byte is sacrosanct. */ - - mov zero, t0 # .. e1 : - mov zero, t6 # e0 : - beq t4, 1f # .. e1 : - ldq_u t0, 0(a0) # e0 : - lda t6, -1 # .. e1 : - mskql t6, a0, t6 # e0 : - subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr - - /* If source misalignment is larger than dest misalignment, we need - extra startup checks to avoid SEGV. */ - -1: cmplt t4, t5, t12 # e1 : - extql t1, a1, t1 # .. e0 : shift src into place - lda t2, -1 # e0 : for creating masks later - beq t12, $u_head # .. e1 : - - extql t2, a1, t2 # e0 : - cmpbge zero, t1, t8 # .. e1 : is there a zero? - andnot t2, t6, t2 # e0 : dest mask for a single word copy - or t8, t10, t5 # .. e1 : test for end-of-count too - cmpbge zero, t2, t3 # e0 : - cmoveq a2, t5, t8 # .. e1 : - andnot t8, t3, t8 # e0 : - beq t8, $u_head # .. e1 (zdb) - - /* At this point we've found a zero in the first partial word of - the source. We need to isolate the valid source data and mask - it into the original destination data. (Incidentally, we know - that we'll need at least one byte of that original dest word.) */ - - ldq_u t0, 0(a0) # e0 : - negq t8, t6 # .. e1 : build bitmask of bytes <= zero - mskqh t1, t4, t1 # e0 : - and t6, t8, t12 # .. e1 : - subq t12, 1, t6 # e0 : - or t6, t12, t8 # e1 : - - zapnot t2, t8, t2 # e0 : prepare source word; mirror changes - zapnot t1, t8, t1 # .. e1 : to source validity mask - - andnot t0, t2, t0 # e0 : zero place for source to reside - or t0, t1, t0 # e1 : and put it there - stq_u t0, 0(a0) # e0 : - ret (t9) # .. e1 : - - .end __stxncpy diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S deleted file mode 100644 index d6373ec1bff9e99a9f65368d713a2f42ecd0bf15..0000000000000000000000000000000000000000 --- a/arch/alpha/math-emu/qrnnd.S +++ /dev/null @@ -1,163 +0,0 @@ - # Alpha 21064 __udiv_qrnnd - # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. - - # This file is part of GCC. - - # The GNU MP Library is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by - # the Free Software Foundation; either version 2 of the License, or (at your - # option) any later version. - - # In addition to the permissions in the GNU General Public License, the - # Free Software Foundation gives you unlimited permission to link the - # compiled version of this file with other programs, and to distribute - # those programs without any restriction coming from the use of this - # file. (The General Public License restrictions do apply in other - # respects; for example, they cover modification of the file, and - # distribution when not linked into another program.) - - # This file is distributed in the hope that it will be useful, but - # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public - # License for more details. - - # You should have received a copy of the GNU General Public License - # along with GCC; see the file COPYING. If not, write to the - # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - # MA 02111-1307, USA. - - .set noreorder - .set noat - - .text - - .globl __udiv_qrnnd - .ent __udiv_qrnnd -__udiv_qrnnd: - .frame $30,0,$26,0 - .prologue 0 - -#define cnt $2 -#define tmp $3 -#define rem_ptr $16 -#define n1 $17 -#define n0 $18 -#define d $19 -#define qb $20 -#define AT $at - - ldiq cnt,16 - blt d,$largedivisor - -$loop1: cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule d,n1,qb - subq n1,d,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule d,n1,qb - subq n1,d,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule d,n1,qb - subq n1,d,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule d,n1,qb - subq n1,d,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - subq cnt,1,cnt - bgt cnt,$loop1 - stq n1,0(rem_ptr) - bis $31,n0,$0 - ret $31,($26),1 - -$largedivisor: - and n0,1,$4 - - srl n0,1,n0 - sll n1,63,tmp - or tmp,n0,n0 - srl n1,1,n1 - - and d,1,$6 - srl d,1,$5 - addq $5,$6,$5 - -$loop2: cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule $5,n1,qb - subq n1,$5,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule $5,n1,qb - subq n1,$5,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule $5,n1,qb - subq n1,$5,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - cmplt n0,0,tmp - addq n1,n1,n1 - bis n1,tmp,n1 - addq n0,n0,n0 - cmpule $5,n1,qb - subq n1,$5,tmp - cmovne qb,tmp,n1 - bis n0,qb,n0 - subq cnt,1,cnt - bgt cnt,$loop2 - - addq n1,n1,n1 - addq $4,n1,n1 - bne $6,$Odd - stq n1,0(rem_ptr) - bis $31,n0,$0 - ret $31,($26),1 - -$Odd: - /* q' in n0. r' in n1 */ - addq n1,n0,n1 - - cmpult n1,n0,tmp # tmp := carry from addq - subq n1,d,AT - addq n0,tmp,n0 - cmovne tmp,AT,n1 - - cmpult n1,d,tmp - addq n0,1,AT - cmoveq tmp,AT,n0 - subq n1,d,AT - cmoveq tmp,AT,n1 - - stq n1,0(rem_ptr) - bis $31,n0,$0 - ret $31,($26),1 - - .end __udiv_qrnnd diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S deleted file mode 100644 index 02c4614847611e5e55bbe88f00d102cd73ef0f84..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/ctx_sw_asm.S +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * Vineetg: Aug 2009 - * -Moved core context switch macro out of entry.S into this file. - * -This is the more "natural" hand written assembler - */ - -#include -#include /* For the SAVE_* macros */ -#include - -#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) - -;################### Low Level Context Switch ########################## - - .section .sched.text,"ax",@progbits - .align 4 - .global __switch_to - .type __switch_to, @function -__switch_to: - CFI_STARTPROC - - /* Save regs on kernel mode stack of task */ - st.a blink, [sp, -4] - st.a fp, [sp, -4] - SAVE_CALLEE_SAVED_KERNEL - - /* Save the now KSP in task->thread.ksp */ -#if KSP_WORD_OFF <= 255 - st.as sp, [r0, KSP_WORD_OFF] -#else - /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */ - add2 r24, r0, KSP_WORD_OFF - st sp, [r24] -#endif - /* - * Return last task in r0 (return reg) - * On ARC, Return reg = First Arg reg = r0. - * Since we already have last task in r0, - * don't need to do anything special to return it - */ - - /* - * switch to new task, contained in r1 - * Temp reg r3 is required to get the ptr to store val - */ - SET_CURR_TASK_ON_CPU r1, r3 - - /* reload SP with kernel mode stack pointer in task->thread.ksp */ - ld.as sp, [r1, (TASK_THREAD + THREAD_KSP)/4] - - /* restore the registers */ - RESTORE_CALLEE_SAVED_KERNEL - ld.ab fp, [sp, 4] - ld.ab blink, [sp, 4] - j [blink] - -END_CFI(__switch_to) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S deleted file mode 100644 index 12d5f12d10d23b96cbb2ace74a2629aa1ac7257a..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/entry-arcv2.S +++ /dev/null @@ -1,263 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling - * - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) - */ - -#include /* ARC_{EXTRY,EXIT} */ -#include /* SAVE_ALL_{INT1,INT2,TRAP...} */ -#include -#include -#include - -; A maximum number of supported interrupts in the core interrupt controller. -; This number is not equal to the maximum interrupt number (256) because -; first 16 lines are reserved for exceptions and are not configurable. -#define NR_CPU_IRQS 240 - - .cpu HS - -#define VECTOR .word - -;############################ Vector Table ################################# - - .section .vector,"a",@progbits - .align 4 - -# Initial 16 slots are Exception Vectors -VECTOR res_service ; Reset Vector -VECTOR mem_service ; Mem exception -VECTOR instr_service ; Instrn Error -VECTOR EV_MachineCheck ; Fatal Machine check -VECTOR EV_TLBMissI ; Intruction TLB miss -VECTOR EV_TLBMissD ; Data TLB miss -VECTOR EV_TLBProtV ; Protection Violation -VECTOR EV_PrivilegeV ; Privilege Violation -VECTOR EV_SWI ; Software Breakpoint -VECTOR EV_Trap ; Trap exception -VECTOR EV_Extension ; Extn Instruction Exception -VECTOR EV_DivZero ; Divide by Zero -VECTOR EV_DCError ; Data Cache Error -VECTOR EV_Misaligned ; Misaligned Data Access -VECTOR reserved ; Reserved slots -VECTOR reserved ; Reserved slots - -# Begin Interrupt Vectors -VECTOR handle_interrupt ; (16) Timer0 -VECTOR handle_interrupt ; unused (Timer1) -VECTOR handle_interrupt ; unused (WDT) -VECTOR handle_interrupt ; (19) Inter core Interrupt (IPI) -VECTOR handle_interrupt ; (20) perf Interrupt -VECTOR handle_interrupt ; (21) Software Triggered Intr (Self IPI) -VECTOR handle_interrupt ; unused -VECTOR handle_interrupt ; (23) unused -# End of fixed IRQs - -.rept NR_CPU_IRQS - 8 - VECTOR handle_interrupt -.endr - - .section .text, "ax",@progbits - -reserved: - flag 1 ; Unexpected event, halt - -;##################### Interrupt Handling ############################## - -ENTRY(handle_interrupt) - - INTERRUPT_PROLOGUE - - # irq control APIs local_irq_save/restore/disable/enable fiddle with - # global interrupt enable bits in STATUS32 (.IE for 1 prio, .E[] for 2 prio) - # However a taken interrupt doesn't clear these bits. Thus irqs_disabled() - # query in hard ISR path would return false (since .IE is set) which would - # trips genirq interrupt handling asserts. - # - # So do a "soft" disable of interrutps here. - # - # Note this disable is only for consistent book-keeping as further interrupts - # will be disabled anyways even w/o this. Hardware tracks active interrupts - # seperately in AUX_IRQ_ACT.active and will not take new interrupts - # unless this one returns (or higher prio becomes pending in 2-prio scheme) - - IRQ_DISABLE - - ; icause is banked: one per priority level - ; so a higher prio interrupt taken here won't clobber prev prio icause - lr r0, [ICAUSE] - mov blink, ret_from_exception - - b.d arch_do_IRQ - mov r1, sp - -END(handle_interrupt) - -;################### Non TLB Exception Handling ############################# - -ENTRY(EV_SWI) - ; TODO: implement this - EXCEPTION_PROLOGUE - b ret_from_exception -END(EV_SWI) - -ENTRY(EV_DivZero) - ; TODO: implement this - EXCEPTION_PROLOGUE - b ret_from_exception -END(EV_DivZero) - -ENTRY(EV_DCError) - ; TODO: implement this - EXCEPTION_PROLOGUE - b ret_from_exception -END(EV_DCError) - -; --------------------------------------------- -; Memory Error Exception Handler -; - Unlike ARCompact, handles Bus errors for both User/Kernel mode, -; Instruction fetch or Data access, under a single Exception Vector -; --------------------------------------------- - -ENTRY(mem_service) - - EXCEPTION_PROLOGUE - - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - - bl do_memory_error - b ret_from_exception -END(mem_service) - -ENTRY(EV_Misaligned) - - EXCEPTION_PROLOGUE - - lr r0, [efa] ; Faulting Data address - mov r1, sp - - FAKE_RET_FROM_EXCPN - - SAVE_CALLEE_SAVED_USER - mov r2, sp ; callee_regs - - bl do_misaligned_access - - ; TBD: optimize - do this only if a callee reg was involved - ; either a dst of emulated LD/ST or src with address-writeback - RESTORE_CALLEE_SAVED_USER - - b ret_from_exception -END(EV_Misaligned) - -; --------------------------------------------- -; Protection Violation Exception Handler -; --------------------------------------------- - -ENTRY(EV_TLBProtV) - - EXCEPTION_PROLOGUE - - lr r0, [efa] ; Faulting Data address - mov r1, sp ; pt_regs - - FAKE_RET_FROM_EXCPN - - mov blink, ret_from_exception - b do_page_fault - -END(EV_TLBProtV) - -; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they -; need to call do_page_fault(). -; ECR in pt_regs provides whether access was R/W/X - -.global call_do_page_fault -.set call_do_page_fault, EV_TLBProtV - -;############# Common Handlers for ARCompact and ARCv2 ############## - -#include "entry.S" - -;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ############## -; -; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) -; IRQ shd definitely not happen between now and rtie -; All 2 entry points to here already disable interrupts - -.Lrestore_regs: -restore_regs: - - # Interrpts are actually disabled from this point on, but will get - # reenabled after we return from interrupt/exception. - # But irq tracer needs to be told now... - TRACE_ASM_IRQ_ENABLE - - ld r0, [sp, PT_status32] ; U/K mode at time of entry - lr r10, [AUX_IRQ_ACT] - - bmsk r11, r10, 15 ; extract AUX_IRQ_ACT.active - breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception - -;####### Return from Intr ####### - -.Lisr_ret: - -debug_marker_l1: - ; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot - btst r0, STATUS_DE_BIT ; Z flag set if bit clear - bnz .Lintr_ret_to_delay_slot ; branch if STATUS_DE_BIT set - - ; Handle special case #1: (Entry via Exception, Return via IRQ) - ; - ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig - ; task now returning to U mode (riding the Intr) - ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP - ; won't be switched to correct U mode value (from AUX_SP) - ; So force AUX_IRQ_ACT.U for such a case - - btst r0, STATUS_U_BIT ; Z flag set if K (Z clear for U) - bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U - sr r11, [AUX_IRQ_ACT] - - INTERRUPT_EPILOGUE - rtie - -;####### Return from Exception / pure kernel mode ####### - -.Lexcept_ret: ; Expects r0 has PT_status32 - -debug_marker_syscall: - EXCEPTION_EPILOGUE - rtie - -;####### Return from Intr to insn in delay slot ####### - -; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ) -; -; Intr returning to a Delay Slot (DS) insn -; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig -; entry was via Exception in DS which got preempted in kernel). -; -; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround -; -; Solution is to drop out of interrupt context into pure kernel mode -; and return from pure kernel mode which does right things for delay slot - -.Lintr_ret_to_delay_slot: -debug_marker_ds: - - ld r2, [@intr_to_DE_cnt] - add r2, r2, 1 - st r2, [@intr_to_DE_cnt] - - ; drop out of interrupt context (clear AUX_IRQ_ACT.active) - bmskn r11, r10, 15 - sr r11, [AUX_IRQ_ACT] - b .Lexcept_ret - -END(ret_from_exception) diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S deleted file mode 100644 index 5cb0cd7e4eabaff15d8e4bc3afa356042615b8fe..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/entry-compact.S +++ /dev/null @@ -1,403 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA - * - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * vineetg: May 2011 - * -Userspace unaligned access emulation - * - * vineetg: Feb 2011 (ptrace low level code fixes) - * -traced syscall return code (r0) was not saved into pt_regs for restoring - * into user reg-file when traded task rets to user space. - * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs) - * were not invoking post-syscall trace hook (jumping directly into - * ret_from_system_call) - * - * vineetg: Nov 2010: - * -Vector table jumps (@8 bytes) converted into branches (@4 bytes) - * -To maintain the slot size of 8 bytes/vector, added nop, which is - * not executed at runtime. - * - * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) - * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well - * -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't - * need ptregs anymore - * - * Vineetg: Oct 2009 - * -In a rare scenario, Process gets a Priv-V exception and gets scheduled - * out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains - * active (AE bit enabled). This causes a double fault for a subseq valid - * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. - * Instr Error could also cause similar scenario, so same there as well. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit, - * setting it and not clearing it clears ZOL context - * - * Vineetg: May 16th, 2008 - * - r25 now contains the Current Task when in kernel - * - * Vineetg: Dec 22, 2007 - * Minor Surgery of Low Level ISR to make it SMP safe - * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR - * - _current_task is made an array of NR_CPUS - * - Access of _current_task wrapped inside a macro so that if hardware - * team agrees for a dedicated reg, no other code is touched - * - * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004 - */ - -#include -#include /* {ENTRY,EXIT} */ -#include -#include - - .cpu A7 - -;############################ Vector Table ################################# - -.macro VECTOR lbl -#if 1 /* Just in case, build breaks */ - j \lbl -#else - b \lbl - nop -#endif -.endm - - .section .vector, "ax",@progbits - .align 4 - -/* Each entry in the vector table must occupy 2 words. Since it is a jump - * across sections (.vector to .text) we are guaranteed that 'j somewhere' - * will use the 'j limm' form of the instruction as long as somewhere is in - * a section other than .vector. - */ - -; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Reset Vector (0x0) -VECTOR mem_service ; 0x8, Mem exception (0x1) -VECTOR instr_service ; 0x10, Instrn Error (0x2) - -; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -.rept 28 -VECTOR handle_interrupt_level1 ; Other devices -.endr - -/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */ - -; ******************** Exceptions ********************** -VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) -VECTOR EV_TLBMissI ; 0x108, Instruction TLB miss (0x21) -VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) -VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) - ; or Misaligned Access -VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) -VECTOR EV_Trap ; 0x128, Trap exception (0x25) -VECTOR EV_Extension ; 0x130, Extn Instruction Excp (0x26) - -.rept 24 -VECTOR reserved ; Reserved Exceptions -.endr - - -;##################### Scratch Mem for IRQ stack switching ############# - -ARCFP_DATA int1_saved_reg - .align 32 - .type int1_saved_reg, @object - .size int1_saved_reg, 4 -int1_saved_reg: - .zero 4 - -/* Each Interrupt level needs its own scratch */ -ARCFP_DATA int2_saved_reg - .type int2_saved_reg, @object - .size int2_saved_reg, 4 -int2_saved_reg: - .zero 4 - -; --------------------------------------------- - .section .text, "ax",@progbits - - -reserved: - flag 1 ; Unexpected event, halt - -;##################### Interrupt Handling ############################## - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS -; --------------------------------------------- -; Level 2 ISR: Can interrupt a Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level2) - - INTERRUPT_PROLOGUE 2 - - ;------------------------------------------------------ - ; if L2 IRQ interrupted a L1 ISR, disable preemption - ; - ; This is to avoid a potential L1-L2-L1 scenario - ; -L1 IRQ taken - ; -L2 interrupts L1 (before L1 ISR could run) - ; -preemption off IRQ, user task in syscall picked to run - ; -RTIE to userspace - ; Returns from L2 context fine - ; But both L1 and L2 re-enabled, so another L1 can be taken - ; while prev L1 is still unserviced - ; - ;------------------------------------------------------ - - ; L2 interrupting L1 implies both L2 and L1 active - ; However both A2 and A1 are NOT set in STATUS32, thus - ; need to check STATUS32_L2 to determine if L1 was active - - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal - - ; bump thread_info->preempt_count (Disable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - add r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] - -1: - ;------------------------------------------------------ - ; setup params for Linux common ISR and invoke it - ;------------------------------------------------------ - lr r0, [icause2] - and r0, r0, 0x1f - - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x2 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception - -END(handle_interrupt_level2) - -#endif - -; --------------------------------------------- -; User Mode Memory Bus Error Interrupt Handler -; (Kernel mode memory errors handled via separate exception vectors) -; --------------------------------------------- -ENTRY(mem_service) - - INTERRUPT_PROLOGUE 2 - - mov r0, ilink2 - mov r1, sp - - ; User process needs to be killed with SIGBUS, but first need to get - ; out of the L2 interrupt context (drop to pure kernel mode) and jump - ; off to "C" code where SIGBUS in enqueued - lr r3, [status32] - bclr r3, r3, STATUS_A2_BIT - or r3, r3, (STATUS_E1_MASK|STATUS_E2_MASK) - sr r3, [status32_l2] - mov ilink2, 1f - rtie -1: - bl do_memory_error - b ret_from_exception -END(mem_service) - -; --------------------------------------------- -; Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level1) - - INTERRUPT_PROLOGUE 1 - - lr r0, [icause1] - and r0, r0, 0x1f - -#ifdef CONFIG_TRACE_IRQFLAGS - ; icause1 needs to be read early, before calling tracing, which - ; can clobber scratch regs, hence use of stack to stash it - push r0 - TRACE_ASM_IRQ_DISABLE - pop r0 -#endif - - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x1 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception -END(handle_interrupt_level1) - -;################### Non TLB Exception Handling ############################# - -; --------------------------------------------- -; Protection Violation Exception Handler -; --------------------------------------------- - -ENTRY(EV_TLBProtV) - - EXCEPTION_PROLOGUE - - mov r2, r10 ; ECR set into r10 already - lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above) - - ; Exception auto-disables further Intr/exceptions. - ; Re-enable them by pretending to return from exception - ; (so rest of handler executes in pure K mode) - - FAKE_RET_FROM_EXCPN - - mov r1, sp ; Handle to pt_regs - - ;------ (5) Type of Protection Violation? ---------- - ; - ; ProtV Hardware Exception is triggered for Access Faults of 2 types - ; -Access Violation : 00_23_(00|01|02|03)_00 - ; x r w r+w - ; -Unaligned Access : 00_23_04_00 - ; - bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f - - ;========= (6a) Access Violation Processing ======== - bl do_page_fault - b ret_from_exception - - ;========== (6b) Non aligned access ============ -4: - - SAVE_CALLEE_SAVED_USER - mov r2, sp ; callee_regs - - bl do_misaligned_access - - ; TBD: optimize - do this only if a callee reg was involved - ; either a dst of emulated LD/ST or src with address-writeback - RESTORE_CALLEE_SAVED_USER - - b ret_from_exception - -END(EV_TLBProtV) - -; Wrapper for Linux page fault handler called from EV_TLBMiss* -; Very similar to ProtV handler case (6a) above, but avoids the extra checks -; for Misaligned access -; -ENTRY(call_do_page_fault) - - EXCEPTION_PROLOGUE - lr r0, [efa] ; Faulting Data address - mov r1, sp - FAKE_RET_FROM_EXCPN - - mov blink, ret_from_exception - b do_page_fault - -END(call_do_page_fault) - -;############# Common Handlers for ARCompact and ARCv2 ############## - -#include "entry.S" - -;############# Return from Intr/Excp/Trap (ARC Specifics) ############## -; -; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) -; IRQ shd definitely not happen between now and rtie -; All 2 entry points to here already disable interrupts - -.Lrestore_regs: - - # Interrupts are actually disabled from this point on, but will get - # reenabled after we return from interrupt/exception. - # But irq tracer needs to be told now... - TRACE_ASM_IRQ_ENABLE - - lr r10, [status32] - - ; Restore REG File. In case multiple Events outstanding, - ; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None - ; Note that we use realtime STATUS32 (not pt_regs->status32) to - ; decide that. - - and.f 0, r10, (STATUS_A1_MASK|STATUS_A2_MASK) - bz .Lexcep_or_pure_K_ret - - ; Returning from Interrupts (Level 1 or 2) - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - - ; Level 2 interrupt return Path - from hardware standpoint - bbit0 r10, STATUS_A2_BIT, not_level2_interrupt - - ;------------------------------------------------------------------ - ; However the context returning might not have taken L2 intr itself - ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret - ; Special considerations needed for the context which took L2 intr - - ld r9, [sp, PT_event] ; Ensure this is L2 intr context - brne r9, event_IRQ2, 149f - - ;------------------------------------------------------------------ - ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier - ; so that sched doesn't move to new task, causing L1 to be delayed - ; undeterministically. Now that we've achieved that, let's reset - ; things to what they were, before returning from L2 context - ;---------------------------------------------------------------- - - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal - - ; decrement thread_info->preempt_count (re-enable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - - ; paranoid check, given A1 was active when A2 happened, preempt count - ; must not be 0 because we would have incremented it. - ; If this does happen we simply HALT as it means a BUG !!! - cmp r9, 0 - bnz 2f - flag 1 - -2: - sub r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] - -149: - INTERRUPT_EPILOGUE 2 ; return from level 2 interrupt -debug_marker_l2: - rtie - -not_level2_interrupt: - -#endif - - INTERRUPT_EPILOGUE 1 ; return from level 1 interrupt -debug_marker_l1: - rtie - -.Lexcep_or_pure_K_ret: - - ;this case is for syscalls or Exceptions or pure kernel mode - - EXCEPTION_EPILOGUE -debug_marker_syscall: - rtie - -END(ret_from_exception) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S deleted file mode 100644 index ea74a1eee5d9dfabed168720474e67a5089d3cef..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/entry.S +++ /dev/null @@ -1,358 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC - * (included from entry-.S - * - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -/*------------------------------------------------------------------ - * Function ABI - *------------------------------------------------------------------ - * - * Arguments r0 - r7 - * Caller Saved Registers r0 - r12 - * Callee Saved Registers r13- r25 - * Global Pointer (gp) r26 - * Frame Pointer (fp) r27 - * Stack Pointer (sp) r28 - * Branch link register (blink) r31 - *------------------------------------------------------------------ - */ - -;################### Special Sys Call Wrappers ########################## - -ENTRY(sys_clone_wrapper) - SAVE_CALLEE_SAVED_USER - bl @sys_clone - DISCARD_CALLEE_SAVED_USER - - GET_CURR_THR_INFO_FLAGS r10 - btst r10, TIF_SYSCALL_TRACE - bnz tracesys_exit - - b .Lret_from_system_call -END(sys_clone_wrapper) - -ENTRY(ret_from_fork) - ; when the forked child comes here from the __switch_to function - ; r0 has the last task pointer. - ; put last task in scheduler queue - jl @schedule_tail - - ld r9, [sp, PT_status32] - brne r9, 0, 1f - - jl.d [r14] ; kernel thread entry point - mov r0, r13 ; (see PF_KTHREAD block in copy_thread) - -1: - ; Return to user space - ; 1. Any forked task (Reach here via BRne above) - ; 2. First ever init task (Reach here via return from JL above) - ; This is the historic "kernel_execve" use-case, to return to init - ; user mode, in a round about way since that is always done from - ; a kernel thread which is executed via JL above but always returns - ; out whenever kernel_execve (now inline do_fork()) is involved - b ret_from_exception -END(ret_from_fork) - -;################### Non TLB Exception Handling ############################# - -; --------------------------------------------- -; Instruction Error Exception Handler -; --------------------------------------------- - -ENTRY(instr_service) - - EXCEPTION_PROLOGUE - - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - - bl do_insterror_or_kprobe - b ret_from_exception -END(instr_service) - -; --------------------------------------------- -; Machine Check Exception Handler -; --------------------------------------------- - -ENTRY(EV_MachineCheck) - - EXCEPTION_PROLOGUE - - lr r2, [ecr] - lr r0, [efa] - mov r1, sp - - ; hardware auto-disables MMU, re-enable it to allow kernel vaddr - ; access for say stack unwinding of modules for crash dumps - lr r3, [ARC_REG_PID] - or r3, r3, MMU_ENABLE - sr r3, [ARC_REG_PID] - - lsr r3, r2, 8 - bmsk r3, r3, 7 - brne r3, ECR_C_MCHK_DUP_TLB, 1f - - bl do_tlb_overlap_fault - b ret_from_exception - -1: - ; DEAD END: can't do much, display Regs and HALT - SAVE_CALLEE_SAVED_USER - - GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 - st sp, [r10, THREAD_CALLEE_REG] - - j do_machine_check_fault - -END(EV_MachineCheck) - -; --------------------------------------------- -; Privilege Violation Exception Handler -; --------------------------------------------- -ENTRY(EV_PrivilegeV) - - EXCEPTION_PROLOGUE - - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - - bl do_privilege_fault - b ret_from_exception -END(EV_PrivilegeV) - -; --------------------------------------------- -; Extension Instruction Exception Handler -; --------------------------------------------- -ENTRY(EV_Extension) - - EXCEPTION_PROLOGUE - - lr r0, [efa] - mov r1, sp - - FAKE_RET_FROM_EXCPN - - bl do_extension_fault - b ret_from_exception -END(EV_Extension) - -;################ Trap Handling (Syscall, Breakpoint) ################## - -; --------------------------------------------- -; syscall Tracing -; --------------------------------------------- -tracesys: - ; save EFA in case tracer wants the PC of traced task - ; using ERET won't work since next-PC has already committed - GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 - st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address - - ; PRE Sys Call Ptrace hook - mov r0, sp ; pt_regs needed - bl @syscall_trace_entry - - ; Tracing code now returns the syscall num (orig or modif) - mov r8, r0 - - ; Do the Sys Call as we normally would. - ; Validate the Sys Call number - cmp r8, NR_syscalls - mov.hi r0, -ENOSYS - bhi tracesys_exit - - ; Restore the sys-call args. Mere invocation of the hook abv could have - ; clobbered them (since they are in scratch regs). The tracer could also - ; have deliberately changed the syscall args: r0-r7 - ld r0, [sp, PT_r0] - ld r1, [sp, PT_r1] - ld r2, [sp, PT_r2] - ld r3, [sp, PT_r3] - ld r4, [sp, PT_r4] - ld r5, [sp, PT_r5] - ld r6, [sp, PT_r6] - ld r7, [sp, PT_r7] - ld.as r9, [sys_call_table, r8] - jl [r9] ; Entry into Sys Call Handler - -tracesys_exit: - st r0, [sp, PT_r0] ; sys call return value in pt_regs - - ;POST Sys Call Ptrace Hook - bl @syscall_trace_exit - b ret_from_exception ; NOT ret_from_system_call at is saves r0 which - ; we'd done before calling post hook above - -; --------------------------------------------- -; Breakpoint TRAP -; --------------------------------------------- -trap_with_param: - mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc - mov r1, sp - - ; Save callee regs in case gdb wants to have a look - ; SP will grow up by size of CALLEE Reg-File - ; NOTE: clobbers r12 - SAVE_CALLEE_SAVED_USER - - ; save location of saved Callee Regs @ thread_struct->pc - GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 - st sp, [r10, THREAD_CALLEE_REG] - - ; Call the trap handler - bl do_non_swi_trap - - ; unwind stack to discard Callee saved Regs - DISCARD_CALLEE_SAVED_USER - - b ret_from_exception - -; --------------------------------------------- -; syscall TRAP -; ABI: (r0-r7) upto 8 args, (r8) syscall number -; --------------------------------------------- - -ENTRY(EV_Trap) - - EXCEPTION_PROLOGUE - - lr r12, [efa] - - FAKE_RET_FROM_EXCPN - - ;============ TRAP 1 :breakpoints - ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR) - bmsk.f 0, r10, 7 - bnz trap_with_param - - ;============ TRAP (no param): syscall top level - - ; If syscall tracing ongoing, invoke pre-post-hooks - GET_CURR_THR_INFO_FLAGS r10 - btst r10, TIF_SYSCALL_TRACE - bnz tracesys ; this never comes back - - ;============ Normal syscall case - - ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls - mov.hi r0, -ENOSYS - bhi .Lret_from_system_call - - ; Offset into the syscall_table and call handler - ld.as r9,[sys_call_table, r8] - jl [r9] ; Entry into Sys Call Handler - -.Lret_from_system_call: - - st r0, [sp, PT_r0] ; sys call return value in pt_regs - - ; fall through to ret_from_exception -END(EV_Trap) - -;############# Return from Intr/Excp/Trap (Linux Specifics) ############## -; -; If ret to user mode do we need to handle signals, schedule() et al. - -ENTRY(ret_from_exception) - - ; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32 - ld r8, [sp, PT_status32] ; returning to User/Kernel Mode - - bbit0 r8, STATUS_U_BIT, resume_kernel_mode - - ; Before returning to User mode check-for-and-complete any pending work - ; such as rescheduling/signal-delivery etc. -resume_user_mode_begin: - - ; Disable IRQs to ensures that chk for pending work itself is atomic - ; (and we don't end up missing a NEED_RESCHED/SIGPENDING due to an - ; interim IRQ). - IRQ_DISABLE r10 - - ; Fast Path return to user mode if no pending work - GET_CURR_THR_INFO_FLAGS r9 - and.f 0, r9, _TIF_WORK_MASK - bz .Lrestore_regs - - ; --- (Slow Path #1) task preemption --- - bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals - mov blink, resume_user_mode_begin ; tail-call to U mode ret chks - j @schedule ; BTST+Bnz causes relo error in link - -.Lchk_pend_signals: - IRQ_ENABLE r10 - - ; --- (Slow Path #2) pending signal --- - mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() - - GET_CURR_THR_INFO_FLAGS r9 - bbit0 r9, TIF_SIGPENDING, .Lchk_notify_resume - - ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs - ; in pt_reg since the "C" ABI (kernel code) will automatically - ; save/restore callee-saved regs. - ; - ; However, here we need to explicitly save callee regs because - ; (i) If this signal causes coredump - full regfile needed - ; (ii) If signal is SIGTRAP/SIGSTOP, task is being traced thus - ; tracer might call PEEKUSR(CALLEE reg) - ; - ; NOTE: SP will grow up by size of CALLEE Reg-File - SAVE_CALLEE_SAVED_USER ; clobbers r12 - - ; save location of saved Callee Regs @ thread_struct->callee - GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10 - st sp, [r10, THREAD_CALLEE_REG] - - bl @do_signal - - ; Ideally we want to discard the Callee reg above, however if this was - ; a tracing signal, tracer could have done a POKEUSR(CALLEE reg) - RESTORE_CALLEE_SAVED_USER - - b resume_user_mode_begin ; loop back to start of U mode ret - - ; --- (Slow Path #3) notify_resume --- -.Lchk_notify_resume: - btst r9, TIF_NOTIFY_RESUME - blnz @do_notify_resume - b resume_user_mode_begin ; unconditionally back to U mode ret chks - ; for single exit point from this block - -resume_kernel_mode: - - ; Disable Interrupts from this point on - ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() - ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe - IRQ_DISABLE r9 - -#ifdef CONFIG_PREEMPT - - ; Can't preempt if preemption disabled - GET_CURR_THR_INFO_FROM_SP r10 - ld r8, [r10, THREAD_INFO_PREEMPT_COUNT] - brne r8, 0, .Lrestore_regs - - ; check if this task's NEED_RESCHED flag set - ld r9, [r10, THREAD_INFO_FLAGS] - bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs - - ; Invoke PREEMPTION - jl preempt_schedule_irq - - ; preempt_schedule_irq() always returns with IRQ disabled -#endif - - b .Lrestore_regs - -##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-.S - diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S deleted file mode 100644 index 6f41265f62505cf6850c470a3dc09db1b7066b0c..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/head.S +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ARC CPU startup Code - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * Vineetg: Dec 2007 - * -Check if we are running on Simulator or on real hardware - * to skip certain things during boot on simulator - */ - -#include -#include -#include -#include -#include -#include - -.macro CPU_EARLY_SETUP - - ; Setting up Vectror Table (in case exception happens in early boot - sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] - - ; Disable I-cache/D-cache if kernel so configured - lr r5, [ARC_REG_IC_BCR] - breq r5, 0, 1f ; I$ doesn't exist - lr r5, [ARC_REG_IC_CTRL] -#ifdef CONFIG_ARC_HAS_ICACHE - bclr r5, r5, 0 ; 0 - Enable, 1 is Disable -#else - bset r5, r5, 0 ; I$ exists, but is not used -#endif - sr r5, [ARC_REG_IC_CTRL] - -1: - lr r5, [ARC_REG_DC_BCR] - breq r5, 0, 1f ; D$ doesn't exist - lr r5, [ARC_REG_DC_CTRL] - bclr r5, r5, 6 ; Invalidate (discard w/o wback) -#ifdef CONFIG_ARC_HAS_DCACHE - bclr r5, r5, 0 ; Enable (+Inv) -#else - bset r5, r5, 0 ; Disable (+Inv) -#endif - sr r5, [ARC_REG_DC_CTRL] - -1: - -#ifdef CONFIG_ISA_ARCV2 - ; Unaligned access is disabled at reset, so re-enable early as - ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access - ; by default - lr r5, [status32] -#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS - bset r5, r5, STATUS_AD_BIT -#else - ; Although disabled at reset, bootloader might have enabled it - bclr r5, r5, STATUS_AD_BIT -#endif - kflag r5 -#endif -.endm - - .section .init.text, "ax",@progbits - -;---------------------------------------------------------------- -; Default Reset Handler (jumped into from Reset vector) -; - Don't clobber r0,r1,r2 as they might have u-boot provided args -; - Platforms can override this weak version if needed -;---------------------------------------------------------------- -WEAK(res_service) - j stext -END(res_service) - -;---------------------------------------------------------------- -; Kernel Entry point -;---------------------------------------------------------------- -ENTRY(stext) - - CPU_EARLY_SETUP - -#ifdef CONFIG_SMP - GET_CPU_ID r5 - cmp r5, 0 - mov.nz r0, r5 - bz .Lmaster_proceed - - ; Non-Masters wait for Master to boot enough and bring them up - ; when they resume, tail-call to entry point - mov blink, @first_lines_of_secondary - j arc_platform_smp_wait_to_boot - -.Lmaster_proceed: -#endif - - ; Clear BSS before updating any globals - ; XXX: use ZOL here - mov r5, __bss_start - sub r6, __bss_stop, r5 - lsr.f lp_count, r6, 2 - lpnz 1f - st.ab 0, [r5, 4] -1: - - ; Uboot - kernel ABI - ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 - ; r1 = magic number (always zero as of now) - ; r2 = pointer to uboot provided cmdline or external DTB in mem - ; These are handled later in handle_uboot_args() - st r0, [@uboot_tag] - st r1, [@uboot_magic] - st r2, [@uboot_arg] - - ; setup "current" tsk and optionally cache it in dedicated r25 - mov r9, @init_task - SET_CURR_TASK_ON_CPU r9, r0 ; r9 = tsk, r0 = scratch - - ; setup stack (fp, sp) - mov fp, 0 - - ; tsk->thread_info is really a PAGE, whose bottom hoists stack - GET_TSK_STACK_BASE r9, sp ; r9 = tsk, sp = stack base(output) - - j start_kernel ; "C" entry point -END(stext) - -#ifdef CONFIG_SMP -;---------------------------------------------------------------- -; First lines of code run by secondary before jumping to 'C' -;---------------------------------------------------------------- - .section .text, "ax",@progbits -ENTRY(first_lines_of_secondary) - - ; setup per-cpu idle task as "current" on this CPU - ld r0, [@secondary_idle_tsk] - SET_CURR_TASK_ON_CPU r0, r1 - - ; setup stack (fp, sp) - mov fp, 0 - - ; set it's stack base to tsk->thread_info bottom - GET_TSK_STACK_BASE r0, sp - - j start_kernel_secondary -END(first_lines_of_secondary) -#endif diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S deleted file mode 100644 index 6c693a9d29b6d79c3f09af1bb3176b41103e7b8f..0000000000000000000000000000000000000000 --- a/arch/arc/kernel/vmlinux.lds.S +++ /dev/null @@ -1,155 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#include -#include -#include -#include - -OUTPUT_ARCH(arc) -ENTRY(res_service) - -#ifdef CONFIG_CPU_BIG_ENDIAN -jiffies = jiffies_64 + 4; -#else -jiffies = jiffies_64; -#endif - -SECTIONS -{ - /* - * ICCM starts at 0x8000_0000. So if kernel is relocated to some other - * address, make sure peripheral at 0x8z doesn't clash with ICCM - * Essentially vector is also in ICCM. - */ - - . = CONFIG_LINUX_LINK_BASE; - - _int_vec_base_lds = .; - .vector : { - *(.vector) - . = ALIGN(PAGE_SIZE); - } - -#ifdef CONFIG_ARC_HAS_ICCM - .text.arcfp : { - *(.text.arcfp) - . = ALIGN(CONFIG_ARC_ICCM_SZ * 1024); - } -#endif - - /* - * The reason for having a seperate subsection .init.ramfs is to - * prevent objump from including it in kernel dumps - * - * Reason for having .init.ramfs above .init is to make sure that the - * binary blob is tucked away to one side, reducing the displacement - * between .init.text and .text, avoiding any possible relocation - * errors because of calls from .init.text to .text - * Yes such calls do exist. e.g. - * decompress_inflate.c:gunzip( ) -> zlib_inflate_workspace( ) - */ - - __init_begin = .; - - .init.ramfs : { INIT_RAM_FS } - - . = ALIGN(PAGE_SIZE); - _stext = .; - - HEAD_TEXT_SECTION - INIT_TEXT_SECTION(L1_CACHE_BYTES) - - /* INIT_DATA_SECTION open-coded: special INIT_RAM_FS handling */ - .init.data : { - INIT_DATA - INIT_SETUP(L1_CACHE_BYTES) - INIT_CALLS - CON_INITCALL - } - - .init.arch.info : { - __arch_info_begin = .; - *(.arch.info.init) - __arch_info_end = .; - } - - PERCPU_SECTION(L1_CACHE_BYTES) - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .text : { - _text = .; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) - } - EXCEPTION_TABLE(L1_CACHE_BYTES) - _etext = .; - - _sdata = .; - RO_DATA_SECTION(PAGE_SIZE) - - /* - * 1. this is .data essentially - * 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned - */ - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - - _edata = .; - - BSS_SECTION(4, 4, 4) - -#ifdef CONFIG_ARC_DW2_UNWIND - . = ALIGN(PAGE_SIZE); - .eh_frame : { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#else - /DISCARD/ : { *(.eh_frame) } -#endif - - NOTES - - . = ALIGN(PAGE_SIZE); - _end = . ; - - STABS_DEBUG - DISCARDS - - .arcextmap 0 : { - *(.gnu.linkonce.arcextmap.*) - *(.arcextmap.*) - } - -#ifndef CONFIG_DEBUG_INFO - /DISCARD/ : { *(.debug_frame) } - /DISCARD/ : { *(.debug_aranges) } - /DISCARD/ : { *(.debug_pubnames) } - /DISCARD/ : { *(.debug_info) } - /DISCARD/ : { *(.debug_abbrev) } - /DISCARD/ : { *(.debug_line) } - /DISCARD/ : { *(.debug_str) } - /DISCARD/ : { *(.debug_loc) } - /DISCARD/ : { *(.debug_macinfo) } - /DISCARD/ : { *(.debug_ranges) } -#endif - -#ifdef CONFIG_ARC_HAS_DCCM - . = CONFIG_ARC_DCCM_BASE; - __arc_dccm_base = .; - .data.arcfp : { - *(.data.arcfp) - } - . = ALIGN(CONFIG_ARC_DCCM_SZ * 1024); -#endif -} diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S deleted file mode 100644 index d6dc5e9bc49bfe1d9ae3c77f040f4b85a08afd33..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memcmp.S +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -#ifdef __LITTLE_ENDIAN__ -#define WORD2 r2 -#define SHIFT r3 -#else /* BIG ENDIAN */ -#define WORD2 r3 -#define SHIFT r2 -#endif - -ENTRY_CFI(memcmp) - or r12,r0,r1 - asl_s r12,r12,30 - sub r3,r2,1 - brls r2,r12,.Lbytewise - ld r4,[r0,0] - ld r5,[r1,0] - lsr.f lp_count,r3,3 -#ifdef CONFIG_ISA_ARCV2 - /* In ARCv2 a branch can't be the last instruction in a zero overhead - * loop. - * So we move the branch to the start of the loop, duplicate it - * after the end, and set up r12 so that the branch isn't taken - * initially. - */ - mov_s r12,WORD2 - lpne .Loop_end - brne WORD2,r12,.Lodd - ld WORD2,[r0,4] -#else - lpne .Loop_end - ld_s WORD2,[r0,4] -#endif - ld_s r12,[r1,4] - brne r4,r5,.Leven - ld.a r4,[r0,8] - ld.a r5,[r1,8] -#ifdef CONFIG_ISA_ARCV2 -.Loop_end: - brne WORD2,r12,.Lodd -#else - brne WORD2,r12,.Lodd -.Loop_end: -#endif - asl_s SHIFT,SHIFT,3 - bhs_s .Last_cmp - brne r4,r5,.Leven - ld r4,[r0,4] - ld r5,[r1,4] -#ifdef __LITTLE_ENDIAN__ - nop_s - ; one more load latency cycle -.Last_cmp: - xor r0,r4,r5 - bset r0,r0,SHIFT - sub_s r1,r0,1 - bic_s r1,r1,r0 - norm r1,r1 - b.d .Leven_cmp - and r1,r1,24 -.Leven: - xor r0,r4,r5 - sub_s r1,r0,1 - bic_s r1,r1,r0 - norm r1,r1 - ; slow track insn - and r1,r1,24 -.Leven_cmp: - asl r2,r4,r1 - asl r12,r5,r1 - lsr_s r2,r2,1 - lsr_s r12,r12,1 - j_s.d [blink] - sub r0,r2,r12 - .balign 4 -.Lodd: - xor r0,WORD2,r12 - sub_s r1,r0,1 - bic_s r1,r1,r0 - norm r1,r1 - ; slow track insn - and r1,r1,24 - asl_s r2,r2,r1 - asl_s r12,r12,r1 - lsr_s r2,r2,1 - lsr_s r12,r12,1 - j_s.d [blink] - sub r0,r2,r12 -#else /* BIG ENDIAN */ -.Last_cmp: - neg_s SHIFT,SHIFT - lsr r4,r4,SHIFT - lsr r5,r5,SHIFT - ; slow track insn -.Leven: - sub.f r0,r4,r5 - mov.ne r0,1 - j_s.d [blink] - bset.cs r0,r0,31 -.Lodd: - cmp_s WORD2,r12 - mov_s r0,1 - j_s.d [blink] - bset.cs r0,r0,31 -#endif /* ENDIAN */ - .balign 4 -.Lbytewise: - breq r2,0,.Lnil - ldb r4,[r0,0] - ldb r5,[r1,0] - lsr.f lp_count,r3 -#ifdef CONFIG_ISA_ARCV2 - mov r12,r3 - lpne .Lbyte_end - brne r3,r12,.Lbyte_odd -#else - lpne .Lbyte_end -#endif - ldb_s r3,[r0,1] - ldb r12,[r1,1] - brne r4,r5,.Lbyte_even - ldb.a r4,[r0,2] - ldb.a r5,[r1,2] -#ifdef CONFIG_ISA_ARCV2 -.Lbyte_end: - brne r3,r12,.Lbyte_odd -#else - brne r3,r12,.Lbyte_odd -.Lbyte_end: -#endif - bcc .Lbyte_even - brne r4,r5,.Lbyte_even - ldb_s r3,[r0,1] - ldb_s r12,[r1,1] -.Lbyte_odd: - j_s.d [blink] - sub r0,r3,r12 -.Lbyte_even: - j_s.d [blink] - sub r0,r4,r5 -.Lnil: - j_s.d [blink] - mov r0,0 -END_CFI(memcmp) diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S deleted file mode 100644 index f2e239e219b2aad6c5cb8014d1c55551bf9fdecf..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memcpy-700.S +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -ENTRY_CFI(memcpy) - or r3,r0,r1 - asl_s r3,r3,30 - mov_s r5,r0 - brls.d r2,r3,.Lcopy_bytewise - sub.f r3,r2,1 - ld_s r12,[r1,0] - asr.f lp_count,r3,3 - bbit0.d r3,2,.Lnox4 - bmsk_s r2,r2,1 - st.ab r12,[r5,4] - ld.a r12,[r1,4] -.Lnox4: - lppnz .Lendloop - ld_s r3,[r1,4] - st.ab r12,[r5,4] - ld.a r12,[r1,8] - st.ab r3,[r5,4] -.Lendloop: - breq r2,0,.Last_store - ld r3,[r5,0] -#ifdef __LITTLE_ENDIAN__ - add3 r2,-1,r2 - ; uses long immediate - xor_s r12,r12,r3 - bmsk r12,r12,r2 - xor_s r12,r12,r3 -#else /* BIG ENDIAN */ - sub3 r2,31,r2 - ; uses long immediate - xor_s r3,r3,r12 - bmsk r3,r3,r2 - xor_s r12,r12,r3 -#endif /* ENDIAN */ -.Last_store: - j_s.d [blink] - st r12,[r5,0] - - .balign 4 -.Lcopy_bytewise: - jcs [blink] - ldb_s r12,[r1,0] - lsr.f lp_count,r3 - bhs_s .Lnox1 - stb.ab r12,[r5,1] - ldb.a r12,[r1,1] -.Lnox1: - lppnz .Lendbloop - ldb_s r3,[r1,1] - stb.ab r12,[r5,1] - ldb.a r12,[r1,2] - stb.ab r3,[r5,1] -.Lendbloop: - j_s.d [blink] - stb r12,[r5,0] -END_CFI(memcpy) diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S deleted file mode 100644 index 28993a73fdde637a3c0fb7c8f58a99d1ceb61120..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memcpy-archs-unaligned.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * ARCv2 memcpy implementation optimized for unaligned memory access using. - * - * Copyright (C) 2019 Synopsys - * Author: Eugeniy Paltsev - */ - -#include - -#ifdef CONFIG_ARC_HAS_LL64 -# define LOADX(DST,RX) ldd.ab DST, [RX, 8] -# define STOREX(SRC,RX) std.ab SRC, [RX, 8] -# define ZOLSHFT 5 -# define ZOLAND 0x1F -#else -# define LOADX(DST,RX) ld.ab DST, [RX, 4] -# define STOREX(SRC,RX) st.ab SRC, [RX, 4] -# define ZOLSHFT 4 -# define ZOLAND 0xF -#endif - -ENTRY_CFI(memcpy) - mov r3, r0 ; don;t clobber ret val - - lsr.f lp_count, r2, ZOLSHFT - lpnz @.Lcopy32_64bytes - ;; LOOP START - LOADX (r6, r1) - LOADX (r8, r1) - LOADX (r10, r1) - LOADX (r4, r1) - STOREX (r6, r3) - STOREX (r8, r3) - STOREX (r10, r3) - STOREX (r4, r3) -.Lcopy32_64bytes: - - and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes - lpnz @.Lcopyremainingbytes - ;; LOOP START - ldb.ab r5, [r1, 1] - stb.ab r5, [r3, 1] -.Lcopyremainingbytes: - - j [blink] -END_CFI(memcpy) diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S deleted file mode 100644 index 0051a84f60c0553bad763d53b1d86f331e6da491..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memcpy-archs.S +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -#ifdef __LITTLE_ENDIAN__ -# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM -# define MERGE_2(RX,RY,IMM) -# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM -#else -# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 -#endif - -#ifdef CONFIG_ARC_HAS_LL64 -# define LOADX(DST,RX) ldd.ab DST, [RX, 8] -# define STOREX(SRC,RX) std.ab SRC, [RX, 8] -# define ZOLSHFT 5 -# define ZOLAND 0x1F -#else -# define LOADX(DST,RX) ld.ab DST, [RX, 4] -# define STOREX(SRC,RX) st.ab SRC, [RX, 4] -# define ZOLSHFT 4 -# define ZOLAND 0xF -#endif - -ENTRY_CFI(memcpy) - mov.f 0, r2 -;;; if size is zero - jz.d [blink] - mov r3, r0 ; don;t clobber ret val - -;;; if size <= 8 - cmp r2, 8 - bls.d @.Lsmallchunk - mov.f lp_count, r2 - - and.f r4, r0, 0x03 - rsub lp_count, r4, 4 - lpnz @.Laligndestination - ;; LOOP BEGIN - ldb.ab r5, [r1,1] - sub r2, r2, 1 - stb.ab r5, [r3,1] -.Laligndestination: - -;;; Check the alignment of the source - and.f r4, r1, 0x03 - bnz.d @.Lsourceunaligned - -;;; CASE 0: Both source and destination are 32bit aligned -;;; Convert len to Dwords, unfold x4 - lsr.f lp_count, r2, ZOLSHFT - lpnz @.Lcopy32_64bytes - ;; LOOP START - LOADX (r6, r1) - LOADX (r8, r1) - LOADX (r10, r1) - LOADX (r4, r1) - STOREX (r6, r3) - STOREX (r8, r3) - STOREX (r10, r3) - STOREX (r4, r3) -.Lcopy32_64bytes: - - and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes -.Lsmallchunk: - lpnz @.Lcopyremainingbytes - ;; LOOP START - ldb.ab r5, [r1,1] - stb.ab r5, [r3,1] -.Lcopyremainingbytes: - - j [blink] -;;; END CASE 0 - -.Lsourceunaligned: - cmp r4, 2 - beq.d @.LunalignedOffby2 - sub r2, r2, 1 - - bhi.d @.LunalignedOffby3 - ldb.ab r5, [r1, 1] - -;;; CASE 1: The source is unaligned, off by 1 - ;; Hence I need to read 1 byte for a 16bit alignment - ;; and 2bytes to reach 32bit alignment - ldh.ab r6, [r1, 2] - sub r2, r2, 2 - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 - MERGE_1 (r6, r6, 8) - MERGE_2 (r5, r5, 24) - or r5, r5, r6 - - ;; Both src and dst are aligned - lpnz @.Lcopy8bytes_1 - ;; LOOP START - ld.ab r6, [r1, 4] - ld.ab r8, [r1,4] - - SHIFT_1 (r7, r6, 24) - or r7, r7, r5 - SHIFT_2 (r5, r6, 8) - - SHIFT_1 (r9, r8, 24) - or r9, r9, r5 - SHIFT_2 (r5, r8, 8) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_1: - - ;; Write back the remaining 16bits - EXTRACT_1 (r6, r5, 16) - sth.ab r6, [r3, 2] - ;; Write back the remaining 8bits - EXTRACT_2 (r5, r5, 16) - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_1 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_1: - j [blink] - -.LunalignedOffby2: -;;; CASE 2: The source is unaligned, off by 2 - ldh.ab r5, [r1, 2] - sub r2, r2, 1 - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.nz r5, r5, 16 -#endif - lpnz @.Lcopy8bytes_2 - ;; LOOP START - ld.ab r6, [r1, 4] - ld.ab r8, [r1,4] - - SHIFT_1 (r7, r6, 16) - or r7, r7, r5 - SHIFT_2 (r5, r6, 16) - - SHIFT_1 (r9, r8, 16) - or r9, r9, r5 - SHIFT_2 (r5, r8, 16) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_2: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 16 -#endif - sth.ab r5, [r3, 2] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_2 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_2: - j [blink] - -.LunalignedOffby3: -;;; CASE 3: The source is unaligned, off by 3 -;;; Hence, I need to read 1byte for achieve the 32bit alignment - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.ne r5, r5, 24 -#endif - lpnz @.Lcopy8bytes_3 - ;; LOOP START - ld.ab r6, [r1, 4] - ld.ab r8, [r1,4] - - SHIFT_1 (r7, r6, 8) - or r7, r7, r5 - SHIFT_2 (r5, r6, 24) - - SHIFT_1 (r9, r8, 8) - or r9, r9, r5 - SHIFT_2 (r5, r8, 24) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_3: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 24 -#endif - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_3 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_3: - j [blink] - -END_CFI(memcpy) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S deleted file mode 100644 index d2e09fece5bcffdb556c02bbd7d91781e869a229..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memset-archs.S +++ /dev/null @@ -1,143 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - */ - -#include -#include - -/* - * The memset implementation below is optimized to use prefetchw and prealloc - * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) - * If you want to implement optimized memset for other possible L1 data cache - * line lengths (32B and 128B) you should rewrite code carefully checking - * we don't call any prefetchw/prealloc instruction for L1 cache lines which - * don't belongs to memset area. - */ - -#if L1_CACHE_SHIFT == 6 - -.macro PREALLOC_INSTR reg, off - prealloc [\reg, \off] -.endm - -.macro PREFETCHW_INSTR reg, off - prefetchw [\reg, \off] -.endm - -#else - -.macro PREALLOC_INSTR reg, off -.endm - -.macro PREFETCHW_INSTR reg, off -.endm - -#endif - -ENTRY_CFI(memset) - PREFETCHW_INSTR r0, 0 ; Prefetch the first write location - mov.f 0, r2 -;;; if size is zero - jz.d [blink] - mov r3, r0 ; don't clobber ret val - -;;; if length < 8 - brls.d.nt r2, 8, .Lsmallchunk - mov.f lp_count,r2 - - and.f r4, r0, 0x03 - rsub lp_count, r4, 4 - lpnz @.Laligndestination - ;; LOOP BEGIN - stb.ab r1, [r3,1] - sub r2, r2, 1 -.Laligndestination: - -;;; Destination is aligned - and r1, r1, 0xFF - asl r4, r1, 8 - or r4, r4, r1 - asl r5, r4, 16 - or r5, r5, r4 - mov r4, r5 - - sub3 lp_count, r2, 8 - cmp r2, 64 - bmsk.hi r2, r2, 5 - mov.ls lp_count, 0 - add3.hi r2, r2, 8 - -;;; Convert len to Dwords, unfold x8 - lsr.f lp_count, lp_count, 6 - - lpnz @.Lset64bytes - ;; LOOP START - PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching - -#ifdef CONFIG_ARC_HAS_LL64 - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] -#else - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] -#endif -.Lset64bytes: - - lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes - lpnz .Lset32bytes - ;; LOOP START -#ifdef CONFIG_ARC_HAS_LL64 - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] -#else - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] -#endif -.Lset32bytes: - - and.f lp_count, r2, 0x1F ;Last remaining 31 bytes -.Lsmallchunk: - lpnz .Lcopy3bytes - ;; LOOP START - stb.ab r1, [r3, 1] -.Lcopy3bytes: - - j [blink] - -END_CFI(memset) - -ENTRY_CFI(memzero) - ; adjust bzero args to memset args - mov r2, r1 - b.d memset ;tail call so need to tinker with blink - mov r1, 0 -END_CFI(memzero) diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S deleted file mode 100644 index 9f35960da114182706158037dff960f400088d4a..0000000000000000000000000000000000000000 --- a/arch/arc/lib/memset.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ - -ENTRY_CFI(memset) - mov_s r4,r0 - or r12,r0,r2 - bmsk.f r12,r12,1 - extb_s r1,r1 - asl r3,r1,8 - beq.d .Laligned - or_s r1,r1,r3 - brls r2,SMALL,.Ltiny - add r3,r2,r0 - stb r1,[r3,-1] - bclr_s r3,r3,0 - stw r1,[r3,-2] - bmsk.f r12,r0,1 - add_s r2,r2,r12 - sub.ne r2,r2,4 - stb.ab r1,[r4,1] - and r4,r4,-2 - stw.ab r1,[r4,2] - and r4,r4,-4 -.Laligned: ; This code address should be aligned for speed. - asl r3,r1,16 - lsr.f lp_count,r2,2 - or_s r1,r1,r3 - lpne .Loop_end - st.ab r1,[r4,4] -.Loop_end: - j_s [blink] - - .balign 4 -.Ltiny: - mov.f lp_count,r2 - lpne .Ltiny_end - stb.ab r1,[r4,1] -.Ltiny_end: - j_s [blink] -END_CFI(memset) - -; memzero: @r0 = mem, @r1 = size_t -; memset: @r0 = mem, @r1 = char, @r2 = size_t - -ENTRY_CFI(memzero) - ; adjust bzero args to memset args - mov r2, r1 - mov r1, 0 - b memset ;tail call so need to tinker with blink -END_CFI(memzero) diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S deleted file mode 100644 index d52e2833f9ed73ad702870bf400c16b26c0b5929..0000000000000000000000000000000000000000 --- a/arch/arc/lib/strchr-700.S +++ /dev/null @@ -1,130 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -/* ARC700 has a relatively long pipeline and branch prediction, so we want - to avoid branches that are hard to predict. On the other hand, the - presence of the norm instruction makes it easier to operate on whole - words branch-free. */ - -#include - -ENTRY_CFI(strchr) - extb_s r1,r1 - asl r5,r1,8 - bmsk r2,r0,1 - or r5,r5,r1 - mov_s r3,0x01010101 - breq.d r2,r0,.Laligned - asl r4,r5,16 - sub_s r0,r0,r2 - asl r7,r2,3 - ld_s r2,[r0] -#ifdef __LITTLE_ENDIAN__ - asl r7,r3,r7 -#else - lsr r7,r3,r7 -#endif - or r5,r5,r4 - ror r4,r3 - sub r12,r2,r7 - bic_s r12,r12,r2 - and r12,r12,r4 - brne.d r12,0,.Lfound0_ua - xor r6,r2,r5 - ld.a r2,[r0,4] - sub r12,r6,r7 - bic r12,r12,r6 -#ifdef __LITTLE_ENDIAN__ - and r7,r12,r4 - breq r7,0,.Loop ; For speed, we want this branch to be unaligned. - b .Lfound_char ; Likewise this one. -#else - and r12,r12,r4 - breq r12,0,.Loop ; For speed, we want this branch to be unaligned. - lsr_s r12,r12,7 - bic r2,r7,r6 - b.d .Lfound_char_b - and_s r2,r2,r12 -#endif -; /* We require this code address to be unaligned for speed... */ -.Laligned: - ld_s r2,[r0] - or r5,r5,r4 - ror r4,r3 -; /* ... so that this code address is aligned, for itself and ... */ -.Loop: - sub r12,r2,r3 - bic_s r12,r12,r2 - and r12,r12,r4 - brne.d r12,0,.Lfound0 - xor r6,r2,r5 - ld.a r2,[r0,4] - sub r12,r6,r3 - bic r12,r12,r6 - and r7,r12,r4 - breq r7,0,.Loop /* ... so that this branch is unaligned. */ - ; Found searched-for character. r0 has already advanced to next word. -#ifdef __LITTLE_ENDIAN__ -/* We only need the information about the first matching byte - (i.e. the least significant matching byte) to be exact, - hence there is no problem with carry effects. */ -.Lfound_char: - sub r3,r7,1 - bic r3,r3,r7 - norm r2,r3 - sub_s r0,r0,1 - asr_s r2,r2,3 - j.d [blink] - sub_s r0,r0,r2 - - .balign 4 -.Lfound0_ua: - mov r3,r7 -.Lfound0: - sub r3,r6,r3 - bic r3,r3,r6 - and r2,r3,r4 - or_s r12,r12,r2 - sub_s r3,r12,1 - bic_s r3,r3,r12 - norm r3,r3 - add_s r0,r0,3 - asr_s r12,r3,3 - asl.f 0,r2,r3 - sub_s r0,r0,r12 - j_s.d [blink] - mov.pl r0,0 -#else /* BIG ENDIAN */ -.Lfound_char: - lsr r7,r7,7 - - bic r2,r7,r6 -.Lfound_char_b: - norm r2,r2 - sub_s r0,r0,4 - asr_s r2,r2,3 - j.d [blink] - add_s r0,r0,r2 - -.Lfound0_ua: - mov_s r3,r7 -.Lfound0: - asl_s r2,r2,7 - or r7,r6,r4 - bic_s r12,r12,r2 - sub r2,r7,r3 - or r2,r2,r6 - bic r12,r2,r12 - bic.f r3,r4,r12 - norm r3,r3 - - add.pl r3,r3,1 - asr_s r12,r3,3 - asl.f 0,r2,r3 - add_s r0,r0,r12 - j_s.d [blink] - mov.mi r0,0 -#endif /* ENDIAN */ -END_CFI(strchr) diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S deleted file mode 100644 index 7cffb37174408b2a43724c9892b80104c6c6a3b5..0000000000000000000000000000000000000000 --- a/arch/arc/lib/strcmp-archs.S +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -ENTRY_CFI(strcmp) - or r2, r0, r1 - bmsk_s r2, r2, 1 - brne r2, 0, @.Lcharloop - -;;; s1 and s2 are word aligned - ld.ab r2, [r0, 4] - - mov_s r12, 0x01010101 - ror r11, r12 - .align 4 -.LwordLoop: - ld.ab r3, [r1, 4] - ;; Detect NULL char in str1 - sub r4, r2, r12 - ld.ab r5, [r0, 4] - bic r4, r4, r2 - and r4, r4, r11 - brne.d.nt r4, 0, .LfoundNULL - ;; Check if the read locations are the same - cmp r2, r3 - beq.d .LwordLoop - mov.eq r2, r5 - - ;; A match is found, spot it out -#ifdef __LITTLE_ENDIAN__ - swape r3, r3 - mov_s r0, 1 - swape r2, r2 -#else - mov_s r0, 1 -#endif - cmp_s r2, r3 - j_s.d [blink] - bset.lo r0, r0, 31 - - .align 4 -.LfoundNULL: -#ifdef __BIG_ENDIAN__ - swape r4, r4 - swape r2, r2 - swape r3, r3 -#endif - ;; Find null byte - ffs r0, r4 - bmsk r2, r2, r0 - bmsk r3, r3, r0 - swape r2, r2 - swape r3, r3 - ;; make the return value - sub.f r0, r2, r3 - mov.hi r0, 1 - j_s.d [blink] - bset.lo r0, r0, 31 - - .align 4 -.Lcharloop: - ldb.ab r2, [r0, 1] - ldb.ab r3, [r1, 1] - nop - breq r2, 0, .Lcmpend - breq r2, r3, .Lcharloop - - .align 4 -.Lcmpend: - j_s.d [blink] - sub r0, r2, r3 -END_CFI(strcmp) diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S deleted file mode 100644 index b20c98fb3b2382003e87a1fb395cc6abfe65e642..0000000000000000000000000000000000000000 --- a/arch/arc/lib/strcmp.S +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -/* This is optimized primarily for the ARC700. - It would be possible to speed up the loops by one cycle / word - respective one cycle / byte by forcing double source 1 alignment, unrolling - by a factor of two, and speculatively loading the second word / byte of - source 1; however, that would increase the overhead for loop setup / finish, - and strcmp might often terminate early. */ - -#include - -ENTRY_CFI(strcmp) - or r2,r0,r1 - bmsk_s r2,r2,1 - brne r2,0,.Lcharloop - mov_s r12,0x01010101 - ror r5,r12 -.Lwordloop: - ld.ab r2,[r0,4] - ld.ab r3,[r1,4] - nop_s - sub r4,r2,r12 - bic r4,r4,r2 - and r4,r4,r5 - brne r4,0,.Lfound0 - breq r2,r3,.Lwordloop -#ifdef __LITTLE_ENDIAN__ - xor r0,r2,r3 ; mask for difference - sub_s r1,r0,1 - bic_s r0,r0,r1 ; mask for least significant difference bit - sub r1,r5,r0 - xor r0,r5,r1 ; mask for least significant difference byte - and_s r2,r2,r0 - and_s r3,r3,r0 -#endif /* LITTLE ENDIAN */ - cmp_s r2,r3 - mov_s r0,1 - j_s.d [blink] - bset.lo r0,r0,31 - - .balign 4 -#ifdef __LITTLE_ENDIAN__ -.Lfound0: - xor r0,r2,r3 ; mask for difference - or r0,r0,r4 ; or in zero indicator - sub_s r1,r0,1 - bic_s r0,r0,r1 ; mask for least significant difference bit - sub r1,r5,r0 - xor r0,r5,r1 ; mask for least significant difference byte - and_s r2,r2,r0 - and_s r3,r3,r0 - sub.f r0,r2,r3 - mov.hi r0,1 - j_s.d [blink] - bset.lo r0,r0,31 -#else /* BIG ENDIAN */ - /* The zero-detection above can mis-detect 0x01 bytes as zeroes - because of carry-propagateion from a lower significant zero byte. - We can compensate for this by checking that bit0 is zero. - This compensation is not necessary in the step where we - get a low estimate for r2, because in any affected bytes - we already have 0x00 or 0x01, which will remain unchanged - when bit 7 is cleared. */ - .balign 4 -.Lfound0: - lsr r0,r4,8 - lsr_s r1,r2 - bic_s r2,r2,r0 ; get low estimate for r2 and get ... - bic_s r0,r0,r1 ; - or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... - cmp_s r3,r2 ; ... be independent of trailing garbage - or_s r2,r2,r0 ; likewise for r3 > r2 - bic_s r3,r3,r0 - rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 - cmp_s r2,r3 - j_s.d [blink] - bset.lo r0,r0,31 -#endif /* ENDIAN */ - - .balign 4 -.Lcharloop: - ldb.ab r2,[r0,1] - ldb.ab r3,[r1,1] - nop_s - breq r2,0,.Lcmpend - breq r2,r3,.Lcharloop -.Lcmpend: - j_s.d [blink] - sub r0,r2,r3 -END_CFI(strcmp) diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S deleted file mode 100644 index 6e2294d13e2f1f9105b6524c7b52950e20ed906e..0000000000000000000000000000000000000000 --- a/arch/arc/lib/strcpy-700.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -/* If dst and src are 4 byte aligned, copy 8 bytes at a time. - If the src is 4, but not 8 byte aligned, we first read 4 bytes to get - it 8 byte aligned. Thus, we can do a little read-ahead, without - dereferencing a cache line that we should not touch. - Note that short and long instructions have been scheduled to avoid - branch stalls. - The beq_s to r3z could be made unaligned & long to avoid a stall - there, but the it is not likely to be taken often, and it - would also be likey to cost an unaligned mispredict at the next call. */ - -#include - -ENTRY_CFI(strcpy) - or r2,r0,r1 - bmsk_s r2,r2,1 - brne.d r2,0,charloop - mov_s r10,r0 - ld_s r3,[r1,0] - mov r8,0x01010101 - bbit0.d r1,2,loop_start - ror r12,r8 - sub r2,r3,r8 - bic_s r2,r2,r3 - tst_s r2,r12 - bne r3z - mov_s r4,r3 - .balign 4 -loop: - ld.a r3,[r1,4] - st.ab r4,[r10,4] -loop_start: - ld.a r4,[r1,4] - sub r2,r3,r8 - bic_s r2,r2,r3 - tst_s r2,r12 - bne_s r3z - st.ab r3,[r10,4] - sub r2,r4,r8 - bic r2,r2,r4 - tst r2,r12 - beq loop - mov_s r3,r4 -#ifdef __LITTLE_ENDIAN__ -r3z: bmsk.f r1,r3,7 - lsr_s r3,r3,8 -#else -r3z: lsr.f r1,r3,24 - asl_s r3,r3,8 -#endif - bne.d r3z - stb.ab r1,[r10,1] - j_s [blink] - - .balign 4 -charloop: - ldb.ab r3,[r1,1] - - - brne.d r3,0,charloop - stb.ab r3,[r10,1] - j [blink] -END_CFI(strcpy) diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S deleted file mode 100644 index dae428ceb87af8449f1ad1e7bedcd8365cd8a64d..0000000000000000000000000000000000000000 --- a/arch/arc/lib/strlen.S +++ /dev/null @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#include - -ENTRY_CFI(strlen) - or r3,r0,7 - ld r2,[r3,-7] - ld.a r6,[r3,-3] - mov r4,0x01010101 - ; uses long immediate -#ifdef __LITTLE_ENDIAN__ - asl_s r1,r0,3 - btst_s r0,2 - asl r7,r4,r1 - ror r5,r4 - sub r1,r2,r7 - bic_s r1,r1,r2 - mov.eq r7,r4 - sub r12,r6,r7 - bic r12,r12,r6 - or.eq r12,r12,r1 - and r12,r12,r5 - brne r12,0,.Learly_end -#else /* BIG ENDIAN */ - ror r5,r4 - btst_s r0,2 - mov_s r1,31 - sub3 r7,r1,r0 - sub r1,r2,r4 - bic_s r1,r1,r2 - bmsk r1,r1,r7 - sub r12,r6,r4 - bic r12,r12,r6 - bmsk.ne r12,r12,r7 - or.eq r12,r12,r1 - and r12,r12,r5 - brne r12,0,.Learly_end -#endif /* ENDIAN */ - -.Loop: - ld_s r2,[r3,4] - ld.a r6,[r3,8] - ; stall for load result - sub r1,r2,r4 - bic_s r1,r1,r2 - sub r12,r6,r4 - bic r12,r12,r6 - or r12,r12,r1 - and r12,r12,r5 - breq r12,0,.Loop -.Lend: - and.f r1,r1,r5 - sub.ne r3,r3,4 - mov.eq r1,r12 -#ifdef __LITTLE_ENDIAN__ - sub_s r2,r1,1 - bic_s r2,r2,r1 - norm r1,r2 - sub_s r0,r0,3 - lsr_s r1,r1,3 - sub r0,r3,r0 - j_s.d [blink] - sub r0,r0,r1 -#else /* BIG ENDIAN */ - lsr_s r1,r1,7 - mov.eq r2,r6 - bic_s r1,r1,r2 - norm r1,r1 - sub r0,r3,r0 - lsr_s r1,r1,3 - j_s.d [blink] - add r0,r0,r1 -#endif /* ENDIAN */ -.Learly_end: - b.d .Lend - sub_s.ne r1,r1,r1 -END_CFI(strlen) diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S deleted file mode 100644 index c55d95dd2f3949f0fae10451ce8e02691ca03d9a..0000000000000000000000000000000000000000 --- a/arch/arc/mm/tlbex.S +++ /dev/null @@ -1,413 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * TLB Exception Handling for ARC - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * Vineetg: April 2011 : - * -MMU v1: moved out legacy code into a seperate file - * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, - * helps avoid a shift when preparing PD0 from PTE - * - * Vineetg: July 2009 - * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB - * entry, so that it doesn't knock out it's I-TLB entry - * -Some more fine tuning: - * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc - * - * Vineetg: July 2009 - * -Practically rewrote the I/D TLB Miss handlers - * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. - * Hence Leaner by 1.5 K - * Used Conditional arithmetic to replace excessive branching - * Also used short instructions wherever possible - * - * Vineetg: Aug 13th 2008 - * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing - * more information in case of a Fatality - * - * Vineetg: March 25th Bug #92690 - * -Added Debug Code to check if sw-ASID == hw-ASID - - * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_ISA_ARCOMPACT -;----------------------------------------------------------------- -; ARC700 Exception Handling doesn't auto-switch stack and it only provides -; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" -; -; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a -; "global" is used to free-up FIRST core reg to be able to code the rest of -; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). -; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 -; need to be saved as well by extending the "global" to be 4 words. Hence -; ".size ex_saved_reg1, 16" -; [All of this dance is to avoid stack switching for each TLB Miss, since we -; only need to save only a handful of regs, as opposed to complete reg file] -; -; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST -; core reg as it will not be SMP safe. -; Thus scratch AUX reg is used (and no longer used to cache task PGD). -; To save the rest of 3 regs - per cpu, the global is made "per-cpu". -; Epilogue thus has to locate the "per-cpu" storage for regs. -; To avoid cache line bouncing the per-cpu global is aligned/sized per -; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence -; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" - -; As simple as that.... -;-------------------------------------------------------------------------- - -; scratch memory to save [r0-r3] used to code TLB refill Handler -ARCFP_DATA ex_saved_reg1 - .align 1 << L1_CACHE_SHIFT - .type ex_saved_reg1, @object -#ifdef CONFIG_SMP - .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) -ex_saved_reg1: - .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT) -#else - .size ex_saved_reg1, 16 -ex_saved_reg1: - .zero 16 -#endif - -.macro TLBMISS_FREEUP_REGS -#ifdef CONFIG_SMP - sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with - GET_CPU_ID r0 ; get to per cpu scratch mem, - asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu - add r0, @ex_saved_reg1, r0 -#else - st r0, [@ex_saved_reg1] - mov_s r0, @ex_saved_reg1 -#endif - st_s r1, [r0, 4] - st_s r2, [r0, 8] - st_s r3, [r0, 12] - - ; VERIFY if the ASID in MMU-PID Reg is same as - ; one in Linux data structures - - tlb_paranoid_check_asm -.endm - -.macro TLBMISS_RESTORE_REGS -#ifdef CONFIG_SMP - GET_CPU_ID r0 ; get to per cpu scratch mem - asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide - add r0, @ex_saved_reg1, r0 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - lr r0, [ARC_REG_SCRATCH_DATA0] -#else - mov_s r0, @ex_saved_reg1 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - ld_s r0, [r0] -#endif -.endm - -#else /* ARCv2 */ - -.macro TLBMISS_FREEUP_REGS - PUSH r0 - PUSH r1 - PUSH r2 - PUSH r3 -.endm - -.macro TLBMISS_RESTORE_REGS - POP r3 - POP r2 - POP r1 - POP r0 -.endm - -#endif - -;============================================================================ -; Troubleshooting Stuff -;============================================================================ - -; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid -; When Creating TLB Entries, instead of doing 3 dependent loads from memory, -; we use the MMU PID Reg to get current ASID. -; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. -; So we try to detect this in TLB Mis shandler - -.macro tlb_paranoid_check_asm - -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA - - GET_CURR_TASK_ON_CPU r3 - ld r0, [r3, TASK_ACT_MM] - ld r0, [r0, MM_CTXT+MM_CTXT_ASID] - breq r0, 0, 55f ; Error if no ASID allocated - - lr r1, [ARC_REG_PID] - and r1, r1, 0xFF - - and r2, r0, 0xFF ; MMU PID bits only for comparison - breq r1, r2, 5f - -55: - ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode - lr r2, [erstatus] - bbit0 r2, STATUS_U_BIT, 5f - - ; We sure are in troubled waters, Flag the error, but to do so - ; need to switch to kernel mode stack to call error routine - GET_TSK_STACK_BASE r3, sp - - ; Call printk to shoutout aloud - mov r2, 1 - j print_asid_mismatch - -5: ; ASIDs match so proceed normally - nop - -#endif - -.endm - -;============================================================================ -;TLB Miss handling Code -;============================================================================ - -;----------------------------------------------------------------------------- -; This macro does the page-table lookup for the faulting address. -; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address -.macro LOAD_FAULT_PTE - - lr r2, [efa] - -#ifndef CONFIG_SMP - lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd -#else - GET_CURR_TASK_ON_CPU r1 - ld r1, [r1, TASK_ACT_MM] - ld r1, [r1, MM_PGD] -#endif - - lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr - tst r3, r3 - bz do_slow_path_pf ; if no Page Table, do page fault - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) - add2.nz r1, r1, r0 - bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk - mov.nz r0, r3 - -#endif - and r1, r3, PAGE_MASK - - ; Get the PTE entry: The idea is - ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr - ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = (pgtbl + y * 4) - -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ -#else -#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ -#endif - - ; multiply in step (3) above avoided by shifting lesser in step (1) - lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) - and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) - ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) - ; r1: PTE ptr - -2: - -.endm - -;----------------------------------------------------------------- -; Convert Linux PTE entry into TLB entry -; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu -; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) -; IN: r0 = PTE, r1 = ptr to PTE - -.macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_RWX ; r w x - asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) - and.f 0, r0, _PAGE_GLOBAL - or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) - - and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE - or r3, r3, r2 - - sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C -#ifdef CONFIG_ARC_HAS_PAE40 - ld r3, [r1, 4] ; paddr[39..32] - sr r3, [ARC_REG_TLBPD1HI] -#endif - - and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb - - lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid - - or r3, r3, r2 ; S | vaddr | {sasid|asid} - sr r3,[ARC_REG_TLBPD0] ; rewrite PD0 -.endm - -;----------------------------------------------------------------- -; Commit the TLB entry into MMU - -.macro COMMIT_ENTRY_TO_MMU -#if (CONFIG_ARC_MMU_VER < 4) - -#ifdef CONFIG_EZNPS_MTM_EXT - /* verify if entry for this vaddr+ASID already exists */ - sr TLBProbe, [ARC_REG_TLBCOMMAND] - lr r0, [ARC_REG_TLBINDEX] - bbit0 r0, 31, 88f -#endif - - /* Get free TLB slot: Set = computed from vaddr, way = random */ - sr TLBGetIndex, [ARC_REG_TLBCOMMAND] - - /* Commit the Write */ -#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ - sr TLBWriteNI, [ARC_REG_TLBCOMMAND] -#else - sr TLBWrite, [ARC_REG_TLBCOMMAND] -#endif - -#else - sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] -#endif - -88: -.endm - - -ARCFP_CODE ;Fast Path Code, candidate for ICCM - -;----------------------------------------------------------------------------- -; I-TLB Miss Exception Handler -;----------------------------------------------------------------------------- - -ENTRY(EV_TLBMissI) - - TLBMISS_FREEUP_REGS - - ;---------------------------------------------------------------- - ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA - LOAD_FAULT_PTE - - ;---------------------------------------------------------------- - ; VERIFY_PTE: Check if PTE permissions approp for executing code - cmp_s r2, VMALLOC_START - mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) - or.hs r2, r2, _PAGE_GLOBAL - - and r3, r0, r2 ; Mask out NON Flag bits from PTE - xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) - bnz do_slow_path_pf - - ; Let Linux VM know that the page was accessed - or r0, r0, _PAGE_ACCESSED ; set Accessed Bit - st_s r0, [r1] ; Write back PTE - - CONV_PTE_TO_TLB - COMMIT_ENTRY_TO_MMU - TLBMISS_RESTORE_REGS -EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation - rtie - -END(EV_TLBMissI) - -;----------------------------------------------------------------------------- -; D-TLB Miss Exception Handler -;----------------------------------------------------------------------------- - -ENTRY(EV_TLBMissD) - - TLBMISS_FREEUP_REGS - - ;---------------------------------------------------------------- - ; Get the PTE corresponding to V-addr accessed - ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA - LOAD_FAULT_PTE - - ;---------------------------------------------------------------- - ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) - - cmp_s r2, VMALLOC_START - mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE - or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only - - ; Linux PTE [RWX] bits are semantically overloaded: - ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) - ; -Otherwise they are user-mode permissions, and those are exactly - ; same for kernel mode as well (e.g. copy_(to|from)_user) - - lr r3, [ecr] - btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access - or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE - btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access - or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE - ; Above laddering takes care of XCHG access (both R and W) - - ; By now, r2 setup with all the Flags we need to check in PTE - and r3, r0, r2 ; Mask out NON Flag bits from PTE - brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test) - - ;---------------------------------------------------------------- - ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty - lr r3, [ecr] - or r0, r0, _PAGE_ACCESSED ; Accessed bit always - btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well - st_s r0, [r1] ; Write back PTE - - CONV_PTE_TO_TLB - -#if (CONFIG_ARC_MMU_VER == 1) - ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of - ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. - ; But only for old MMU or one with Metal Fix - TLB_WRITE_HEURISTICS -#endif - - COMMIT_ENTRY_TO_MMU - TLBMISS_RESTORE_REGS -EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation - rtie - -;-------- Common routine to call Linux Page Fault Handler ----------- -do_slow_path_pf: - -#ifdef CONFIG_ISA_ARCV2 - ; Set Z flag if exception in U mode. Hardware micro-ops do this on any - ; taken interrupt/exception, and thus is already the case at the entry - ; above, but ensuing code would have already clobbered. - ; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set - - lr r2, [erstatus] - and r2, r2, STATUS_U_MASK - bxor.f 0, r2, STATUS_U_BIT -#endif - - ; Restore the 4-scratch regs saved by fast path miss handler - TLBMISS_RESTORE_REGS - - ; Slow path TLB Miss handled as a regular ARC Exception - ; (stack switching / save the complete reg-file). - b call_do_page_fault -END(EV_TLBMissD) diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S deleted file mode 100644 index 3f18c0108e7287b76b73080095d24a0437a82a74..0000000000000000000000000000000000000000 --- a/arch/arc/plat-eznps/entry.S +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/******************************************************************************* - - EZNPS CPU startup Code - Copyright(c) 2012 EZchip Technologies. - - -*******************************************************************************/ -#include -#include -#include -#include - - .cpu A7 - - .section .init.text, "ax",@progbits - .align 1024 ; HW requierment for restart first PC - -ENTRY(res_service) -#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS) - ; There is no work for HW thread id != 0 - lr r3, [CTOP_AUX_THREAD_ID] - cmp r3, 0 - jne stext -#endif - -#ifdef CONFIG_ARC_HAS_DCACHE - ; With no cache coherency mechanism D$ need to be used very carefully. - ; Address space: - ; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES. - ; 2G-3G: We disable D$ by setting this bit. - ; 3G-4G: D$ is disabled by architecture. - ; FMT are huge pages for user application reside at 0-2G. - ; Only FMT left as one who can use D$ where each such page got - ; disable/enable bit for cachability. - ; Programmer will use FMT pages for private data so cache coherency - ; would not be a problem. - ; First thing we invalidate D$ - sr 1, [ARC_REG_DC_IVDC] - sr HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY] -#endif - -#ifdef CONFIG_SMP - ; We set logical cpuid to be used by GET_CPUID - ; We do not use physical cpuid since we want ids to be continious when - ; it comes to cpus on the same quad cluster. - ; This is useful for applications that used shared resources of a quad - ; cluster such SRAMS. - lr r3, [CTOP_AUX_CORE_ID] - sr r3, [CTOP_AUX_LOGIC_CORE_ID] - lr r3, [CTOP_AUX_CLUSTER_ID] - ; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit) - ; r3 is used since we use short instruction and we need q-class reg - .short CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST - .word CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM - sr r3, [CTOP_AUX_LOGIC_CLUSTER_ID] -#endif - - j stext -END(res_service) diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S deleted file mode 100644 index 5c476bd2b4ce9c9bf9c7a37b684e20ddf46135f3..0000000000000000000000000000000000000000 --- a/arch/arm/boot/bootp/init.S +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/boot/bootp/init.S - * - * Copyright (C) 2000-2003 Russell King. - * - * "Header" file for splitting kernel + initrd. Note that we pass - * r0 through to r3 straight through. - * - * This demonstrates how to append code to the start of the kernel - * zImage, and boot the kernel without copying it around. This - * example would be simpler; if we didn't have an object of unknown - * size immediately following the kernel, we could build this into - * a binary blob, and concatenate the zImage using the cat command. - */ - .section .start,#alloc,#execinstr - .type _start, #function - .globl _start - -_start: add lr, pc, #-0x8 @ lr = current load addr - adr r13, data - ldmia r13!, {r4-r6} @ r5 = dest, r6 = length - add r4, r4, lr @ r4 = initrd_start + load addr - bl move @ move the initrd - -/* - * Setup the initrd parameters to pass to the kernel. This can only be - * passed in via the tagged list. - */ - ldmia r13, {r5-r9} @ get size and addr of initrd - @ r5 = ATAG_CORE - @ r6 = ATAG_INITRD2 - @ r7 = initrd start - @ r8 = initrd end - @ r9 = param_struct address - - ldr r10, [r9, #4] @ get first tag - teq r10, r5 @ is it ATAG_CORE? -/* - * If we didn't find a valid tag list, create a dummy ATAG_CORE entry. - */ - movne r10, #0 @ terminator - movne r4, #2 @ Size of this entry (2 words) - stmiane r9, {r4, r5, r10} @ Size, ATAG_CORE, terminator - -/* - * find the end of the tag list, and then add an INITRD tag on the end. - * If there is already an INITRD tag, then we ignore it; the last INITRD - * tag takes precedence. - */ -taglist: ldr r10, [r9, #0] @ tag length - teq r10, #0 @ last tag (zero length)? - addne r9, r9, r10, lsl #2 - bne taglist - - mov r5, #4 @ Size of initrd tag (4 words) - stmia r9, {r5, r6, r7, r8, r10} - b kernel_start @ call kernel - -/* - * Move the block of memory length r6 from address r4 to address r5 - */ -move: ldmia r4!, {r7 - r10} @ move 32-bytes at a time - stmia r5!, {r7 - r10} - ldmia r4!, {r7 - r10} - stmia r5!, {r7 - r10} - subs r6, r6, #8 * 4 - bcs move - mov pc, lr - - .size _start, . - _start - - .align - - .type data,#object -data: .word initrd_start @ source initrd address - .word initrd_phys @ destination initrd address - .word initrd_size @ initrd size - - .word 0x54410001 @ r5 = ATAG_CORE - .word 0x54420005 @ r6 = ATAG_INITRD2 - .word initrd_phys @ r7 - .word initrd_size @ r8 - .word params_phys @ r9 - .size data, . - data diff --git a/arch/arm/boot/bootp/initrd.S b/arch/arm/boot/bootp/initrd.S deleted file mode 100644 index dd3d04971c42123581569003992286fe03e0e9ee..0000000000000000000000000000000000000000 --- a/arch/arm/boot/bootp/initrd.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .type initrd_start,#object - .globl initrd_start -initrd_start: - .incbin INITRD - .globl initrd_end -initrd_end: diff --git a/arch/arm/boot/bootp/kernel.S b/arch/arm/boot/bootp/kernel.S deleted file mode 100644 index dc6236c173d241267021eed2245434e2826ad077..0000000000000000000000000000000000000000 --- a/arch/arm/boot/bootp/kernel.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .globl kernel_start -kernel_start: - .incbin "arch/arm/boot/zImage" - .globl kernel_end -kernel_end: - .align 2 diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S deleted file mode 100644 index 88e2a88d324b2535a6610c1a914e81ef3a010b8b..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/big-endian.S +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/arm/boot/compressed/big-endian.S - * - * Switch CPU into big endian mode. - * Author: Nicolas Pitre - */ - - .section ".start", #alloc, #execinstr - - mrc p15, 0, r0, c1, c0, 0 @ read control reg - orr r0, r0, #(1 << 7) @ enable big endian mode - mcr p15, 0, r0, c1, c0, 0 @ write control reg - diff --git a/arch/arm/boot/compressed/debug.S b/arch/arm/boot/compressed/debug.S deleted file mode 100644 index 6bf2917a46214f09215e35debe7ddfdc5df4d5d3..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/debug.S +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -#ifndef CONFIG_DEBUG_SEMIHOSTING - -#include CONFIG_DEBUG_LL_INCLUDE - -ENTRY(putc) - addruart r1, r2, r3 - waituart r3, r1 - senduart r0, r1 - busyuart r3, r1 - mov pc, lr -ENDPROC(putc) - -#else - -ENTRY(putc) - adr r1, 1f - ldmia r1, {r2, r3} - add r2, r2, r1 - ldr r1, [r2, r3] - strb r0, [r1] - mov r0, #0x03 @ SYS_WRITEC - ARM( svc #0x123456 ) -#ifdef CONFIG_CPU_V7M - THUMB( bkpt #0xab ) -#else - THUMB( svc #0xab ) -#endif - mov pc, lr - .align 2 -1: .word _GLOBAL_OFFSET_TABLE_ - . - .word semi_writec_buf(GOT) -ENDPROC(putc) - - .bss - .global semi_writec_buf - .type semi_writec_buf, %object -semi_writec_buf: - .space 4 - .size semi_writec_buf, 4 - -#endif diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S deleted file mode 100644 index a5983588f96b8cf8847edf5ea74fe798c7f6c74b..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/efi-header.S +++ /dev/null @@ -1,130 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013-2017 Linaro Ltd - * Authors: Roy Franz - * Ard Biesheuvel - */ - -#include -#include - - .macro __nop -#ifdef CONFIG_EFI_STUB - @ This is almost but not quite a NOP, since it does clobber the - @ condition flags. But it is the best we can do for EFI, since - @ PE/COFF expects the magic string "MZ" at offset 0, while the - @ ARM/Linux boot protocol expects an executable instruction - @ there. - .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000 -#else - AR_CLASS( mov r0, r0 ) - M_CLASS( nop.w ) -#endif - .endm - - .macro __EFI_HEADER -#ifdef CONFIG_EFI_STUB - .set start_offset, __efi_start - start - .org start + 0x3c - @ - @ The PE header can be anywhere in the file, but for - @ simplicity we keep it together with the MSDOS header - @ The offset to the PE/COFF header needs to be at offset - @ 0x3C in the MSDOS header. - @ The only 2 fields of the MSDOS header that are used are this - @ PE/COFF offset, and the "MZ" bytes at offset 0x0. - @ - .long pe_header - start @ Offset to the PE header. - -pe_header: - .long PE_MAGIC - -coff_header: - .short IMAGE_FILE_MACHINE_THUMB @ Machine - .short section_count @ NumberOfSections - .long 0 @ TimeDateStamp - .long 0 @ PointerToSymbolTable - .long 0 @ NumberOfSymbols - .short section_table - optional_header @ SizeOfOptionalHeader - .short IMAGE_FILE_32BIT_MACHINE | \ - IMAGE_FILE_DEBUG_STRIPPED | \ - IMAGE_FILE_EXECUTABLE_IMAGE | \ - IMAGE_FILE_LINE_NUMS_STRIPPED @ Characteristics - -#define __pecoff_code_size (__pecoff_data_start - __efi_start) - -optional_header: - .short PE_OPT_MAGIC_PE32 @ PE32 format - .byte 0x02 @ MajorLinkerVersion - .byte 0x14 @ MinorLinkerVersion - .long __pecoff_code_size @ SizeOfCode - .long __pecoff_data_size @ SizeOfInitializedData - .long 0 @ SizeOfUninitializedData - .long efi_stub_entry - start @ AddressOfEntryPoint - .long start_offset @ BaseOfCode - .long __pecoff_data_start - start @ BaseOfData - -extra_header_fields: - .long 0 @ ImageBase - .long SZ_4K @ SectionAlignment - .long SZ_512 @ FileAlignment - .short 0 @ MajorOsVersion - .short 0 @ MinorOsVersion - .short 0 @ MajorImageVersion - .short 0 @ MinorImageVersion - .short 0 @ MajorSubsystemVersion - .short 0 @ MinorSubsystemVersion - .long 0 @ Win32VersionValue - - .long __pecoff_end - start @ SizeOfImage - .long start_offset @ SizeOfHeaders - .long 0 @ CheckSum - .short IMAGE_SUBSYSTEM_EFI_APPLICATION @ Subsystem - .short 0 @ DllCharacteristics - .long 0 @ SizeOfStackReserve - .long 0 @ SizeOfStackCommit - .long 0 @ SizeOfHeapReserve - .long 0 @ SizeOfHeapCommit - .long 0 @ LoaderFlags - .long (section_table - .) / 8 @ NumberOfRvaAndSizes - - .quad 0 @ ExportTable - .quad 0 @ ImportTable - .quad 0 @ ResourceTable - .quad 0 @ ExceptionTable - .quad 0 @ CertificationTable - .quad 0 @ BaseRelocationTable - -section_table: - .ascii ".text\0\0\0" - .long __pecoff_code_size @ VirtualSize - .long __efi_start @ VirtualAddress - .long __pecoff_code_size @ SizeOfRawData - .long __efi_start @ PointerToRawData - .long 0 @ PointerToRelocations - .long 0 @ PointerToLineNumbers - .short 0 @ NumberOfRelocations - .short 0 @ NumberOfLineNumbers - .long IMAGE_SCN_CNT_CODE | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE @ Characteristics - - .ascii ".data\0\0\0" - .long __pecoff_data_size @ VirtualSize - .long __pecoff_data_start - start @ VirtualAddress - .long __pecoff_data_rawsize @ SizeOfRawData - .long __pecoff_data_start - start @ PointerToRawData - .long 0 @ PointerToRelocations - .long 0 @ PointerToLineNumbers - .short 0 @ NumberOfRelocations - .short 0 @ NumberOfLineNumbers - .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_WRITE @ Characteristics - - .set section_count, (. - section_table) / 40 - - .align 12 -__efi_start: -#endif - .endm diff --git a/arch/arm/boot/compressed/head-sa1100.S b/arch/arm/boot/compressed/head-sa1100.S deleted file mode 100644 index 95abdd850fe35bcb2257b4449e0112cd9b478aa5..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/head-sa1100.S +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/arm/boot/compressed/head-sa1100.S - * - * Copyright (C) 1999 Nicolas Pitre - * - * SA1100 specific tweaks. This is merged into head.S by the linker. - * - */ - -#include -#include - - .section ".start", "ax" - .arch armv4 - -__SA1100_start: - - @ Preserve r8/r7 i.e. kernel entry values -#ifdef CONFIG_SA1100_COLLIE - mov r7, #MACH_TYPE_COLLIE -#endif -#ifdef CONFIG_SA1100_SIMPAD - @ UNTIL we've something like an open bootldr - mov r7, #MACH_TYPE_SIMPAD @should be 87 -#endif - mrc p15, 0, r0, c1, c0, 0 @ read control reg - ands r0, r0, #0x0d - beq 99f - - @ Data cache might be active. - @ Be sure to flush kernel binary out of the cache, - @ whatever state it is, before it is turned off. - @ This is done by fetching through currently executed - @ memory to be sure we hit the same cache. - bic r2, pc, #0x1f - add r3, r2, #0x4000 @ 16 kb is quite enough... -1: ldr r0, [r2], #32 - teq r2, r3 - bne 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c7, c7, 0 @ flush I & D caches - - @ disabling MMU and caches - mrc p15, 0, r0, c1, c0, 0 @ read control reg - bic r0, r0, #0x0d @ clear WB, DC, MMU - bic r0, r0, #0x1000 @ clear Icache - mcr p15, 0, r0, c1, c0, 0 -99: diff --git a/arch/arm/boot/compressed/head-sharpsl.S b/arch/arm/boot/compressed/head-sharpsl.S deleted file mode 100644 index 992e784500fa6ea4e8ef44843d8a410ba2376c3e..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/head-sharpsl.S +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/arm/boot/compressed/head-sharpsl.S - * - * Copyright (C) 2004-2005 Richard Purdie - * - * Sharp's bootloader doesn't pass any kind of machine ID - * so we have to figure out the machine for ourselves... - * - * Support for Poodle, Corgi (SL-C700), Shepherd (SL-C750) - * Husky (SL-C760), Tosa (SL-C6000), Spitz (SL-C3000), - * Akita (SL-C1000) and Borzoi (SL-C3100). - * - */ - -#include -#include - -#ifndef CONFIG_PXA_SHARPSL -#error What am I doing here... -#endif - - .section ".start", "ax" - -__SharpSL_start: - -/* Check for TC6393 - if found we have a Tosa */ - ldr r7, .TOSAID - mov r1, #0x10000000 @ Base address of TC6393 chip - mov r6, #0x03 - ldrh r3, [r1, #8] @ Load TC6393XB Revison: This is 0x0003 - cmp r6, r3 - beq .SHARPEND @ Success -> tosa - -/* Check for pxa270 - if found, branch */ - mrc p15, 0, r4, c0, c0 @ Get Processor ID - and r4, r4, #0xffffff00 - ldr r3, .PXA270ID - cmp r4, r3 - beq .PXA270 - -/* Check for w100 - if not found we have a Poodle */ - ldr r1, .W100ADDR @ Base address of w100 chip + regs offset - - mov r6, #0x31 @ Load Magic Init value - str r6, [r1, #0x280] @ to SCRATCH_UMSK - mov r5, #0x3000 -.W100LOOP: - subs r5, r5, #1 - bne .W100LOOP - mov r6, #0x30 @ Load 2nd Magic Init value - str r6, [r1, #0x280] @ to SCRATCH_UMSK - - ldr r6, [r1, #0] @ Load Chip ID - ldr r3, .W100ID - ldr r7, .POODLEID - cmp r6, r3 - bne .SHARPEND @ We have no w100 - Poodle - -/* Check for pxa250 - if found we have a Corgi */ - ldr r7, .CORGIID - ldr r3, .PXA255ID - cmp r4, r3 - blo .SHARPEND @ We have a PXA250 - Corgi - -/* Check for 64MiB flash - if found we have a Shepherd */ - bl get_flash_ids - ldr r7, .SHEPHERDID - cmp r3, #0x76 @ 64MiB flash - beq .SHARPEND @ We have Shepherd - -/* Must be a Husky */ - ldr r7, .HUSKYID @ Must be Husky - b .SHARPEND - -.PXA270: -/* Check for 16MiB flash - if found we have Spitz */ - bl get_flash_ids - ldr r7, .SPITZID - cmp r3, #0x73 @ 16MiB flash - beq .SHARPEND @ We have Spitz - -/* Check for a second SCOOP chip - if found we have Borzoi */ - ldr r1, .SCOOP2ADDR - ldr r7, .BORZOIID - mov r6, #0x0140 - strh r6, [r1] - ldrh r6, [r1] - cmp r6, #0x0140 - beq .SHARPEND @ We have Borzoi - -/* Must be Akita */ - ldr r7, .AKITAID - b .SHARPEND @ We have Borzoi - -.PXA255ID: - .word 0x69052d00 @ PXA255 Processor ID -.PXA270ID: - .word 0x69054100 @ PXA270 Processor ID -.W100ID: - .word 0x57411002 @ w100 Chip ID -.W100ADDR: - .word 0x08010000 @ w100 Chip ID Reg Address -.SCOOP2ADDR: - .word 0x08800040 -.POODLEID: - .word MACH_TYPE_POODLE -.CORGIID: - .word MACH_TYPE_CORGI -.SHEPHERDID: - .word MACH_TYPE_SHEPHERD -.HUSKYID: - .word MACH_TYPE_HUSKY -.TOSAID: - .word MACH_TYPE_TOSA -.SPITZID: - .word MACH_TYPE_SPITZ -.AKITAID: - .word MACH_TYPE_AKITA -.BORZOIID: - .word MACH_TYPE_BORZOI - -/* - * Return: r2 - NAND Manufacturer ID - * r3 - NAND Chip ID - * Corrupts: r1 - */ -get_flash_ids: - mov r1, #0x0c000000 @ Base address of NAND chip - ldrb r3, [r1, #24] @ Load FLASHCTL - bic r3, r3, #0x11 @ SET NCE - orr r3, r3, #0x0a @ SET CLR + FLWP - strb r3, [r1, #24] @ Save to FLASHCTL - mov r2, #0x90 @ Command "readid" - strb r2, [r1, #20] @ Save to FLASHIO - bic r3, r3, #2 @ CLR CLE - orr r3, r3, #4 @ SET ALE - strb r3, [r1, #24] @ Save to FLASHCTL - mov r2, #0 @ Address 0x00 - strb r2, [r1, #20] @ Save to FLASHIO - bic r3, r3, #4 @ CLR ALE - strb r3, [r1, #24] @ Save to FLASHCTL -.fids1: - ldrb r3, [r1, #24] @ Load FLASHCTL - tst r3, #32 @ Is chip ready? - beq .fids1 - ldrb r2, [r1, #20] @ NAND Manufacturer ID - ldrb r3, [r1, #20] @ NAND Chip ID - mov pc, lr - -.SHARPEND: diff --git a/arch/arm/boot/compressed/head-xscale.S b/arch/arm/boot/compressed/head-xscale.S deleted file mode 100644 index 20fa44d59f82db667c6572b78905afd3296536ce..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/head-xscale.S +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/arm/boot/compressed/head-xscale.S - * - * XScale specific tweaks. This is merged into head.S by the linker. - * - */ - -#include - - .section ".start", "ax" - -__XScale_start: - - @ Preserve r8/r7 i.e. kernel entry values - - @ Data cache might be active. - @ Be sure to flush kernel binary out of the cache, - @ whatever state it is, before it is turned off. - @ This is done by fetching through currently executed - @ memory to be sure we hit the same cache. - bic r2, pc, #0x1f - add r3, r2, #0x10000 @ 64 kb is quite enough... -1: ldr r0, [r2], #32 - teq r2, r3 - bne 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c7, c7, 0 @ flush I & D caches - - @ disabling MMU and caches - mrc p15, 0, r0, c1, c0, 0 @ read control reg - bic r0, r0, #0x05 @ clear DC, MMU - bic r0, r0, #0x1000 @ clear Icache - mcr p15, 0, r0, c1, c0, 0 - diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S deleted file mode 100644 index cbe126297f5499507f9b51910d2ab4e5cc7a528c..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/head.S +++ /dev/null @@ -1,1490 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/boot/compressed/head.S - * - * Copyright (C) 1996-2002 Russell King - * Copyright (C) 2004 Hyok S. Choi (MPU support) - */ -#include -#include -#include - -#include "efi-header.S" - - AR_CLASS( .arch armv7-a ) - M_CLASS( .arch armv7-m ) - -/* - * Debugging stuff - * - * Note that these macros must not contain any code which is not - * 100% relocatable. Any attempt to do so will result in a crash. - * Please select one of the following when turning on debugging. - */ -#ifdef DEBUG - -#if defined(CONFIG_DEBUG_ICEDCC) - -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) - .macro loadsp, rb, tmp1, tmp2 - .endm - .macro writeb, ch, rb - mcr p14, 0, \ch, c0, c5, 0 - .endm -#elif defined(CONFIG_CPU_XSCALE) - .macro loadsp, rb, tmp1, tmp2 - .endm - .macro writeb, ch, rb - mcr p14, 0, \ch, c8, c0, 0 - .endm -#else - .macro loadsp, rb, tmp1, tmp2 - .endm - .macro writeb, ch, rb - mcr p14, 0, \ch, c1, c0, 0 - .endm -#endif - -#else - -#include CONFIG_DEBUG_LL_INCLUDE - - .macro writeb, ch, rb - senduart \ch, \rb - .endm - -#if defined(CONFIG_ARCH_SA1100) - .macro loadsp, rb, tmp1, tmp2 - mov \rb, #0x80000000 @ physical base address -#ifdef CONFIG_DEBUG_LL_SER3 - add \rb, \rb, #0x00050000 @ Ser3 -#else - add \rb, \rb, #0x00010000 @ Ser1 -#endif - .endm -#else - .macro loadsp, rb, tmp1, tmp2 - addruart \rb, \tmp1, \tmp2 - .endm -#endif -#endif -#endif - - .macro kputc,val - mov r0, \val - bl putc - .endm - - .macro kphex,val,len - mov r0, \val - mov r1, #\len - bl phex - .endm - - .macro debug_reloc_start -#ifdef DEBUG - kputc #'\n' - kphex r6, 8 /* processor id */ - kputc #':' - kphex r7, 8 /* architecture id */ -#ifdef CONFIG_CPU_CP15 - kputc #':' - mrc p15, 0, r0, c1, c0 - kphex r0, 8 /* control reg */ -#endif - kputc #'\n' - kphex r5, 8 /* decompressed kernel start */ - kputc #'-' - kphex r9, 8 /* decompressed kernel end */ - kputc #'>' - kphex r4, 8 /* kernel execution address */ - kputc #'\n' -#endif - .endm - - .macro debug_reloc_end -#ifdef DEBUG - kphex r5, 8 /* end of kernel */ - kputc #'\n' - mov r0, r4 - bl memdump /* dump 256 bytes at start of kernel */ -#endif - .endm - - /* - * Debug kernel copy by printing the memory addresses involved - */ - .macro dbgkc, begin, end, cbegin, cend -#ifdef DEBUG - kputc #'\n' - kputc #'C' - kputc #':' - kputc #'0' - kputc #'x' - kphex \begin, 8 /* Start of compressed kernel */ - kputc #'-' - kputc #'0' - kputc #'x' - kphex \end, 8 /* End of compressed kernel */ - kputc #'-' - kputc #'>' - kputc #'0' - kputc #'x' - kphex \cbegin, 8 /* Start of kernel copy */ - kputc #'-' - kputc #'0' - kputc #'x' - kphex \cend, 8 /* End of kernel copy */ - kputc #'\n' - kputc #'\r' -#endif - .endm - - .section ".start", #alloc, #execinstr -/* - * sort out different calling conventions - */ - .align - /* - * Always enter in ARM state for CPUs that support the ARM ISA. - * As of today (2014) that's exactly the members of the A and R - * classes. - */ - AR_CLASS( .arm ) -start: - .type start,#function - /* - * These 7 nops along with the 1 nop immediately below for - * !THUMB2 form 8 nops that make the compressed kernel bootable - * on legacy ARM systems that were assuming the kernel in a.out - * binary format. The boot loaders on these systems would - * jump 32 bytes into the image to skip the a.out header. - * with these 8 nops filling exactly 32 bytes, things still - * work as expected on these legacy systems. Thumb2 mode keeps - * 7 of the nops as it turns out that some boot loaders - * were patching the initial instructions of the kernel, i.e - * had started to exploit this "patch area". - */ - .rept 7 - __nop - .endr -#ifndef CONFIG_THUMB2_KERNEL - __nop -#else - AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode - M_CLASS( nop.w ) @ M: already in Thumb2 mode - .thumb -#endif - W(b) 1f - - .word _magic_sig @ Magic numbers to help the loader - .word _magic_start @ absolute load/run zImage address - .word _magic_end @ zImage end address - .word 0x04030201 @ endianness flag - .word 0x45454545 @ another magic number to indicate - .word _magic_table @ additional data table - - __EFI_HEADER -1: - ARM_BE8( setend be ) @ go BE8 if compiled for BE8 - AR_CLASS( mrs r9, cpsr ) -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install @ get into SVC mode, reversibly -#endif - mov r7, r1 @ save architecture ID - mov r8, r2 @ save atags pointer - -#ifndef CONFIG_CPU_V7M - /* - * Booting from Angel - need to enter SVC mode and disable - * FIQs/IRQs (numeric definitions from angel arm.h source). - * We only do this if we were in user mode on entry. - */ - mrs r2, cpsr @ get current mode - tst r2, #3 @ not user? - bne not_angel - mov r0, #0x17 @ angel_SWIreason_EnterSVC - ARM( swi 0x123456 ) @ angel_SWI_ARM - THUMB( svc 0xab ) @ angel_SWI_THUMB -not_angel: - safe_svcmode_maskall r0 - msr spsr_cxsf, r9 @ Save the CPU boot mode in - @ SPSR -#endif - /* - * Note that some cache flushing and other stuff may - * be needed here - is there an Angel SWI call for this? - */ - - /* - * some architecture specific code can be inserted - * by the linker here, but it should preserve r7, r8, and r9. - */ - - .text - -#ifdef CONFIG_AUTO_ZRELADDR - /* - * Find the start of physical memory. As we are executing - * without the MMU on, we are in the physical address space. - * We just need to get rid of any offset by aligning the - * address. - * - * This alignment is a balance between the requirements of - * different platforms - we have chosen 128MB to allow - * platforms which align the start of their physical memory - * to 128MB to use this feature, while allowing the zImage - * to be placed within the first 128MB of memory on other - * platforms. Increasing the alignment means we place - * stricter alignment requirements on the start of physical - * memory, but relaxing it means that we break people who - * are already placing their zImage in (eg) the top 64MB - * of this range. - */ - mov r4, pc - and r4, r4, #0xf8000000 - /* Determine final kernel image address. */ - add r4, r4, #TEXT_OFFSET -#else - ldr r4, =zreladdr -#endif - - /* - * Set up a page table only if it won't overwrite ourself. - * That means r4 < pc || r4 - 16k page directory > &_end. - * Given that r4 > &_end is most unfrequent, we add a rough - * additional 1MB of room for a possible appended DTB. - */ - mov r0, pc - cmp r0, r4 - ldrcc r0, LC0+32 - addcc r0, r0, pc - cmpcc r4, r0 - orrcc r4, r4, #1 @ remember we skipped cache_on - blcs cache_on - -restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r6, r10, r11, r12} - ldr sp, [r0, #28] - - /* - * We might be running at a different address. We need - * to fix up various pointers. - */ - sub r0, r0, r1 @ calculate the delta offset - add r6, r6, r0 @ _edata - add r10, r10, r0 @ inflated kernel size location - - /* - * The kernel build system appends the size of the - * decompressed kernel at the end of the compressed data - * in little-endian form. - */ - ldrb r9, [r10, #0] - ldrb lr, [r10, #1] - orr r9, r9, lr, lsl #8 - ldrb lr, [r10, #2] - ldrb r10, [r10, #3] - orr r9, r9, lr, lsl #16 - orr r9, r9, r10, lsl #24 - -#ifndef CONFIG_ZBOOT_ROM - /* malloc space is above the relocated stack (64k max) */ - add sp, sp, r0 - add r10, sp, #0x10000 -#else - /* - * With ZBOOT_ROM the bss/stack is non relocatable, - * but someone could still run this code from RAM, - * in which case our reference is _edata. - */ - mov r10, r6 -#endif - - mov r5, #0 @ init dtb size to 0 -#ifdef CONFIG_ARM_APPENDED_DTB -/* - * r0 = delta - * r2 = BSS start - * r3 = BSS end - * r4 = final kernel address (possibly with LSB set) - * r5 = appended dtb size (still unknown) - * r6 = _edata - * r7 = architecture ID - * r8 = atags/device tree pointer - * r9 = size of decompressed image - * r10 = end of this image, including bss/stack/malloc space if non XIP - * r11 = GOT start - * r12 = GOT end - * sp = stack pointer - * - * if there are device trees (dtb) appended to zImage, advance r10 so that the - * dtb data will get relocated along with the kernel if necessary. - */ - - ldr lr, [r6, #0] -#ifndef __ARMEB__ - ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian -#else - ldr r1, =0xd00dfeed -#endif - cmp lr, r1 - bne dtb_check_done @ not found - -#ifdef CONFIG_ARM_ATAG_DTB_COMPAT - /* - * OK... Let's do some funky business here. - * If we do have a DTB appended to zImage, and we do have - * an ATAG list around, we want the later to be translated - * and folded into the former here. No GOT fixup has occurred - * yet, but none of the code we're about to call uses any - * global variable. - */ - - /* Get the initial DTB size */ - ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif - /* 50% DTB growth should be good enough */ - add r5, r5, r5, lsr #1 - /* preserve 64-bit alignment */ - add r5, r5, #7 - bic r5, r5, #7 - /* clamp to 32KB min and 1MB max */ - cmp r5, #(1 << 15) - movlo r5, #(1 << 15) - cmp r5, #(1 << 20) - movhi r5, #(1 << 20) - /* temporarily relocate the stack past the DTB work space */ - add sp, sp, r5 - - stmfd sp!, {r0-r3, ip, lr} - mov r0, r8 - mov r1, r6 - mov r2, r5 - bl atags_to_fdt - - /* - * If returned value is 1, there is no ATAG at the location - * pointed by r8. Try the typical 0x100 offset from start - * of RAM and hope for the best. - */ - cmp r0, #1 - sub r0, r4, #TEXT_OFFSET - bic r0, r0, #1 - add r0, r0, #0x100 - mov r1, r6 - mov r2, r5 - bleq atags_to_fdt - - ldmfd sp!, {r0-r3, ip, lr} - sub sp, sp, r5 -#endif - - mov r8, r6 @ use the appended device tree - - /* - * Make sure that the DTB doesn't end up in the final - * kernel's .bss area. To do so, we adjust the decompressed - * kernel size to compensate if that .bss size is larger - * than the relocated code. - */ - ldr r5, =_kernel_bss_size - adr r1, wont_overwrite - sub r1, r6, r1 - subs r1, r5, r1 - addhi r9, r9, r1 - - /* Get the current DTB size */ - ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert r5 (dtb size) to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif - - /* preserve 64-bit alignment */ - add r5, r5, #7 - bic r5, r5, #7 - - /* relocate some pointers past the appended dtb */ - add r6, r6, r5 - add r10, r10, r5 - add sp, sp, r5 -dtb_check_done: -#endif - -/* - * Check to see if we will overwrite ourselves. - * r4 = final kernel address (possibly with LSB set) - * r9 = size of decompressed image - * r10 = end of this image, including bss/stack/malloc space if non XIP - * We basically want: - * r4 - 16k page directory >= r10 -> OK - * r4 + image length <= address of wont_overwrite -> OK - * Note: the possible LSB in r4 is harmless here. - */ - add r10, r10, #16384 - cmp r4, r10 - bhs wont_overwrite - add r10, r4, r9 - adr r9, wont_overwrite - cmp r10, r9 - bls wont_overwrite - -/* - * Relocate ourselves past the end of the decompressed kernel. - * r6 = _edata - * r10 = end of the decompressed kernel - * Because we always copy ahead, we need to do it from the end and go - * backward in case the source and destination overlap. - */ - /* - * Bump to the next 256-byte boundary with the size of - * the relocation code added. This avoids overwriting - * ourself when the offset is small. - */ - add r10, r10, #((reloc_code_end - restart + 256) & ~255) - bic r10, r10, #255 - - /* Get start of code we want to copy and align it down. */ - adr r5, restart - bic r5, r5, #31 - -/* Relocate the hyp vector base if necessary */ -#ifdef CONFIG_ARM_VIRT_EXT - mrs r0, spsr - and r0, r0, #MODE_MASK - cmp r0, #HYP_MODE - bne 1f - - /* - * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. - * Call __hyp_set_vectors with the new address so that we - * can HVC again after the copy. - */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 - sub r0, r0, r5 - add r0, r0, r10 - bl __hyp_set_vectors -1: -#endif - - sub r9, r6, r5 @ size to copy - add r9, r9, #31 @ rounded up to a multiple - bic r9, r9, #31 @ ... of 32 bytes - add r6, r9, r5 - add r9, r9, r10 - -#ifdef DEBUG - sub r10, r6, r5 - sub r10, r9, r10 - /* - * We are about to copy the kernel to a new memory area. - * The boundaries of the new memory area can be found in - * r10 and r9, whilst r5 and r6 contain the boundaries - * of the memory we are going to copy. - * Calling dbgkc will help with the printing of this - * information. - */ - dbgkc r5, r6, r10, r9 -#endif - -1: ldmdb r6!, {r0 - r3, r10 - r12, lr} - cmp r6, r5 - stmdb r9!, {r0 - r3, r10 - r12, lr} - bhi 1b - - /* Preserve offset to relocated code. */ - sub r6, r9, r6 - -#ifndef CONFIG_ZBOOT_ROM - /* cache_clean_flush may use the stack, so relocate it */ - add sp, sp, r6 -#endif - - bl cache_clean_flush - - badr r0, restart - add r0, r0, r6 - mov pc, r0 - -wont_overwrite: -/* - * If delta is zero, we are running at the address we were linked at. - * r0 = delta - * r2 = BSS start - * r3 = BSS end - * r4 = kernel execution address (possibly with LSB set) - * r5 = appended dtb size (0 if not present) - * r7 = architecture ID - * r8 = atags pointer - * r11 = GOT start - * r12 = GOT end - * sp = stack pointer - */ - orrs r1, r0, r5 - beq not_relocated - - add r11, r11, r0 - add r12, r12, r0 - -#ifndef CONFIG_ZBOOT_ROM - /* - * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, - * we need to fix up pointers into the BSS region. - * Note that the stack pointer has already been fixed up. - */ - add r2, r2, r0 - add r3, r3, r0 - - /* - * Relocate all entries in the GOT table. - * Bump bss entries to _edata + dtb size - */ -1: ldr r1, [r11, #0] @ relocate entries in the GOT - add r1, r1, r0 @ This fixes up C references - cmp r1, r2 @ if entry >= bss_start && - cmphs r3, r1 @ bss_end > entry - addhi r1, r1, r5 @ entry += dtb size - str r1, [r11], #4 @ next entry - cmp r11, r12 - blo 1b - - /* bump our bss pointers too */ - add r2, r2, r5 - add r3, r3, r5 - -#else - - /* - * Relocate entries in the GOT table. We only relocate - * the entries that are outside the (relocated) BSS region. - */ -1: ldr r1, [r11, #0] @ relocate entries in the GOT - cmp r1, r2 @ entry < bss_start || - cmphs r3, r1 @ _end < entry - addlo r1, r1, r0 @ table. This fixes up the - str r1, [r11], #4 @ C references. - cmp r11, r12 - blo 1b -#endif - -not_relocated: mov r0, #0 -1: str r0, [r2], #4 @ clear bss - str r0, [r2], #4 - str r0, [r2], #4 - str r0, [r2], #4 - cmp r2, r3 - blo 1b - - /* - * Did we skip the cache setup earlier? - * That is indicated by the LSB in r4. - * Do it now if so. - */ - tst r4, #1 - bic r4, r4, #1 - blne cache_on - -/* - * The C runtime environment should now be setup sufficiently. - * Set up some pointers, and start decompressing. - * r4 = kernel execution address - * r7 = architecture ID - * r8 = atags pointer - */ - mov r0, r4 - mov r1, sp @ malloc space above stack - add r2, sp, #0x10000 @ 64k max - mov r3, r7 - bl decompress_kernel - bl cache_clean_flush - bl cache_off - -#ifdef CONFIG_ARM_VIRT_EXT - mrs r0, spsr @ Get saved CPU boot mode - and r0, r0, #MODE_MASK - cmp r0, #HYP_MODE @ if not booted in HYP mode... - bne __enter_kernel @ boot kernel directly - - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - - bl __hyp_set_vectors - __HVC(0) @ otherwise bounce to hyp mode - - b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . -#else - b __enter_kernel -#endif - - .align 2 - .type LC0, #object -LC0: .word LC0 @ r1 - .word __bss_start @ r2 - .word _end @ r3 - .word _edata @ r6 - .word input_data_end - 4 @ r10 (inflated size location) - .word _got_start @ r11 - .word _got_end @ ip - .word .L_user_stack_end @ sp - .word _end - restart + 16384 + 1024*1024 - .size LC0, . - LC0 - -#ifdef CONFIG_ARCH_RPC - .globl params -params: ldr r0, =0x10000100 @ params_phys for RPC - mov pc, lr - .ltorg - .align -#endif - -/* - * Turn on the cache. We need to setup some page tables so that we - * can have both the I and D caches on. - * - * We place the page tables 16k down from the kernel execution address, - * and we hope that nothing else is using it. If we're using it, we - * will go pop! - * - * On entry, - * r4 = kernel execution address - * r7 = architecture number - * r8 = atags pointer - * On exit, - * r0, r1, r2, r3, r9, r10, r12 corrupted - * This routine must preserve: - * r4, r7, r8 - */ - .align 5 -cache_on: mov r3, #8 @ cache_on function - b call_cache_fn - -/* - * Initialize the highest priority protection region, PR7 - * to cover all 32bit address and cacheable and bufferable. - */ -__armv4_mpu_cache_on: - mov r0, #0x3f @ 4G, the whole - mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting - mcr p15, 0, r0, c6, c7, 1 - - mov r0, #0x80 @ PR7 - mcr p15, 0, r0, c2, c0, 0 @ D-cache on - mcr p15, 0, r0, c2, c0, 1 @ I-cache on - mcr p15, 0, r0, c3, c0, 0 @ write-buffer on - - mov r0, #0xc000 - mcr p15, 0, r0, c5, c0, 1 @ I-access permission - mcr p15, 0, r0, c5, c0, 0 @ D-access permission - - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache - mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache - mrc p15, 0, r0, c1, c0, 0 @ read control reg - @ ...I .... ..D. WC.M - orr r0, r0, #0x002d @ .... .... ..1. 11.1 - orr r0, r0, #0x1000 @ ...1 .... .... .... - - mcr p15, 0, r0, c1, c0, 0 @ write control reg - - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache - mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache - mov pc, lr - -__armv3_mpu_cache_on: - mov r0, #0x3f @ 4G, the whole - mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting - - mov r0, #0x80 @ PR7 - mcr p15, 0, r0, c2, c0, 0 @ cache on - mcr p15, 0, r0, c3, c0, 0 @ write-buffer on - - mov r0, #0xc000 - mcr p15, 0, r0, c5, c0, 0 @ access permission - - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 - /* - * ?? ARMv3 MMU does not allow reading the control register, - * does this really work on ARMv3 MPU? - */ - mrc p15, 0, r0, c1, c0, 0 @ read control reg - @ .... .... .... WC.M - orr r0, r0, #0x000d @ .... .... .... 11.1 - /* ?? this overwrites the value constructed above? */ - mov r0, #0 - mcr p15, 0, r0, c1, c0, 0 @ write control reg - - /* ?? invalidate for the second time? */ - mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 - mov pc, lr - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH -#define CB_BITS 0x08 -#else -#define CB_BITS 0x0c -#endif - -__setup_mmu: sub r3, r4, #16384 @ Page directory size - bic r3, r3, #0xff @ Align the pointer - bic r3, r3, #0x3f00 -/* - * Initialise the page tables, turning on the cacheable and bufferable - * bits for the RAM area only. - */ - mov r0, r3 - mov r9, r0, lsr #18 - mov r9, r9, lsl #18 @ start of RAM - add r10, r9, #0x10000000 @ a reasonable RAM size - mov r1, #0x12 @ XN|U + section mapping - orr r1, r1, #3 << 10 @ AP=11 - add r2, r3, #16384 -1: cmp r1, r9 @ if virt > start of RAM - cmphs r10, r1 @ && end of RAM > virt - bic r1, r1, #0x1c @ clear XN|U + C + B - orrlo r1, r1, #0x10 @ Set XN|U for non-RAM - orrhs r1, r1, r6 @ set RAM section settings - str r1, [r0], #4 @ 1:1 mapping - add r1, r1, #1048576 - teq r0, r2 - bne 1b -/* - * If ever we are running from Flash, then we surely want the cache - * to be enabled also for our execution instance... We map 2MB of it - * so there is no map overlap problem for up to 1 MB compressed kernel. - * If the execution is in RAM then we would only be duplicating the above. - */ - orr r1, r6, #0x04 @ ensure B is set for this - orr r1, r1, #3 << 10 - mov r2, pc - mov r2, r2, lsr #20 - orr r1, r1, r2, lsl #20 - add r0, r3, r2, lsl #2 - str r1, [r0], #4 - add r1, r1, #1048576 - str r1, [r0] - mov pc, lr -ENDPROC(__setup_mmu) - -@ Enable unaligned access on v6, to allow better code generation -@ for the decompressor C code: -__armv6_mmu_cache_on: - mrc p15, 0, r0, c1, c0, 0 @ read SCTLR - bic r0, r0, #2 @ A (no unaligned access fault) - orr r0, r0, #1 << 22 @ U (v6 unaligned access model) - mcr p15, 0, r0, c1, c0, 0 @ write SCTLR - b __armv4_mmu_cache_on - -__arm926ejs_mmu_cache_on: -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #4 @ put dcache in WT mode - mcr p15, 7, r0, c15, c0, 0 -#endif - -__armv4_mmu_cache_on: - mov r12, lr -#ifdef CONFIG_MMU - mov r6, #CB_BITS | 0x12 @ U - bl __setup_mmu - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs - mrc p15, 0, r0, c1, c0, 0 @ read control reg - orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement - orr r0, r0, #0x0030 - ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables - bl __common_mmu_cache_on - mov r0, #0 - mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs -#endif - mov pc, r12 - -__armv7_mmu_cache_on: - mov r12, lr -#ifdef CONFIG_MMU - mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0 - tst r11, #0xf @ VMSA - movne r6, #CB_BITS | 0x02 @ !XN - blne __setup_mmu - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - tst r11, #0xf @ VMSA - mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs -#endif - mrc p15, 0, r0, c1, c0, 0 @ read control reg - bic r0, r0, #1 << 28 @ clear SCTLR.TRE - orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement - orr r0, r0, #0x003c @ write buffer - bic r0, r0, #2 @ A (no unaligned access fault) - orr r0, r0, #1 << 22 @ U (v6 unaligned access model) - @ (needed for ARM1176) -#ifdef CONFIG_MMU - ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables - mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg - orrne r0, r0, #1 @ MMU enabled - movne r1, #0xfffffffd @ domain 0 = client - bic r6, r6, #1 << 31 @ 32-bit translation system - bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 - mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer - mcrne p15, 0, r1, c3, c0, 0 @ load domain access control - mcrne p15, 0, r6, c2, c0, 2 @ load ttb control -#endif - mcr p15, 0, r0, c7, c5, 4 @ ISB - mcr p15, 0, r0, c1, c0, 0 @ load control register - mrc p15, 0, r0, c1, c0, 0 @ and read it back - mov r0, #0 - mcr p15, 0, r0, c7, c5, 4 @ ISB - mov pc, r12 - -__fa526_cache_on: - mov r12, lr - mov r6, #CB_BITS | 0x12 @ U - bl __setup_mmu - mov r0, #0 - mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c8, c7, 0 @ flush UTLB - mrc p15, 0, r0, c1, c0, 0 @ read control reg - orr r0, r0, #0x1000 @ I-cache enable - bl __common_mmu_cache_on - mov r0, #0 - mcr p15, 0, r0, c8, c7, 0 @ flush UTLB - mov pc, r12 - -__common_mmu_cache_on: -#ifndef CONFIG_THUMB2_KERNEL -#ifndef DEBUG - orr r0, r0, #0x000d @ Write buffer, mmu -#endif - mov r1, #-1 - mcr p15, 0, r3, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c3, c0, 0 @ load domain access control - b 1f - .align 5 @ cache line aligned -1: mcr p15, 0, r0, c1, c0, 0 @ load control register - mrc p15, 0, r0, c1, c0, 0 @ and read it back to - sub pc, lr, r0, lsr #32 @ properly flush pipeline -#endif - -#define PROC_ENTRY_SIZE (4*5) - -/* - * Here follow the relocatable cache support functions for the - * various processors. This is a generic hook for locating an - * entry and jumping to an instruction at the specified offset - * from the start of the block. Please note this is all position - * independent code. - * - * r1 = corrupted - * r2 = corrupted - * r3 = block offset - * r9 = corrupted - * r12 = corrupted - */ - -call_cache_fn: adr r12, proc_types -#ifdef CONFIG_CPU_CP15 - mrc p15, 0, r9, c0, c0 @ get processor ID -#elif defined(CONFIG_CPU_V7M) - /* - * On v7-M the processor id is located in the V7M_SCB_CPUID - * register, but as cache handling is IMPLEMENTATION DEFINED on - * v7-M (if existant at all) we just return early here. - * If V7M_SCB_CPUID were used the cpu ID functions (i.e. - * __armv7_mmu_cache_{on,off,flush}) would be selected which - * use cp15 registers that are not implemented on v7-M. - */ - bx lr -#else - ldr r9, =CONFIG_PROCESSOR_ID -#endif -1: ldr r1, [r12, #0] @ get value - ldr r2, [r12, #4] @ get mask - eor r1, r1, r9 @ (real ^ match) - tst r1, r2 @ & mask - ARM( addeq pc, r12, r3 ) @ call cache function - THUMB( addeq r12, r3 ) - THUMB( moveq pc, r12 ) @ call cache function - add r12, r12, #PROC_ENTRY_SIZE - b 1b - -/* - * Table for cache operations. This is basically: - * - CPU ID match - * - CPU ID mask - * - 'cache on' method instruction - * - 'cache off' method instruction - * - 'cache flush' method instruction - * - * We match an entry using: ((real_id ^ match) & mask) == 0 - * - * Writethrough caches generally only need 'on' and 'off' - * methods. Writeback caches _must_ have the flush method - * defined. - */ - .align 2 - .type proc_types,#object -proc_types: - .word 0x41000000 @ old ARM ID - .word 0xff00f000 - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - - .word 0x41007000 @ ARM7/710 - .word 0xfff8fe00 - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - - .word 0x41807200 @ ARM720T (writethrough) - .word 0xffffff00 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - mov pc, lr - THUMB( nop ) - - .word 0x41007400 @ ARM74x - .word 0xff00ff00 - W(b) __armv3_mpu_cache_on - W(b) __armv3_mpu_cache_off - W(b) __armv3_mpu_cache_flush - - .word 0x41009400 @ ARM94x - .word 0xff00ff00 - W(b) __armv4_mpu_cache_on - W(b) __armv4_mpu_cache_off - W(b) __armv4_mpu_cache_flush - - .word 0x41069260 @ ARM926EJ-S (v5TEJ) - .word 0xff0ffff0 - W(b) __arm926ejs_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv5tej_mmu_cache_flush - - .word 0x00007000 @ ARM7 IDs - .word 0x0000f000 - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - - @ Everything from here on will be the new ID system. - - .word 0x4401a100 @ sa110 / sa1100 - .word 0xffffffe0 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv4_mmu_cache_flush - - .word 0x6901b110 @ sa1110 - .word 0xfffffff0 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv4_mmu_cache_flush - - .word 0x56056900 - .word 0xffffff00 @ PXA9xx - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv4_mmu_cache_flush - - .word 0x56158000 @ PXA168 - .word 0xfffff000 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv5tej_mmu_cache_flush - - .word 0x56050000 @ Feroceon - .word 0xff0f0000 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv5tej_mmu_cache_flush - -#ifdef CONFIG_CPU_FEROCEON_OLD_ID - /* this conflicts with the standard ARMv5TE entry */ - .long 0x41009260 @ Old Feroceon - .long 0xff00fff0 - b __armv4_mmu_cache_on - b __armv4_mmu_cache_off - b __armv5tej_mmu_cache_flush -#endif - - .word 0x66015261 @ FA526 - .word 0xff01fff1 - W(b) __fa526_cache_on - W(b) __armv4_mmu_cache_off - W(b) __fa526_cache_flush - - @ These match on the architecture ID - - .word 0x00020000 @ ARMv4T - .word 0x000f0000 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv4_mmu_cache_flush - - .word 0x00050000 @ ARMv5TE - .word 0x000f0000 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv4_mmu_cache_flush - - .word 0x00060000 @ ARMv5TEJ - .word 0x000f0000 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv5tej_mmu_cache_flush - - .word 0x0007b000 @ ARMv6 - .word 0x000ff000 - W(b) __armv6_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv6_mmu_cache_flush - - .word 0x000f0000 @ new CPU Id - .word 0x000f0000 - W(b) __armv7_mmu_cache_on - W(b) __armv7_mmu_cache_off - W(b) __armv7_mmu_cache_flush - - .word 0 @ unrecognised type - .word 0 - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - mov pc, lr - THUMB( nop ) - - .size proc_types, . - proc_types - - /* - * If you get a "non-constant expression in ".if" statement" - * error from the assembler on this line, check that you have - * not accidentally written a "b" instruction where you should - * have written W(b). - */ - .if (. - proc_types) % PROC_ENTRY_SIZE != 0 - .error "The size of one or more proc_types entries is wrong." - .endif - -/* - * Turn off the Cache and MMU. ARMv3 does not support - * reading the control register, but ARMv4 does. - * - * On exit, - * r0, r1, r2, r3, r9, r12 corrupted - * This routine must preserve: - * r4, r7, r8 - */ - .align 5 -cache_off: mov r3, #12 @ cache_off function - b call_cache_fn - -__armv4_mpu_cache_off: - mrc p15, 0, r0, c1, c0 - bic r0, r0, #0x000d - mcr p15, 0, r0, c1, c0 @ turn MPU and cache off - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c7, c6, 0 @ flush D-Cache - mcr p15, 0, r0, c7, c5, 0 @ flush I-Cache - mov pc, lr - -__armv3_mpu_cache_off: - mrc p15, 0, r0, c1, c0 - bic r0, r0, #0x000d - mcr p15, 0, r0, c1, c0, 0 @ turn MPU and cache off - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 - mov pc, lr - -__armv4_mmu_cache_off: -#ifdef CONFIG_MMU - mrc p15, 0, r0, c1, c0 - bic r0, r0, #0x000d - mcr p15, 0, r0, c1, c0 @ turn MMU and cache off - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 - mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 -#endif - mov pc, lr - -__armv7_mmu_cache_off: - mrc p15, 0, r0, c1, c0 -#ifdef CONFIG_MMU - bic r0, r0, #0x0005 -#else - bic r0, r0, #0x0004 -#endif - mcr p15, 0, r0, c1, c0 @ turn MMU and cache off - mov r12, lr - bl __armv7_mmu_cache_flush - mov r0, #0 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB -#endif - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC - mcr p15, 0, r0, c7, c10, 4 @ DSB - mcr p15, 0, r0, c7, c5, 4 @ ISB - mov pc, r12 - -/* - * Clean and flush the cache to maintain consistency. - * - * On exit, - * r1, r2, r3, r9, r10, r11, r12 corrupted - * This routine must preserve: - * r4, r6, r7, r8 - */ - .align 5 -cache_clean_flush: - mov r3, #16 - b call_cache_fn - -__armv4_mpu_cache_flush: - tst r4, #1 - movne pc, lr - mov r2, #1 - mov r3, #0 - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache - mov r1, #7 << 5 @ 8 segments -1: orr r3, r1, #63 << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 7 to 0 - - teq r2, #0 - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr - -__fa526_cache_flush: - tst r4, #1 - movne pc, lr - mov r1, #0 - mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache - mcr p15, 0, r1, c7, c5, 0 @ flush I cache - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mov pc, lr - -__armv6_mmu_cache_flush: - mov r1, #0 - tst r4, #1 - mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D - mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB - mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mov pc, lr - -__armv7_mmu_cache_flush: - tst r4, #1 - bne iflush - mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 - tst r10, #0xf << 16 @ hierarchical cache (ARMv7) - mov r10, #0 - beq hierarchical - mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D - b iflush -hierarchical: - mcr p15, 0, r10, c7, c10, 5 @ DMB - stmfd sp!, {r0-r7, r9-r11} - mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean - mov r10, #0 @ start clean at cache level 0 -loop1: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: - mov r9, r4 @ create working copy of max way size -loop3: - ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r9, r5 ) - THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - THUMB( lsl r6, r7, r2 ) - THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index - bge loop2 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 - bgt loop1 -finished: - ldmfd sp!, {r0-r7, r9-r11} - mov r10, #0 @ switch back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr -iflush: - mcr p15, 0, r10, c7, c10, 4 @ DSB - mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB - mcr p15, 0, r10, c7, c10, 4 @ DSB - mcr p15, 0, r10, c7, c5, 4 @ ISB - mov pc, lr - -__armv5tej_mmu_cache_flush: - tst r4, #1 - movne pc, lr -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache - bne 1b - mcr p15, 0, r0, c7, c5, 0 @ flush I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr - -__armv4_mmu_cache_flush: - tst r4, #1 - movne pc, lr - mov r2, #64*1024 @ default: 32K dcache size (*2) - mov r11, #32 @ default: 32 byte line size - mrc p15, 0, r3, c0, c0, 1 @ read cache type - teq r3, r9 @ cache ID register present? - beq no_cache_id - mov r1, r3, lsr #18 - and r1, r1, #7 - mov r2, #1024 - mov r2, r2, lsl r1 @ base dcache size *2 - tst r3, #1 << 14 @ test M bit - addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 - mov r3, r3, lsr #12 - and r3, r3, #3 - mov r11, #8 - mov r11, r11, lsl r3 @ cache line size in bytes -no_cache_id: - mov r1, pc - bic r1, r1, #63 @ align to longest cache line - add r2, r1, r2 -1: - ARM( ldr r3, [r1], r11 ) @ s/w flush D cache - THUMB( ldr r3, [r1] ) @ s/w flush D cache - THUMB( add r1, r1, r11 ) - teq r1, r2 - bne 1b - - mcr p15, 0, r1, c7, c5, 0 @ flush I cache - mcr p15, 0, r1, c7, c6, 0 @ flush D cache - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mov pc, lr - -__armv3_mmu_cache_flush: -__armv3_mpu_cache_flush: - tst r4, #1 - movne pc, lr - mov r1, #0 - mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3 - mov pc, lr - -/* - * Various debugging routines for printing hex characters and - * memory, which again must be relocatable. - */ -#ifdef DEBUG - .align 2 - .type phexbuf,#object -phexbuf: .space 12 - .size phexbuf, . - phexbuf - -@ phex corrupts {r0, r1, r2, r3} -phex: adr r3, phexbuf - mov r2, #0 - strb r2, [r3, r1] -1: subs r1, r1, #1 - movmi r0, r3 - bmi puts - and r2, r0, #15 - mov r0, r0, lsr #4 - cmp r2, #10 - addge r2, r2, #7 - add r2, r2, #'0' - strb r2, [r3, r1] - b 1b - -@ puts corrupts {r0, r1, r2, r3} -puts: loadsp r3, r2, r1 -1: ldrb r2, [r0], #1 - teq r2, #0 - moveq pc, lr -2: writeb r2, r3 - mov r1, #0x00020000 -3: subs r1, r1, #1 - bne 3b - teq r2, #'\n' - moveq r2, #'\r' - beq 2b - teq r0, #0 - bne 1b - mov pc, lr -@ putc corrupts {r0, r1, r2, r3} -putc: - mov r2, r0 - loadsp r3, r1, r0 - mov r0, #0 - b 2b - -@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr} -memdump: mov r12, r0 - mov r10, lr - mov r11, #0 -2: mov r0, r11, lsl #2 - add r0, r0, r12 - mov r1, #8 - bl phex - mov r0, #':' - bl putc -1: mov r0, #' ' - bl putc - ldr r0, [r12, r11, lsl #2] - mov r1, #8 - bl phex - and r0, r11, #7 - teq r0, #3 - moveq r0, #' ' - bleq putc - and r0, r11, #7 - add r11, r11, #1 - teq r0, #7 - bne 1b - mov r0, #'\n' - bl putc - cmp r11, #64 - blt 2b - mov pc, r10 -#endif - - .ltorg - -#ifdef CONFIG_ARM_VIRT_EXT -.align 5 -__hyp_reentry_vectors: - W(b) . @ reset - W(b) . @ undef - W(b) . @ svc - W(b) . @ pabort - W(b) . @ dabort - W(b) __enter_kernel @ hyp - W(b) . @ irq - W(b) . @ fiq -#endif /* CONFIG_ARM_VIRT_EXT */ - -__enter_kernel: - mov r0, #0 @ must be 0 - mov r1, r7 @ restore architecture number - mov r2, r8 @ restore atags pointer - ARM( mov pc, r4 ) @ call kernel - M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class - THUMB( bx r4 ) @ entry point is always ARM for A/R classes - -reloc_code_end: - -#ifdef CONFIG_EFI_STUB - .align 2 -_start: .long start - . - -ENTRY(efi_stub_entry) - @ allocate space on stack for passing current zImage address - @ and for the EFI stub to return of new entry point of - @ zImage, as EFI stub may copy the kernel. Pointer address - @ is passed in r2. r0 and r1 are passed through from the - @ EFI firmware to efi_entry - adr ip, _start - ldr r3, [ip] - add r3, r3, ip - stmfd sp!, {r3, lr} - mov r2, sp @ pass zImage address in r2 - bl efi_entry - - @ Check for error return from EFI stub. r0 has FDT address - @ or error code. - cmn r0, #1 - beq efi_load_fail - - @ Preserve return value of efi_entry() in r4 - mov r4, r0 - - @ our cache maintenance code relies on CP15 barrier instructions - @ but since we arrived here with the MMU and caches configured - @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. - @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in - @ the enable path will be executed on v7+ only. - mrc p15, 0, r1, c1, c0, 0 @ read SCTLR - tst r1, #(1 << 5) @ CP15BEN bit set? - bne 0f - orr r1, r1, #(1 << 5) @ CP15 barrier instructions - mcr p15, 0, r1, c1, c0, 0 @ write SCTLR - ARM( .inst 0xf57ff06f @ v7+ isb ) - THUMB( isb ) - -0: bl cache_clean_flush - bl cache_off - - @ Set parameters for booting zImage according to boot protocol - @ put FDT address in r2, it was returned by efi_entry() - @ r1 is the machine type, and r0 needs to be 0 - mov r0, #0 - mov r1, #0xFFFFFFFF - mov r2, r4 - - @ Branch to (possibly) relocated zImage that is in [sp] - ldr lr, [sp] - ldr ip, =start_offset - add lr, lr, ip - mov pc, lr @ no mode switch - -efi_load_fail: - @ Return EFI_LOAD_ERROR to EFI firmware on error. - ldr r0, =0x80000001 - ldmfd sp!, {ip, pc} -ENDPROC(efi_stub_entry) -#endif - - .align - .section ".stack", "aw", %nobits -.L_user_stack: .space 4096 -.L_user_stack_end: diff --git a/arch/arm/boot/compressed/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S deleted file mode 100644 index 1ec8cb2898b1c368790d611371cbcf87701dc19b..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/ll_char_wr.S +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/ll_char_wr.S - * - * Copyright (C) 1995, 1996 Russell King. - * - * Speedups & 1bpp code (C) 1996 Philip Blundell & Russell King. - * - * 10-04-96 RMK Various cleanups & reduced register usage. - * 08-04-98 RMK Shifts re-ordered - */ - -@ Regs: [] = corruptible -@ {} = used -@ () = do not use - -#include -#include - .text - -LC0: .word LC0 - .word bytes_per_char_h - .word video_size_row - .word acorndata_8x8 - .word con_charconvtable - -/* - * r0 = ptr - * r1 = char - * r2 = white - */ -ENTRY(ll_write_char) - stmfd sp!, {r4 - r7, lr} -@ -@ Smashable regs: {r0 - r3}, [r4 - r7], (r8 - fp), [ip], (sp), [lr], (pc) -@ - /* - * calculate offset into character table - */ - mov r1, r1, lsl #3 - /* - * calculate offset required for each row. - */ - adr ip, LC0 - ldmia ip, {r3, r4, r5, r6, lr} - sub ip, ip, r3 - add r6, r6, ip - add lr, lr, ip - ldr r4, [r4, ip] - ldr r5, [r5, ip] - /* - * Go to resolution-dependent routine... - */ - cmp r4, #4 - blt Lrow1bpp - add r0, r0, r5, lsl #3 @ Move to bottom of character - orr r1, r1, #7 - ldrb r7, [r6, r1] - teq r4, #8 - beq Lrow8bpplp -@ -@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc) -@ -Lrow4bpplp: - ldr r7, [lr, r7, lsl #2] - mul r7, r2, r7 - sub r1, r1, #1 @ avoid using r7 directly after - str r7, [r0, -r5]! - ldrb r7, [r6, r1] - ldr r7, [lr, r7, lsl #2] - mul r7, r2, r7 - tst r1, #7 @ avoid using r7 directly after - str r7, [r0, -r5]! - subne r1, r1, #1 - ldrbne r7, [r6, r1] - bne Lrow4bpplp - ldmfd sp!, {r4 - r7, pc} - -@ -@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc) -@ -Lrow8bpplp: - mov ip, r7, lsr #4 - ldr ip, [lr, ip, lsl #2] - mul r4, r2, ip - and ip, r7, #15 @ avoid r4 - ldr ip, [lr, ip, lsl #2] @ avoid r4 - mul ip, r2, ip @ avoid r4 - sub r1, r1, #1 @ avoid ip - sub r0, r0, r5 @ avoid ip - stmia r0, {r4, ip} - ldrb r7, [r6, r1] - mov ip, r7, lsr #4 - ldr ip, [lr, ip, lsl #2] - mul r4, r2, ip - and ip, r7, #15 @ avoid r4 - ldr ip, [lr, ip, lsl #2] @ avoid r4 - mul ip, r2, ip @ avoid r4 - tst r1, #7 @ avoid ip - sub r0, r0, r5 @ avoid ip - stmia r0, {r4, ip} - subne r1, r1, #1 - ldrbne r7, [r6, r1] - bne Lrow8bpplp - ldmfd sp!, {r4 - r7, pc} - -@ -@ Smashable regs: {r0 - r3}, [r4], {r5, r6}, [r7], (r8 - fp), [ip], (sp), [lr], (pc) -@ -Lrow1bpp: - add r6, r6, r1 - ldmia r6, {r4, r7} - strb r4, [r0], r5 - mov r4, r4, lsr #8 - strb r4, [r0], r5 - mov r4, r4, lsr #8 - strb r4, [r0], r5 - mov r4, r4, lsr #8 - strb r4, [r0], r5 - strb r7, [r0], r5 - mov r7, r7, lsr #8 - strb r7, [r0], r5 - mov r7, r7, lsr #8 - strb r7, [r0], r5 - mov r7, r7, lsr #8 - strb r7, [r0], r5 - ldmfd sp!, {r4 - r7, pc} - - .bss -ENTRY(con_charconvtable) - .space 1024 diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S deleted file mode 100644 index 0284f84dcf38049f643c144d4f86dd76df88fabe..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/piggy.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .section .piggydata,#alloc - .globl input_data -input_data: - .incbin "arch/arm/boot/compressed/piggy_data" - .globl input_data_end -input_data_end: diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S deleted file mode 100644 index 51b078604978b9f4296dca4c58819304ef464997..0000000000000000000000000000000000000000 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2000 Russell King - */ - -#ifdef CONFIG_CPU_ENDIAN_BE8 -#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \ - (((x) >> 8) & 0x0000ff00) | \ - (((x) << 8) & 0x00ff0000) | \ - (((x) << 24) & 0xff000000) ) -#else -#define ZIMAGE_MAGIC(x) (x) -#endif - -OUTPUT_ARCH(arm) -ENTRY(_start) -SECTIONS -{ - /DISCARD/ : { - *(.ARM.exidx*) - *(.ARM.extab*) - /* - * Discard any r/w data - this produces a link error if we have any, - * which is required for PIC decompression. Local data generates - * GOTOFF relocations, which prevents it being relocated independently - * of the text/got segments. - */ - *(.data) - } - - . = TEXT_START; - _text = .; - - .text : { - _start = .; - *(.start) - *(.text) - *(.text.*) - *(.fixup) - *(.gnu.warning) - *(.glue_7t) - *(.glue_7) - } - .table : ALIGN(4) { - _table_start = .; - LONG(ZIMAGE_MAGIC(4)) - LONG(ZIMAGE_MAGIC(0x5a534c4b)) - LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start)) - LONG(ZIMAGE_MAGIC(_kernel_bss_size)) - LONG(0) - _table_end = .; - } - .rodata : { - *(.rodata) - *(.rodata.*) - *(.data.rel.ro) - } - .piggydata : { - *(.piggydata) - __piggy_size_addr = . - 4; - } - - . = ALIGN(4); - _etext = .; - - .got.plt : { *(.got.plt) } - _got_start = .; - .got : { *(.got) } - _got_end = .; - - /* ensure the zImage file size is always a multiple of 64 bits */ - /* (without a dummy byte, ld just ignores the empty section) */ - .pad : { BYTE(0); . = ALIGN(8); } - -#ifdef CONFIG_EFI_STUB - .data : ALIGN(4096) { - __pecoff_data_start = .; - /* - * The EFI stub always executes from RAM, and runs strictly before the - * decompressor, so we can make an exception for its r/w data, and keep it - */ - *(.data.efistub) - __pecoff_data_end = .; - - /* - * PE/COFF mandates a file size which is a multiple of 512 bytes if the - * section size equals or exceeds 4 KB - */ - . = ALIGN(512); - } - __pecoff_data_rawsize = . - ADDR(.data); -#endif - - _edata = .; - - /* - * The image_end section appears after any additional loadable sections - * that the linker may decide to insert in the binary image. Having - * this symbol allows further debug in the near future. - */ - .image_end (NOLOAD) : { - /* - * EFI requires that the image is aligned to 512 bytes, and appended - * DTB requires that we know where the end of the image is. Ensure - * that both are satisfied by ensuring that there are no additional - * sections emitted into the decompressor image. - */ - _edata_real = .; - } - - _magic_sig = ZIMAGE_MAGIC(0x016f2818); - _magic_start = ZIMAGE_MAGIC(_start); - _magic_end = ZIMAGE_MAGIC(_edata); - _magic_table = ZIMAGE_MAGIC(_table_start - _start); - - . = BSS_START; - __bss_start = .; - .bss : { *(.bss) } - _end = .; - - . = ALIGN(8); /* the stack must be 64-bit aligned */ - .stack : { *(.stack) } - - PROVIDE(__pecoff_data_size = ALIGN(512) - ADDR(.data)); - PROVIDE(__pecoff_end = ALIGN(512)); - - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} -ASSERT(_edata_real == _edata, "error: zImage file size is incorrect"); diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh old mode 100755 new mode 100644 diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts deleted file mode 120000 index c2f22fc3381107322545a350fa5b9620ba8647af..0000000000000000000000000000000000000000 --- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts +++ /dev/null @@ -1 +0,0 @@ -sun8i-a23-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts new file mode 100644 index 0000000000000000000000000000000000000000..c2f22fc3381107322545a350fa5b9620ba8647af --- /dev/null +++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts @@ -0,0 +1 @@ +sun8i-a23-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts deleted file mode 120000 index c2f22fc3381107322545a350fa5b9620ba8647af..0000000000000000000000000000000000000000 --- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts +++ /dev/null @@ -1 +0,0 @@ -sun8i-a23-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts new file mode 100644 index 0000000000000000000000000000000000000000..c2f22fc3381107322545a350fa5b9620ba8647af --- /dev/null +++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts @@ -0,0 +1 @@ +sun8i-a23-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts deleted file mode 120000 index 4519fd791a8f9077bfb769c88027b0b0df47f627..0000000000000000000000000000000000000000 --- a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts +++ /dev/null @@ -1 +0,0 @@ -sun8i-a33-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts new file mode 100644 index 0000000000000000000000000000000000000000..4519fd791a8f9077bfb769c88027b0b0df47f627 --- /dev/null +++ b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts @@ -0,0 +1 @@ +sun8i-a33-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts deleted file mode 120000 index 4519fd791a8f9077bfb769c88027b0b0df47f627..0000000000000000000000000000000000000000 --- a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts +++ /dev/null @@ -1 +0,0 @@ -sun8i-a33-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts new file mode 100644 index 0000000000000000000000000000000000000000..4519fd791a8f9077bfb769c88027b0b0df47f627 --- /dev/null +++ b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts @@ -0,0 +1 @@ +sun8i-a33-q8-tablet.dts \ No newline at end of file diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S deleted file mode 100644 index 291d969bc719cdd77c74c5f3e1735f7d0fe56a1d..0000000000000000000000000000000000000000 --- a/arch/arm/common/mcpm_head.S +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM - * - * Created by: Nicolas Pitre, March 2012 - * Copyright: (C) 2012-2013 Linaro Limited - * - * Refer to Documentation/arm/cluster-pm-race-avoidance.rst - * for details of the synchronisation algorithms used here. - */ - -#include -#include -#include - -#include "vlock.h" - -.if MCPM_SYNC_CLUSTER_CPUS -.error "cpus must be the first member of struct mcpm_sync_struct" -.endif - - .macro pr_dbg string -#if defined(CONFIG_DEBUG_LL) && defined(DEBUG) - b 1901f -1902: .asciz "CPU" -1903: .asciz " cluster" -1904: .asciz ": \string" - .align -1901: adr r0, 1902b - bl printascii - mov r0, r9 - bl printhex2 - adr r0, 1903b - bl printascii - mov r0, r10 - bl printhex2 - adr r0, 1904b - bl printascii -#endif - .endm - - .arm - .align - -ENTRY(mcpm_entry_point) - - ARM_BE8(setend be) - THUMB( badr r12, 1f ) - THUMB( bx r12 ) - THUMB( .thumb ) -1: - mrc p15, 0, r0, c0, c0, 5 @ MPIDR - ubfx r9, r0, #0, #8 @ r9 = cpu - ubfx r10, r0, #8, #8 @ r10 = cluster - mov r3, #MAX_CPUS_PER_CLUSTER - mla r4, r3, r10, r9 @ r4 = canonical CPU index - cmp r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS) - blo 2f - - /* We didn't expect this CPU. Try to cheaply make it quiet. */ -1: wfi - wfe - b 1b - -2: pr_dbg "kernel mcpm_entry_point\n" - - /* - * MMU is off so we need to get to various variables in a - * position independent way. - */ - adr r5, 3f - ldmia r5, {r0, r6, r7, r8, r11} - add r0, r5, r0 @ r0 = mcpm_entry_early_pokes - add r6, r5, r6 @ r6 = mcpm_entry_vectors - ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys - add r8, r5, r8 @ r8 = mcpm_sync - add r11, r5, r11 @ r11 = first_man_locks - - @ Perform an early poke, if any - add r0, r0, r4, lsl #3 - ldmia r0, {r0, r1} - teq r0, #0 - strne r1, [r0] - - mov r0, #MCPM_SYNC_CLUSTER_SIZE - mla r8, r0, r10, r8 @ r8 = sync cluster base - - @ Signal that this CPU is coming UP: - mov r0, #CPU_COMING_UP - mov r5, #MCPM_SYNC_CPU_SIZE - mla r5, r9, r5, r8 @ r5 = sync cpu address - strb r0, [r5] - - @ At this point, the cluster cannot unexpectedly enter the GOING_DOWN - @ state, because there is at least one active CPU (this CPU). - - mov r0, #VLOCK_SIZE - mla r11, r0, r10, r11 @ r11 = cluster first man lock - mov r0, r11 - mov r1, r9 @ cpu - bl vlock_trylock @ implies DMB - - cmp r0, #0 @ failed to get the lock? - bne mcpm_setup_wait @ wait for cluster setup if so - - ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] - cmp r0, #CLUSTER_UP @ cluster already up? - bne mcpm_setup @ if not, set up the cluster - - @ Otherwise, release the first man lock and skip setup: - mov r0, r11 - bl vlock_unlock - b mcpm_setup_complete - -mcpm_setup: - @ Control dependency implies strb not observable before previous ldrb. - - @ Signal that the cluster is being brought up: - mov r0, #INBOUND_COMING_UP - strb r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND] - dmb - - @ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this - @ point onwards will observe INBOUND_COMING_UP and abort. - - @ Wait for any previously-pending cluster teardown operations to abort - @ or complete: -mcpm_teardown_wait: - ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] - cmp r0, #CLUSTER_GOING_DOWN - bne first_man_setup - wfe - b mcpm_teardown_wait - -first_man_setup: - dmb - - @ If the outbound gave up before teardown started, skip cluster setup: - - cmp r0, #CLUSTER_UP - beq mcpm_setup_leave - - @ power_up_setup is now responsible for setting up the cluster: - - cmp r7, #0 - mov r0, #1 @ second (cluster) affinity level - blxne r7 @ Call power_up_setup if defined - dmb - - mov r0, #CLUSTER_UP - strb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] - dmb - -mcpm_setup_leave: - @ Leave the cluster setup critical section: - - mov r0, #INBOUND_NOT_COMING_UP - strb r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND] - dsb st - sev - - mov r0, r11 - bl vlock_unlock @ implies DMB - b mcpm_setup_complete - - @ In the contended case, non-first men wait here for cluster setup - @ to complete: -mcpm_setup_wait: - ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] - cmp r0, #CLUSTER_UP - wfene - bne mcpm_setup_wait - dmb - -mcpm_setup_complete: - @ If a platform-specific CPU setup hook is needed, it is - @ called from here. - - cmp r7, #0 - mov r0, #0 @ first (CPU) affinity level - blxne r7 @ Call power_up_setup if defined - dmb - - @ Mark the CPU as up: - - mov r0, #CPU_UP - strb r0, [r5] - - @ Observability order of CPU_UP and opening of the gate does not matter. - -mcpm_entry_gated: - ldr r5, [r6, r4, lsl #2] @ r5 = CPU entry vector - cmp r5, #0 - wfeeq - beq mcpm_entry_gated - dmb - - pr_dbg "released\n" - bx r5 - - .align 2 - -3: .word mcpm_entry_early_pokes - . - .word mcpm_entry_vectors - 3b - .word mcpm_power_up_setup_phys - 3b - .word mcpm_sync - 3b - .word first_man_locks - 3b - -ENDPROC(mcpm_entry_point) - - .bss - - .align CACHE_WRITEBACK_ORDER - .type first_man_locks, #object -first_man_locks: - .space VLOCK_SIZE * MAX_NR_CLUSTERS - .align CACHE_WRITEBACK_ORDER - - .type mcpm_entry_vectors, #object -ENTRY(mcpm_entry_vectors) - .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER - - .type mcpm_entry_early_pokes, #object -ENTRY(mcpm_entry_early_pokes) - .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER - - .type mcpm_power_up_setup_phys, #object -ENTRY(mcpm_power_up_setup_phys) - .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/common/secure_cntvoff.S b/arch/arm/common/secure_cntvoff.S deleted file mode 100644 index 53fc7bdb6c2e191440cce900818e2a6fffb687e5..0000000000000000000000000000000000000000 --- a/arch/arm/common/secure_cntvoff.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2014 Renesas Electronics Corporation - * - * Initialization of CNTVOFF register from secure mode - * - */ - -#include -#include - -ENTRY(secure_cntvoff_init) - .arch armv7-a - /* - * CNTVOFF has to be initialized either from non-secure Hypervisor - * mode or secure Monitor mode with SCR.NS==1. If TrustZone is enabled - * then it should be handled by the secure code. The CPU must implement - * the virtualization extensions. - */ - cps #MON_MODE - mrc p15, 0, r1, c1, c1, 0 /* Get Secure Config */ - orr r0, r1, #1 - mcr p15, 0, r0, c1, c1, 0 /* Set Non Secure bit */ - isb - mov r0, #0 - mcrr p15, 4, r0, r0, c14 /* CNTVOFF = 0 */ - isb - mcr p15, 0, r1, c1, c1, 0 /* Set Secure bit */ - isb - cps #SVC_MODE - ret lr -ENDPROC(secure_cntvoff_init) diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S deleted file mode 100644 index f1c7fd44f1b10bc7f08787765e4e57f94e0789de..0000000000000000000000000000000000000000 --- a/arch/arm/common/vlock.S +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * vlock.S - simple voting lock implementation for ARM - * - * Created by: Dave Martin, 2012-08-16 - * Copyright: (C) 2012-2013 Linaro Limited - * - * This algorithm is described in more detail in - * Documentation/arm/vlocks.rst. - */ - -#include -#include "vlock.h" - -/* Select different code if voting flags can fit in a single word. */ -#if VLOCK_VOTING_SIZE > 4 -#define FEW(x...) -#define MANY(x...) x -#else -#define FEW(x...) x -#define MANY(x...) -#endif - -@ voting lock for first-man coordination - -.macro voting_begin rbase:req, rcpu:req, rscratch:req - mov \rscratch, #1 - strb \rscratch, [\rbase, \rcpu] - dmb -.endm - -.macro voting_end rbase:req, rcpu:req, rscratch:req - dmb - mov \rscratch, #0 - strb \rscratch, [\rbase, \rcpu] - dsb st - sev -.endm - -/* - * The vlock structure must reside in Strongly-Ordered or Device memory. - * This implementation deliberately eliminates most of the barriers which - * would be required for other memory types, and assumes that independent - * writes to neighbouring locations within a cacheline do not interfere - * with one another. - */ - -@ r0: lock structure base -@ r1: CPU ID (0-based index within cluster) -ENTRY(vlock_trylock) - add r1, r1, #VLOCK_VOTING_OFFSET - - voting_begin r0, r1, r2 - - ldrb r2, [r0, #VLOCK_OWNER_OFFSET] @ check whether lock is held - cmp r2, #VLOCK_OWNER_NONE - bne trylock_fail @ fail if so - - @ Control dependency implies strb not observable before previous ldrb. - - strb r1, [r0, #VLOCK_OWNER_OFFSET] @ submit my vote - - voting_end r0, r1, r2 @ implies DMB - - @ Wait for the current round of voting to finish: - - MANY( mov r3, #VLOCK_VOTING_OFFSET ) -0: - MANY( ldr r2, [r0, r3] ) - FEW( ldr r2, [r0, #VLOCK_VOTING_OFFSET] ) - cmp r2, #0 - wfene - bne 0b - MANY( add r3, r3, #4 ) - MANY( cmp r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE ) - MANY( bne 0b ) - - @ Check who won: - - dmb - ldrb r2, [r0, #VLOCK_OWNER_OFFSET] - eor r0, r1, r2 @ zero if I won, else nonzero - bx lr - -trylock_fail: - voting_end r0, r1, r2 - mov r0, #1 @ nonzero indicates that I lost - bx lr -ENDPROC(vlock_trylock) - -@ r0: lock structure base -ENTRY(vlock_unlock) - dmb - mov r1, #VLOCK_OWNER_NONE - strb r1, [r0, #VLOCK_OWNER_OFFSET] - dsb st - sev - bx lr -ENDPROC(vlock_unlock) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S deleted file mode 100644 index 312428d83eedb2aacd5380bd1e1d196133668ee6..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/aes-ce-core.S +++ /dev/null @@ -1,713 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd - */ - -#include -#include - - .text - .arch armv8-a - .fpu crypto-neon-fp-armv8 - .align 3 - - .macro enc_round, state, key - aese.8 \state, \key - aesmc.8 \state, \state - .endm - - .macro dec_round, state, key - aesd.8 \state, \key - aesimc.8 \state, \state - .endm - - .macro enc_dround, key1, key2 - enc_round q0, \key1 - enc_round q0, \key2 - .endm - - .macro dec_dround, key1, key2 - dec_round q0, \key1 - dec_round q0, \key2 - .endm - - .macro enc_fround, key1, key2, key3 - enc_round q0, \key1 - aese.8 q0, \key2 - veor q0, q0, \key3 - .endm - - .macro dec_fround, key1, key2, key3 - dec_round q0, \key1 - aesd.8 q0, \key2 - veor q0, q0, \key3 - .endm - - .macro enc_dround_4x, key1, key2 - enc_round q0, \key1 - enc_round q1, \key1 - enc_round q2, \key1 - enc_round q3, \key1 - enc_round q0, \key2 - enc_round q1, \key2 - enc_round q2, \key2 - enc_round q3, \key2 - .endm - - .macro dec_dround_4x, key1, key2 - dec_round q0, \key1 - dec_round q1, \key1 - dec_round q2, \key1 - dec_round q3, \key1 - dec_round q0, \key2 - dec_round q1, \key2 - dec_round q2, \key2 - dec_round q3, \key2 - .endm - - .macro enc_fround_4x, key1, key2, key3 - enc_round q0, \key1 - enc_round q1, \key1 - enc_round q2, \key1 - enc_round q3, \key1 - aese.8 q0, \key2 - aese.8 q1, \key2 - aese.8 q2, \key2 - aese.8 q3, \key2 - veor q0, q0, \key3 - veor q1, q1, \key3 - veor q2, q2, \key3 - veor q3, q3, \key3 - .endm - - .macro dec_fround_4x, key1, key2, key3 - dec_round q0, \key1 - dec_round q1, \key1 - dec_round q2, \key1 - dec_round q3, \key1 - aesd.8 q0, \key2 - aesd.8 q1, \key2 - aesd.8 q2, \key2 - aesd.8 q3, \key2 - veor q0, q0, \key3 - veor q1, q1, \key3 - veor q2, q2, \key3 - veor q3, q3, \key3 - .endm - - .macro do_block, dround, fround - cmp r3, #12 @ which key size? - vld1.32 {q10-q11}, [ip]! - \dround q8, q9 - vld1.32 {q12-q13}, [ip]! - \dround q10, q11 - vld1.32 {q10-q11}, [ip]! - \dround q12, q13 - vld1.32 {q12-q13}, [ip]! - \dround q10, q11 - blo 0f @ AES-128: 10 rounds - vld1.32 {q10-q11}, [ip]! - \dround q12, q13 - beq 1f @ AES-192: 12 rounds - vld1.32 {q12-q13}, [ip] - \dround q10, q11 -0: \fround q12, q13, q14 - bx lr - -1: \fround q10, q11, q14 - bx lr - .endm - - /* - * Internal, non-AAPCS compliant functions that implement the core AES - * transforms. These should preserve all registers except q0 - q2 and ip - * Arguments: - * q0 : first in/output block - * q1 : second in/output block (_4x version only) - * q2 : third in/output block (_4x version only) - * q3 : fourth in/output block (_4x version only) - * q8 : first round key - * q9 : secound round key - * q14 : final round key - * r2 : address of round key array - * r3 : number of rounds - */ - .align 6 -aes_encrypt: - add ip, r2, #32 @ 3rd round key -.Laes_encrypt_tweak: - do_block enc_dround, enc_fround -ENDPROC(aes_encrypt) - - .align 6 -aes_decrypt: - add ip, r2, #32 @ 3rd round key - do_block dec_dround, dec_fround -ENDPROC(aes_decrypt) - - .align 6 -aes_encrypt_4x: - add ip, r2, #32 @ 3rd round key - do_block enc_dround_4x, enc_fround_4x -ENDPROC(aes_encrypt_4x) - - .align 6 -aes_decrypt_4x: - add ip, r2, #32 @ 3rd round key - do_block dec_dround_4x, dec_fround_4x -ENDPROC(aes_decrypt_4x) - - .macro prepare_key, rk, rounds - add ip, \rk, \rounds, lsl #4 - vld1.32 {q8-q9}, [\rk] @ load first 2 round keys - vld1.32 {q14}, [ip] @ load last round key - .endm - - /* - * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, - * int blocks) - * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, - * int blocks) - */ -ENTRY(ce_aes_ecb_encrypt) - push {r4, lr} - ldr r4, [sp, #8] - prepare_key r2, r3 -.Lecbencloop4x: - subs r4, r4, #4 - bmi .Lecbenc1x - vld1.8 {q0-q1}, [r1]! - vld1.8 {q2-q3}, [r1]! - bl aes_encrypt_4x - vst1.8 {q0-q1}, [r0]! - vst1.8 {q2-q3}, [r0]! - b .Lecbencloop4x -.Lecbenc1x: - adds r4, r4, #4 - beq .Lecbencout -.Lecbencloop: - vld1.8 {q0}, [r1]! - bl aes_encrypt - vst1.8 {q0}, [r0]! - subs r4, r4, #1 - bne .Lecbencloop -.Lecbencout: - pop {r4, pc} -ENDPROC(ce_aes_ecb_encrypt) - -ENTRY(ce_aes_ecb_decrypt) - push {r4, lr} - ldr r4, [sp, #8] - prepare_key r2, r3 -.Lecbdecloop4x: - subs r4, r4, #4 - bmi .Lecbdec1x - vld1.8 {q0-q1}, [r1]! - vld1.8 {q2-q3}, [r1]! - bl aes_decrypt_4x - vst1.8 {q0-q1}, [r0]! - vst1.8 {q2-q3}, [r0]! - b .Lecbdecloop4x -.Lecbdec1x: - adds r4, r4, #4 - beq .Lecbdecout -.Lecbdecloop: - vld1.8 {q0}, [r1]! - bl aes_decrypt - vst1.8 {q0}, [r0]! - subs r4, r4, #1 - bne .Lecbdecloop -.Lecbdecout: - pop {r4, pc} -ENDPROC(ce_aes_ecb_decrypt) - - /* - * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, - * int blocks, u8 iv[]) - * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, - * int blocks, u8 iv[]) - */ -ENTRY(ce_aes_cbc_encrypt) - push {r4-r6, lr} - ldrd r4, r5, [sp, #16] - vld1.8 {q0}, [r5] - prepare_key r2, r3 -.Lcbcencloop: - vld1.8 {q1}, [r1]! @ get next pt block - veor q0, q0, q1 @ ..and xor with iv - bl aes_encrypt - vst1.8 {q0}, [r0]! - subs r4, r4, #1 - bne .Lcbcencloop - vst1.8 {q0}, [r5] - pop {r4-r6, pc} -ENDPROC(ce_aes_cbc_encrypt) - -ENTRY(ce_aes_cbc_decrypt) - push {r4-r6, lr} - ldrd r4, r5, [sp, #16] - vld1.8 {q15}, [r5] @ keep iv in q15 - prepare_key r2, r3 -.Lcbcdecloop4x: - subs r4, r4, #4 - bmi .Lcbcdec1x - vld1.8 {q0-q1}, [r1]! - vld1.8 {q2-q3}, [r1]! - vmov q4, q0 - vmov q5, q1 - vmov q6, q2 - vmov q7, q3 - bl aes_decrypt_4x - veor q0, q0, q15 - veor q1, q1, q4 - veor q2, q2, q5 - veor q3, q3, q6 - vmov q15, q7 - vst1.8 {q0-q1}, [r0]! - vst1.8 {q2-q3}, [r0]! - b .Lcbcdecloop4x -.Lcbcdec1x: - adds r4, r4, #4 - beq .Lcbcdecout - vmov q6, q14 @ preserve last round key -.Lcbcdecloop: - vld1.8 {q0}, [r1]! @ get next ct block - veor q14, q15, q6 @ combine prev ct with last key - vmov q15, q0 - bl aes_decrypt - vst1.8 {q0}, [r0]! - subs r4, r4, #1 - bne .Lcbcdecloop -.Lcbcdecout: - vst1.8 {q15}, [r5] @ keep iv in q15 - pop {r4-r6, pc} -ENDPROC(ce_aes_cbc_decrypt) - - - /* - * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], - * int rounds, int bytes, u8 const iv[]) - * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], - * int rounds, int bytes, u8 const iv[]) - */ - -ENTRY(ce_aes_cbc_cts_encrypt) - push {r4-r6, lr} - ldrd r4, r5, [sp, #16] - - movw ip, :lower16:.Lcts_permute_table - movt ip, :upper16:.Lcts_permute_table - sub r4, r4, #16 - add lr, ip, #32 - add ip, ip, r4 - sub lr, lr, r4 - vld1.8 {q5}, [ip] - vld1.8 {q6}, [lr] - - add ip, r1, r4 - vld1.8 {q0}, [r1] @ overlapping loads - vld1.8 {q3}, [ip] - - vld1.8 {q1}, [r5] @ get iv - prepare_key r2, r3 - - veor q0, q0, q1 @ xor with iv - bl aes_encrypt - - vtbl.8 d4, {d0-d1}, d10 - vtbl.8 d5, {d0-d1}, d11 - vtbl.8 d2, {d6-d7}, d12 - vtbl.8 d3, {d6-d7}, d13 - - veor q0, q0, q1 - bl aes_encrypt - - add r4, r0, r4 - vst1.8 {q2}, [r4] @ overlapping stores - vst1.8 {q0}, [r0] - - pop {r4-r6, pc} -ENDPROC(ce_aes_cbc_cts_encrypt) - -ENTRY(ce_aes_cbc_cts_decrypt) - push {r4-r6, lr} - ldrd r4, r5, [sp, #16] - - movw ip, :lower16:.Lcts_permute_table - movt ip, :upper16:.Lcts_permute_table - sub r4, r4, #16 - add lr, ip, #32 - add ip, ip, r4 - sub lr, lr, r4 - vld1.8 {q5}, [ip] - vld1.8 {q6}, [lr] - - add ip, r1, r4 - vld1.8 {q0}, [r1] @ overlapping loads - vld1.8 {q1}, [ip] - - vld1.8 {q3}, [r5] @ get iv - prepare_key r2, r3 - - bl aes_decrypt - - vtbl.8 d4, {d0-d1}, d10 - vtbl.8 d5, {d0-d1}, d11 - vtbx.8 d0, {d2-d3}, d12 - vtbx.8 d1, {d2-d3}, d13 - - veor q1, q1, q2 - bl aes_decrypt - veor q0, q0, q3 @ xor with iv - - add r4, r0, r4 - vst1.8 {q1}, [r4] @ overlapping stores - vst1.8 {q0}, [r0] - - pop {r4-r6, pc} -ENDPROC(ce_aes_cbc_cts_decrypt) - - - /* - * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, - * int blocks, u8 ctr[]) - */ -ENTRY(ce_aes_ctr_encrypt) - push {r4-r6, lr} - ldrd r4, r5, [sp, #16] - vld1.8 {q7}, [r5] @ load ctr - prepare_key r2, r3 - vmov r6, s31 @ keep swabbed ctr in r6 - rev r6, r6 - cmn r6, r4 @ 32 bit overflow? - bcs .Lctrloop -.Lctrloop4x: - subs r4, r4, #4 - bmi .Lctr1x - - /* - * NOTE: the sequence below has been carefully tweaked to avoid - * a silicon erratum that exists in Cortex-A57 (#1742098) and - * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs - * may produce an incorrect result if they take their input from a - * register of which a single 32-bit lane has been updated the last - * time it was modified. To work around this, the lanes of registers - * q0-q3 below are not manipulated individually, and the different - * counter values are prepared by successive manipulations of q7. - */ - add ip, r6, #1 - vmov q0, q7 - rev ip, ip - add lr, r6, #2 - vmov s31, ip @ set lane 3 of q1 via q7 - add ip, r6, #3 - rev lr, lr - vmov q1, q7 - vmov s31, lr @ set lane 3 of q2 via q7 - rev ip, ip - vmov q2, q7 - vmov s31, ip @ set lane 3 of q3 via q7 - add r6, r6, #4 - vmov q3, q7 - - vld1.8 {q4-q5}, [r1]! - vld1.8 {q6}, [r1]! - vld1.8 {q15}, [r1]! - bl aes_encrypt_4x - veor q0, q0, q4 - veor q1, q1, q5 - veor q2, q2, q6 - veor q3, q3, q15 - rev ip, r6 - vst1.8 {q0-q1}, [r0]! - vst1.8 {q2-q3}, [r0]! - vmov s31, ip - b .Lctrloop4x -.Lctr1x: - adds r4, r4, #4 - beq .Lctrout -.Lctrloop: - vmov q0, q7 - bl aes_encrypt - - adds r6, r6, #1 @ increment BE ctr - rev ip, r6 - vmov s31, ip - bcs .Lctrcarry - -.Lctrcarrydone: - subs r4, r4, #1 - bmi .Lctrtailblock @ blocks < 0 means tail block - vld1.8 {q3}, [r1]! - veor q3, q0, q3 - vst1.8 {q3}, [r0]! - bne .Lctrloop - -.Lctrout: - vst1.8 {q7}, [r5] @ return next CTR value - pop {r4-r6, pc} - -.Lctrtailblock: - vst1.8 {q0}, [r0, :64] @ return the key stream - b .Lctrout - -.Lctrcarry: - .irp sreg, s30, s29, s28 - vmov ip, \sreg @ load next word of ctr - rev ip, ip @ ... to handle the carry - adds ip, ip, #1 - rev ip, ip - vmov \sreg, ip - bcc .Lctrcarrydone - .endr - b .Lctrcarrydone -ENDPROC(ce_aes_ctr_encrypt) - - /* - * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, - * int bytes, u8 iv[], u32 const rk2[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, - * int bytes, u8 iv[], u32 const rk2[], int first) - */ - - .macro next_tweak, out, in, const, tmp - vshr.s64 \tmp, \in, #63 - vand \tmp, \tmp, \const - vadd.u64 \out, \in, \in - vext.8 \tmp, \tmp, \tmp, #8 - veor \out, \out, \tmp - .endm - -ce_aes_xts_init: - vmov.i32 d30, #0x87 @ compose tweak mask vector - vmovl.u32 q15, d30 - vshr.u64 d30, d31, #7 - - ldrd r4, r5, [sp, #16] @ load args - ldr r6, [sp, #28] - vld1.8 {q0}, [r5] @ load iv - teq r6, #1 @ start of a block? - bxne lr - - @ Encrypt the IV in q0 with the second AES key. This should only - @ be done at the start of a block. - ldr r6, [sp, #24] @ load AES key 2 - prepare_key r6, r3 - add ip, r6, #32 @ 3rd round key of key 2 - b .Laes_encrypt_tweak @ tail call -ENDPROC(ce_aes_xts_init) - -ENTRY(ce_aes_xts_encrypt) - push {r4-r6, lr} - - bl ce_aes_xts_init @ run shared prologue - prepare_key r2, r3 - vmov q4, q0 - - teq r6, #0 @ start of a block? - bne .Lxtsenc4x - -.Lxtsencloop4x: - next_tweak q4, q4, q15, q10 -.Lxtsenc4x: - subs r4, r4, #64 - bmi .Lxtsenc1x - vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks - vld1.8 {q2-q3}, [r1]! - next_tweak q5, q4, q15, q10 - veor q0, q0, q4 - next_tweak q6, q5, q15, q10 - veor q1, q1, q5 - next_tweak q7, q6, q15, q10 - veor q2, q2, q6 - veor q3, q3, q7 - bl aes_encrypt_4x - veor q0, q0, q4 - veor q1, q1, q5 - veor q2, q2, q6 - veor q3, q3, q7 - vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks - vst1.8 {q2-q3}, [r0]! - vmov q4, q7 - teq r4, #0 - beq .Lxtsencret - b .Lxtsencloop4x -.Lxtsenc1x: - adds r4, r4, #64 - beq .Lxtsencout - subs r4, r4, #16 - bmi .LxtsencctsNx -.Lxtsencloop: - vld1.8 {q0}, [r1]! -.Lxtsencctsout: - veor q0, q0, q4 - bl aes_encrypt - veor q0, q0, q4 - teq r4, #0 - beq .Lxtsencout - subs r4, r4, #16 - next_tweak q4, q4, q15, q6 - bmi .Lxtsenccts - vst1.8 {q0}, [r0]! - b .Lxtsencloop -.Lxtsencout: - vst1.8 {q0}, [r0] -.Lxtsencret: - vst1.8 {q4}, [r5] - pop {r4-r6, pc} - -.LxtsencctsNx: - vmov q0, q3 - sub r0, r0, #16 -.Lxtsenccts: - movw ip, :lower16:.Lcts_permute_table - movt ip, :upper16:.Lcts_permute_table - - add r1, r1, r4 @ rewind input pointer - add r4, r4, #16 @ # bytes in final block - add lr, ip, #32 - add ip, ip, r4 - sub lr, lr, r4 - add r4, r0, r4 @ output address of final block - - vld1.8 {q1}, [r1] @ load final partial block - vld1.8 {q2}, [ip] - vld1.8 {q3}, [lr] - - vtbl.8 d4, {d0-d1}, d4 - vtbl.8 d5, {d0-d1}, d5 - vtbx.8 d0, {d2-d3}, d6 - vtbx.8 d1, {d2-d3}, d7 - - vst1.8 {q2}, [r4] @ overlapping stores - mov r4, #0 - b .Lxtsencctsout -ENDPROC(ce_aes_xts_encrypt) - - -ENTRY(ce_aes_xts_decrypt) - push {r4-r6, lr} - - bl ce_aes_xts_init @ run shared prologue - prepare_key r2, r3 - vmov q4, q0 - - /* subtract 16 bytes if we are doing CTS */ - tst r4, #0xf - subne r4, r4, #0x10 - - teq r6, #0 @ start of a block? - bne .Lxtsdec4x - -.Lxtsdecloop4x: - next_tweak q4, q4, q15, q10 -.Lxtsdec4x: - subs r4, r4, #64 - bmi .Lxtsdec1x - vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks - vld1.8 {q2-q3}, [r1]! - next_tweak q5, q4, q15, q10 - veor q0, q0, q4 - next_tweak q6, q5, q15, q10 - veor q1, q1, q5 - next_tweak q7, q6, q15, q10 - veor q2, q2, q6 - veor q3, q3, q7 - bl aes_decrypt_4x - veor q0, q0, q4 - veor q1, q1, q5 - veor q2, q2, q6 - veor q3, q3, q7 - vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks - vst1.8 {q2-q3}, [r0]! - vmov q4, q7 - teq r4, #0 - beq .Lxtsdecout - b .Lxtsdecloop4x -.Lxtsdec1x: - adds r4, r4, #64 - beq .Lxtsdecout - subs r4, r4, #16 -.Lxtsdecloop: - vld1.8 {q0}, [r1]! - bmi .Lxtsdeccts -.Lxtsdecctsout: - veor q0, q0, q4 - bl aes_decrypt - veor q0, q0, q4 - vst1.8 {q0}, [r0]! - teq r4, #0 - beq .Lxtsdecout - subs r4, r4, #16 - next_tweak q4, q4, q15, q6 - b .Lxtsdecloop -.Lxtsdecout: - vst1.8 {q4}, [r5] - pop {r4-r6, pc} - -.Lxtsdeccts: - movw ip, :lower16:.Lcts_permute_table - movt ip, :upper16:.Lcts_permute_table - - add r1, r1, r4 @ rewind input pointer - add r4, r4, #16 @ # bytes in final block - add lr, ip, #32 - add ip, ip, r4 - sub lr, lr, r4 - add r4, r0, r4 @ output address of final block - - next_tweak q5, q4, q15, q6 - - vld1.8 {q1}, [r1] @ load final partial block - vld1.8 {q2}, [ip] - vld1.8 {q3}, [lr] - - veor q0, q0, q5 - bl aes_decrypt - veor q0, q0, q5 - - vtbl.8 d4, {d0-d1}, d4 - vtbl.8 d5, {d0-d1}, d5 - vtbx.8 d0, {d2-d3}, d6 - vtbx.8 d1, {d2-d3}, d7 - - vst1.8 {q2}, [r4] @ overlapping stores - mov r4, #0 - b .Lxtsdecctsout -ENDPROC(ce_aes_xts_decrypt) - - /* - * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the - * AES sbox substitution on each byte in - * 'input' - */ -ENTRY(ce_aes_sub) - vdup.32 q1, r0 - veor q0, q0, q0 - aese.8 q0, q1 - vmov r0, s0 - bx lr -ENDPROC(ce_aes_sub) - - /* - * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns - * operation on round key *src - */ -ENTRY(ce_aes_invert) - vld1.32 {q0}, [r1] - aesimc.8 q0, q0 - vst1.32 {q0}, [r0] - bx lr -ENDPROC(ce_aes_invert) - - .section ".rodata", "a" - .align 6 -.Lcts_permute_table: - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S deleted file mode 100644 index 472e56d09eeae6d4f4fd4211960c49750bd4c649..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/aes-cipher-core.S +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Scalar AES core transform - * - * Copyright (C) 2017 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -#include -#include -#include - - .text - .align 5 - - rk .req r0 - rounds .req r1 - in .req r2 - out .req r3 - ttab .req ip - - t0 .req lr - t1 .req r2 - t2 .req r3 - - .macro __select, out, in, idx - .if __LINUX_ARM_ARCH__ < 7 - and \out, \in, #0xff << (8 * \idx) - .else - ubfx \out, \in, #(8 * \idx), #8 - .endif - .endm - - .macro __load, out, in, idx, sz, op - .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 - ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] - .else - ldr\op \out, [ttab, \in, lsl #\sz] - .endif - .endm - - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr - __select \out0, \in0, 0 - __select t0, \in1, 1 - __load \out0, \out0, 0, \sz, \op - __load t0, t0, 1, \sz, \op - - .if \enc - __select \out1, \in1, 0 - __select t1, \in2, 1 - .else - __select \out1, \in3, 0 - __select t1, \in0, 1 - .endif - __load \out1, \out1, 0, \sz, \op - __select t2, \in2, 2 - __load t1, t1, 1, \sz, \op - __load t2, t2, 2, \sz, \op - - eor \out0, \out0, t0, ror #24 - - __select t0, \in3, 3 - .if \enc - __select \t3, \in3, 2 - __select \t4, \in0, 3 - .else - __select \t3, \in1, 2 - __select \t4, \in2, 3 - .endif - __load \t3, \t3, 2, \sz, \op - __load t0, t0, 3, \sz, \op - __load \t4, \t4, 3, \sz, \op - - .ifnb \oldcpsr - /* - * This is the final round and we're done with all data-dependent table - * lookups, so we can safely re-enable interrupts. - */ - restore_irqs \oldcpsr - .endif - - eor \out1, \out1, t1, ror #24 - eor \out0, \out0, t2, ror #16 - ldm rk!, {t1, t2} - eor \out1, \out1, \t3, ror #16 - eor \out0, \out0, t0, ror #8 - eor \out1, \out1, \t4, ror #8 - eor \out0, \out0, t1 - eor \out1, \out1, t2 - .endm - - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr - __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr - .endm - - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr - __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr - .endm - - .macro __rev, out, in - .if __LINUX_ARM_ARCH__ < 6 - lsl t0, \in, #24 - and t1, \in, #0xff00 - and t2, \in, #0xff0000 - orr \out, t0, \in, lsr #24 - orr \out, \out, t1, lsl #8 - orr \out, \out, t2, lsr #8 - .else - rev \out, \in - .endif - .endm - - .macro __adrl, out, sym, c - .if __LINUX_ARM_ARCH__ < 7 - ldr\c \out, =\sym - .else - movw\c \out, #:lower16:\sym - movt\c \out, #:upper16:\sym - .endif - .endm - - .macro do_crypt, round, ttab, ltab, bsz - push {r3-r11, lr} - - // Load keys first, to reduce latency in case they're not cached yet. - ldm rk!, {r8-r11} - - ldr r4, [in] - ldr r5, [in, #4] - ldr r6, [in, #8] - ldr r7, [in, #12] - -#ifdef CONFIG_CPU_BIG_ENDIAN - __rev r4, r4 - __rev r5, r5 - __rev r6, r6 - __rev r7, r7 -#endif - - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - - __adrl ttab, \ttab - /* - * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into - * L1 cache, assuming cacheline size >= 32. This is a hardening measure - * intended to make cache-timing attacks more difficult. They may not - * be fully prevented, however; see the paper - * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf - * ("Cache-timing attacks on AES") for a discussion of the many - * difficulties involved in writing truly constant-time AES software. - */ - save_and_disable_irqs t0 - .set i, 0 - .rept 1024 / 128 - ldr r8, [ttab, #i + 0] - ldr r9, [ttab, #i + 32] - ldr r10, [ttab, #i + 64] - ldr r11, [ttab, #i + 96] - .set i, i + 128 - .endr - push {t0} // oldcpsr - - tst rounds, #2 - bne 1f - -0: \round r8, r9, r10, r11, r4, r5, r6, r7 - \round r4, r5, r6, r7, r8, r9, r10, r11 - -1: subs rounds, rounds, #4 - \round r8, r9, r10, r11, r4, r5, r6, r7 - bls 2f - \round r4, r5, r6, r7, r8, r9, r10, r11 - b 0b - -2: .ifb \ltab - add ttab, ttab, #1 - .else - __adrl ttab, \ltab - // Prefetch inverse S-box for final round; see explanation above - .set i, 0 - .rept 256 / 64 - ldr t0, [ttab, #i + 0] - ldr t1, [ttab, #i + 32] - .set i, i + 64 - .endr - .endif - - pop {rounds} // oldcpsr - \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds - -#ifdef CONFIG_CPU_BIG_ENDIAN - __rev r4, r4 - __rev r5, r5 - __rev r6, r6 - __rev r7, r7 -#endif - - ldr out, [sp] - - str r4, [out] - str r5, [out, #4] - str r6, [out, #8] - str r7, [out, #12] - - pop {r3-r11, pc} - - .align 3 - .ltorg - .endm - -ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab,, 2 -ENDPROC(__aes_arm_encrypt) - - .align 5 -ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 -ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S deleted file mode 100644 index cfaed4e67535f5ef7453c76336b474cd1923868c..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/aes-neonbs-core.S +++ /dev/null @@ -1,1026 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Bit sliced AES using NEON instructions - * - * Copyright (C) 2017 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -/* - * The algorithm implemented here is described in detail by the paper - * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and - * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) - * - * This implementation is based primarily on the OpenSSL implementation - * for 32-bit ARM written by Andy Polyakov - */ - -#include -#include - - .text - .fpu neon - - rounds .req ip - bskey .req r4 - - q0l .req d0 - q0h .req d1 - q1l .req d2 - q1h .req d3 - q2l .req d4 - q2h .req d5 - q3l .req d6 - q3h .req d7 - q4l .req d8 - q4h .req d9 - q5l .req d10 - q5h .req d11 - q6l .req d12 - q6h .req d13 - q7l .req d14 - q7h .req d15 - q8l .req d16 - q8h .req d17 - q9l .req d18 - q9h .req d19 - q10l .req d20 - q10h .req d21 - q11l .req d22 - q11h .req d23 - q12l .req d24 - q12h .req d25 - q13l .req d26 - q13h .req d27 - q14l .req d28 - q14h .req d29 - q15l .req d30 - q15h .req d31 - - .macro __tbl, out, tbl, in, tmp - .ifc \out, \tbl - .ifb \tmp - .error __tbl needs temp register if out == tbl - .endif - vmov \tmp, \out - .endif - vtbl.8 \out\()l, {\tbl}, \in\()l - .ifc \out, \tbl - vtbl.8 \out\()h, {\tmp}, \in\()h - .else - vtbl.8 \out\()h, {\tbl}, \in\()h - .endif - .endm - - .macro __ldr, out, sym - vldr \out\()l, \sym - vldr \out\()h, \sym + 8 - .endm - - .macro __adr, reg, lbl - adr \reg, \lbl -THUMB( orr \reg, \reg, #1 ) - .endm - - .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 - veor \b2, \b2, \b1 - veor \b5, \b5, \b6 - veor \b3, \b3, \b0 - veor \b6, \b6, \b2 - veor \b5, \b5, \b0 - veor \b6, \b6, \b3 - veor \b3, \b3, \b7 - veor \b7, \b7, \b5 - veor \b3, \b3, \b4 - veor \b4, \b4, \b5 - veor \b2, \b2, \b7 - veor \b3, \b3, \b1 - veor \b1, \b1, \b5 - .endm - - .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 - veor \b0, \b0, \b6 - veor \b1, \b1, \b4 - veor \b4, \b4, \b6 - veor \b2, \b2, \b0 - veor \b6, \b6, \b1 - veor \b1, \b1, \b5 - veor \b5, \b5, \b3 - veor \b3, \b3, \b7 - veor \b7, \b7, \b5 - veor \b2, \b2, \b5 - veor \b4, \b4, \b7 - .endm - - .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 - veor \b1, \b1, \b7 - veor \b4, \b4, \b7 - veor \b7, \b7, \b5 - veor \b1, \b1, \b3 - veor \b2, \b2, \b5 - veor \b3, \b3, \b7 - veor \b6, \b6, \b1 - veor \b2, \b2, \b0 - veor \b5, \b5, \b3 - veor \b4, \b4, \b6 - veor \b0, \b0, \b6 - veor \b1, \b1, \b4 - .endm - - .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 - veor \b1, \b1, \b5 - veor \b2, \b2, \b7 - veor \b3, \b3, \b1 - veor \b4, \b4, \b5 - veor \b7, \b7, \b5 - veor \b3, \b3, \b4 - veor \b5, \b5, \b0 - veor \b3, \b3, \b7 - veor \b6, \b6, \b2 - veor \b2, \b2, \b1 - veor \b6, \b6, \b3 - veor \b3, \b3, \b0 - veor \b5, \b5, \b6 - .endm - - .macro mul_gf4, x0, x1, y0, y1, t0, t1 - veor \t0, \y0, \y1 - vand \t0, \t0, \x0 - veor \x0, \x0, \x1 - vand \t1, \x1, \y0 - vand \x0, \x0, \y1 - veor \x1, \t1, \t0 - veor \x0, \x0, \t1 - .endm - - .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 - veor \t0, \y0, \y1 - veor \t1, \y2, \y3 - vand \t0, \t0, \x0 - vand \t1, \t1, \x2 - veor \x0, \x0, \x1 - veor \x2, \x2, \x3 - vand \x1, \x1, \y0 - vand \x3, \x3, \y2 - vand \x0, \x0, \y1 - vand \x2, \x2, \y3 - veor \x1, \x1, \x0 - veor \x2, \x2, \x3 - veor \x0, \x0, \t0 - veor \x3, \x3, \t1 - .endm - - .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, y1, y2, y3, t0, t1, t2, t3 - veor \t0, \x0, \x2 - veor \t1, \x1, \x3 - mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 - veor \y0, \y0, \y2 - veor \y1, \y1, \y3 - mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 - veor \x0, \x0, \t0 - veor \x2, \x2, \t0 - veor \x1, \x1, \t1 - veor \x3, \x3, \t1 - veor \t0, \x4, \x6 - veor \t1, \x5, \x7 - mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 - veor \y0, \y0, \y2 - veor \y1, \y1, \y3 - mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 - veor \x4, \x4, \t0 - veor \x6, \x6, \t0 - veor \x5, \x5, \t1 - veor \x7, \x7, \t1 - .endm - - .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - veor \t3, \x4, \x6 - veor \t0, \x5, \x7 - veor \t1, \x1, \x3 - veor \s1, \x7, \x6 - veor \s0, \x0, \x2 - veor \s3, \t3, \t0 - vorr \t2, \t0, \t1 - vand \s2, \t3, \s0 - vorr \t3, \t3, \s0 - veor \s0, \s0, \t1 - vand \t0, \t0, \t1 - veor \t1, \x3, \x2 - vand \s3, \s3, \s0 - vand \s1, \s1, \t1 - veor \t1, \x4, \x5 - veor \s0, \x1, \x0 - veor \t3, \t3, \s1 - veor \t2, \t2, \s1 - vand \s1, \t1, \s0 - vorr \t1, \t1, \s0 - veor \t3, \t3, \s3 - veor \t0, \t0, \s1 - veor \t2, \t2, \s2 - veor \t1, \t1, \s3 - veor \t0, \t0, \s2 - vand \s0, \x7, \x3 - veor \t1, \t1, \s2 - vand \s1, \x6, \x2 - vand \s2, \x5, \x1 - vorr \s3, \x4, \x0 - veor \t3, \t3, \s0 - veor \t1, \t1, \s2 - veor \s0, \t0, \s3 - veor \t2, \t2, \s1 - vand \s2, \t3, \t1 - veor \s1, \t2, \s2 - veor \s3, \s0, \s2 - vbsl \s1, \t1, \s0 - vmvn \t0, \s0 - vbsl \s0, \s1, \s3 - vbsl \t0, \s1, \s3 - vbsl \s3, \t3, \t2 - veor \t3, \t3, \t2 - vand \s2, \s0, \s3 - veor \t1, \t1, \t0 - veor \s2, \s2, \t3 - mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ - \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - .endm - - .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 - inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ - \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 - .endm - - .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 - inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ - \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 - .endm - - .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, mask - vld1.8 {\t0-\t1}, [bskey, :256]! - veor \t0, \t0, \x0 - vld1.8 {\t2-\t3}, [bskey, :256]! - veor \t1, \t1, \x1 - __tbl \x0, \t0, \mask - veor \t2, \t2, \x2 - __tbl \x1, \t1, \mask - vld1.8 {\t0-\t1}, [bskey, :256]! - veor \t3, \t3, \x3 - __tbl \x2, \t2, \mask - __tbl \x3, \t3, \mask - vld1.8 {\t2-\t3}, [bskey, :256]! - veor \t0, \t0, \x4 - veor \t1, \t1, \x5 - __tbl \x4, \t0, \mask - veor \t2, \t2, \x6 - __tbl \x5, \t1, \mask - veor \t3, \t3, \x7 - __tbl \x6, \t2, \mask - __tbl \x7, \t3, \mask - .endm - - .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, mask - __tbl \x0, \x0, \mask, \t0 - __tbl \x1, \x1, \mask, \t1 - __tbl \x2, \x2, \mask, \t2 - __tbl \x3, \x3, \mask, \t3 - __tbl \x4, \x4, \mask, \t0 - __tbl \x5, \x5, \mask, \t1 - __tbl \x6, \x6, \mask, \t2 - __tbl \x7, \x7, \mask, \t3 - .endm - - .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, t4, t5, t6, t7, inv - vext.8 \t0, \x0, \x0, #12 - vext.8 \t1, \x1, \x1, #12 - veor \x0, \x0, \t0 - vext.8 \t2, \x2, \x2, #12 - veor \x1, \x1, \t1 - vext.8 \t3, \x3, \x3, #12 - veor \x2, \x2, \t2 - vext.8 \t4, \x4, \x4, #12 - veor \x3, \x3, \t3 - vext.8 \t5, \x5, \x5, #12 - veor \x4, \x4, \t4 - vext.8 \t6, \x6, \x6, #12 - veor \x5, \x5, \t5 - vext.8 \t7, \x7, \x7, #12 - veor \x6, \x6, \t6 - veor \t1, \t1, \x0 - veor.8 \x7, \x7, \t7 - vext.8 \x0, \x0, \x0, #8 - veor \t2, \t2, \x1 - veor \t0, \t0, \x7 - veor \t1, \t1, \x7 - vext.8 \x1, \x1, \x1, #8 - veor \t5, \t5, \x4 - veor \x0, \x0, \t0 - veor \t6, \t6, \x5 - veor \x1, \x1, \t1 - vext.8 \t0, \x4, \x4, #8 - veor \t4, \t4, \x3 - vext.8 \t1, \x5, \x5, #8 - veor \t7, \t7, \x6 - vext.8 \x4, \x3, \x3, #8 - veor \t3, \t3, \x2 - vext.8 \x5, \x7, \x7, #8 - veor \t4, \t4, \x7 - vext.8 \x3, \x6, \x6, #8 - veor \t3, \t3, \x7 - vext.8 \x6, \x2, \x2, #8 - veor \x7, \t1, \t5 - .ifb \inv - veor \x2, \t0, \t4 - veor \x4, \x4, \t3 - veor \x5, \x5, \t7 - veor \x3, \x3, \t6 - veor \x6, \x6, \t2 - .else - veor \t3, \t3, \x4 - veor \x5, \x5, \t7 - veor \x2, \x3, \t6 - veor \x3, \t0, \t4 - veor \x4, \x6, \t2 - vmov \x6, \t3 - .endif - .endm - - .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, t4, t5, t6, t7 - vld1.8 {\t0-\t1}, [bskey, :256]! - veor \x0, \x0, \t0 - vld1.8 {\t2-\t3}, [bskey, :256]! - veor \x1, \x1, \t1 - vld1.8 {\t4-\t5}, [bskey, :256]! - veor \x2, \x2, \t2 - vld1.8 {\t6-\t7}, [bskey, :256] - sub bskey, bskey, #224 - veor \x3, \x3, \t3 - veor \x4, \x4, \t4 - veor \x5, \x5, \t5 - veor \x6, \x6, \t6 - veor \x7, \x7, \t7 - vext.8 \t0, \x0, \x0, #8 - vext.8 \t6, \x6, \x6, #8 - vext.8 \t7, \x7, \x7, #8 - veor \t0, \t0, \x0 - vext.8 \t1, \x1, \x1, #8 - veor \t6, \t6, \x6 - vext.8 \t2, \x2, \x2, #8 - veor \t7, \t7, \x7 - vext.8 \t3, \x3, \x3, #8 - veor \t1, \t1, \x1 - vext.8 \t4, \x4, \x4, #8 - veor \t2, \t2, \x2 - vext.8 \t5, \x5, \x5, #8 - veor \t3, \t3, \x3 - veor \t4, \t4, \x4 - veor \t5, \t5, \x5 - veor \x0, \x0, \t6 - veor \x1, \x1, \t6 - veor \x2, \x2, \t0 - veor \x4, \x4, \t2 - veor \x3, \x3, \t1 - veor \x1, \x1, \t7 - veor \x2, \x2, \t7 - veor \x4, \x4, \t6 - veor \x5, \x5, \t3 - veor \x3, \x3, \t6 - veor \x6, \x6, \t4 - veor \x4, \x4, \t7 - veor \x5, \x5, \t7 - veor \x7, \x7, \t5 - mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ - \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 - .endm - - .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 - vshr.u64 \t0, \b0, #\n - vshr.u64 \t1, \b1, #\n - veor \t0, \t0, \a0 - veor \t1, \t1, \a1 - vand \t0, \t0, \mask - vand \t1, \t1, \mask - veor \a0, \a0, \t0 - vshl.s64 \t0, \t0, #\n - veor \a1, \a1, \t1 - vshl.s64 \t1, \t1, #\n - veor \b0, \b0, \t0 - veor \b1, \b1, \t1 - .endm - - .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 - vmov.i8 \t0, #0x55 - vmov.i8 \t1, #0x33 - swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 - swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 - vmov.i8 \t0, #0x0f - swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 - swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 - swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 - swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 - .endm - - .align 4 -M0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d - - /* - * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) - */ -ENTRY(aesbs_convert_key) - vld1.32 {q7}, [r1]! // load round 0 key - vld1.32 {q15}, [r1]! // load round 1 key - - vmov.i8 q8, #0x01 // bit masks - vmov.i8 q9, #0x02 - vmov.i8 q10, #0x04 - vmov.i8 q11, #0x08 - vmov.i8 q12, #0x10 - vmov.i8 q13, #0x20 - __ldr q14, M0 - - sub r2, r2, #1 - vst1.8 {q7}, [r0, :128]! // save round 0 key - -.Lkey_loop: - __tbl q7, q15, q14 - vmov.i8 q6, #0x40 - vmov.i8 q15, #0x80 - - vtst.8 q0, q7, q8 - vtst.8 q1, q7, q9 - vtst.8 q2, q7, q10 - vtst.8 q3, q7, q11 - vtst.8 q4, q7, q12 - vtst.8 q5, q7, q13 - vtst.8 q6, q7, q6 - vtst.8 q7, q7, q15 - vld1.32 {q15}, [r1]! // load next round key - vmvn q0, q0 - vmvn q1, q1 - vmvn q5, q5 - vmvn q6, q6 - - subs r2, r2, #1 - vst1.8 {q0-q1}, [r0, :256]! - vst1.8 {q2-q3}, [r0, :256]! - vst1.8 {q4-q5}, [r0, :256]! - vst1.8 {q6-q7}, [r0, :256]! - bne .Lkey_loop - - vmov.i8 q7, #0x63 // compose .L63 - veor q15, q15, q7 - vst1.8 {q15}, [r0, :128] - bx lr -ENDPROC(aesbs_convert_key) - - .align 4 -M0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 - -aesbs_encrypt8: - vld1.8 {q9}, [bskey, :128]! // round 0 key - __ldr q8, M0SR - - veor q10, q0, q9 // xor with round0 key - veor q11, q1, q9 - __tbl q0, q10, q8 - veor q12, q2, q9 - __tbl q1, q11, q8 - veor q13, q3, q9 - __tbl q2, q12, q8 - veor q14, q4, q9 - __tbl q3, q13, q8 - veor q15, q5, q9 - __tbl q4, q14, q8 - veor q10, q6, q9 - __tbl q5, q15, q8 - veor q11, q7, q9 - __tbl q6, q10, q8 - __tbl q7, q11, q8 - - bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 - - sub rounds, rounds, #1 - b .Lenc_sbox - - .align 5 -SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -SRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d - -.Lenc_last: - __ldr q12, SRM0 -.Lenc_loop: - shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 -.Lenc_sbox: - sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ - q13, q14, q15 - subs rounds, rounds, #1 - bcc .Lenc_done - - mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ - q13, q14, q15 - - beq .Lenc_last - __ldr q12, SR - b .Lenc_loop - -.Lenc_done: - vld1.8 {q12}, [bskey, :128] // last round key - - bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 - - veor q0, q0, q12 - veor q1, q1, q12 - veor q4, q4, q12 - veor q6, q6, q12 - veor q3, q3, q12 - veor q7, q7, q12 - veor q2, q2, q12 - veor q5, q5, q12 - bx lr -ENDPROC(aesbs_encrypt8) - - .align 4 -M0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 - -aesbs_decrypt8: - add bskey, bskey, rounds, lsl #7 - sub bskey, bskey, #112 - vld1.8 {q9}, [bskey, :128] // round 0 key - sub bskey, bskey, #128 - __ldr q8, M0ISR - - veor q10, q0, q9 // xor with round0 key - veor q11, q1, q9 - __tbl q0, q10, q8 - veor q12, q2, q9 - __tbl q1, q11, q8 - veor q13, q3, q9 - __tbl q2, q12, q8 - veor q14, q4, q9 - __tbl q3, q13, q8 - veor q15, q5, q9 - __tbl q4, q14, q8 - veor q10, q6, q9 - __tbl q5, q15, q8 - veor q11, q7, q9 - __tbl q6, q10, q8 - __tbl q7, q11, q8 - - bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 - - sub rounds, rounds, #1 - b .Ldec_sbox - - .align 5 -ISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -ISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d - -.Ldec_last: - __ldr q12, ISRM0 -.Ldec_loop: - inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 -.Ldec_sbox: - inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ - q13, q14, q15 - subs rounds, rounds, #1 - bcc .Ldec_done - - inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ - q13, q14, q15 - - beq .Ldec_last - __ldr q12, ISR - b .Ldec_loop - -.Ldec_done: - add bskey, bskey, #112 - vld1.8 {q12}, [bskey, :128] // last round key - - bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 - - veor q0, q0, q12 - veor q1, q1, q12 - veor q6, q6, q12 - veor q4, q4, q12 - veor q2, q2, q12 - veor q7, q7, q12 - veor q3, q3, q12 - veor q5, q5, q12 - bx lr -ENDPROC(aesbs_decrypt8) - - /* - * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - */ - .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - push {r4-r6, lr} - ldr r5, [sp, #16] // number of blocks - -99: __adr ip, 0f - and lr, r5, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q0}, [r1]! - vld1.8 {q1}, [r1]! - vld1.8 {q2}, [r1]! - vld1.8 {q3}, [r1]! - vld1.8 {q4}, [r1]! - vld1.8 {q5}, [r1]! - vld1.8 {q6}, [r1]! - vld1.8 {q7}, [r1]! - -0: mov bskey, r2 - mov rounds, r3 - bl \do8 - - __adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 - - vst1.8 {\o0}, [r0]! - vst1.8 {\o1}, [r0]! - vst1.8 {\o2}, [r0]! - vst1.8 {\o3}, [r0]! - vst1.8 {\o4}, [r0]! - vst1.8 {\o5}, [r0]! - vst1.8 {\o6}, [r0]! - vst1.8 {\o7}, [r0]! - -1: subs r5, r5, #8 - bgt 99b - - pop {r4-r6, pc} - .endm - - .align 4 -ENTRY(aesbs_ecb_encrypt) - __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 -ENDPROC(aesbs_ecb_encrypt) - - .align 4 -ENTRY(aesbs_ecb_decrypt) - __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 -ENDPROC(aesbs_ecb_decrypt) - - /* - * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 iv[]) - */ - .align 4 -ENTRY(aesbs_cbc_decrypt) - mov ip, sp - push {r4-r6, lr} - ldm ip, {r5-r6} // load args 4-5 - -99: __adr ip, 0f - and lr, r5, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #2 - mov lr, r1 - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q0}, [lr]! - vld1.8 {q1}, [lr]! - vld1.8 {q2}, [lr]! - vld1.8 {q3}, [lr]! - vld1.8 {q4}, [lr]! - vld1.8 {q5}, [lr]! - vld1.8 {q6}, [lr]! - vld1.8 {q7}, [lr] - -0: mov bskey, r2 - mov rounds, r3 - bl aesbs_decrypt8 - - vld1.8 {q8}, [r6] - vmov q9, q8 - vmov q10, q8 - vmov q11, q8 - vmov q12, q8 - vmov q13, q8 - vmov q14, q8 - vmov q15, q8 - - __adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q9}, [r1]! - vld1.8 {q10}, [r1]! - vld1.8 {q11}, [r1]! - vld1.8 {q12}, [r1]! - vld1.8 {q13}, [r1]! - vld1.8 {q14}, [r1]! - vld1.8 {q15}, [r1]! - W(nop) - -1: __adr ip, 2f - sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 - - veor q0, q0, q8 - vst1.8 {q0}, [r0]! - veor q1, q1, q9 - vst1.8 {q1}, [r0]! - veor q6, q6, q10 - vst1.8 {q6}, [r0]! - veor q4, q4, q11 - vst1.8 {q4}, [r0]! - veor q2, q2, q12 - vst1.8 {q2}, [r0]! - veor q7, q7, q13 - vst1.8 {q7}, [r0]! - veor q3, q3, q14 - vst1.8 {q3}, [r0]! - veor q5, q5, q15 - vld1.8 {q8}, [r1]! // load next round's iv -2: vst1.8 {q5}, [r0]! - - subs r5, r5, #8 - vst1.8 {q8}, [r6] // store next round's iv - bgt 99b - - pop {r4-r6, pc} -ENDPROC(aesbs_cbc_decrypt) - - .macro next_ctr, q - vmov.32 \q\()h[1], r10 - adds r10, r10, #1 - vmov.32 \q\()h[0], r9 - adcs r9, r9, #0 - vmov.32 \q\()l[1], r8 - adcs r8, r8, #0 - vmov.32 \q\()l[0], r7 - adc r7, r7, #0 - vrev32.8 \q, \q - .endm - - /* - * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 ctr[], u8 final[]) - */ -ENTRY(aesbs_ctr_encrypt) - mov ip, sp - push {r4-r10, lr} - - ldm ip, {r5-r7} // load args 4-6 - teq r7, #0 - addne r5, r5, #1 // one extra block if final != 0 - - vld1.8 {q0}, [r6] // load counter - vrev32.8 q1, q0 - vmov r9, r10, d3 - vmov r7, r8, d2 - - adds r10, r10, #1 - adcs r9, r9, #0 - adcs r8, r8, #0 - adc r7, r7, #0 - -99: vmov q1, q0 - vmov q2, q0 - vmov q3, q0 - vmov q4, q0 - vmov q5, q0 - vmov q6, q0 - vmov q7, q0 - - __adr ip, 0f - sub lr, r5, #1 - and lr, lr, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #5 - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 - - next_ctr q1 - next_ctr q2 - next_ctr q3 - next_ctr q4 - next_ctr q5 - next_ctr q6 - next_ctr q7 - -0: mov bskey, r2 - mov rounds, r3 - bl aesbs_encrypt8 - - __adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - movgt r4, #0 - ldrle r4, [sp, #40] // load final in the last round - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q8}, [r1]! - vld1.8 {q9}, [r1]! - vld1.8 {q10}, [r1]! - vld1.8 {q11}, [r1]! - vld1.8 {q12}, [r1]! - vld1.8 {q13}, [r1]! - vld1.8 {q14}, [r1]! - teq r4, #0 // skip last block if 'final' -1: bne 2f - vld1.8 {q15}, [r1]! - -2: __adr ip, 3f - cmp r5, #8 - sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 - - veor q0, q0, q8 - vst1.8 {q0}, [r0]! - veor q1, q1, q9 - vst1.8 {q1}, [r0]! - veor q4, q4, q10 - vst1.8 {q4}, [r0]! - veor q6, q6, q11 - vst1.8 {q6}, [r0]! - veor q3, q3, q12 - vst1.8 {q3}, [r0]! - veor q7, q7, q13 - vst1.8 {q7}, [r0]! - veor q2, q2, q14 - vst1.8 {q2}, [r0]! - teq r4, #0 // skip last block if 'final' - W(bne) 5f -3: veor q5, q5, q15 - vst1.8 {q5}, [r0]! - -4: next_ctr q0 - - subs r5, r5, #8 - bgt 99b - - vst1.8 {q0}, [r6] - pop {r4-r10, pc} - -5: vst1.8 {q5}, [r4] - b 4b -ENDPROC(aesbs_ctr_encrypt) - - .macro next_tweak, out, in, const, tmp - vshr.s64 \tmp, \in, #63 - vand \tmp, \tmp, \const - vadd.u64 \out, \in, \in - vext.8 \tmp, \tmp, \tmp, #8 - veor \out, \out, \tmp - .endm - - /* - * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[], int reorder_last_tweak) - * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[], int reorder_last_tweak) - */ -__xts_prepare8: - vld1.8 {q14}, [r7] // load iv - vmov.i32 d30, #0x87 // compose tweak mask vector - vmovl.u32 q15, d30 - vshr.u64 d30, d31, #7 - vmov q12, q14 - - __adr ip, 0f - and r4, r6, #7 - cmp r6, #8 - sub ip, ip, r4, lsl #5 - mov r4, sp - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q0}, [r1]! - next_tweak q12, q14, q15, q13 - veor q0, q0, q14 - vst1.8 {q14}, [r4, :128]! - - vld1.8 {q1}, [r1]! - next_tweak q14, q12, q15, q13 - veor q1, q1, q12 - vst1.8 {q12}, [r4, :128]! - - vld1.8 {q2}, [r1]! - next_tweak q12, q14, q15, q13 - veor q2, q2, q14 - vst1.8 {q14}, [r4, :128]! - - vld1.8 {q3}, [r1]! - next_tweak q14, q12, q15, q13 - veor q3, q3, q12 - vst1.8 {q12}, [r4, :128]! - - vld1.8 {q4}, [r1]! - next_tweak q12, q14, q15, q13 - veor q4, q4, q14 - vst1.8 {q14}, [r4, :128]! - - vld1.8 {q5}, [r1]! - next_tweak q14, q12, q15, q13 - veor q5, q5, q12 - vst1.8 {q12}, [r4, :128]! - - vld1.8 {q6}, [r1]! - next_tweak q12, q14, q15, q13 - veor q6, q6, q14 - vst1.8 {q14}, [r4, :128]! - - vld1.8 {q7}, [r1]! - next_tweak q14, q12, q15, q13 -THUMB( itt le ) - W(cmple) r8, #0 - ble 1f -0: veor q7, q7, q12 - vst1.8 {q12}, [r4, :128] - - vst1.8 {q14}, [r7] // store next iv - bx lr - -1: vswp q12, q14 - b 0b -ENDPROC(__xts_prepare8) - - .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - push {r4-r8, lr} - mov r5, sp // preserve sp - ldrd r6, r7, [sp, #24] // get blocks and iv args - ldr r8, [sp, #32] // reorder final tweak? - rsb r8, r8, #1 - sub ip, sp, #128 // make room for 8x tweak - bic ip, ip, #0xf // align sp to 16 bytes - mov sp, ip - -99: bl __xts_prepare8 - - mov bskey, r2 - mov rounds, r3 - bl \do8 - - __adr ip, 0f - and lr, r6, #7 - cmp r6, #8 - sub ip, ip, lr, lsl #2 - mov r4, sp - bxlt ip // computed goto if blocks < 8 - - vld1.8 {q8}, [r4, :128]! - vld1.8 {q9}, [r4, :128]! - vld1.8 {q10}, [r4, :128]! - vld1.8 {q11}, [r4, :128]! - vld1.8 {q12}, [r4, :128]! - vld1.8 {q13}, [r4, :128]! - vld1.8 {q14}, [r4, :128]! - vld1.8 {q15}, [r4, :128] - -0: __adr ip, 1f - sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 - - veor \o0, \o0, q8 - vst1.8 {\o0}, [r0]! - veor \o1, \o1, q9 - vst1.8 {\o1}, [r0]! - veor \o2, \o2, q10 - vst1.8 {\o2}, [r0]! - veor \o3, \o3, q11 - vst1.8 {\o3}, [r0]! - veor \o4, \o4, q12 - vst1.8 {\o4}, [r0]! - veor \o5, \o5, q13 - vst1.8 {\o5}, [r0]! - veor \o6, \o6, q14 - vst1.8 {\o6}, [r0]! - veor \o7, \o7, q15 - vst1.8 {\o7}, [r0]! - -1: subs r6, r6, #8 - bgt 99b - - mov sp, r5 - pop {r4-r8, pc} - .endm - -ENTRY(aesbs_xts_encrypt) - __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 -ENDPROC(aesbs_xts_encrypt) - -ENTRY(aesbs_xts_decrypt) - __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 -ENDPROC(aesbs_xts_decrypt) diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S deleted file mode 100644 index eb22926d49127e894a060afc003871d743dfe36d..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/chacha-neon-core.S +++ /dev/null @@ -1,560 +0,0 @@ -/* - * ChaCha/XChaCha NEON helper functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - - /* - * NEON doesn't have a rotate instruction. The alternatives are, more or less: - * - * (a) vshl.u32 + vsri.u32 (needs temporary register) - * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register) - * (c) vrev32.16 (16-bit rotations only) - * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, - * needs index vector) - * - * ChaCha has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit rotations, - * the only choices are (a) and (b). We use (a) since it takes two-thirds the - * cycles of (b) on both Cortex-A7 and Cortex-A53. - * - * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest - * and doesn't need a temporary register. - * - * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence - * is twice as fast as (a), even when doing (a) on multiple registers - * simultaneously to eliminate the stall between vshl and vsri. Also, it - * parallelizes better when temporary registers are scarce. - * - * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as - * (a), so the need to load the rotation table actually makes the vtbl method - * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it - * seems to be a good compromise to get a more significant speed boost on some - * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7. - */ - -#include - - .text - .fpu neon - .align 5 - -/* - * chacha_permute - permute one block - * - * Permute one 64-byte block where the state matrix is stored in the four NEON - * registers q0-q3. It performs matrix operations on four words in parallel, - * but requires shuffling to rearrange the words after each round. - * - * The round count is given in r3. - * - * Clobbers: r3, ip, q4-q5 - */ -chacha_permute: - - adr ip, .Lrol8_table - vld1.8 {d10}, [ip, :64] - -.Ldoubleround: - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q3, q3, q0 - vrev32.16 q3, q3 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q3, q3, q0 - vtbl.8 d6, {d6}, d10 - vtbl.8 d7, {d7}, d10 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) - vext.8 q1, q1, q1, #4 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) - vext.8 q3, q3, q3, #12 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q3, q3, q0 - vrev32.16 q3, q3 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q3, q3, q0 - vtbl.8 d6, {d6}, d10 - vtbl.8 d7, {d7}, d10 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) - vext.8 q1, q1, q1, #12 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) - vext.8 q3, q3, q3, #4 - - subs r3, r3, #2 - bne .Ldoubleround - - bx lr -ENDPROC(chacha_permute) - -ENTRY(chacha_block_xor_neon) - // r0: Input state matrix, s - // r1: 1 data block output, o - // r2: 1 data block input, i - // r3: nrounds - push {lr} - - // x0..3 = s0..3 - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - vmov q8, q0 - vmov q9, q1 - vmov q10, q2 - vmov q11, q3 - - bl chacha_permute - - add ip, r2, #0x20 - vld1.8 {q4-q5}, [r2] - vld1.8 {q6-q7}, [ip] - - // o0 = i0 ^ (x0 + s0) - vadd.i32 q0, q0, q8 - veor q0, q0, q4 - - // o1 = i1 ^ (x1 + s1) - vadd.i32 q1, q1, q9 - veor q1, q1, q5 - - // o2 = i2 ^ (x2 + s2) - vadd.i32 q2, q2, q10 - veor q2, q2, q6 - - // o3 = i3 ^ (x3 + s3) - vadd.i32 q3, q3, q11 - veor q3, q3, q7 - - add ip, r1, #0x20 - vst1.8 {q0-q1}, [r1] - vst1.8 {q2-q3}, [ip] - - pop {pc} -ENDPROC(chacha_block_xor_neon) - -ENTRY(hchacha_block_neon) - // r0: Input state matrix, s - // r1: output (8 32-bit words) - // r2: nrounds - push {lr} - - vld1.32 {q0-q1}, [r0]! - vld1.32 {q2-q3}, [r0] - - mov r3, r2 - bl chacha_permute - - vst1.32 {q0}, [r1]! - vst1.32 {q3}, [r1] - - pop {pc} -ENDPROC(hchacha_block_neon) - - .align 4 -.Lctrinc: .word 0, 1, 2, 3 -.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 - - .align 5 -ENTRY(chacha_4block_xor_neon) - push {r4-r5} - mov r4, sp // preserve the stack pointer - sub ip, sp, #0x20 // allocate a 32 byte buffer - bic ip, ip, #0x1f // aligned to 32 bytes - mov sp, ip - - // r0: Input state matrix, s - // r1: 4 data blocks output, o - // r2: 4 data blocks input, i - // r3: nrounds - - // - // This function encrypts four consecutive ChaCha blocks by loading - // the state matrix in NEON registers four times. The algorithm performs - // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. The words are re-interleaved before the - // final addition of the original state and the XORing step. - // - - // x0..15[0-3] = s0..15[0-3] - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - adr r5, .Lctrinc - vdup.32 q15, d7[1] - vdup.32 q14, d7[0] - vld1.32 {q4}, [r5, :128] - vdup.32 q13, d6[1] - vdup.32 q12, d6[0] - vdup.32 q11, d5[1] - vdup.32 q10, d5[0] - vadd.u32 q12, q12, q4 // x12 += counter values 0-3 - vdup.32 q9, d4[1] - vdup.32 q8, d4[0] - vdup.32 q7, d3[1] - vdup.32 q6, d3[0] - vdup.32 q5, d2[1] - vdup.32 q4, d2[0] - vdup.32 q3, d1[1] - vdup.32 q2, d1[0] - vdup.32 q1, d0[1] - vdup.32 q0, d0[0] - - adr ip, .Lrol8_table - b 1f - -.Ldoubleround4: - vld1.32 {q8-q9}, [sp, :256] -1: - // x0 += x4, x12 = rotl32(x12 ^ x0, 16) - // x1 += x5, x13 = rotl32(x13 ^ x1, 16) - // x2 += x6, x14 = rotl32(x14 ^ x2, 16) - // x3 += x7, x15 = rotl32(x15 ^ x3, 16) - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q12, q12, q0 - veor q13, q13, q1 - veor q14, q14, q2 - veor q15, q15, q3 - - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - vrev32.16 q15, q15 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 12) - // x9 += x13, x5 = rotl32(x5 ^ x9, 12) - // x10 += x14, x6 = rotl32(x6 ^ x10, 12) - // x11 += x15, x7 = rotl32(x7 ^ x11, 12) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #12 - vshl.u32 q5, q9, #12 - vsri.u32 q4, q8, #20 - vsri.u32 q5, q9, #20 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #12 - vshl.u32 q7, q9, #12 - vsri.u32 q6, q8, #20 - vsri.u32 q7, q9, #20 - - // x0 += x4, x12 = rotl32(x12 ^ x0, 8) - // x1 += x5, x13 = rotl32(x13 ^ x1, 8) - // x2 += x6, x14 = rotl32(x14 ^ x2, 8) - // x3 += x7, x15 = rotl32(x15 ^ x3, 8) - vld1.8 {d16}, [ip, :64] - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q12, q12, q0 - veor q13, q13, q1 - veor q14, q14, q2 - veor q15, q15, q3 - - vtbl.8 d24, {d24}, d16 - vtbl.8 d25, {d25}, d16 - vtbl.8 d26, {d26}, d16 - vtbl.8 d27, {d27}, d16 - vtbl.8 d28, {d28}, d16 - vtbl.8 d29, {d29}, d16 - vtbl.8 d30, {d30}, d16 - vtbl.8 d31, {d31}, d16 - - vld1.32 {q8-q9}, [sp, :256] - - // x8 += x12, x4 = rotl32(x4 ^ x8, 7) - // x9 += x13, x5 = rotl32(x5 ^ x9, 7) - // x10 += x14, x6 = rotl32(x6 ^ x10, 7) - // x11 += x15, x7 = rotl32(x7 ^ x11, 7) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #7 - vshl.u32 q5, q9, #7 - vsri.u32 q4, q8, #25 - vsri.u32 q5, q9, #25 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #7 - vshl.u32 q7, q9, #7 - vsri.u32 q6, q8, #25 - vsri.u32 q7, q9, #25 - - vld1.32 {q8-q9}, [sp, :256] - - // x0 += x5, x15 = rotl32(x15 ^ x0, 16) - // x1 += x6, x12 = rotl32(x12 ^ x1, 16) - // x2 += x7, x13 = rotl32(x13 ^ x2, 16) - // x3 += x4, x14 = rotl32(x14 ^ x3, 16) - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q15, q15, q0 - veor q12, q12, q1 - veor q13, q13, q2 - veor q14, q14, q3 - - vrev32.16 q15, q15 - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 12) - // x11 += x12, x6 = rotl32(x6 ^ x11, 12) - // x8 += x13, x7 = rotl32(x7 ^ x8, 12) - // x9 += x14, x4 = rotl32(x4 ^ x9, 12) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #12 - vshl.u32 q4, q9, #12 - vsri.u32 q7, q8, #20 - vsri.u32 q4, q9, #20 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #12 - vshl.u32 q6, q9, #12 - vsri.u32 q5, q8, #20 - vsri.u32 q6, q9, #20 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 8) - // x1 += x6, x12 = rotl32(x12 ^ x1, 8) - // x2 += x7, x13 = rotl32(x13 ^ x2, 8) - // x3 += x4, x14 = rotl32(x14 ^ x3, 8) - vld1.8 {d16}, [ip, :64] - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q15, q15, q0 - veor q12, q12, q1 - veor q13, q13, q2 - veor q14, q14, q3 - - vtbl.8 d30, {d30}, d16 - vtbl.8 d31, {d31}, d16 - vtbl.8 d24, {d24}, d16 - vtbl.8 d25, {d25}, d16 - vtbl.8 d26, {d26}, d16 - vtbl.8 d27, {d27}, d16 - vtbl.8 d28, {d28}, d16 - vtbl.8 d29, {d29}, d16 - - vld1.32 {q8-q9}, [sp, :256] - - // x10 += x15, x5 = rotl32(x5 ^ x10, 7) - // x11 += x12, x6 = rotl32(x6 ^ x11, 7) - // x8 += x13, x7 = rotl32(x7 ^ x8, 7) - // x9 += x14, x4 = rotl32(x4 ^ x9, 7) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #7 - vshl.u32 q4, q9, #7 - vsri.u32 q7, q8, #25 - vsri.u32 q4, q9, #25 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #7 - vshl.u32 q6, q9, #7 - vsri.u32 q5, q8, #25 - vsri.u32 q6, q9, #25 - - subs r3, r3, #2 - bne .Ldoubleround4 - - // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. - // x8..9[0-3] are on the stack. - - // Re-interleave the words in the first two rows of each block (x0..7). - // Also add the counter values 0-3 to x12[0-3]. - vld1.32 {q8}, [r5, :128] // load counter values 0-3 - vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) - vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) - vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) - vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7) - vadd.u32 q12, q8 // x12 += counter values 0-3 - vswp d1, d4 - vswp d3, d6 - vld1.32 {q8-q9}, [r0]! // load s0..7 - vswp d9, d12 - vswp d11, d14 - - // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1) - // after XORing the first 32 bytes. - vswp q1, q4 - - // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7) - - // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block) - vadd.u32 q0, q0, q8 - vadd.u32 q2, q2, q8 - vadd.u32 q4, q4, q8 - vadd.u32 q3, q3, q8 - - // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block) - vadd.u32 q1, q1, q9 - vadd.u32 q6, q6, q9 - vadd.u32 q5, q5, q9 - vadd.u32 q7, q7, q9 - - // XOR first 32 bytes using keystream from first two rows of first block - vld1.8 {q8-q9}, [r2]! - veor q8, q8, q0 - veor q9, q9, q1 - vst1.8 {q8-q9}, [r1]! - - // Re-interleave the words in the last two rows of each block (x8..15). - vld1.32 {q8-q9}, [sp, :256] - vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) - vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) - vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) - vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11) - vld1.32 {q0-q1}, [r0] // load s8..15 - vswp d25, d28 - vswp d27, d30 - vswp d17, d20 - vswp d19, d22 - - // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15) - - // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block) - vadd.u32 q8, q8, q0 - vadd.u32 q10, q10, q0 - vadd.u32 q9, q9, q0 - vadd.u32 q11, q11, q0 - - // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block) - vadd.u32 q12, q12, q1 - vadd.u32 q14, q14, q1 - vadd.u32 q13, q13, q1 - vadd.u32 q15, q15, q1 - - // XOR the rest of the data with the keystream - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q8 - veor q1, q1, q12 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q2 - veor q1, q1, q6 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q10 - veor q1, q1, q14 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q4 - veor q1, q1, q5 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q9 - veor q1, q1, q13 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q3 - veor q1, q1, q7 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2] - mov sp, r4 // restore original stack pointer - veor q0, q0, q11 - veor q1, q1, q15 - vst1.8 {q0-q1}, [r1] - - pop {r4-r5} - bx lr -ENDPROC(chacha_4block_xor_neon) diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S deleted file mode 100644 index 5cbd4a6fedad7cb3c99ed35295b77f554d967434..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/crc32-ce-core.S +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions - * - * Copyright (C) 2016 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 - * calculation. - * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) - * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found - * at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2B: Instruction Set Reference, N-Z - * - * Authors: Gregory Prestas - * Alexander Boyko - */ - -#include -#include - - .text - .align 6 - .arch armv8-a - .arch_extension crc - .fpu crypto-neon-fp-armv8 - -.Lcrc32_constants: - /* - * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 - * #define CONSTANT_R1 0x154442bd4LL - * - * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 - * #define CONSTANT_R2 0x1c6e41596LL - */ - .quad 0x0000000154442bd4 - .quad 0x00000001c6e41596 - - /* - * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 - * #define CONSTANT_R3 0x1751997d0LL - * - * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e - * #define CONSTANT_R4 0x0ccaa009eLL - */ - .quad 0x00000001751997d0 - .quad 0x00000000ccaa009e - - /* - * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 - * #define CONSTANT_R5 0x163cd6124LL - */ - .quad 0x0000000163cd6124 - .quad 0x00000000FFFFFFFF - - /* - * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL - * - * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` - * = 0x1F7011641LL - * #define CONSTANT_RU 0x1F7011641LL - */ - .quad 0x00000001DB710641 - .quad 0x00000001F7011641 - -.Lcrc32c_constants: - .quad 0x00000000740eef02 - .quad 0x000000009e4addf8 - .quad 0x00000000f20c0dfe - .quad 0x000000014cd00bd6 - .quad 0x00000000dd45aab8 - .quad 0x00000000FFFFFFFF - .quad 0x0000000105ec76f0 - .quad 0x00000000dea713f1 - - dCONSTANTl .req d0 - dCONSTANTh .req d1 - qCONSTANT .req q0 - - BUF .req r0 - LEN .req r1 - CRC .req r2 - - qzr .req q9 - - /** - * Calculate crc32 - * BUF - buffer - * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63 - * CRC - initial crc32 - * return %eax crc32 - * uint crc32_pmull_le(unsigned char const *buffer, - * size_t len, uint crc32) - */ -ENTRY(crc32_pmull_le) - adr r3, .Lcrc32_constants - b 0f - -ENTRY(crc32c_pmull_le) - adr r3, .Lcrc32c_constants - -0: bic LEN, LEN, #15 - vld1.8 {q1-q2}, [BUF, :128]! - vld1.8 {q3-q4}, [BUF, :128]! - vmov.i8 qzr, #0 - vmov.i8 qCONSTANT, #0 - vmov.32 dCONSTANTl[0], CRC - veor.8 d2, d2, dCONSTANTl - sub LEN, LEN, #0x40 - cmp LEN, #0x40 - blt less_64 - - vld1.64 {qCONSTANT}, [r3] - -loop_64: /* 64 bytes Full cache line folding */ - sub LEN, LEN, #0x40 - - vmull.p64 q5, d3, dCONSTANTh - vmull.p64 q6, d5, dCONSTANTh - vmull.p64 q7, d7, dCONSTANTh - vmull.p64 q8, d9, dCONSTANTh - - vmull.p64 q1, d2, dCONSTANTl - vmull.p64 q2, d4, dCONSTANTl - vmull.p64 q3, d6, dCONSTANTl - vmull.p64 q4, d8, dCONSTANTl - - veor.8 q1, q1, q5 - vld1.8 {q5}, [BUF, :128]! - veor.8 q2, q2, q6 - vld1.8 {q6}, [BUF, :128]! - veor.8 q3, q3, q7 - vld1.8 {q7}, [BUF, :128]! - veor.8 q4, q4, q8 - vld1.8 {q8}, [BUF, :128]! - - veor.8 q1, q1, q5 - veor.8 q2, q2, q6 - veor.8 q3, q3, q7 - veor.8 q4, q4, q8 - - cmp LEN, #0x40 - bge loop_64 - -less_64: /* Folding cache line into 128bit */ - vldr dCONSTANTl, [r3, #16] - vldr dCONSTANTh, [r3, #24] - - vmull.p64 q5, d3, dCONSTANTh - vmull.p64 q1, d2, dCONSTANTl - veor.8 q1, q1, q5 - veor.8 q1, q1, q2 - - vmull.p64 q5, d3, dCONSTANTh - vmull.p64 q1, d2, dCONSTANTl - veor.8 q1, q1, q5 - veor.8 q1, q1, q3 - - vmull.p64 q5, d3, dCONSTANTh - vmull.p64 q1, d2, dCONSTANTl - veor.8 q1, q1, q5 - veor.8 q1, q1, q4 - - teq LEN, #0 - beq fold_64 - -loop_16: /* Folding rest buffer into 128bit */ - subs LEN, LEN, #0x10 - - vld1.8 {q2}, [BUF, :128]! - vmull.p64 q5, d3, dCONSTANTh - vmull.p64 q1, d2, dCONSTANTl - veor.8 q1, q1, q5 - veor.8 q1, q1, q2 - - bne loop_16 - -fold_64: - /* perform the last 64 bit fold, also adds 32 zeroes - * to the input stream */ - vmull.p64 q2, d2, dCONSTANTh - vext.8 q1, q1, qzr, #8 - veor.8 q1, q1, q2 - - /* final 32-bit fold */ - vldr dCONSTANTl, [r3, #32] - vldr d6, [r3, #40] - vmov.i8 d7, #0 - - vext.8 q2, q1, qzr, #4 - vand.8 d2, d2, d6 - vmull.p64 q1, d2, dCONSTANTl - veor.8 q1, q1, q2 - - /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ - vldr dCONSTANTl, [r3, #48] - vldr dCONSTANTh, [r3, #56] - - vand.8 q2, q1, q3 - vext.8 q2, qzr, q2, #8 - vmull.p64 q2, d5, dCONSTANTh - vand.8 q2, q2, q3 - vmull.p64 q2, d4, dCONSTANTl - veor.8 q1, q1, q2 - vmov r0, s5 - - bx lr -ENDPROC(crc32_pmull_le) -ENDPROC(crc32c_pmull_le) - - .macro __crc32, c - subs ip, r2, #8 - bmi .Ltail\c - - tst r1, #3 - bne .Lunaligned\c - - teq ip, #0 -.Laligned8\c: - ldrd r2, r3, [r1], #8 -ARM_BE8(rev r2, r2 ) -ARM_BE8(rev r3, r3 ) - crc32\c\()w r0, r0, r2 - crc32\c\()w r0, r0, r3 - bxeq lr - subs ip, ip, #8 - bpl .Laligned8\c - -.Ltail\c: - tst ip, #4 - beq 2f - ldr r3, [r1], #4 -ARM_BE8(rev r3, r3 ) - crc32\c\()w r0, r0, r3 - -2: tst ip, #2 - beq 1f - ldrh r3, [r1], #2 -ARM_BE8(rev16 r3, r3 ) - crc32\c\()h r0, r0, r3 - -1: tst ip, #1 - bxeq lr - ldrb r3, [r1] - crc32\c\()b r0, r0, r3 - bx lr - -.Lunaligned\c: - tst r1, #1 - beq 2f - ldrb r3, [r1], #1 - subs r2, r2, #1 - crc32\c\()b r0, r0, r3 - - tst r1, #2 - beq 0f -2: ldrh r3, [r1], #2 - subs r2, r2, #2 -ARM_BE8(rev16 r3, r3 ) - crc32\c\()h r0, r0, r3 - -0: subs ip, r2, #8 - bpl .Laligned8\c - b .Ltail\c - .endm - - .align 5 -ENTRY(crc32_armv8_le) - __crc32 -ENDPROC(crc32_armv8_le) - - .align 5 -ENTRY(crc32c_armv8_le) - __crc32 c -ENDPROC(crc32c_armv8_le) diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S deleted file mode 100644 index 86be258a803fa0b618f6d762d511143285b3a8fd..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ /dev/null @@ -1,381 +0,0 @@ -// -// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions -// -// Copyright (C) 2016 Linaro Ltd -// Copyright (C) 2019 Google LLC -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 2 as -// published by the Free Software Foundation. -// - -// Derived from the x86 version: -// -// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -// -// Copyright (c) 2013, Intel Corporation -// -// Authors: -// Erdinc Ozturk -// Vinodh Gopal -// James Guilford -// Tim Chen -// -// This software is available to you under a choice of one of two -// licenses. You may choose to be licensed under the terms of the GNU -// General Public License (GPL) Version 2, available from the file -// COPYING in the main directory of this source tree, or the -// OpenIB.org BSD license below: -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// * Neither the name of the Intel Corporation nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// -// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Reference paper titled "Fast CRC Computation for Generic -// Polynomials Using PCLMULQDQ Instruction" -// URL: http://www.intel.com/content/dam/www/public/us/en/documents -// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -// - -#include -#include - -#ifdef CONFIG_CPU_ENDIAN_BE8 -#define CPU_LE(code...) -#else -#define CPU_LE(code...) code -#endif - - .text - .arch armv7-a - .fpu crypto-neon-fp-armv8 - - init_crc .req r0 - buf .req r1 - len .req r2 - - fold_consts_ptr .req ip - - q0l .req d0 - q0h .req d1 - q1l .req d2 - q1h .req d3 - q2l .req d4 - q2h .req d5 - q3l .req d6 - q3h .req d7 - q4l .req d8 - q4h .req d9 - q5l .req d10 - q5h .req d11 - q6l .req d12 - q6h .req d13 - q7l .req d14 - q7h .req d15 - q8l .req d16 - q8h .req d17 - q9l .req d18 - q9h .req d19 - q10l .req d20 - q10h .req d21 - q11l .req d22 - q11h .req d23 - q12l .req d24 - q12h .req d25 - - FOLD_CONSTS .req q10 - FOLD_CONST_L .req q10l - FOLD_CONST_H .req q10h - - // Fold reg1, reg2 into the next 32 data bytes, storing the result back - // into reg1, reg2. - .macro fold_32_bytes, reg1, reg2 - vld1.64 {q11-q12}, [buf]! - - vmull.p64 q8, \reg1\()h, FOLD_CONST_H - vmull.p64 \reg1, \reg1\()l, FOLD_CONST_L - vmull.p64 q9, \reg2\()h, FOLD_CONST_H - vmull.p64 \reg2, \reg2\()l, FOLD_CONST_L - -CPU_LE( vrev64.8 q11, q11 ) -CPU_LE( vrev64.8 q12, q12 ) - vswp q11l, q11h - vswp q12l, q12h - - veor.8 \reg1, \reg1, q8 - veor.8 \reg2, \reg2, q9 - veor.8 \reg1, \reg1, q11 - veor.8 \reg2, \reg2, q12 - .endm - - // Fold src_reg into dst_reg, optionally loading the next fold constants - .macro fold_16_bytes, src_reg, dst_reg, load_next_consts - vmull.p64 q8, \src_reg\()l, FOLD_CONST_L - vmull.p64 \src_reg, \src_reg\()h, FOLD_CONST_H - .ifnb \load_next_consts - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]! - .endif - veor.8 \dst_reg, \dst_reg, q8 - veor.8 \dst_reg, \dst_reg, \src_reg - .endm - - .macro __adrl, out, sym - movw \out, #:lower16:\sym - movt \out, #:upper16:\sym - .endm - -// -// u16 crc_t10dif_pmull(u16 init_crc, const u8 *buf, size_t len); -// -// Assumes len >= 16. -// -ENTRY(crc_t10dif_pmull) - - // For sizes less than 256 bytes, we can't fold 128 bytes at a time. - cmp len, #256 - blt .Lless_than_256_bytes - - __adrl fold_consts_ptr, .Lfold_across_128_bytes_consts - - // Load the first 128 data bytes. Byte swapping is necessary to make - // the bit order match the polynomial coefficient order. - vld1.64 {q0-q1}, [buf]! - vld1.64 {q2-q3}, [buf]! - vld1.64 {q4-q5}, [buf]! - vld1.64 {q6-q7}, [buf]! -CPU_LE( vrev64.8 q0, q0 ) -CPU_LE( vrev64.8 q1, q1 ) -CPU_LE( vrev64.8 q2, q2 ) -CPU_LE( vrev64.8 q3, q3 ) -CPU_LE( vrev64.8 q4, q4 ) -CPU_LE( vrev64.8 q5, q5 ) -CPU_LE( vrev64.8 q6, q6 ) -CPU_LE( vrev64.8 q7, q7 ) - vswp q0l, q0h - vswp q1l, q1h - vswp q2l, q2h - vswp q3l, q3h - vswp q4l, q4h - vswp q5l, q5h - vswp q6l, q6h - vswp q7l, q7h - - // XOR the first 16 data *bits* with the initial CRC value. - vmov.i8 q8h, #0 - vmov.u16 q8h[3], init_crc - veor q0h, q0h, q8h - - // Load the constants for folding across 128 bytes. - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]! - - // Subtract 128 for the 128 data bytes just consumed. Subtract another - // 128 to simplify the termination condition of the following loop. - sub len, len, #256 - - // While >= 128 data bytes remain (not counting q0-q7), fold the 128 - // bytes q0-q7 into them, storing the result back into q0-q7. -.Lfold_128_bytes_loop: - fold_32_bytes q0, q1 - fold_32_bytes q2, q3 - fold_32_bytes q4, q5 - fold_32_bytes q6, q7 - subs len, len, #128 - bge .Lfold_128_bytes_loop - - // Now fold the 112 bytes in q0-q6 into the 16 bytes in q7. - - // Fold across 64 bytes. - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]! - fold_16_bytes q0, q4 - fold_16_bytes q1, q5 - fold_16_bytes q2, q6 - fold_16_bytes q3, q7, 1 - // Fold across 32 bytes. - fold_16_bytes q4, q6 - fold_16_bytes q5, q7, 1 - // Fold across 16 bytes. - fold_16_bytes q6, q7 - - // Add 128 to get the correct number of data bytes remaining in 0...127 - // (not counting q7), following the previous extra subtraction by 128. - // Then subtract 16 to simplify the termination condition of the - // following loop. - adds len, len, #(128-16) - - // While >= 16 data bytes remain (not counting q7), fold the 16 bytes q7 - // into them, storing the result back into q7. - blt .Lfold_16_bytes_loop_done -.Lfold_16_bytes_loop: - vmull.p64 q8, q7l, FOLD_CONST_L - vmull.p64 q7, q7h, FOLD_CONST_H - veor.8 q7, q7, q8 - vld1.64 {q0}, [buf]! -CPU_LE( vrev64.8 q0, q0 ) - vswp q0l, q0h - veor.8 q7, q7, q0 - subs len, len, #16 - bge .Lfold_16_bytes_loop - -.Lfold_16_bytes_loop_done: - // Add 16 to get the correct number of data bytes remaining in 0...15 - // (not counting q7), following the previous extra subtraction by 16. - adds len, len, #16 - beq .Lreduce_final_16_bytes - -.Lhandle_partial_segment: - // Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first - // 16 bytes are in q7 and the rest are the remaining data in 'buf'. To - // do this without needing a fold constant for each possible 'len', - // redivide the bytes into a first chunk of 'len' bytes and a second - // chunk of 16 bytes, then fold the first chunk into the second. - - // q0 = last 16 original data bytes - add buf, buf, len - sub buf, buf, #16 - vld1.64 {q0}, [buf] -CPU_LE( vrev64.8 q0, q0 ) - vswp q0l, q0h - - // q1 = high order part of second chunk: q7 left-shifted by 'len' bytes. - __adrl r3, .Lbyteshift_table + 16 - sub r3, r3, len - vld1.8 {q2}, [r3] - vtbl.8 q1l, {q7l-q7h}, q2l - vtbl.8 q1h, {q7l-q7h}, q2h - - // q3 = first chunk: q7 right-shifted by '16-len' bytes. - vmov.i8 q3, #0x80 - veor.8 q2, q2, q3 - vtbl.8 q3l, {q7l-q7h}, q2l - vtbl.8 q3h, {q7l-q7h}, q2h - - // Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes. - vshr.s8 q2, q2, #7 - - // q2 = second chunk: 'len' bytes from q0 (low-order bytes), - // then '16-len' bytes from q1 (high-order bytes). - vbsl.8 q2, q1, q0 - - // Fold the first chunk into the second chunk, storing the result in q7. - vmull.p64 q0, q3l, FOLD_CONST_L - vmull.p64 q7, q3h, FOLD_CONST_H - veor.8 q7, q7, q0 - veor.8 q7, q7, q2 - -.Lreduce_final_16_bytes: - // Reduce the 128-bit value M(x), stored in q7, to the final 16-bit CRC. - - // Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]! - - // Fold the high 64 bits into the low 64 bits, while also multiplying by - // x^64. This produces a 128-bit value congruent to x^64 * M(x) and - // whose low 48 bits are 0. - vmull.p64 q0, q7h, FOLD_CONST_H // high bits * x^48 * (x^80 mod G(x)) - veor.8 q0h, q0h, q7l // + low bits * x^64 - - // Fold the high 32 bits into the low 96 bits. This produces a 96-bit - // value congruent to x^64 * M(x) and whose low 48 bits are 0. - vmov.i8 q1, #0 - vmov s4, s3 // extract high 32 bits - vmov s3, s5 // zero high 32 bits - vmull.p64 q1, q1l, FOLD_CONST_L // high 32 bits * x^48 * (x^48 mod G(x)) - veor.8 q0, q0, q1 // + low bits - - // Load G(x) and floor(x^48 / G(x)). - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128] - - // Use Barrett reduction to compute the final CRC value. - vmull.p64 q1, q0h, FOLD_CONST_H // high 32 bits * floor(x^48 / G(x)) - vshr.u64 q1l, q1l, #32 // /= x^32 - vmull.p64 q1, q1l, FOLD_CONST_L // *= G(x) - vshr.u64 q0l, q0l, #48 - veor.8 q0l, q0l, q1l // + low 16 nonzero bits - // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of q0. - - vmov.u16 r0, q0l[0] - bx lr - -.Lless_than_256_bytes: - // Checksumming a buffer of length 16...255 bytes - - __adrl fold_consts_ptr, .Lfold_across_16_bytes_consts - - // Load the first 16 data bytes. - vld1.64 {q7}, [buf]! -CPU_LE( vrev64.8 q7, q7 ) - vswp q7l, q7h - - // XOR the first 16 data *bits* with the initial CRC value. - vmov.i8 q0h, #0 - vmov.u16 q0h[3], init_crc - veor.8 q7h, q7h, q0h - - // Load the fold-across-16-bytes constants. - vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]! - - cmp len, #16 - beq .Lreduce_final_16_bytes // len == 16 - subs len, len, #32 - addlt len, len, #16 - blt .Lhandle_partial_segment // 17 <= len <= 31 - b .Lfold_16_bytes_loop // 32 <= len <= 255 -ENDPROC(crc_t10dif_pmull) - - .section ".rodata", "a" - .align 4 - -// Fold constants precomputed from the polynomial 0x18bb7 -// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 -.Lfold_across_128_bytes_consts: - .quad 0x0000000000006123 // x^(8*128) mod G(x) - .quad 0x0000000000002295 // x^(8*128+64) mod G(x) -// .Lfold_across_64_bytes_consts: - .quad 0x0000000000001069 // x^(4*128) mod G(x) - .quad 0x000000000000dd31 // x^(4*128+64) mod G(x) -// .Lfold_across_32_bytes_consts: - .quad 0x000000000000857d // x^(2*128) mod G(x) - .quad 0x0000000000007acc // x^(2*128+64) mod G(x) -.Lfold_across_16_bytes_consts: - .quad 0x000000000000a010 // x^(1*128) mod G(x) - .quad 0x0000000000001faa // x^(1*128+64) mod G(x) -// .Lfinal_fold_consts: - .quad 0x1368000000000000 // x^48 * (x^48 mod G(x)) - .quad 0x2d56000000000000 // x^48 * (x^80 mod G(x)) -// .Lbarrett_reduction_consts: - .quad 0x0000000000018bb7 // G(x) - .quad 0x00000001f65a57f8 // floor(x^48 / G(x)) - -// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - -// len] is the index vector to shift left by 'len' bytes, and is also {0x80, -// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes. -.Lbyteshift_table: - .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 - .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S deleted file mode 100644 index c47fe81abcb0189cdb809a959c96dbdfe2fa74bd..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/ghash-ce-core.S +++ /dev/null @@ -1,337 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions. - * - * Copyright (C) 2015 - 2017 Linaro Ltd. - */ - -#include -#include - - SHASH .req q0 - T1 .req q1 - XL .req q2 - XM .req q3 - XH .req q4 - IN1 .req q4 - - SHASH_L .req d0 - SHASH_H .req d1 - T1_L .req d2 - T1_H .req d3 - XL_L .req d4 - XL_H .req d5 - XM_L .req d6 - XM_H .req d7 - XH_L .req d8 - - t0l .req d10 - t0h .req d11 - t1l .req d12 - t1h .req d13 - t2l .req d14 - t2h .req d15 - t3l .req d16 - t3h .req d17 - t4l .req d18 - t4h .req d19 - - t0q .req q5 - t1q .req q6 - t2q .req q7 - t3q .req q8 - t4q .req q9 - T2 .req q9 - - s1l .req d20 - s1h .req d21 - s2l .req d22 - s2h .req d23 - s3l .req d24 - s3h .req d25 - s4l .req d26 - s4h .req d27 - - MASK .req d28 - SHASH2_p8 .req d28 - - k16 .req d29 - k32 .req d30 - k48 .req d31 - SHASH2_p64 .req d31 - - HH .req q10 - HH3 .req q11 - HH4 .req q12 - HH34 .req q13 - - HH_L .req d20 - HH_H .req d21 - HH3_L .req d22 - HH3_H .req d23 - HH4_L .req d24 - HH4_H .req d25 - HH34_L .req d26 - HH34_H .req d27 - SHASH2_H .req d29 - - XL2 .req q5 - XM2 .req q6 - XH2 .req q7 - T3 .req q8 - - XL2_L .req d10 - XL2_H .req d11 - XM2_L .req d12 - XM2_H .req d13 - T3_L .req d16 - T3_H .req d17 - - .text - .fpu crypto-neon-fp-armv8 - - .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 - vmull.p64 \rd, \rn, \rm - .endm - - /* - * This implementation of 64x64 -> 128 bit polynomial multiplication - * using vmull.p8 instructions (8x8 -> 16) is taken from the paper - * "Fast Software Polynomial Multiplication on ARM Processors Using - * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and - * Ricardo Dahab (https://hal.inria.fr/hal-01506572) - * - * It has been slightly tweaked for in-order performance, and to allow - * 'rq' to overlap with 'ad' or 'bd'. - */ - .macro __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l - vext.8 t0l, \ad, \ad, #1 @ A1 - .ifc \b1, t4l - vext.8 t4l, \bd, \bd, #1 @ B1 - .endif - vmull.p8 t0q, t0l, \bd @ F = A1*B - vext.8 t1l, \ad, \ad, #2 @ A2 - vmull.p8 t4q, \ad, \b1 @ E = A*B1 - .ifc \b2, t3l - vext.8 t3l, \bd, \bd, #2 @ B2 - .endif - vmull.p8 t1q, t1l, \bd @ H = A2*B - vext.8 t2l, \ad, \ad, #3 @ A3 - vmull.p8 t3q, \ad, \b2 @ G = A*B2 - veor t0q, t0q, t4q @ L = E + F - .ifc \b3, t4l - vext.8 t4l, \bd, \bd, #3 @ B3 - .endif - vmull.p8 t2q, t2l, \bd @ J = A3*B - veor t0l, t0l, t0h @ t0 = (L) (P0 + P1) << 8 - veor t1q, t1q, t3q @ M = G + H - .ifc \b4, t3l - vext.8 t3l, \bd, \bd, #4 @ B4 - .endif - vmull.p8 t4q, \ad, \b3 @ I = A*B3 - veor t1l, t1l, t1h @ t1 = (M) (P2 + P3) << 16 - vmull.p8 t3q, \ad, \b4 @ K = A*B4 - vand t0h, t0h, k48 - vand t1h, t1h, k32 - veor t2q, t2q, t4q @ N = I + J - veor t0l, t0l, t0h - veor t1l, t1l, t1h - veor t2l, t2l, t2h @ t2 = (N) (P4 + P5) << 24 - vand t2h, t2h, k16 - veor t3l, t3l, t3h @ t3 = (K) (P6 + P7) << 32 - vmov.i64 t3h, #0 - vext.8 t0q, t0q, t0q, #15 - veor t2l, t2l, t2h - vext.8 t1q, t1q, t1q, #14 - vmull.p8 \rq, \ad, \bd @ D = A*B - vext.8 t2q, t2q, t2q, #13 - vext.8 t3q, t3q, t3q, #12 - veor t0q, t0q, t1q - veor t2q, t2q, t3q - veor \rq, \rq, t0q - veor \rq, \rq, t2q - .endm - - // - // PMULL (64x64->128) based reduction for CPUs that can do - // it in a single instruction. - // - .macro __pmull_reduce_p64 - vmull.p64 T1, XL_L, MASK - - veor XH_L, XH_L, XM_H - vext.8 T1, T1, T1, #8 - veor XL_H, XL_H, XM_L - veor T1, T1, XL - - vmull.p64 XL, T1_H, MASK - .endm - - // - // Alternative reduction for CPUs that lack support for the - // 64x64->128 PMULL instruction - // - .macro __pmull_reduce_p8 - veor XL_H, XL_H, XM_L - veor XH_L, XH_L, XM_H - - vshl.i64 T1, XL, #57 - vshl.i64 T2, XL, #62 - veor T1, T1, T2 - vshl.i64 T2, XL, #63 - veor T1, T1, T2 - veor XL_H, XL_H, T1_L - veor XH_L, XH_L, T1_H - - vshr.u64 T1, XL, #1 - veor XH, XH, XL - veor XL, XL, T1 - vshr.u64 T1, T1, #6 - vshr.u64 XL, XL, #1 - .endm - - .macro ghash_update, pn - vld1.64 {XL}, [r1] - - /* do the head block first, if supplied */ - ldr ip, [sp] - teq ip, #0 - beq 0f - vld1.64 {T1}, [ip] - teq r0, #0 - b 3f - -0: .ifc \pn, p64 - tst r0, #3 // skip until #blocks is a - bne 2f // round multiple of 4 - - vld1.8 {XL2-XM2}, [r2]! -1: vld1.8 {T3-T2}, [r2]! - vrev64.8 XL2, XL2 - vrev64.8 XM2, XM2 - - subs r0, r0, #4 - - vext.8 T1, XL2, XL2, #8 - veor XL2_H, XL2_H, XL_L - veor XL, XL, T1 - - vrev64.8 T3, T3 - vrev64.8 T1, T2 - - vmull.p64 XH, HH4_H, XL_H // a1 * b1 - veor XL2_H, XL2_H, XL_H - vmull.p64 XL, HH4_L, XL_L // a0 * b0 - vmull.p64 XM, HH34_H, XL2_H // (a1 + a0)(b1 + b0) - - vmull.p64 XH2, HH3_H, XM2_L // a1 * b1 - veor XM2_L, XM2_L, XM2_H - vmull.p64 XL2, HH3_L, XM2_H // a0 * b0 - vmull.p64 XM2, HH34_L, XM2_L // (a1 + a0)(b1 + b0) - - veor XH, XH, XH2 - veor XL, XL, XL2 - veor XM, XM, XM2 - - vmull.p64 XH2, HH_H, T3_L // a1 * b1 - veor T3_L, T3_L, T3_H - vmull.p64 XL2, HH_L, T3_H // a0 * b0 - vmull.p64 XM2, SHASH2_H, T3_L // (a1 + a0)(b1 + b0) - - veor XH, XH, XH2 - veor XL, XL, XL2 - veor XM, XM, XM2 - - vmull.p64 XH2, SHASH_H, T1_L // a1 * b1 - veor T1_L, T1_L, T1_H - vmull.p64 XL2, SHASH_L, T1_H // a0 * b0 - vmull.p64 XM2, SHASH2_p64, T1_L // (a1 + a0)(b1 + b0) - - veor XH, XH, XH2 - veor XL, XL, XL2 - veor XM, XM, XM2 - - beq 4f - - vld1.8 {XL2-XM2}, [r2]! - - veor T1, XL, XH - veor XM, XM, T1 - - __pmull_reduce_p64 - - veor T1, T1, XH - veor XL, XL, T1 - - b 1b - .endif - -2: vld1.64 {T1}, [r2]! - subs r0, r0, #1 - -3: /* multiply XL by SHASH in GF(2^128) */ -#ifndef CONFIG_CPU_BIG_ENDIAN - vrev64.8 T1, T1 -#endif - vext.8 IN1, T1, T1, #8 - veor T1_L, T1_L, XL_H - veor XL, XL, IN1 - - __pmull_\pn XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h @ a1 * b1 - veor T1, T1, XL - __pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0 - __pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0) - -4: veor T1, XL, XH - veor XM, XM, T1 - - __pmull_reduce_\pn - - veor T1, T1, XH - veor XL, XL, T1 - - bne 0b - - vst1.64 {XL}, [r1] - bx lr - .endm - - /* - * void pmull_ghash_update(int blocks, u64 dg[], const char *src, - * struct ghash_key const *k, const char *head) - */ -ENTRY(pmull_ghash_update_p64) - vld1.64 {SHASH}, [r3]! - vld1.64 {HH}, [r3]! - vld1.64 {HH3-HH4}, [r3] - - veor SHASH2_p64, SHASH_L, SHASH_H - veor SHASH2_H, HH_L, HH_H - veor HH34_L, HH3_L, HH3_H - veor HH34_H, HH4_L, HH4_H - - vmov.i8 MASK, #0xe1 - vshl.u64 MASK, MASK, #57 - - ghash_update p64 -ENDPROC(pmull_ghash_update_p64) - -ENTRY(pmull_ghash_update_p8) - vld1.64 {SHASH}, [r3] - veor SHASH2_p8, SHASH_L, SHASH_H - - vext.8 s1l, SHASH_L, SHASH_L, #1 - vext.8 s2l, SHASH_L, SHASH_L, #2 - vext.8 s3l, SHASH_L, SHASH_L, #3 - vext.8 s4l, SHASH_L, SHASH_L, #4 - vext.8 s1h, SHASH_H, SHASH_H, #1 - vext.8 s2h, SHASH_H, SHASH_H, #2 - vext.8 s3h, SHASH_H, SHASH_H, #3 - vext.8 s4h, SHASH_H, SHASH_H, #4 - - vmov.i64 k16, #0xffff - vmov.i64 k32, #0xffffffff - vmov.i64 k48, #0xffffffffffff - - ghash_update p8 -ENDPROC(pmull_ghash_update_p8) diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S deleted file mode 100644 index 434d80ab531c2a600fbcffc89c21e6a8ad5ef284..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/nh-neon-core.S +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * NH - ε-almost-universal hash function, NEON accelerated version - * - * Copyright 2018 Google LLC - * - * Author: Eric Biggers - */ - -#include - - .text - .fpu neon - - KEY .req r0 - MESSAGE .req r1 - MESSAGE_LEN .req r2 - HASH .req r3 - - PASS0_SUMS .req q0 - PASS0_SUM_A .req d0 - PASS0_SUM_B .req d1 - PASS1_SUMS .req q1 - PASS1_SUM_A .req d2 - PASS1_SUM_B .req d3 - PASS2_SUMS .req q2 - PASS2_SUM_A .req d4 - PASS2_SUM_B .req d5 - PASS3_SUMS .req q3 - PASS3_SUM_A .req d6 - PASS3_SUM_B .req d7 - K0 .req q4 - K1 .req q5 - K2 .req q6 - K3 .req q7 - T0 .req q8 - T0_L .req d16 - T0_H .req d17 - T1 .req q9 - T1_L .req d18 - T1_H .req d19 - T2 .req q10 - T2_L .req d20 - T2_H .req d21 - T3 .req q11 - T3_L .req d22 - T3_H .req d23 - -.macro _nh_stride k0, k1, k2, k3 - - // Load next message stride - vld1.8 {T3}, [MESSAGE]! - - // Load next key stride - vld1.32 {\k3}, [KEY]! - - // Add message words to key words - vadd.u32 T0, T3, \k0 - vadd.u32 T1, T3, \k1 - vadd.u32 T2, T3, \k2 - vadd.u32 T3, T3, \k3 - - // Multiply 32x32 => 64 and accumulate - vmlal.u32 PASS0_SUMS, T0_L, T0_H - vmlal.u32 PASS1_SUMS, T1_L, T1_H - vmlal.u32 PASS2_SUMS, T2_L, T2_H - vmlal.u32 PASS3_SUMS, T3_L, T3_H -.endm - -/* - * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) - * - * It's guaranteed that message_len % 16 == 0. - */ -ENTRY(nh_neon) - - vld1.32 {K0,K1}, [KEY]! - vmov.u64 PASS0_SUMS, #0 - vmov.u64 PASS1_SUMS, #0 - vld1.32 {K2}, [KEY]! - vmov.u64 PASS2_SUMS, #0 - vmov.u64 PASS3_SUMS, #0 - - subs MESSAGE_LEN, MESSAGE_LEN, #64 - blt .Lloop4_done -.Lloop4: - _nh_stride K0, K1, K2, K3 - _nh_stride K1, K2, K3, K0 - _nh_stride K2, K3, K0, K1 - _nh_stride K3, K0, K1, K2 - subs MESSAGE_LEN, MESSAGE_LEN, #64 - bge .Lloop4 - -.Lloop4_done: - ands MESSAGE_LEN, MESSAGE_LEN, #63 - beq .Ldone - _nh_stride K0, K1, K2, K3 - - subs MESSAGE_LEN, MESSAGE_LEN, #16 - beq .Ldone - _nh_stride K1, K2, K3, K0 - - subs MESSAGE_LEN, MESSAGE_LEN, #16 - beq .Ldone - _nh_stride K2, K3, K0, K1 - -.Ldone: - // Sum the accumulators for each pass, then store the sums to 'hash' - vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B - vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B - vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B - vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B - vst1.8 {T0-T1}, [HASH] - bx lr -ENDPROC(nh_neon) diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S deleted file mode 100644 index f82cd8cf5a093f5bb79c911a45513864dcdbe271..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/sha1-armv4-large.S +++ /dev/null @@ -1,507 +0,0 @@ -#define __ARM_ARCH__ __LINUX_ARM_ARCH__ -@ SPDX-License-Identifier: GPL-2.0 - -@ This code is taken from the OpenSSL project but the author (Andy Polyakov) -@ has relicensed it under the GPLv2. Therefore this program is free software; -@ you can redistribute it and/or modify it under the terms of the GNU General -@ Public License version 2 as published by the Free Software Foundation. -@ -@ The original headers, including the original license headers, are -@ included below for completeness. - -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ sha1_block procedure for ARMv4. -@ -@ January 2007. - -@ Size/performance trade-off -@ ==================================================================== -@ impl size in bytes comp cycles[*] measured performance -@ ==================================================================== -@ thumb 304 3212 4420 -@ armv4-small 392/+29% 1958/+64% 2250/+96% -@ armv4-compact 740/+89% 1552/+26% 1840/+22% -@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] -@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% -@ ==================================================================== -@ thumb = same as 'small' but in Thumb instructions[**] and -@ with recurring code in two private functions; -@ small = detached Xload/update, loops are folded; -@ compact = detached Xload/update, 5x unroll; -@ large = interleaved Xload/update, 5x unroll; -@ full unroll = interleaved Xload/update, full unroll, estimated[!]; -@ -@ [*] Manually counted instructions in "grand" loop body. Measured -@ performance is affected by prologue and epilogue overhead, -@ i-cache availability, branch penalties, etc. -@ [**] While each Thumb instruction is twice smaller, they are not as -@ diverse as ARM ones: e.g., there are only two arithmetic -@ instructions with 3 arguments, no [fixed] rotate, addressing -@ modes are limited. As result it takes more instructions to do -@ the same job in Thumb, therefore the code is never twice as -@ small and always slower. -@ [***] which is also ~35% better than compiler generated code. Dual- -@ issue Cortex A8 core was measured to process input block in -@ ~990 cycles. - -@ August 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 13% improvement on -@ Cortex A8 core and in absolute terms ~870 cycles per input block -@ [or 13.6 cycles per byte]. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 10% -@ improvement on Cortex A8 core and 12.2 cycles per byte. - -#include - -.text - -.align 2 -ENTRY(sha1_block_data_order) - stmdb sp!,{r4-r12,lr} - add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 - ldmia r0,{r3,r4,r5,r6,r7} -.Lloop: - ldr r8,.LK_00_19 - mov r14,sp - sub sp,sp,#15*4 - mov r5,r5,ror#30 - mov r6,r6,ror#30 - mov r7,r7,ror#30 @ [6] -.L_00_15: -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r6,r8,r6,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r4,r5 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r6,r8,r6,ror#2 @ E+=K_00_19 - eor r10,r4,r5 @ F_xx_xx - add r6,r6,r7,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r3,r10,ror#2 - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r6,r6,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r5,r8,r5,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r3,r4 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r5,r8,r5,ror#2 @ E+=K_00_19 - eor r10,r3,r4 @ F_xx_xx - add r5,r5,r6,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r7,r10,ror#2 - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r5,r5,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r4,r8,r4,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r7,r3 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r4,r8,r4,ror#2 @ E+=K_00_19 - eor r10,r7,r3 @ F_xx_xx - add r4,r4,r5,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r6,r10,ror#2 - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r4,r4,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r3,r8,r3,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r6,r7 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r3,r8,r3,ror#2 @ E+=K_00_19 - eor r10,r6,r7 @ F_xx_xx - add r3,r3,r4,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r5,r10,ror#2 - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r3,r3,r10 @ E+=F_00_19(B,C,D) - cmp r14,sp - bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#25*4 -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - add r6,r6,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - add r5,r5,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - add r4,r4,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - add r3,r3,r10 @ E+=F_00_19(B,C,D) - - ldr r8,.LK_20_39 @ [+15+16*4] - cmn sp,#0 @ [+3], clear carry to denote 20_39 -.L_20_39_or_60_79: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r4,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_20_39(B,C,D) - ARM( teq r14,sp ) @ preserve carry - THUMB( mov r11,sp ) - THUMB( teq r14,r11 ) @ preserve carry - bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] - bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes - - ldr r8,.LK_40_59 - sub sp,sp,#20*4 @ [+2] -.L_40_59: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r4,r10,ror#2 @ F_xx_xx - and r11,r5,r6 @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_40_59(B,C,D) - add r7,r7,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - and r11,r4,r5 @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_40_59(B,C,D) - add r6,r6,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - and r11,r3,r4 @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_40_59(B,C,D) - add r5,r5,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - and r11,r7,r3 @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_40_59(B,C,D) - add r4,r4,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - and r11,r6,r7 @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_40_59(B,C,D) - add r3,r3,r11,ror#2 - cmp r14,sp - bne .L_40_59 @ [+((12+5)*5+2)*4] - - ldr r8,.LK_60_79 - sub sp,sp,#20*4 - cmp sp,#0 @ set carry to denote 60_79 - b .L_20_39_or_60_79 @ [+4], spare 300 bytes -.L_done: - add sp,sp,#80*4 @ "deallocate" stack frame - ldmia r0,{r8,r9,r10,r11,r12} - add r3,r8,r3 - add r4,r9,r4 - add r5,r10,r5,ror#2 - add r6,r11,r6,ror#2 - add r7,r12,r7,ror#2 - stmia r0,{r3,r4,r5,r6,r7} - teq r1,r2 - bne .Lloop @ [+18], total 1307 - - ldmia sp!,{r4-r12,pc} -.align 2 -.LK_00_19: .word 0x5a827999 -.LK_20_39: .word 0x6ed9eba1 -.LK_40_59: .word 0x8f1bbcdc -.LK_60_79: .word 0xca62c1d6 -ENDPROC(sha1_block_data_order) -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S deleted file mode 100644 index 28d816a6a530777af2b1b4f32052afbbb7a28728..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ /dev/null @@ -1,634 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function - * - * Copyright © 2013-2014 Jussi Kivilinna - */ - -#include -#include - -.syntax unified -.fpu neon - -.text - - -/* Context structure */ - -#define state_h0 0 -#define state_h1 4 -#define state_h2 8 -#define state_h3 12 -#define state_h4 16 - - -/* Constants */ - -#define K1 0x5A827999 -#define K2 0x6ED9EBA1 -#define K3 0x8F1BBCDC -#define K4 0xCA62C1D6 -.align 4 -.LK_VEC: -.LK1: .long K1, K1, K1, K1 -.LK2: .long K2, K2, K2, K2 -.LK3: .long K3, K3, K3, K3 -.LK4: .long K4, K4, K4, K4 - - -/* Register macros */ - -#define RSTATE r0 -#define RDATA r1 -#define RNBLKS r2 -#define ROLDSTACK r3 -#define RWK lr - -#define _a r4 -#define _b r5 -#define _c r6 -#define _d r7 -#define _e r8 - -#define RT0 r9 -#define RT1 r10 -#define RT2 r11 -#define RT3 r12 - -#define W0 q0 -#define W1 q7 -#define W2 q2 -#define W3 q3 -#define W4 q4 -#define W5 q6 -#define W6 q5 -#define W7 q1 - -#define tmp0 q8 -#define tmp1 q9 -#define tmp2 q10 -#define tmp3 q11 - -#define qK1 q12 -#define qK2 q13 -#define qK3 q14 -#define qK4 q15 - -#ifdef CONFIG_CPU_BIG_ENDIAN -#define ARM_LE(code...) -#else -#define ARM_LE(code...) code -#endif - -/* Round function macros. */ - -#define WK_offs(i) (((i) & 15) * 4) - -#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - bic RT0, d, b; \ - add e, e, a, ror #(32 - 5); \ - and RT1, c, b; \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add RT0, RT0, RT3; \ - add e, e, RT1; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT0; - -#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - eor RT0, d, b; \ - add e, e, a, ror #(32 - 5); \ - eor RT0, RT0, c; \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT3; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT0; \ - -#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - eor RT0, b, c; \ - and RT1, b, c; \ - add e, e, a, ror #(32 - 5); \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - and RT0, RT0, d; \ - add RT1, RT1, RT3; \ - add e, e, RT0; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT1; - -#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define R(a,b,c,d,e,f,i) \ - _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define dummy(...) - - -/* Input expansion macros. */ - -/********* Precalc macros for rounds 0-15 *************************************/ - -#define W_PRECALC_00_15() \ - add RWK, sp, #(WK_offs(0)); \ - \ - vld1.32 {W0, W7}, [RDATA]!; \ - ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ - vld1.32 {W6, W5}, [RDATA]!; \ - vadd.u32 tmp0, W0, curK; \ - ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ - ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ - vadd.u32 tmp1, W7, curK; \ - ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ - vadd.u32 tmp2, W6, curK; \ - vst1.32 {tmp0, tmp1}, [RWK]!; \ - vadd.u32 tmp3, W5, curK; \ - vst1.32 {tmp2, tmp3}, [RWK]; \ - -#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {W0, W7}, [RDATA]!; \ - -#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(0)); \ - -#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ - -#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {W6, W5}, [RDATA]!; \ - -#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W0, curK; \ - -#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ - -#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ - -#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp1, W7, curK; \ - -#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ - -#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp2, W6, curK; \ - -#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0, tmp1}, [RWK]!; \ - -#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp3, W5, curK; \ - -#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp2, tmp3}, [RWK]; \ - - -/********* Precalc macros for rounds 16-31 ************************************/ - -#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0; \ - vext.8 W, W_m16, W_m12, #8; \ - -#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(i)); \ - vext.8 tmp0, W_m04, tmp0, #4; \ - -#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0, W_m16; \ - veor.32 W, W, W_m08; \ - -#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp1, tmp1; \ - veor W, W, tmp0; \ - -#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp0, W, #1; \ - -#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vext.8 tmp1, tmp1, W, #(16-12); \ - vshr.u32 W, W, #31; \ - -#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vorr tmp0, tmp0, W; \ - vshr.u32 W, tmp1, #30; \ - -#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp1, tmp1, #2; \ - -#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0, W; \ - -#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, tmp0, tmp1; \ - -#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W, curK; \ - -#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0}, [RWK]; - - -/********* Precalc macros for rounds 32-79 ************************************/ - -#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, W_m28; \ - -#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vext.8 tmp0, W_m08, W_m04, #8; \ - -#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, W_m16; \ - -#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, tmp0; \ - -#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(i&~3)); \ - -#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp1, W, #2; \ - -#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshr.u32 tmp0, W, #30; \ - -#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vorr W, tmp0, tmp1; \ - -#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W, curK; \ - -#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0}, [RWK]; - - -/* - * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. - * - * unsigned int - * sha1_transform_neon (void *ctx, const unsigned char *data, - * unsigned int nblks) - */ -.align 3 -ENTRY(sha1_transform_neon) - /* input: - * r0: ctx, CTX - * r1: data (64*nblks bytes) - * r2: nblks - */ - - cmp RNBLKS, #0; - beq .Ldo_nothing; - - push {r4-r12, lr}; - /*vpush {q4-q7};*/ - - adr RT3, .LK_VEC; - - mov ROLDSTACK, sp; - - /* Align stack. */ - sub RT0, sp, #(16*4); - and RT0, #(~(16-1)); - mov sp, RT0; - - vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ - - /* Get the values of the chaining variables. */ - ldm RSTATE, {_a-_e}; - - vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ - -#undef curK -#define curK qK1 - /* Precalc 0-15. */ - W_PRECALC_00_15(); - -.Loop: - /* Transform 0-15 + Precalc 16-31. */ - _R( _a, _b, _c, _d, _e, F1, 0, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 1, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _d, _e, _a, _b, _c, F1, 2, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _c, _d, _e, _a, _b, F1, 3, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, - W4, W5, W6, W7, W0, _, _, _ ); - -#undef curK -#define curK qK2 - _R( _b, _c, _d, _e, _a, F1, 4, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 5, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 6, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _d, _e, _a, _b, _c, F1, 7, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, - W3, W4, W5, W6, W7, _, _, _ ); - - _R( _c, _d, _e, _a, _b, F1, 8, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _b, _c, _d, _e, _a, F1, 9, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 10, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 11, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, - W2, W3, W4, W5, W6, _, _, _ ); - - _R( _d, _e, _a, _b, _c, F1, 12, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _c, _d, _e, _a, _b, F1, 13, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _b, _c, _d, _e, _a, F1, 14, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 15, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, - W1, W2, W3, W4, W5, _, _, _ ); - - /* Transform 16-63 + Precalc 32-79. */ - _R( _e, _a, _b, _c, _d, F1, 16, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _d, _e, _a, _b, _c, F1, 17, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _c, _d, _e, _a, _b, F1, 18, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _b, _c, _d, _e, _a, F1, 19, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - - _R( _a, _b, _c, _d, _e, F2, 20, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _e, _a, _b, _c, _d, F2, 21, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _d, _e, _a, _b, _c, F2, 22, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _c, _d, _e, _a, _b, F2, 23, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - -#undef curK -#define curK qK3 - _R( _b, _c, _d, _e, _a, F2, 24, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _a, _b, _c, _d, _e, F2, 25, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _e, _a, _b, _c, _d, F2, 26, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _d, _e, _a, _b, _c, F2, 27, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - - _R( _c, _d, _e, _a, _b, F2, 28, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _b, _c, _d, _e, _a, F2, 29, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _a, _b, _c, _d, _e, F2, 30, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _e, _a, _b, _c, _d, F2, 31, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - - _R( _d, _e, _a, _b, _c, F2, 32, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _c, _d, _e, _a, _b, F2, 33, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _b, _c, _d, _e, _a, F2, 34, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _a, _b, _c, _d, _e, F2, 35, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - - _R( _e, _a, _b, _c, _d, F2, 36, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _d, _e, _a, _b, _c, F2, 37, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _c, _d, _e, _a, _b, F2, 38, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _b, _c, _d, _e, _a, F2, 39, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - - _R( _a, _b, _c, _d, _e, F3, 40, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _e, _a, _b, _c, _d, F3, 41, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _d, _e, _a, _b, _c, F3, 42, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _c, _d, _e, _a, _b, F3, 43, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - -#undef curK -#define curK qK4 - _R( _b, _c, _d, _e, _a, F3, 44, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _a, _b, _c, _d, _e, F3, 45, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _e, _a, _b, _c, _d, F3, 46, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _d, _e, _a, _b, _c, F3, 47, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - - _R( _c, _d, _e, _a, _b, F3, 48, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _b, _c, _d, _e, _a, F3, 49, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _a, _b, _c, _d, _e, F3, 50, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _e, _a, _b, _c, _d, F3, 51, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - - _R( _d, _e, _a, _b, _c, F3, 52, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _c, _d, _e, _a, _b, F3, 53, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _b, _c, _d, _e, _a, F3, 54, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _a, _b, _c, _d, _e, F3, 55, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - - _R( _e, _a, _b, _c, _d, F3, 56, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _d, _e, _a, _b, _c, F3, 57, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _c, _d, _e, _a, _b, F3, 58, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _b, _c, _d, _e, _a, F3, 59, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - - subs RNBLKS, #1; - - _R( _a, _b, _c, _d, _e, F4, 60, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _e, _a, _b, _c, _d, F4, 61, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _d, _e, _a, _b, _c, F4, 62, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _c, _d, _e, _a, _b, F4, 63, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - - beq .Lend; - - /* Transform 64-79 + Precalc 0-15 of next block. */ -#undef curK -#define curK qK1 - _R( _b, _c, _d, _e, _a, F4, 64, - WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 65, - WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _e, _a, _b, _c, _d, F4, 66, - WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _d, _e, _a, _b, _c, F4, 67, - WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _c, _d, _e, _a, _b, F4, 68, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 69, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 70, - WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _e, _a, _b, _c, _d, F4, 71, - WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _d, _e, _a, _b, _c, F4, 72, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _c, _d, _e, _a, _b, F4, 73, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 74, - WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 75, - WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _e, _a, _b, _c, _d, F4, 76, - WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _d, _e, _a, _b, _c, F4, 77, - WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _c, _d, _e, _a, _b, F4, 78, - WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 79, - WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); - - /* Update the chaining variables. */ - ldm RSTATE, {RT0-RT3}; - add _a, RT0; - ldr RT0, [RSTATE, #state_h4]; - add _b, RT1; - add _c, RT2; - add _d, RT3; - add _e, RT0; - stm RSTATE, {_a-_e}; - - b .Loop; - -.Lend: - /* Transform 64-79 */ - R( _b, _c, _d, _e, _a, F4, 64 ); - R( _a, _b, _c, _d, _e, F4, 65 ); - R( _e, _a, _b, _c, _d, F4, 66 ); - R( _d, _e, _a, _b, _c, F4, 67 ); - R( _c, _d, _e, _a, _b, F4, 68 ); - R( _b, _c, _d, _e, _a, F4, 69 ); - R( _a, _b, _c, _d, _e, F4, 70 ); - R( _e, _a, _b, _c, _d, F4, 71 ); - R( _d, _e, _a, _b, _c, F4, 72 ); - R( _c, _d, _e, _a, _b, F4, 73 ); - R( _b, _c, _d, _e, _a, F4, 74 ); - R( _a, _b, _c, _d, _e, F4, 75 ); - R( _e, _a, _b, _c, _d, F4, 76 ); - R( _d, _e, _a, _b, _c, F4, 77 ); - R( _c, _d, _e, _a, _b, F4, 78 ); - R( _b, _c, _d, _e, _a, F4, 79 ); - - mov sp, ROLDSTACK; - - /* Update the chaining variables. */ - ldm RSTATE, {RT0-RT3}; - add _a, RT0; - ldr RT0, [RSTATE, #state_h4]; - add _b, RT1; - add _c, RT2; - add _d, RT3; - /*vpop {q4-q7};*/ - add _e, RT0; - stm RSTATE, {_a-_e}; - - pop {r4-r12, pc}; - -.Ldo_nothing: - bx lr -ENDPROC(sha1_transform_neon) diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S deleted file mode 100644 index 49a74a441aec78e4749a207b741ea8b514b4bde7..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/sha1-ce-core.S +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -#include -#include - - .text - .fpu crypto-neon-fp-armv8 - - k0 .req q0 - k1 .req q1 - k2 .req q2 - k3 .req q3 - - ta0 .req q4 - ta1 .req q5 - tb0 .req q5 - tb1 .req q4 - - dga .req q6 - dgb .req q7 - dgbs .req s28 - - dg0 .req q12 - dg1a0 .req q13 - dg1a1 .req q14 - dg1b0 .req q14 - dg1b1 .req q13 - - .macro add_only, op, ev, rc, s0, dg1 - .ifnb \s0 - vadd.u32 tb\ev, q\s0, \rc - .endif - sha1h.32 dg1b\ev, dg0 - .ifb \dg1 - sha1\op\().32 dg0, dg1a\ev, ta\ev - .else - sha1\op\().32 dg0, \dg1, ta\ev - .endif - .endm - - .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 - sha1su0.32 q\s0, q\s1, q\s2 - add_only \op, \ev, \rc, \s1, \dg1 - sha1su1.32 q\s0, q\s3 - .endm - - .align 6 -.Lsha1_rcon: - .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 - .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 - .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc - .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 - - /* - * void sha1_ce_transform(struct sha1_state *sst, u8 const *src, - * int blocks); - */ -ENTRY(sha1_ce_transform) - /* load round constants */ - adr ip, .Lsha1_rcon - vld1.32 {k0-k1}, [ip, :128]! - vld1.32 {k2-k3}, [ip, :128] - - /* load state */ - vld1.32 {dga}, [r0] - vldr dgbs, [r0, #16] - - /* load input */ -0: vld1.32 {q8-q9}, [r1]! - vld1.32 {q10-q11}, [r1]! - subs r2, r2, #1 - -#ifndef CONFIG_CPU_BIG_ENDIAN - vrev32.8 q8, q8 - vrev32.8 q9, q9 - vrev32.8 q10, q10 - vrev32.8 q11, q11 -#endif - - vadd.u32 ta0, q8, k0 - vmov dg0, dga - - add_update c, 0, k0, 8, 9, 10, 11, dgb - add_update c, 1, k0, 9, 10, 11, 8 - add_update c, 0, k0, 10, 11, 8, 9 - add_update c, 1, k0, 11, 8, 9, 10 - add_update c, 0, k1, 8, 9, 10, 11 - - add_update p, 1, k1, 9, 10, 11, 8 - add_update p, 0, k1, 10, 11, 8, 9 - add_update p, 1, k1, 11, 8, 9, 10 - add_update p, 0, k1, 8, 9, 10, 11 - add_update p, 1, k2, 9, 10, 11, 8 - - add_update m, 0, k2, 10, 11, 8, 9 - add_update m, 1, k2, 11, 8, 9, 10 - add_update m, 0, k2, 8, 9, 10, 11 - add_update m, 1, k2, 9, 10, 11, 8 - add_update m, 0, k3, 10, 11, 8, 9 - - add_update p, 1, k3, 11, 8, 9, 10 - add_only p, 0, k3, 9 - add_only p, 1, k3, 10 - add_only p, 0, k3, 11 - add_only p, 1 - - /* update state */ - vadd.u32 dga, dga, dg0 - vadd.u32 dgb, dgb, dg1a0 - bne 0b - - /* store new state */ - vst1.32 {dga}, [r0] - vstr dgbs, [r0, #16] - bx lr -ENDPROC(sha1_ce_transform) diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S deleted file mode 100644 index 4ad517577e230ddd04d7891f35b5ec7e7196931e..0000000000000000000000000000000000000000 --- a/arch/arm/crypto/sha2-ce-core.S +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd. - * Author: Ard Biesheuvel - */ - -#include -#include - - .text - .fpu crypto-neon-fp-armv8 - - k0 .req q7 - k1 .req q8 - rk .req r3 - - ta0 .req q9 - ta1 .req q10 - tb0 .req q10 - tb1 .req q9 - - dga .req q11 - dgb .req q12 - - dg0 .req q13 - dg1 .req q14 - dg2 .req q15 - - .macro add_only, ev, s0 - vmov dg2, dg0 - .ifnb \s0 - vld1.32 {k\ev}, [rk, :128]! - .endif - sha256h.32 dg0, dg1, tb\ev - sha256h2.32 dg1, dg2, tb\ev - .ifnb \s0 - vadd.u32 ta\ev, q\s0, k\ev - .endif - .endm - - .macro add_update, ev, s0, s1, s2, s3 - sha256su0.32 q\s0, q\s1 - add_only \ev, \s1 - sha256su1.32 q\s0, q\s2, q\s3 - .endm - - .align 6 -.Lsha256_rcon: - .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 - - /* - * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, - int blocks); - */ -ENTRY(sha2_ce_transform) - /* load state */ - vld1.32 {dga-dgb}, [r0] - - /* load input */ -0: vld1.32 {q0-q1}, [r1]! - vld1.32 {q2-q3}, [r1]! - subs r2, r2, #1 - -#ifndef CONFIG_CPU_BIG_ENDIAN - vrev32.8 q0, q0 - vrev32.8 q1, q1 - vrev32.8 q2, q2 - vrev32.8 q3, q3 -#endif - - /* load first round constant */ - adr rk, .Lsha256_rcon - vld1.32 {k0}, [rk, :128]! - - vadd.u32 ta0, q0, k0 - vmov dg0, dga - vmov dg1, dgb - - add_update 1, 0, 1, 2, 3 - add_update 0, 1, 2, 3, 0 - add_update 1, 2, 3, 0, 1 - add_update 0, 3, 0, 1, 2 - add_update 1, 0, 1, 2, 3 - add_update 0, 1, 2, 3, 0 - add_update 1, 2, 3, 0, 1 - add_update 0, 3, 0, 1, 2 - add_update 1, 0, 1, 2, 3 - add_update 0, 1, 2, 3, 0 - add_update 1, 2, 3, 0, 1 - add_update 0, 3, 0, 1, 2 - - add_only 1, 1 - add_only 0, 2 - add_only 1, 3 - add_only 0 - - /* update state */ - vadd.u32 dga, dga, dg0 - vadd.u32 dgb, dgb, dg1 - bne 0b - - /* store new state */ - vst1.32 {dga-dgb}, [r0] - bx lr -ENDPROC(sha2_ce_transform) diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S deleted file mode 100644 index dfc6bfa430121673015fb927349b161c94798217..0000000000000000000000000000000000000000 --- a/arch/arm/include/asm/entry-macro-multi.S +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include - -/* - * Interrupt handling. Preserves r7, r8, r9 - */ - .macro arch_irq_handler_default - get_irqnr_preamble r6, lr -1: get_irqnr_and_base r0, r2, r6, lr - movne r1, sp - @ - @ routine called with r0 = irq number, r1 = struct pt_regs * - @ - badrne lr, 1b - bne asm_do_IRQ - -#ifdef CONFIG_SMP - /* - * XXX - * - * this macro assumes that irqstat (r2) and base (r6) are - * preserved from get_irqnr_and_base above - */ - ALT_SMP(test_for_ipi r0, r2, r6, lr) - ALT_UP_B(9997f) - movne r1, sp - badrne lr, 1b - bne do_IPI -#endif -9997: - .endm - - .macro arch_irq_handler, symbol_name - .align 5 - .global \symbol_name -\symbol_name: - mov r8, lr - arch_irq_handler_default - ret r8 - .endm diff --git a/arch/arm/include/asm/hardware/entry-macro-iomd.S b/arch/arm/include/asm/hardware/entry-macro-iomd.S deleted file mode 100644 index f7692731e514359a6a8fb66cb229444b9cf9fabe..0000000000000000000000000000000000000000 --- a/arch/arm/include/asm/hardware/entry-macro-iomd.S +++ /dev/null @@ -1,131 +0,0 @@ -/* - * arch/arm/include/asm/hardware/entry-macro-iomd.S - * - * Low-level IRQ helper macros for IOC/IOMD based platforms - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -/* IOC / IOMD based hardware */ -#include - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldrb \irqstat, [\base, #IOMD_IRQREQB] @ get high priority first - ldr \tmp, =irq_prio_h - teq \irqstat, #0 -#ifdef IOMD_BASE - ldrbeq \irqstat, [\base, #IOMD_DMAREQ] @ get dma - addeq \tmp, \tmp, #256 @ irq_prio_h table size - teqeq \irqstat, #0 - bne 2406f -#endif - ldrbeq \irqstat, [\base, #IOMD_IRQREQA] @ get low priority - addeq \tmp, \tmp, #256 @ irq_prio_d table size - teqeq \irqstat, #0 -#ifdef IOMD_IRQREQC - ldrbeq \irqstat, [\base, #IOMD_IRQREQC] - addeq \tmp, \tmp, #256 @ irq_prio_l table size - teqeq \irqstat, #0 -#endif -#ifdef IOMD_IRQREQD - ldrbeq \irqstat, [\base, #IOMD_IRQREQD] - addeq \tmp, \tmp, #256 @ irq_prio_lc table size - teqeq \irqstat, #0 -#endif -2406: ldrbne \irqnr, [\tmp, \irqstat] @ get IRQ number - .endm - -/* - * Interrupt table (incorporates priority). Please note that we - * rely on the order of these tables (see above code). - */ - .align 5 -irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 12, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 - .byte 13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10 -#ifdef IOMD_BASE -irq_prio_d: .byte 0,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 20,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 23,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 23,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 - .byte 21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16 -#endif -irq_prio_l: .byte 0, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 4, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - .byte 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - .byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - .byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -#ifdef IOMD_IRQREQC -irq_prio_lc: .byte 24,24,25,24,26,26,26,26,27,27,27,27,27,27,27,27 - .byte 28,24,25,24,26,26,26,26,27,27,27,27,27,27,27,27 - .byte 29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 - .byte 29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 - .byte 30,30,30,30,30,30,30,30,27,27,27,27,27,27,27,27 - .byte 30,30,30,30,30,30,30,30,27,27,27,27,27,27,27,27 - .byte 29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 - .byte 29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 -#endif -#ifdef IOMD_IRQREQD -irq_prio_ld: .byte 40,40,41,40,42,42,42,42,43,43,43,43,43,43,43,43 - .byte 44,40,41,40,42,42,42,42,43,43,43,43,43,43,43,43 - .byte 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45 - .byte 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45 - .byte 46,46,46,46,46,46,46,46,43,43,43,43,43,43,43,43 - .byte 46,46,46,46,46,46,46,46,43,43,43,43,43,43,43,43 - .byte 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45 - .byte 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 - .byte 47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47 -#endif - diff --git a/arch/arm/include/debug/8250.S b/arch/arm/include/debug/8250.S deleted file mode 100644 index e4a036f082c29c722e69f0cfbb0b9230c0653b56..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/8250.S +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/include/debug/8250.S - * - * Copyright (C) 1994-2013 Russell King - */ -#include - - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS - ldr \rv, =CONFIG_DEBUG_UART_VIRT - .endm - -#ifdef CONFIG_DEBUG_UART_8250_WORD - .macro store, rd, rx:vararg - ARM_BE8(rev \rd, \rd) - str \rd, \rx - ARM_BE8(rev \rd, \rd) - .endm - - .macro load, rd, rx:vararg - ldr \rd, \rx - ARM_BE8(rev \rd, \rd) - .endm -#else - .macro store, rd, rx:vararg - strb \rd, \rx - .endm - - .macro load, rd, rx:vararg - ldrb \rd, \rx - .endm -#endif - -#define UART_SHIFT CONFIG_DEBUG_UART_8250_SHIFT - - .macro senduart,rd,rx - store \rd, [\rx, #UART_TX << UART_SHIFT] - .endm - - .macro busyuart,rd,rx -1002: load \rd, [\rx, #UART_LSR << UART_SHIFT] - and \rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE - teq \rd, #UART_LSR_TEMT | UART_LSR_THRE - bne 1002b - .endm - - .macro waituart,rd,rx -#ifdef CONFIG_DEBUG_UART_8250_FLOW_CONTROL -1001: load \rd, [\rx, #UART_MSR << UART_SHIFT] - tst \rd, #UART_MSR_CTS - beq 1001b -#endif - .endm diff --git a/arch/arm/include/debug/asm9260.S b/arch/arm/include/debug/asm9260.S deleted file mode 100644 index 0da1eb6253318dcd18c4c81eb896e865712041c1..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/asm9260.S +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks - * Modified for ASM9260 by Oleksij Remepl - */ - - .macro addruart, rp, rv, tmp - ldr \rp, = CONFIG_DEBUG_UART_PHYS - ldr \rv, = CONFIG_DEBUG_UART_VIRT - .endm - - .macro waituart,rd,rx - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #0x50] @ TXDATA - .endm - - .macro busyuart,rd,rx -1002: ldr \rd, [\rx, #0x60] @ STAT - tst \rd, #1 << 27 @ TXEMPTY - beq 1002b @ wait until transmit done - .endm diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S deleted file mode 100644 index 6c91cbaaa20be8a7a943703ae062a54c88279c8b..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/at91.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2003-2005 SAN People - * - * Debugging macro include header -*/ - -#define AT91_DBGU_SR (0x14) /* Status Register */ -#define AT91_DBGU_THR (0x1c) /* Transmitter Holding Register */ -#define AT91_DBGU_TXRDY (1 << 1) /* Transmitter Ready */ -#define AT91_DBGU_TXEMPTY (1 << 9) /* Transmitter Empty */ - - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS @ System peripherals (phys address) - ldr \rv, =CONFIG_DEBUG_UART_VIRT @ System peripherals (virt address) - .endm - - .macro senduart,rd,rx - strb \rd, [\rx, #(AT91_DBGU_THR)] @ Write to Transmitter Holding Register - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #(AT91_DBGU_SR)] @ Read Status Register - tst \rd, #AT91_DBGU_TXRDY @ DBGU_TXRDY = 1 when ready to transmit - beq 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #(AT91_DBGU_SR)] @ Read Status Register - tst \rd, #AT91_DBGU_TXEMPTY @ DBGU_TXEMPTY = 1 when transmission complete - beq 1001b - .endm - diff --git a/arch/arm/include/debug/bcm63xx.S b/arch/arm/include/debug/bcm63xx.S deleted file mode 100644 index 06a8962273960078c3749663396a0550a9d158db..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/bcm63xx.S +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Broadcom BCM63xx low-level UART debug - * - * Copyright (C) 2014 Broadcom Corporation - */ - -#include - - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS - ldr \rv, =CONFIG_DEBUG_UART_VIRT - .endm - - .macro senduart, rd, rx - /* word access do not work */ - strb \rd, [\rx, #UART_FIFO_REG] - .endm - - .macro waituart, rd, rx -1001: ldr \rd, [\rx, #UART_IR_REG] - tst \rd, #(1 << UART_IR_TXEMPTY) - beq 1001b - .endm - - .macro busyuart, rd, rx -1002: ldr \rd, [\rx, #UART_IR_REG] - tst \rd, #(1 << UART_IR_TXTRESH) - beq 1002b - .endm diff --git a/arch/arm/include/debug/brcmstb.S b/arch/arm/include/debug/brcmstb.S deleted file mode 100644 index bf8702ee8f86dff04e7f7d905b64c155c03ec912..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/brcmstb.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (C) 2016 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#include -#include - -/* Physical register offset and virtual register offset */ -#define REG_PHYS_BASE 0xf0000000 -#define REG_PHYS_BASE_V7 0x08000000 -#define REG_VIRT_BASE 0xfc000000 -#define REG_PHYS_ADDR(x) ((x) + REG_PHYS_BASE) -#define REG_PHYS_ADDR_V7(x) ((x) + REG_PHYS_BASE_V7) - -/* Product id can be read from here */ -#define SUN_TOP_CTRL_BASE REG_PHYS_ADDR(0x404000) -#define SUN_TOP_CTRL_BASE_V7 REG_PHYS_ADDR_V7(0x404000) - -#define UARTA_3390 REG_PHYS_ADDR(0x40a900) -#define UARTA_7250 REG_PHYS_ADDR(0x40b400) -#define UARTA_7255 REG_PHYS_ADDR(0x40c000) -#define UARTA_7260 UARTA_7255 -#define UARTA_7268 UARTA_7255 -#define UARTA_7271 UARTA_7268 -#define UARTA_7278 REG_PHYS_ADDR_V7(0x40c000) -#define UARTA_7364 REG_PHYS_ADDR(0x40b000) -#define UARTA_7366 UARTA_7364 -#define UARTA_74371 REG_PHYS_ADDR(0x406b00) -#define UARTA_7439 REG_PHYS_ADDR(0x40a900) -#define UARTA_7445 REG_PHYS_ADDR(0x40ab00) - -#define UART_SHIFT 2 - -#define checkuart(rp, rv, family_id, family) \ - /* Load family id */ \ - ldr rp, =family_id ; \ - /* Compare SUN_TOP_CTRL value against it */ \ - cmp rp, rv ; \ - /* Passed test, load address */ \ - ldreq rp, =UARTA_##family ; \ - /* Jump to save UART address */ \ - beq 91f - - .macro addruart, rp, rv, tmp - adr \rp, 99f @ actual addr of 99f - ldr \rv, [\rp] @ linked addr is stored there - sub \rv, \rv, \rp @ offset between the two - ldr \rp, [\rp, #4] @ linked brcmstb_uart_config - sub \tmp, \rp, \rv @ actual brcmstb_uart_config - ldr \rp, [\tmp] @ Load brcmstb_uart_config - cmp \rp, #1 @ needs initialization? - bne 100f @ no; go load the addresses - mov \rv, #0 @ yes; record init is done - str \rv, [\tmp] - - /* Check for V7 memory map if B53 */ - mrc p15, 0, \rv, c0, c0, 0 @ get Main ID register - ldr \rp, =ARM_CPU_PART_MASK - and \rv, \rv, \rp - ldr \rp, =ARM_CPU_PART_BRAHMA_B53 @ check for B53 CPU - cmp \rv, \rp - bne 10f - - /* if PERIPHBASE doesn't overlap REG_PHYS_BASE use V7 map */ - mrc p15, 1, \rv, c15, c3, 0 @ get PERIPHBASE from CBAR - ands \rv, \rv, #REG_PHYS_BASE - ldreq \rp, =SUN_TOP_CTRL_BASE_V7 - - /* Check SUN_TOP_CTRL base */ -10: ldrne \rp, =SUN_TOP_CTRL_BASE @ load SUN_TOP_CTRL PA - ldr \rv, [\rp, #0] @ get register contents -ARM_BE8( rev \rv, \rv ) - and \rv, \rv, #0xffffff00 @ strip revision bits [7:0] - - /* Chip specific detection starts here */ -20: checkuart(\rp, \rv, 0x33900000, 3390) -21: checkuart(\rp, \rv, 0x72500000, 7250) -22: checkuart(\rp, \rv, 0x72550000, 7255) -23: checkuart(\rp, \rv, 0x72600000, 7260) -24: checkuart(\rp, \rv, 0x72680000, 7268) -25: checkuart(\rp, \rv, 0x72710000, 7271) -26: checkuart(\rp, \rv, 0x72780000, 7278) -27: checkuart(\rp, \rv, 0x73640000, 7364) -28: checkuart(\rp, \rv, 0x73660000, 7366) -29: checkuart(\rp, \rv, 0x07437100, 74371) -30: checkuart(\rp, \rv, 0x74390000, 7439) -31: checkuart(\rp, \rv, 0x74450000, 7445) - - /* No valid UART found */ -90: mov \rp, #0 - /* fall through */ - - /* Record whichever UART we chose */ -91: str \rp, [\tmp, #4] @ Store in brcmstb_uart_phys - cmp \rp, #0 @ Valid UART address? - bne 92f @ Yes, go process it - str \rp, [\tmp, #8] @ Store 0 in brcmstb_uart_virt - b 100f @ Done -92: and \rv, \rp, #0xffffff @ offset within 16MB section - add \rv, \rv, #REG_VIRT_BASE - str \rv, [\tmp, #8] @ Store in brcmstb_uart_virt - b 100f - - .align -99: .word . - .word brcmstb_uart_config - .ltorg - - /* Load previously selected UART address */ -100: ldr \rp, [\tmp, #4] @ Load brcmstb_uart_phys - ldr \rv, [\tmp, #8] @ Load brcmstb_uart_virt - .endm - - .macro store, rd, rx:vararg -ARM_BE8( rev \rd, \rd ) - str \rd, \rx - .endm - - .macro load, rd, rx:vararg - ldr \rd, \rx -ARM_BE8( rev \rd, \rd ) - .endm - - .macro senduart,rd,rx - store \rd, [\rx, #UART_TX << UART_SHIFT] - .endm - - .macro busyuart,rd,rx -1002: load \rd, [\rx, #UART_LSR << UART_SHIFT] - and \rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE - teq \rd, #UART_LSR_TEMT | UART_LSR_THRE - bne 1002b - .endm - - .macro waituart,rd,rx - .endm - -/* - * Storage for the state maintained by the macros above. - * - * In the kernel proper, this data is located in arch/arm/mach-bcm/brcmstb.c. - * That's because this header is included from multiple files, and we only - * want a single copy of the data. In particular, the UART probing code above - * assumes it's running using physical addresses. This is true when this file - * is included from head.o, but not when included from debug.o. So we need - * to share the probe results between the two copies, rather than having - * to re-run the probing again later. - * - * In the decompressor, we put the symbol/storage right here, since common.c - * isn't included in the decompressor build. This symbol gets put in .text - * even though it's really data, since .data is discarded from the - * decompressor. Luckily, .text is writeable in the decompressor, unless - * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. - */ -#if defined(ZIMAGE) -brcmstb_uart_config: - /* Debug UART initialization required */ - .word 1 - /* Debug UART physical address */ - .word 0 - /* Debug UART virtual address */ - .word 0 -#endif diff --git a/arch/arm/include/debug/clps711x.S b/arch/arm/include/debug/clps711x.S deleted file mode 100644 index 774a67ac3877dd6baebb2dc8eefc96938feef191..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/clps711x.S +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2014 Alexander Shiyan - */ - -#ifndef CONFIG_DEBUG_CLPS711X_UART2 -#define CLPS711X_UART_PADDR (0x80000000 + 0x0000) -#define CLPS711X_UART_VADDR (0xfeff4000 + 0x0000) -#else -#define CLPS711X_UART_PADDR (0x80000000 + 0x1000) -#define CLPS711X_UART_VADDR (0xfeff4000 + 0x1000) -#endif - -#define SYSFLG (0x0140) -#define SYSFLG_UBUSY (1 << 11) -#define UARTDR (0x0480) - - .macro addruart, rp, rv, tmp - ldr \rv, =CLPS711X_UART_VADDR - ldr \rp, =CLPS711X_UART_PADDR - .endm - - .macro waituart,rd,rx - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #UARTDR] - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #SYSFLG] - tst \rd, #SYSFLG_UBUSY - bne 1001b - .endm diff --git a/arch/arm/include/debug/dc21285.S b/arch/arm/include/debug/dc21285.S deleted file mode 100644 index d7e8c71706abd9293174e8727b6bc2c2865893bf..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/dc21285.S +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/mach-footbridge/include/mach/debug-macro.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks -*/ - -#include - -#include - /* For EBSA285 debugging */ - .equ dc21285_high, ARMCSR_BASE & 0xff000000 - .equ dc21285_low, ARMCSR_BASE & 0x00ffffff - - .macro addruart, rp, rv, tmp - .if dc21285_low - mov \rp, #dc21285_low - .else - mov \rp, #0 - .endif - orr \rv, \rp, #dc21285_high - orr \rp, \rp, #0x42000000 - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #0x160] @ UARTDR - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x178] @ UARTFLG - tst \rd, #1 << 3 - bne 1001b - .endm - - .macro waituart,rd,rx - .endm diff --git a/arch/arm/include/debug/digicolor.S b/arch/arm/include/debug/digicolor.S deleted file mode 100644 index 256f5f4da2759d4a1e4f72ef3c5f0b9ab3c90478..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/digicolor.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Debugging macro include header for Conexant Digicolor USART - * - * Copyright (C) 2014 Paradox Innovation Ltd. -*/ - -#define UA0_STATUS 0x0742 -#define UA0_EMI_REC 0x0744 - -#define UA0_STATUS_TX_READY 0x40 - -#ifdef CONFIG_DEBUG_UART_PHYS - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS - ldr \rv, =CONFIG_DEBUG_UART_VIRT - .endm -#endif - - .macro senduart,rd,rx - strb \rd, [\rx, #UA0_EMI_REC] - .endm - - .macro waituart,rd,rx - .endm - - .macro busyuart,rd,rx -1001: ldrb \rd, [\rx, #UA0_STATUS] - tst \rd, #UA0_STATUS_TX_READY - beq 1001b - .endm diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S deleted file mode 100644 index 5ed5028306f4a605bff81df0782e5a8346773e45..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/efm32.S +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 Pengutronix - * Uwe Kleine-Koenig - */ - -#define UARTn_CMD 0x000c -#define UARTn_CMD_TXEN 0x0004 - -#define UARTn_STATUS 0x0010 -#define UARTn_STATUS_TXC 0x0020 -#define UARTn_STATUS_TXBL 0x0040 - -#define UARTn_TXDATA 0x0034 - - .macro addruart, rx, tmp, tmp2 - ldr \rx, =(CONFIG_DEBUG_UART_PHYS) - - /* - * enable TX. The driver might disable it to save energy. We - * don't care about disabling at the end as during debug power - * consumption isn't that important. - */ - ldr \tmp, =(UARTn_CMD_TXEN) - str \tmp, [\rx, #UARTn_CMD] - .endm - - .macro senduart,rd,rx - strb \rd, [\rx, #UARTn_TXDATA] - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #UARTn_STATUS] - tst \rd, #UARTn_STATUS_TXBL - beq 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, UARTn_STATUS] - tst \rd, #UARTn_STATUS_TXC - bne 1001b - .endm diff --git a/arch/arm/include/debug/exynos.S b/arch/arm/include/debug/exynos.S deleted file mode 100644 index 74b56769f9cb3b6faf3f58940dbb3dbe2b6f40d9..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/exynos.S +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. - * http://www.samsung.com - */ - -/* pull in the relevant register and map files. */ - -#define S3C_ADDR_BASE 0xF6000000 -#define S3C_VA_UART S3C_ADDR_BASE + 0x01000000 -#define EXYNOS4_PA_UART 0x13800000 -#define EXYNOS5_PA_UART 0x12C00000 - - /* note, for the boot process to work we have to keep the UART - * virtual address aligned to an 1MiB boundary for the L1 - * mapping the head code makes. We keep the UART virtual address - * aligned and add in the offset when we load the value here. - */ - - .macro addruart, rp, rv, tmp - mrc p15, 0, \tmp, c0, c0, 0 - and \tmp, \tmp, #0xf0 - teq \tmp, #0xf0 @@ A15 - beq 100f - mrc p15, 0, \tmp, c0, c0, 5 - and \tmp, \tmp, #0xf00 - teq \tmp, #0x100 @@ A15 + A7 but boot to A7 -100: ldreq \rp, =EXYNOS5_PA_UART - movne \rp, #EXYNOS4_PA_UART @@ EXYNOS4 - ldr \rv, =S3C_VA_UART -#if CONFIG_DEBUG_S3C_UART != 0 - add \rp, \rp, #(0x10000 * CONFIG_DEBUG_S3C_UART) - add \rv, \rv, #(0x10000 * CONFIG_DEBUG_S3C_UART) -#endif - .endm - -#define fifo_full fifo_full_s5pv210 -#define fifo_level fifo_level_s5pv210 - -#include diff --git a/arch/arm/include/debug/icedcc.S b/arch/arm/include/debug/icedcc.S deleted file mode 100644 index 74a0dd036a175edac05d931db5f132e9e18f5b53..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/icedcc.S +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/include/debug/icedcc.S - * - * Copyright (C) 1994-1999 Russell King - */ - - @@ debug using ARM EmbeddedICE DCC channel - - .macro addruart, rp, rv, tmp - .endm - -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) - - .macro senduart, rd, rx - mcr p14, 0, \rd, c0, c5, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c0, c1, 0 - tst \rx, #0x20000000 - beq 1001b - .endm - - .macro waituart, rd, rx - mov \rd, #0x2000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c0, c1, 0 - tst \rx, #0x20000000 - bne 1001b -1002: - .endm - -#elif defined(CONFIG_CPU_XSCALE) - - .macro senduart, rd, rx - mcr p14, 0, \rd, c8, c0, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c14, c0, 0 - tst \rx, #0x10000000 - beq 1001b - .endm - - .macro waituart, rd, rx - mov \rd, #0x10000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c14, c0, 0 - tst \rx, #0x10000000 - bne 1001b -1002: - .endm - -#else - - .macro senduart, rd, rx - mcr p14, 0, \rd, c1, c0, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c0, c0, 0 - tst \rx, #2 - beq 1001b - - .endm - - .macro waituart, rd, rx - mov \rd, #0x2000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c0, c0, 0 - tst \rx, #2 - bne 1001b -1002: - .endm - -#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/include/debug/imx.S b/arch/arm/include/debug/imx.S deleted file mode 100644 index 1c1b9d1da4c8f5e28ece495f8c1d1c6bcce8807b..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/imx.S +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/mach-imx/include/mach/debug-macro.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks - */ - -#include -#include "imx-uart.h" - -/* - * FIXME: This is a copy of IMX_IO_P2V in hardware.h, and needs to - * stay sync with that. It's hard to maintain, and should be fixed - * globally for multi-platform build to use a fixed virtual address - * for low-level debug uart port across platforms. - */ -#define IMX_IO_P2V(x) ( \ - (((x) & 0x80000000) >> 7) | \ - (0xf4000000 + \ - (((x) & 0x50000000) >> 6) + \ - (((x) & 0x0b000000) >> 4) + \ - (((x) & 0x000fffff)))) - -#define UART_VADDR IMX_IO_P2V(UART_PADDR) - - .macro addruart, rp, rv, tmp - ldr \rp, =UART_PADDR @ physical - ldr \rv, =UART_VADDR @ virtual - .endm - - .macro senduart,rd,rx - ARM_BE8(rev \rd, \rd) - str \rd, [\rx, #0x40] @ TXDATA - .endm - - .macro waituart,rd,rx - .endm - - .macro busyuart,rd,rx -1002: ldr \rd, [\rx, #0x98] @ SR2 - ARM_BE8(rev \rd, \rd) - tst \rd, #1 << 3 @ TXDC - beq 1002b @ wait until transmit done - .endm diff --git a/arch/arm/include/debug/meson.S b/arch/arm/include/debug/meson.S deleted file mode 100644 index 1e501a0054aea10ee77829ee6c9c32fcd6a64035..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/meson.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014 Carlo Caione - * Carlo Caione - */ - -#define MESON_AO_UART_WFIFO 0x0 -#define MESON_AO_UART_STATUS 0xc - -#define MESON_AO_UART_TX_FIFO_EMPTY (1 << 22) -#define MESON_AO_UART_TX_FIFO_FULL (1 << 21) - - .macro addruart, rp, rv, tmp - ldr \rp, =(CONFIG_DEBUG_UART_PHYS) @ physical - ldr \rv, =(CONFIG_DEBUG_UART_VIRT) @ virtual - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #MESON_AO_UART_WFIFO] - .endm - - .macro busyuart,rd,rx -1002: ldr \rd, [\rx, #MESON_AO_UART_STATUS] - tst \rd, #MESON_AO_UART_TX_FIFO_EMPTY - beq 1002b - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #MESON_AO_UART_STATUS] - tst \rd, #MESON_AO_UART_TX_FIFO_FULL - bne 1001b - .endm diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S deleted file mode 100644 index 9405b71461daf1aebe3968835a6c2682159d86f3..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/msm.S +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * - * Copyright (C) 2007 Google, Inc. - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * Author: Brian Swetland - */ - - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS - ldr \rv, =CONFIG_DEBUG_UART_VIRT - .endm - - .macro senduart, rd, rx -ARM_BE8(rev \rd, \rd ) - @ Write the 1 character to UARTDM_TF - str \rd, [\rx, #0x70] - .endm - - .macro waituart, rd, rx - @ check for TX_EMT in UARTDM_SR - ldr \rd, [\rx, #0x08] -ARM_BE8(rev \rd, \rd ) - tst \rd, #0x08 - bne 1002f - @ wait for TXREADY in UARTDM_ISR -1001: ldr \rd, [\rx, #0x14] -ARM_BE8(rev \rd, \rd ) - tst \rd, #0x80 - beq 1001b -1002: - @ Clear TX_READY by writing to the UARTDM_CR register - mov \rd, #0x300 -ARM_BE8(rev \rd, \rd ) - str \rd, [\rx, #0x10] - @ Write 0x1 to NCF register - mov \rd, #0x1 -ARM_BE8(rev \rd, \rd ) - str \rd, [\rx, #0x40] - @ UARTDM reg. Read to induce delay - ldr \rd, [\rx, #0x08] - .endm - - .macro busyuart, rd, rx - .endm diff --git a/arch/arm/include/debug/omap2plus.S b/arch/arm/include/debug/omap2plus.S deleted file mode 100644 index b5696a33ba0f524b26dbd41b4cbc18064b01e969..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/omap2plus.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks -*/ - -#include - -/* External port on Zoom2/3 */ -#define ZOOM_UART_BASE 0x10000000 -#define ZOOM_UART_VIRT 0xfa400000 - -#define OMAP_PORT_SHIFT 2 -#define ZOOM_PORT_SHIFT 1 - -#define UART_OFFSET(addr) ((addr) & 0x00ffffff) - - .pushsection .data - .align 2 -omap_uart_phys: .word 0 -omap_uart_virt: .word 0 -omap_uart_lsr: .word 0 - .popsection - - .macro addruart, rp, rv, tmp - - /* Use omap_uart_phys/virt if already configured */ -10: adr \rp, 99f @ get effective addr of 99f - ldr \rv, [\rp] @ get absolute addr of 99f - sub \rv, \rv, \rp @ offset between the two - ldr \rp, [\rp, #4] @ abs addr of omap_uart_phys - sub \tmp, \rp, \rv @ make it effective - ldr \rp, [\tmp, #0] @ omap_uart_phys - ldr \rv, [\tmp, #4] @ omap_uart_virt - cmp \rp, #0 @ is port configured? - cmpne \rv, #0 - bne 100f @ already configured - - /* Configure the UART offset from the phys/virt base */ -#ifdef CONFIG_DEBUG_ZOOM_UART - ldr \rp, =ZOOM_UART_BASE - str \rp, [\tmp, #0] @ omap_uart_phys - ldr \rp, =ZOOM_UART_VIRT - str \rp, [\tmp, #4] @ omap_uart_virt - mov \rp, #(UART_LSR << ZOOM_PORT_SHIFT) - str \rp, [\tmp, #8] @ omap_uart_lsr -#endif - b 10b - - .align -99: .word . - .word omap_uart_phys - .ltorg - -100: /* Pass the UART_LSR reg address */ - ldr \tmp, [\tmp, #8] @ omap_uart_lsr - add \rp, \rp, \tmp - add \rv, \rv, \tmp - .endm - - .macro senduart,rd,rx - orr \rd, \rd, \rx, lsl #24 @ preserve LSR reg offset - bic \rx, \rx, #0xff @ get base (THR) reg address - strb \rd, [\rx] @ send lower byte of rd - orr \rx, \rx, \rd, lsr #24 @ restore original rx (LSR) - bic \rd, \rd, #(0xff << 24) @ restore original rd - .endm - - .macro busyuart,rd,rx -1001: ldrb \rd, [\rx] @ rx contains UART_LSR address - and \rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - teq \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - bne 1001b - .endm - - .macro waituart,rd,rx - .endm diff --git a/arch/arm/include/debug/palmchip.S b/arch/arm/include/debug/palmchip.S deleted file mode 100644 index aed59332e487bc2c1d475a7cb687e036d9f9abbb..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/palmchip.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include - -#undef UART_TX -#undef UART_LSR -#undef UART_MSR - -#define UART_TX 1 -#define UART_LSR 7 -#define UART_MSR 8 - -#include diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S deleted file mode 100644 index a2a553afe7b89e4409c7ba5b583d8011bb0ff0c3..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/pl01x.S +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/include/debug/pl01x.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks -*/ -#include - -#ifdef CONFIG_DEBUG_ZTE_ZX -#undef UART01x_DR -#undef UART01x_FR -#define UART01x_DR 0x04 -#define UART01x_FR 0x14 -#endif - -#ifdef CONFIG_DEBUG_UART_PHYS - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS - ldr \rv, =CONFIG_DEBUG_UART_VIRT - .endm -#endif - - .macro senduart,rd,rx - strb \rd, [\rx, #UART01x_DR] - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #UART01x_FR] - ARM_BE8( rev \rd, \rd ) - tst \rd, #UART01x_FR_TXFF - bne 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #UART01x_FR] - ARM_BE8( rev \rd, \rd ) - tst \rd, #UART01x_FR_BUSY - bne 1001b - .endm diff --git a/arch/arm/include/debug/renesas-scif.S b/arch/arm/include/debug/renesas-scif.S deleted file mode 100644 index 25f06663a9a4e2c1b2f569b8bb10b5c503a78e83..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/renesas-scif.S +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Renesas SCIF(A) debugging macro include header - * - * Based on r8a7790.S - * - * Copyright (C) 2012-2013 Renesas Electronics Corporation - * Copyright (C) 1994-1999 Russell King - */ - -#define SCIF_PHYS CONFIG_DEBUG_UART_PHYS -#define SCIF_VIRT ((SCIF_PHYS & 0x00ffffff) | 0xfd000000) - -#if defined(CONFIG_DEBUG_R7S9210_SCIF2) || defined(CONFIG_DEBUG_R7S9210_SCIF4) -/* RZ/A2 SCIFA */ -#define FTDR 0x06 -#define FSR 0x08 -#elif CONFIG_DEBUG_UART_PHYS < 0xe6e00000 -/* SCIFA */ -#define FTDR 0x20 -#define FSR 0x14 -#else -/* SCIF */ -#define FTDR 0x0c -#define FSR 0x10 -#endif - -#define TDFE (1 << 5) -#define TEND (1 << 6) - - .macro addruart, rp, rv, tmp - ldr \rp, =SCIF_PHYS - ldr \rv, =SCIF_VIRT - .endm - - .macro waituart, rd, rx -1001: ldrh \rd, [\rx, #FSR] - tst \rd, #TDFE - beq 1001b - .endm - - .macro senduart, rd, rx - strb \rd, [\rx, #FTDR] - ldrh \rd, [\rx, #FSR] - bic \rd, \rd, #TEND - strh \rd, [\rx, #FSR] - .endm - - .macro busyuart, rd, rx -1001: ldrh \rd, [\rx, #FSR] - tst \rd, #TEND - beq 1001b - .endm diff --git a/arch/arm/include/debug/s3c24xx.S b/arch/arm/include/debug/s3c24xx.S deleted file mode 100644 index af873b5266778be070b0fde8cf3cbfd34debdad1..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/s3c24xx.S +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/mach-s3c2410/include/mach/debug-macro.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Copyright (C) 2005 Simtec Electronics - * - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks -*/ - -#include - -#define S3C2410_UART1_OFF (0x4000) - - .macro addruart, rp, rv, tmp - ldr \rp, = CONFIG_DEBUG_UART_PHYS - ldr \rv, = CONFIG_DEBUG_UART_VIRT - .endm - - .macro fifo_full_s3c2410 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] - tst \rd, #S3C2410_UFSTAT_TXFULL - .endm - - .macro fifo_level_s3c2410 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] - and \rd, \rd, #S3C2410_UFSTAT_TXMASK - .endm - -/* Select the correct implementation depending on the configuration. The - * S3C2440 will get selected by default, as these are the most widely - * used variants of these -*/ - -#if defined(CONFIG_DEBUG_S3C2410_UART) -#define fifo_full fifo_full_s3c2410 -#define fifo_level fifo_level_s3c2410 -#endif - -/* include the reset of the code which will do the work */ - -#include diff --git a/arch/arm/include/debug/s5pv210.S b/arch/arm/include/debug/s5pv210.S deleted file mode 100644 index 820a1cfb059527c9346037d54f23df4a5569acfe..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/s5pv210.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. - * http://www.samsung.com -*/ - -/* pull in the relevant register and map files. */ - -#define S3C_ADDR_BASE 0xF6000000 -#define S3C_VA_UART S3C_ADDR_BASE + 0x01000000 -#define S5PV210_PA_UART 0xe2900000 - - /* note, for the boot process to work we have to keep the UART - * virtual address aligned to an 1MiB boundary for the L1 - * mapping the head code makes. We keep the UART virtual address - * aligned and add in the offset when we load the value here. - */ - - .macro addruart, rp, rv, tmp - ldr \rp, =S5PV210_PA_UART - ldr \rv, =S3C_VA_UART -#if CONFIG_DEBUG_S3C_UART != 0 - add \rp, \rp, #(0x400 * CONFIG_DEBUG_S3C_UART) - add \rv, \rv, #(0x400 * CONFIG_DEBUG_S3C_UART) -#endif - .endm - -#define fifo_full fifo_full_s5pv210 -#define fifo_level fifo_level_s5pv210 - -#include diff --git a/arch/arm/include/debug/sa1100.S b/arch/arm/include/debug/sa1100.S deleted file mode 100644 index 6109e6058e5b3eb80cd9e58a8e871e760cc4f245..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/sa1100.S +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/include/debug/sa1100.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks -*/ - -#define UTCR3 0x0c -#define UTDR 0x14 -#define UTSR1 0x20 -#define UTCR3_TXE 0x00000002 /* Transmit Enable */ -#define UTSR1_TBY 0x00000001 /* Transmitter BusY (read) */ -#define UTSR1_TNF 0x00000004 /* Transmit FIFO Not Full (read) */ - - .macro addruart, rp, rv, tmp - mrc p15, 0, \rp, c1, c0 - tst \rp, #1 @ MMU enabled? - moveq \rp, #0x80000000 @ physical base address - movne \rp, #0xf8000000 @ virtual address - - @ We probe for the active serial port here, coherently with - @ the comment in arch/arm/mach-sa1100/include/mach/uncompress.h. - @ We assume r1 can be clobbered. - - @ see if Ser3 is active - add \rp, \rp, #0x00050000 - ldr \rv, [\rp, #UTCR3] - tst \rv, #UTCR3_TXE - - @ if Ser3 is inactive, then try Ser1 - addeq \rp, \rp, #(0x00010000 - 0x00050000) - ldreq \rv, [\rp, #UTCR3] - tsteq \rv, #UTCR3_TXE - - @ if Ser1 is inactive, then try Ser2 - addeq \rp, \rp, #(0x00030000 - 0x00010000) - ldreq \rv, [\rp, #UTCR3] - tsteq \rv, #UTCR3_TXE - - @ clear top bits, and generate both phys and virt addresses - lsl \rp, \rp, #8 - lsr \rp, \rp, #8 - orr \rv, \rp, #0xf8000000 @ virtual - orr \rp, \rp, #0x80000000 @ physical - - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #UTDR] - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #UTSR1] - tst \rd, #UTSR1_TNF - beq 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #UTSR1] - tst \rd, #UTSR1_TBY - bne 1001b - .endm diff --git a/arch/arm/include/debug/samsung.S b/arch/arm/include/debug/samsung.S deleted file mode 100644 index 69201d7fb48f6443d0ec8e5bb474764d947275b0..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/samsung.S +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 2005, 2007 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - */ - -#include - -/* The S5PV210/S5PC110 implementations are as belows. */ - - .macro fifo_level_s5pv210 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] -ARM_BE8(rev \rd, \rd) - and \rd, \rd, #S5PV210_UFSTAT_TXMASK - .endm - - .macro fifo_full_s5pv210 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] -ARM_BE8(rev \rd, \rd) - tst \rd, #S5PV210_UFSTAT_TXFULL - .endm - -/* The S3C2440 implementations are used by default as they are the - * most widely re-used */ - - .macro fifo_level_s3c2440 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] -ARM_BE8(rev \rd, \rd) - and \rd, \rd, #S3C2440_UFSTAT_TXMASK - .endm - -#ifndef fifo_level -#define fifo_level fifo_level_s3c2440 -#endif - - .macro fifo_full_s3c2440 rd, rx - ldr \rd, [\rx, # S3C2410_UFSTAT] -ARM_BE8(rev \rd, \rd) - tst \rd, #S3C2440_UFSTAT_TXFULL - .endm - -#ifndef fifo_full -#define fifo_full fifo_full_s3c2440 -#endif - - .macro senduart,rd,rx - strb \rd, [\rx, # S3C2410_UTXH] - .endm - - .macro busyuart, rd, rx - ldr \rd, [\rx, # S3C2410_UFCON] -ARM_BE8(rev \rd, \rd) - tst \rd, #S3C2410_UFCON_FIFOMODE @ fifo enabled? - beq 1001f @ - @ FIFO enabled... -1003: - fifo_full \rd, \rx - bne 1003b - b 1002f - -1001: - @ busy waiting for non fifo - ldr \rd, [\rx, # S3C2410_UTRSTAT] -ARM_BE8(rev \rd, \rd) - tst \rd, #S3C2410_UTRSTAT_TXFE - beq 1001b - -1002: @ exit busyuart - .endm - - .macro waituart,rd,rx - ldr \rd, [\rx, # S3C2410_UFCON] -ARM_BE8(rev \rd, \rd) - tst \rd, #S3C2410_UFCON_FIFOMODE @ fifo enabled? - beq 1001f @ - @ FIFO enabled... -1003: - fifo_level \rd, \rx - teq \rd, #0 - bne 1003b - b 1002f -1001: - @ idle waiting for non fifo - ldr \rd, [\rx, # S3C2410_UTRSTAT] -ARM_BE8(rev \rd, \rd) - tst \rd, #S3C2410_UTRSTAT_TXFE - beq 1001b - -1002: @ exit busyuart - .endm diff --git a/arch/arm/include/debug/sirf.S b/arch/arm/include/debug/sirf.S deleted file mode 100644 index e73e4de0a015312fe01d344972a7998b8c6bd4d8..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/sirf.S +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/arm/mach-prima2/include/mach/debug-macro.S - * - * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. - */ - -#define SIRF_LLUART_TXFIFO_STATUS 0x0114 -#define SIRF_LLUART_TXFIFO_DATA 0x0118 - -#define SIRF_LLUART_TXFIFO_FULL (1 << 5) - -#ifdef CONFIG_DEBUG_SIRFATLAS7_UART0 -#define SIRF_LLUART_TXFIFO_EMPTY (1 << 8) -#else -#define SIRF_LLUART_TXFIFO_EMPTY (1 << 6) -#endif - - - .macro addruart, rp, rv, tmp - ldr \rp, =CONFIG_DEBUG_UART_PHYS @ physical - ldr \rv, =CONFIG_DEBUG_UART_VIRT @ virtual - .endm - - .macro senduart,rd,rx - str \rd, [\rx, #SIRF_LLUART_TXFIFO_DATA] - .endm - - .macro busyuart,rd,rx - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #SIRF_LLUART_TXFIFO_STATUS] - tst \rd, #SIRF_LLUART_TXFIFO_EMPTY - beq 1001b - .endm - diff --git a/arch/arm/include/debug/sti.S b/arch/arm/include/debug/sti.S deleted file mode 100644 index 6b42c91f217d4019f392eac6d4eab53f210ea259..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/sti.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/include/debug/sti.S - * - * Debugging macro include header - * Copyright (C) 2013 STMicroelectronics (R&D) Limited. - */ - -#define STIH41X_COMMS_BASE 0xfed00000 -#define STIH41X_ASC2_BASE (STIH41X_COMMS_BASE+0x32000) - -#define STIH41X_SBC_LPM_BASE 0xfe400000 -#define STIH41X_SBC_COMMS_BASE (STIH41X_SBC_LPM_BASE + 0x100000) -#define STIH41X_SBC_ASC1_BASE (STIH41X_SBC_COMMS_BASE + 0x31000) - - -#define VIRT_ADDRESS(x) (x - 0x1000000) - -#if IS_ENABLED(CONFIG_STIH41X_DEBUG_ASC2) -#define DEBUG_LL_UART_BASE STIH41X_ASC2_BASE -#endif - -#if IS_ENABLED(CONFIG_STIH41X_DEBUG_SBC_ASC1) -#define DEBUG_LL_UART_BASE STIH41X_SBC_ASC1_BASE -#endif - -#ifndef DEBUG_LL_UART_BASE -#error "DEBUG UART is not Configured" -#endif - -#define ASC_TX_BUF_OFF 0x04 -#define ASC_CTRL_OFF 0x0c -#define ASC_STA_OFF 0x14 - -#define ASC_STA_TX_FULL (1<<9) -#define ASC_STA_TX_EMPTY (1<<1) - - - .macro addruart, rp, rv, tmp - ldr \rp, =DEBUG_LL_UART_BASE @ physical base - ldr \rv, =VIRT_ADDRESS(DEBUG_LL_UART_BASE) @ virt base - .endm - - .macro senduart,rd,rx - strb \rd, [\rx, #ASC_TX_BUF_OFF] - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #ASC_STA_OFF] - tst \rd, #ASC_STA_TX_FULL - bne 1001b - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #ASC_STA_OFF] - tst \rd, #ASC_STA_TX_EMPTY - beq 1001b - .endm diff --git a/arch/arm/include/debug/stm32.S b/arch/arm/include/debug/stm32.S deleted file mode 100644 index 1abb32f685fdbb8327d3392ccdd754fb00e35312..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/stm32.S +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) STMicroelectronics SA 2017 - All Rights Reserved - * Author: Gerald Baeza for STMicroelectronics. - */ - -#define STM32_UART_BASE 0x40011000 /* USART1 */ - -#ifdef CONFIG_STM32F4_DEBUG_UART -#define STM32_USART_SR_OFF 0x00 -#define STM32_USART_TDR_OFF 0x04 -#endif - -#ifdef CONFIG_STM32F7_DEBUG_UART -#define STM32_USART_SR_OFF 0x1C -#define STM32_USART_TDR_OFF 0x28 -#endif - -#define STM32_USART_TC (1 << 6) /* Tx complete */ -#define STM32_USART_TXE (1 << 7) /* Tx data reg empty */ - -.macro addruart, rp, rv, tmp - ldr \rp, =STM32_UART_BASE @ physical base - ldr \rv, =STM32_UART_BASE @ virt base /* NoMMU */ -.endm - -.macro senduart,rd,rx - strb \rd, [\rx, #STM32_USART_TDR_OFF] -.endm - -.macro waituart,rd,rx -1001: ldr \rd, [\rx, #(STM32_USART_SR_OFF)] @ Read Status Register - tst \rd, #STM32_USART_TXE @ TXE = 1 = tx empty - beq 1001b -.endm - -.macro busyuart,rd,rx -1001: ldr \rd, [\rx, #(STM32_USART_SR_OFF)] @ Read Status Register - tst \rd, #STM32_USART_TC @ TC = 1 = tx complete - beq 1001b -.endm diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S deleted file mode 100644 index 2148d0f8859194f1ee6928dfb8d995a2899bf134..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/tegra.S +++ /dev/null @@ -1,217 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2010,2011 Google, Inc. - * Copyright (C) 2011-2012 NVIDIA CORPORATION. All Rights Reserved. - * - * Author: - * Colin Cross - * Erik Gilling - * Doug Anderson - * Stephen Warren - * - * Portions based on mach-omap2's debug-macro.S - * Copyright (C) 1994-1999 Russell King - */ - -#include - -#define UART_SHIFT 2 - -/* Physical addresses */ -#define TEGRA_CLK_RESET_BASE 0x60006000 -#define TEGRA_APB_MISC_BASE 0x70000000 -#define TEGRA_UARTA_BASE 0x70006000 -#define TEGRA_UARTB_BASE 0x70006040 -#define TEGRA_UARTC_BASE 0x70006200 -#define TEGRA_UARTD_BASE 0x70006300 -#define TEGRA_UARTE_BASE 0x70006400 -#define TEGRA_PMC_BASE 0x7000e400 - -#define TEGRA_CLK_RST_DEVICES_L (TEGRA_CLK_RESET_BASE + 0x04) -#define TEGRA_CLK_RST_DEVICES_H (TEGRA_CLK_RESET_BASE + 0x08) -#define TEGRA_CLK_RST_DEVICES_U (TEGRA_CLK_RESET_BASE + 0x0c) -#define TEGRA_CLK_OUT_ENB_L (TEGRA_CLK_RESET_BASE + 0x10) -#define TEGRA_CLK_OUT_ENB_H (TEGRA_CLK_RESET_BASE + 0x14) -#define TEGRA_CLK_OUT_ENB_U (TEGRA_CLK_RESET_BASE + 0x18) -#define TEGRA_PMC_SCRATCH20 (TEGRA_PMC_BASE + 0xa0) -#define TEGRA_APB_MISC_GP_HIDREV (TEGRA_APB_MISC_BASE + 0x804) - -/* - * Must be section-aligned since a section mapping is used early on. - * Must not overlap with regions in mach-tegra/io.c:tegra_io_desc[]. - */ -#define UART_VIRTUAL_BASE 0xfe800000 - -#define checkuart(rp, rv, lhu, bit, uart) \ - /* Load address of CLK_RST register */ \ - ldr rp, =TEGRA_CLK_RST_DEVICES_##lhu ; \ - /* Load value from CLK_RST register */ \ - ldr rp, [rp, #0] ; \ - /* Test UART's reset bit */ \ - tst rp, #(1 << bit) ; \ - /* If set, can't use UART; jump to save no UART */ \ - bne 90f ; \ - /* Load address of CLK_OUT_ENB register */ \ - ldr rp, =TEGRA_CLK_OUT_ENB_##lhu ; \ - /* Load value from CLK_OUT_ENB register */ \ - ldr rp, [rp, #0] ; \ - /* Test UART's clock enable bit */ \ - tst rp, #(1 << bit) ; \ - /* If clear, can't use UART; jump to save no UART */ \ - beq 90f ; \ - /* Passed all tests, load address of UART registers */ \ - ldr rp, =TEGRA_UART##uart##_BASE ; \ - /* Jump to save UART address */ \ - b 91f - - .macro addruart, rp, rv, tmp - adr \rp, 99f @ actual addr of 99f - ldr \rv, [\rp] @ linked addr is stored there - sub \rv, \rv, \rp @ offset between the two - ldr \rp, [\rp, #4] @ linked tegra_uart_config - sub \tmp, \rp, \rv @ actual tegra_uart_config - ldr \rp, [\tmp] @ Load tegra_uart_config - cmp \rp, #1 @ needs initialization? - bne 100f @ no; go load the addresses - mov \rv, #0 @ yes; record init is done - str \rv, [\tmp] - -#ifdef CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA - /* Check ODMDATA */ -10: ldr \rp, =TEGRA_PMC_SCRATCH20 - ldr \rp, [\rp, #0] @ Load PMC_SCRATCH20 - lsr \rv, \rp, #18 @ 19:18 are console type - and \rv, \rv, #3 - cmp \rv, #2 @ 2 and 3 mean DCC, UART - beq 11f @ some boards swap the meaning - cmp \rv, #3 @ so accept either - bne 90f -11: lsr \rv, \rp, #15 @ 17:15 are UART ID - and \rv, #7 - cmp \rv, #0 @ UART 0? - beq 20f - cmp \rv, #1 @ UART 1? - beq 21f - cmp \rv, #2 @ UART 2? - beq 22f - cmp \rv, #3 @ UART 3? - beq 23f - cmp \rv, #4 @ UART 4? - beq 24f - b 90f @ invalid -#endif - -#if defined(CONFIG_TEGRA_DEBUG_UARTA) || \ - defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) - /* Check UART A validity */ -20: checkuart(\rp, \rv, L, 6, A) -#endif - -#if defined(CONFIG_TEGRA_DEBUG_UARTB) || \ - defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) - /* Check UART B validity */ -21: checkuart(\rp, \rv, L, 7, B) -#endif - -#if defined(CONFIG_TEGRA_DEBUG_UARTC) || \ - defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) - /* Check UART C validity */ -22: checkuart(\rp, \rv, H, 23, C) -#endif - -#if defined(CONFIG_TEGRA_DEBUG_UARTD) || \ - defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) - /* Check UART D validity */ -23: checkuart(\rp, \rv, U, 1, D) -#endif - -#if defined(CONFIG_TEGRA_DEBUG_UARTE) || \ - defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) - /* Check UART E validity */ -24: - checkuart(\rp, \rv, U, 2, E) -#endif - - /* No valid UART found */ -90: mov \rp, #0 - /* fall through */ - - /* Record whichever UART we chose */ -91: str \rp, [\tmp, #4] @ Store in tegra_uart_phys - cmp \rp, #0 @ Valid UART address? - bne 92f @ Yes, go process it - str \rp, [\tmp, #8] @ Store 0 in tegra_uart_virt - b 100f @ Done -92: and \rv, \rp, #0xffffff @ offset within 1MB section - add \rv, \rv, #UART_VIRTUAL_BASE - str \rv, [\tmp, #8] @ Store in tegra_uart_virt - b 100f - - .align -99: .word . - .word tegra_uart_config - .ltorg - - /* Load previously selected UART address */ -100: ldr \rp, [\tmp, #4] @ Load tegra_uart_phys - ldr \rv, [\tmp, #8] @ Load tegra_uart_virt - .endm - -/* - * Code below is swiped from , but add an extra - * check to make sure that the UART address is actually valid. - */ - - .macro senduart, rd, rx - cmp \rx, #0 - strbne \rd, [\rx, #UART_TX << UART_SHIFT] -1001: - .endm - - .macro busyuart, rd, rx - cmp \rx, #0 - beq 1002f -1001: ldrb \rd, [\rx, #UART_LSR << UART_SHIFT] - and \rd, \rd, #UART_LSR_THRE - teq \rd, #UART_LSR_THRE - bne 1001b -1002: - .endm - - .macro waituart, rd, rx -#ifdef FLOW_CONTROL - cmp \rx, #0 - beq 1002f -1001: ldrb \rd, [\rx, #UART_MSR << UART_SHIFT] - tst \rd, #UART_MSR_CTS - beq 1001b -1002: -#endif - .endm - -/* - * Storage for the state maintained by the macros above. - * - * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c. - * That's because this header is included from multiple files, and we only - * want a single copy of the data. In particular, the UART probing code above - * assumes it's running using physical addresses. This is true when this file - * is included from head.o, but not when included from debug.o. So we need - * to share the probe results between the two copies, rather than having - * to re-run the probing again later. - * - * In the decompressor, we put the symbol/storage right here, since common.c - * isn't included in the decompressor build. This symbol gets put in .text - * even though it's really data, since .data is discarded from the - * decompressor. Luckily, .text is writeable in the decompressor, unless - * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. - */ -#if defined(ZIMAGE) -tegra_uart_config: - /* Debug UART initialization required */ - .word 1 - /* Debug UART physical address */ - .word 0 - /* Debug UART virtual address */ - .word 0 -#endif diff --git a/arch/arm/include/debug/ux500.S b/arch/arm/include/debug/ux500.S deleted file mode 100644 index c516900947bb4e39dc7d28c88e1a93d2d2f37ec1..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/ux500.S +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Debugging macro include header - * - * Copyright (C) 2009 ST-Ericsson - */ - - -#if CONFIG_UX500_DEBUG_UART > 2 -#error Invalid Ux500 debug UART -#endif - -/* - * DEBUG_LL only works if only one SOC is built in. We don't use #else below - * in order to get "__UX500_UART redefined" warnings if more than one SOC is - * built, so that there's some hint during the build that something is wrong. - */ - -#ifdef CONFIG_UX500_SOC_DB8500 -#define U8500_UART0_PHYS_BASE (0x80120000) -#define U8500_UART1_PHYS_BASE (0x80121000) -#define U8500_UART2_PHYS_BASE (0x80007000) -#define __UX500_PHYS_UART(n) U8500_UART##n##_PHYS_BASE -#endif - -#if !defined(__UX500_PHYS_UART) -#error Unknown SOC -#endif - -#define UX500_PHYS_UART(n) __UX500_PHYS_UART(n) -#define UART_PHYS_BASE UX500_PHYS_UART(CONFIG_UX500_DEBUG_UART) -#define UART_VIRT_BASE (0xfff07000) - - .macro addruart, rp, rv, tmp - ldr \rp, =UART_PHYS_BASE @ no, physical address - ldr \rv, =UART_VIRT_BASE @ yes, virtual address - .endm - -#include diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S deleted file mode 100644 index ccb22e9a86a35a4123fca489bbc2db1c1de7b8df..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/vexpress.S +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* arch/arm/mach-realview/include/mach/debug-macro.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks - */ - -#define DEBUG_LL_PHYS_BASE 0x10000000 -#define DEBUG_LL_UART_OFFSET 0x00009000 - -#define DEBUG_LL_PHYS_BASE_RS1 0x1c000000 -#define DEBUG_LL_UART_OFFSET_RS1 0x00090000 - -#define DEBUG_LL_UART_PHYS_CRX 0xb0090000 - -#define DEBUG_LL_VIRT_BASE 0xf8000000 - -#if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT) - - .macro addruart,rp,rv,tmp - .arch armv7-a - - @ Make an educated guess regarding the memory map: - @ - the original A9 core tile (based on ARM Cortex-A9 r0p1) - @ should use UART at 0x10009000 - @ - all other (RS1 complaint) tiles use UART mapped - @ at 0x1c090000 - mrc p15, 0, \rp, c0, c0, 0 - movw \rv, #0xc091 - movt \rv, #0x410f - cmp \rp, \rv - - @ Original memory map - moveq \rp, #DEBUG_LL_UART_OFFSET - orreq \rv, \rp, #DEBUG_LL_VIRT_BASE - orreq \rp, \rp, #DEBUG_LL_PHYS_BASE - - @ RS1 memory map - movne \rp, #DEBUG_LL_UART_OFFSET_RS1 - orrne \rv, \rp, #DEBUG_LL_VIRT_BASE - orrne \rp, \rp, #DEBUG_LL_PHYS_BASE_RS1 - - .endm - -#include -#endif diff --git a/arch/arm/include/debug/vf.S b/arch/arm/include/debug/vf.S deleted file mode 100644 index 854d9bd8277019e50de31cc6823278ef343afaf5..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/vf.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2013 Freescale Semiconductor, Inc. - */ - -#define VF_UART0_BASE_ADDR 0x40027000 -#define VF_UART1_BASE_ADDR 0x40028000 -#define VF_UART2_BASE_ADDR 0x40029000 -#define VF_UART3_BASE_ADDR 0x4002a000 -#define VF_UART_BASE_ADDR(n) VF_UART##n##_BASE_ADDR -#define VF_UART_BASE(n) VF_UART_BASE_ADDR(n) -#define VF_UART_PHYSICAL_BASE VF_UART_BASE(CONFIG_DEBUG_VF_UART_PORT) - -#define VF_UART_VIRTUAL_BASE 0xfe000000 - - .macro addruart, rp, rv, tmp - ldr \rp, =VF_UART_PHYSICAL_BASE @ physical - and \rv, \rp, #0xffffff @ offset within 16MB section - add \rv, \rv, #VF_UART_VIRTUAL_BASE - .endm - - .macro senduart, rd, rx - strb \rd, [\rx, #0x7] @ Data Register - .endm - - .macro busyuart, rd, rx -1001: ldrb \rd, [\rx, #0x4] @ Status Register 1 - tst \rd, #1 << 6 @ TC - beq 1001b @ wait until transmit done - .endm - - .macro waituart,rd,rx - .endm diff --git a/arch/arm/include/debug/vt8500.S b/arch/arm/include/debug/vt8500.S deleted file mode 100644 index 8dc1df2d91b859926978cb9313b30fa4235764c9..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/vt8500.S +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Debugging macro include header - * - * Copyright (C) 2010 Alexey Charkov - * Moved from arch/arm/mach-vt8500/include/mach/debug-macro.S - * Minor changes for readability. - */ - -#define DEBUG_LL_PHYS_BASE 0xD8000000 -#define DEBUG_LL_VIRT_BASE 0xF8000000 -#define DEBUG_LL_UART_OFFSET 0x00200000 - -#if defined(CONFIG_DEBUG_VT8500_UART0) - .macro addruart, rp, rv, tmp - mov \rp, #DEBUG_LL_UART_OFFSET - orr \rv, \rp, #DEBUG_LL_VIRT_BASE - orr \rp, \rp, #DEBUG_LL_PHYS_BASE - .endm - - .macro senduart,rd,rx - strb \rd, [\rx, #0] - .endm - - .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x1c] - ands \rd, \rd, #0x2 - bne 1001b - .endm - - .macro waituart,rd,rx - .endm - -#endif diff --git a/arch/arm/include/debug/zynq.S b/arch/arm/include/debug/zynq.S deleted file mode 100644 index 58d77c972fd680684a2e0f1f8ac40bd19e0d221d..0000000000000000000000000000000000000000 --- a/arch/arm/include/debug/zynq.S +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Debugging macro include header - * - * Copyright (C) 2011 Xilinx - */ -#define UART_CR_OFFSET 0x00 /* Control Register [8:0] */ -#define UART_SR_OFFSET 0x2C /* Channel Status [11:0] */ -#define UART_FIFO_OFFSET 0x30 /* FIFO [15:0] or [7:0] */ - -#define UART_SR_TXFULL 0x00000010 /* TX FIFO full */ -#define UART_SR_TXEMPTY 0x00000008 /* TX FIFO empty */ - -#define UART0_PHYS 0xE0000000 -#define UART0_VIRT 0xF0800000 -#define UART1_PHYS 0xE0001000 -#define UART1_VIRT 0xF0801000 - -#if IS_ENABLED(CONFIG_DEBUG_ZYNQ_UART1) -# define LL_UART_PADDR UART1_PHYS -# define LL_UART_VADDR UART1_VIRT -#else -# define LL_UART_PADDR UART0_PHYS -# define LL_UART_VADDR UART0_VIRT -#endif - - .macro addruart, rp, rv, tmp - ldr \rp, =LL_UART_PADDR @ physical - ldr \rv, =LL_UART_VADDR @ virtual - .endm - - .macro senduart,rd,rx - strb \rd, [\rx, #UART_FIFO_OFFSET] @ TXDATA - .endm - - .macro waituart,rd,rx -1001: ldr \rd, [\rx, #UART_SR_OFFSET] -ARM_BE8( rev \rd, \rd ) - tst \rd, #UART_SR_TXEMPTY - beq 1001b - .endm - - .macro busyuart,rd,rx -1002: ldr \rd, [\rx, #UART_SR_OFFSET] @ get status register -ARM_BE8( rev \rd, \rd ) - tst \rd, #UART_SR_TXFULL @ - bne 1002b @ wait if FIFO is full - .endm diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S deleted file mode 100644 index e112072b579d424c3dfcaad914cc8c576b9590c8..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/debug.S +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/debug.S - * - * Copyright (C) 1994-1999 Russell King - * - * 32-bit debugging code - */ -#include -#include - - .text - -/* - * Some debugging routines (useful if you've got MM problems and - * printk isn't working). For DEBUGGING ONLY!!! Do not leave - * references to these in a production kernel! - */ - -#if !defined(CONFIG_DEBUG_SEMIHOSTING) -#include CONFIG_DEBUG_LL_INCLUDE -#endif - -#ifdef CONFIG_MMU - .macro addruart_current, rx, tmp1, tmp2 - addruart \tmp1, \tmp2, \rx - mrc p15, 0, \rx, c1, c0 - tst \rx, #1 - moveq \rx, \tmp1 - movne \rx, \tmp2 - .endm - -#else /* !CONFIG_MMU */ - .macro addruart_current, rx, tmp1, tmp2 - addruart \rx, \tmp1, \tmp2 - .endm - -#endif /* CONFIG_MMU */ - -/* - * Useful debugging routines - */ -ENTRY(printhex8) - mov r1, #8 - b printhex -ENDPROC(printhex8) - -ENTRY(printhex4) - mov r1, #4 - b printhex -ENDPROC(printhex4) - -ENTRY(printhex2) - mov r1, #2 -printhex: adr r2, hexbuf_rel - ldr r3, [r2] - add r2, r2, r3 - add r3, r2, r1 - mov r1, #0 - strb r1, [r3] -1: and r1, r0, #15 - mov r0, r0, lsr #4 - cmp r1, #10 - addlt r1, r1, #'0' - addge r1, r1, #'a' - 10 - strb r1, [r3, #-1]! - teq r3, r2 - bne 1b - mov r0, r2 - b printascii -ENDPROC(printhex2) - - .pushsection .bss -hexbuf_addr: .space 16 - .popsection - .align -hexbuf_rel: .long hexbuf_addr - . - - .ltorg - -#ifndef CONFIG_DEBUG_SEMIHOSTING - -ENTRY(printascii) - addruart_current r3, r1, r2 -1: teq r0, #0 - ldrbne r1, [r0], #1 - teqne r1, #0 - reteq lr -2: teq r1, #'\n' - bne 3f - mov r1, #'\r' - waituart r2, r3 - senduart r1, r3 - busyuart r2, r3 - mov r1, #'\n' -3: waituart r2, r3 - senduart r1, r3 - busyuart r2, r3 - b 1b -ENDPROC(printascii) - -ENTRY(printch) - addruart_current r3, r1, r2 - mov r1, r0 - mov r0, #0 - b 2b -ENDPROC(printch) - -#ifdef CONFIG_MMU -ENTRY(debug_ll_addr) - addruart r2, r3, ip - str r2, [r0] - str r3, [r1] - ret lr -ENDPROC(debug_ll_addr) -#endif - -#else - -ENTRY(printascii) - mov r1, r0 - mov r0, #0x04 @ SYS_WRITE0 - ARM( svc #0x123456 ) -#ifdef CONFIG_CPU_V7M - THUMB( bkpt #0xab ) -#else - THUMB( svc #0xab ) -#endif - ret lr -ENDPROC(printascii) - -ENTRY(printch) - adr r1, hexbuf_rel - ldr r2, [r1] - add r1, r1, r2 - strb r0, [r1] - mov r0, #0x03 @ SYS_WRITEC - ARM( svc #0x123456 ) -#ifdef CONFIG_CPU_V7M - THUMB( bkpt #0xab ) -#else - THUMB( svc #0xab ) -#endif - ret lr -ENDPROC(printch) - -ENTRY(debug_ll_addr) - mov r2, #0 - str r2, [r0] - str r2, [r1] - ret lr -ENDPROC(debug_ll_addr) - -#endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S deleted file mode 100644 index b62d74a2c73a58f9fdf9d6bca7e7d473c65a5d28..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/entry-armv.S +++ /dev/null @@ -1,1197 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/entry-armv.S - * - * Copyright (C) 1996,1997,1998 Russell King. - * ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk) - * nommu support by Hyok S. Choi (hyok.choi@samsung.com) - * - * Low-level vector interface routines - * - * Note: there is a StrongARM bug in the STMIA rn, {regs}^ instruction - * that causes it to save wrong values... Be aware! - */ - -#include - -#include -#include -#include -#include -#include -#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER -#include -#endif -#include -#include -#include -#include -#include -#include - -#include "entry-header.S" -#include -#include - -/* - * Interrupt handling. - */ - .macro irq_handler -#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER - ldr r1, =handle_arch_irq - mov r0, sp - badr lr, 9997f - ldr pc, [r1] -#else - arch_irq_handler_default -#endif -9997: - .endm - - .macro pabt_helper - @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 -#ifdef MULTI_PABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_PABT_FUNC] -#else - bl CPU_PABORT_HANDLER -#endif - .endm - - .macro dabt_helper - - @ - @ Call the processor-specific abort handler: - @ - @ r2 - pt_regs - @ r4 - aborted context pc - @ r5 - aborted context psr - @ - @ The abort handler must return the aborted address in r0, and - @ the fault status register in r1. r9 must be preserved. - @ -#ifdef MULTI_DABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_DABT_FUNC] -#else - bl CPU_DABORT_HANDLER -#endif - .endm - - .section .entry.text,"ax",%progbits - -/* - * Invalid mode handlers - */ - .macro inv_entry, reason - sub sp, sp, #PT_REGS_SIZE - ARM( stmib sp, {r1 - lr} ) - THUMB( stmia sp, {r0 - r12} ) - THUMB( str sp, [sp, #S_SP] ) - THUMB( str lr, [sp, #S_LR] ) - mov r1, #\reason - .endm - -__pabt_invalid: - inv_entry BAD_PREFETCH - b common_invalid -ENDPROC(__pabt_invalid) - -__dabt_invalid: - inv_entry BAD_DATA - b common_invalid -ENDPROC(__dabt_invalid) - -__irq_invalid: - inv_entry BAD_IRQ - b common_invalid -ENDPROC(__irq_invalid) - -__und_invalid: - inv_entry BAD_UNDEFINSTR - - @ - @ XXX fall through to common_invalid - @ - -@ -@ common_invalid - generic code for failed exception (re-entrant version of handlers) -@ -common_invalid: - zero_fp - - ldmia r0, {r4 - r6} - add r0, sp, #S_PC @ here for interlock avoidance - mov r7, #-1 @ "" "" "" "" - str r4, [sp] @ save preserved r0 - stmia r0, {r5 - r7} @ lr_, - @ cpsr_, "old_r0" - - mov r0, sp - b bad_mode -ENDPROC(__und_invalid) - -/* - * SVC mode handlers - */ - -#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) -#define SPFIX(code...) code -#else -#define SPFIX(code...) -#endif - - .macro svc_entry, stack_hole=0, trace=1, uaccess=1 - UNWIND(.fnstart ) - UNWIND(.save {r0 - pc} ) - sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) -#ifdef CONFIG_THUMB2_KERNEL - SPFIX( str r0, [sp] ) @ temporarily saved - SPFIX( mov r0, sp ) - SPFIX( tst r0, #4 ) @ test original stack alignment - SPFIX( ldr r0, [sp] ) @ restored -#else - SPFIX( tst sp, #4 ) -#endif - SPFIX( subeq sp, sp, #4 ) - stmia sp, {r1 - r12} - - ldmia r0, {r3 - r5} - add r7, sp, #S_SP - 4 @ here for interlock avoidance - mov r6, #-1 @ "" "" "" "" - add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) - SPFIX( addeq r2, r2, #4 ) - str r3, [sp, #-4]! @ save the "real" r0 copied - @ from the exception stack - - mov r3, lr - - @ - @ We are now ready to fill in the remaining blanks on the stack: - @ - @ r2 - sp_svc - @ r3 - lr_svc - @ r4 - lr_, already fixed up for correct return/restart - @ r5 - spsr_ - @ r6 - orig_r0 (see pt_regs definition in ptrace.h) - @ - stmia r7, {r2 - r6} - - get_thread_info tsk - uaccess_entry tsk, r0, r1, r2, \uaccess - - .if \trace -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - .endif - .endm - - .align 5 -__dabt_svc: - svc_entry uaccess=0 - mov r2, sp - dabt_helper - THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR - svc_exit r5 @ return from exception - UNWIND(.fnend ) -ENDPROC(__dabt_svc) - - .align 5 -__irq_svc: - svc_entry - irq_handler - -#ifdef CONFIG_PREEMPT - ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags - teq r8, #0 @ if preempt count != 0 - movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED - blne svc_preempt -#endif - - svc_exit r5, irq = 1 @ return from exception - UNWIND(.fnend ) -ENDPROC(__irq_svc) - - .ltorg - -#ifdef CONFIG_PREEMPT -svc_preempt: - mov r8, lr -1: bl preempt_schedule_irq @ irq en/disable is done inside - ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED - reteq r8 @ go again - b 1b -#endif - -__und_fault: - @ Correct the PC such that it is pointing at the instruction - @ which caused the fault. If the faulting instruction was ARM - @ the PC will be pointing at the next instruction, and have to - @ subtract 4. Otherwise, it is Thumb, and the PC will be - @ pointing at the second half of the Thumb instruction. We - @ have to subtract 2. - ldr r2, [r0, #S_PC] - sub r2, r2, r1 - str r2, [r0, #S_PC] - b do_undefinstr -ENDPROC(__und_fault) - - .align 5 -__und_svc: -#ifdef CONFIG_KPROBES - @ If a kprobe is about to simulate a "stmdb sp..." instruction, - @ it obviously needs free stack space which then will belong to - @ the saved context. - svc_entry MAX_STACK_SIZE -#else - svc_entry -#endif - - mov r1, #4 @ PC correction to apply - THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? - THUMB( movne r1, #2 ) @ if so, fix up PC correction - mov r0, sp @ struct pt_regs *regs - bl __und_fault - -__und_svc_finish: - get_thread_info tsk - ldr r5, [sp, #S_PSR] @ Get SVC cpsr - svc_exit r5 @ return from exception - UNWIND(.fnend ) -ENDPROC(__und_svc) - - .align 5 -__pabt_svc: - svc_entry - mov r2, sp @ regs - pabt_helper - svc_exit r5 @ return from exception - UNWIND(.fnend ) -ENDPROC(__pabt_svc) - - .align 5 -__fiq_svc: - svc_entry trace=0 - mov r0, sp @ struct pt_regs *regs - bl handle_fiq_as_nmi - svc_exit_via_fiq - UNWIND(.fnend ) -ENDPROC(__fiq_svc) - - .align 5 -.LCcralign: - .word cr_alignment -#ifdef MULTI_DABORT -.LCprocfns: - .word processor -#endif -.LCfp: - .word fp_enter - -/* - * Abort mode handlers - */ - -@ -@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode -@ and reuses the same macros. However in abort mode we must also -@ save/restore lr_abt and spsr_abt to make nested aborts safe. -@ - .align 5 -__fiq_abt: - svc_entry trace=0 - - ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( msr cpsr_c, r0 ) - mov r1, lr @ Save lr_abt - mrs r2, spsr @ Save spsr_abt, abort is now safe - ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( msr cpsr_c, r0 ) - stmfd sp!, {r1 - r2} - - add r0, sp, #8 @ struct pt_regs *regs - bl handle_fiq_as_nmi - - ldmfd sp!, {r1 - r2} - ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( msr cpsr_c, r0 ) - mov lr, r1 @ Restore lr_abt, abort is unsafe - msr spsr_cxsf, r2 @ Restore spsr_abt - ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) - THUMB( msr cpsr_c, r0 ) - - svc_exit_via_fiq - UNWIND(.fnend ) -ENDPROC(__fiq_abt) - -/* - * User mode handlers - * - * EABI note: sp_svc is always 64-bit aligned here, so should PT_REGS_SIZE - */ - -#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (PT_REGS_SIZE & 7) -#error "sizeof(struct pt_regs) must be a multiple of 8" -#endif - - .macro usr_entry, trace=1, uaccess=1 - UNWIND(.fnstart ) - UNWIND(.cantunwind ) @ don't unwind the user space - sub sp, sp, #PT_REGS_SIZE - ARM( stmib sp, {r1 - r12} ) - THUMB( stmia sp, {r0 - r12} ) - - ATRAP( mrc p15, 0, r7, c1, c0, 0) - ATRAP( ldr r8, .LCcralign) - - ldmia r0, {r3 - r5} - add r0, sp, #S_PC @ here for interlock avoidance - mov r6, #-1 @ "" "" "" "" - - str r3, [sp] @ save the "real" r0 copied - @ from the exception stack - - ATRAP( ldr r8, [r8, #0]) - - @ - @ We are now ready to fill in the remaining blanks on the stack: - @ - @ r4 - lr_, already fixed up for correct return/restart - @ r5 - spsr_ - @ r6 - orig_r0 (see pt_regs definition in ptrace.h) - @ - @ Also, separately save sp_usr and lr_usr - @ - stmia r0, {r4 - r6} - ARM( stmdb r0, {sp, lr}^ ) - THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) - - .if \uaccess - uaccess_disable ip - .endif - - @ Enable the alignment trap while in kernel mode - ATRAP( teq r8, r7) - ATRAP( mcrne p15, 0, r8, c1, c0, 0) - - @ - @ Clear FP to mark the first stack frame - @ - zero_fp - - .if \trace -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - ct_user_exit save = 0 - .endif - .endm - - .macro kuser_cmpxchg_check -#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) -#ifndef CONFIG_MMU -#warning "NPTL on non MMU needs fixing" -#else - @ Make sure our user space atomic helper is restarted - @ if it was interrupted in a critical region. Here we - @ perform a quick test inline since it should be false - @ 99.9999% of the time. The rest is done out of line. - cmp r4, #TASK_SIZE - blhs kuser_cmpxchg64_fixup -#endif -#endif - .endm - - .align 5 -__dabt_usr: - usr_entry uaccess=0 - kuser_cmpxchg_check - mov r2, sp - dabt_helper - b ret_from_exception - UNWIND(.fnend ) -ENDPROC(__dabt_usr) - - .align 5 -__irq_usr: - usr_entry - kuser_cmpxchg_check - irq_handler - get_thread_info tsk - mov why, #0 - b ret_to_user_from_irq - UNWIND(.fnend ) -ENDPROC(__irq_usr) - - .ltorg - - .align 5 -__und_usr: - usr_entry uaccess=0 - - mov r2, r4 - mov r3, r5 - - @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the - @ faulting instruction depending on Thumb mode. - @ r3 = regs->ARM_cpsr - @ - @ The emulation code returns using r9 if it has emulated the - @ instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - badr r9, ret_from_exception - - @ IRQs must be enabled before attempting to read the instruction from - @ user space since that could cause a page/translation fault if the - @ page table was modified by another CPU. - enable_irq - - tst r3, #PSR_T_BIT @ Thumb mode? - bne __und_usr_thumb - sub r4, r2, #4 @ ARM instr at LR - 4 -1: ldrt r0, [r4] - ARM_BE8(rev r0, r0) @ little endian instruction - - uaccess_disable ip - - @ r0 = 32-bit ARM instruction which caused the exception - @ r2 = PC value for the following instruction (:= regs->ARM_pc) - @ r4 = PC value for the faulting instruction - @ lr = 32-bit undefined instruction function - badr lr, __und_usr_fault_32 - b call_fpe - -__und_usr_thumb: - @ Thumb instruction - sub r4, r2, #2 @ First half of thumb instr at LR - 2 -#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 -/* - * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms - * can never be supported in a single kernel, this code is not applicable at - * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be - * made about .arch directives. - */ -#if __LINUX_ARM_ARCH__ < 7 -/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ -#define NEED_CPU_ARCHITECTURE - ldr r5, .LCcpu_architecture - ldr r5, [r5] - cmp r5, #CPU_ARCH_ARMv7 - blo __und_usr_fault_16 @ 16bit undefined instruction -/* - * The following code won't get run unless the running CPU really is v7, so - * coding round the lack of ldrht on older arches is pointless. Temporarily - * override the assembler target arch with the minimum required instead: - */ - .arch armv6t2 -#endif -2: ldrht r5, [r4] -ARM_BE8(rev16 r5, r5) @ little endian instruction - cmp r5, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_fault_16_pan @ 16bit undefined instruction -3: ldrht r0, [r2] -ARM_BE8(rev16 r0, r0) @ little endian instruction - uaccess_disable ip - add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 - str r2, [sp, #S_PC] @ it's a 2x16bit instr, update - orr r0, r0, r5, lsl #16 - badr lr, __und_usr_fault_32 - @ r0 = the two 16-bit Thumb instructions which caused the exception - @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) - @ r4 = PC value for the first 16-bit Thumb instruction - @ lr = 32bit undefined instruction function - -#if __LINUX_ARM_ARCH__ < 7 -/* If the target arch was overridden, change it back: */ -#ifdef CONFIG_CPU_32v6K - .arch armv6k -#else - .arch armv6 -#endif -#endif /* __LINUX_ARM_ARCH__ < 7 */ -#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */ - b __und_usr_fault_16 -#endif - UNWIND(.fnend) -ENDPROC(__und_usr) - -/* - * The out of line fixup for the ldrt instructions above. - */ - .pushsection .text.fixup, "ax" - .align 2 -4: str r4, [sp, #S_PC] @ retry current instruction - ret r9 - .popsection - .pushsection __ex_table,"a" - .long 1b, 4b -#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 - .long 2b, 4b - .long 3b, 4b -#endif - .popsection - -/* - * Check whether the instruction is a co-processor instruction. - * If yes, we need to call the relevant co-processor handler. - * - * Note that we don't do a full check here for the co-processor - * instructions; all instructions with bit 27 set are well - * defined. The only instructions that should fault are the - * co-processor instructions. However, we have to watch out - * for the ARM6/ARM7 SWI bug. - * - * NEON is a special case that has to be handled here. Not all - * NEON instructions are co-processor instructions, so we have - * to make a special case of checking for them. Plus, there's - * five groups of them, so we have a table of mask/opcode pairs - * to check against, and if any match then we branch off into the - * NEON handler code. - * - * Emulators may wish to make use of the following registers: - * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) - * r2 = PC value to resume execution after successful emulation - * r9 = normal "successful" return address - * r10 = this threads thread_info structure - * lr = unrecognised instruction return address - * IRQs enabled, FIQs enabled. - */ - @ - @ Fall-through from Thumb-2 __und_usr - @ -#ifdef CONFIG_NEON - get_thread_info r10 @ get current thread - adr r6, .LCneon_thumb_opcodes - b 2f -#endif -call_fpe: - get_thread_info r10 @ get current thread -#ifdef CONFIG_NEON - adr r6, .LCneon_arm_opcodes -2: ldr r5, [r6], #4 @ mask value - ldr r7, [r6], #4 @ opcode bits matching in mask - cmp r5, #0 @ end mask? - beq 1f - and r8, r0, r5 - cmp r8, r7 @ NEON instruction? - bne 2b - mov r7, #1 - strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used - strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used - b do_vfp @ let VFP handler handle this -1: -#endif - tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 - tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 - reteq lr - and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) - mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] -#ifdef CONFIG_IWMMXT - @ Test if we need to give access to iWMMXt coprocessors - ldr r5, [r10, #TI_FLAGS] - rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only - movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1) - bcs iwmmxt_task_enable -#endif - ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) - THUMB( add pc, r8 ) - nop - - ret.w lr @ CP#0 - W(b) do_fpe @ CP#1 (FPE) - W(b) do_fpe @ CP#2 (FPE) - ret.w lr @ CP#3 -#ifdef CONFIG_CRUNCH - b crunch_task_enable @ CP#4 (MaverickCrunch) - b crunch_task_enable @ CP#5 (MaverickCrunch) - b crunch_task_enable @ CP#6 (MaverickCrunch) -#else - ret.w lr @ CP#4 - ret.w lr @ CP#5 - ret.w lr @ CP#6 -#endif - ret.w lr @ CP#7 - ret.w lr @ CP#8 - ret.w lr @ CP#9 -#ifdef CONFIG_VFP - W(b) do_vfp @ CP#10 (VFP) - W(b) do_vfp @ CP#11 (VFP) -#else - ret.w lr @ CP#10 (VFP) - ret.w lr @ CP#11 (VFP) -#endif - ret.w lr @ CP#12 - ret.w lr @ CP#13 - ret.w lr @ CP#14 (Debug) - ret.w lr @ CP#15 (Control) - -#ifdef NEED_CPU_ARCHITECTURE - .align 2 -.LCcpu_architecture: - .word __cpu_architecture -#endif - -#ifdef CONFIG_NEON - .align 6 - -.LCneon_arm_opcodes: - .word 0xfe000000 @ mask - .word 0xf2000000 @ opcode - - .word 0xff100000 @ mask - .word 0xf4000000 @ opcode - - .word 0x00000000 @ mask - .word 0x00000000 @ opcode - -.LCneon_thumb_opcodes: - .word 0xef000000 @ mask - .word 0xef000000 @ opcode - - .word 0xff100000 @ mask - .word 0xf9000000 @ opcode - - .word 0x00000000 @ mask - .word 0x00000000 @ opcode -#endif - -do_fpe: - ldr r4, .LCfp - add r10, r10, #TI_FPSTATE @ r10 = workspace - ldr pc, [r4] @ Call FP module USR entry point - -/* - * The FP module is called with these registers set: - * r0 = instruction - * r2 = PC+4 - * r9 = normal "successful" return address - * r10 = FP workspace - * lr = unrecognised FP instruction return address - */ - - .pushsection .data - .align 2 -ENTRY(fp_enter) - .word no_fp - .popsection - -ENTRY(no_fp) - ret lr -ENDPROC(no_fp) - -__und_usr_fault_32: - mov r1, #4 - b 1f -__und_usr_fault_16_pan: - uaccess_disable ip -__und_usr_fault_16: - mov r1, #2 -1: mov r0, sp - badr lr, ret_from_exception - b __und_fault -ENDPROC(__und_usr_fault_32) -ENDPROC(__und_usr_fault_16) - - .align 5 -__pabt_usr: - usr_entry - mov r2, sp @ regs - pabt_helper - UNWIND(.fnend ) - /* fall through */ -/* - * This is the return code to user mode for abort handlers - */ -ENTRY(ret_from_exception) - UNWIND(.fnstart ) - UNWIND(.cantunwind ) - get_thread_info tsk - mov why, #0 - b ret_to_user - UNWIND(.fnend ) -ENDPROC(__pabt_usr) -ENDPROC(ret_from_exception) - - .align 5 -__fiq_usr: - usr_entry trace=0 - kuser_cmpxchg_check - mov r0, sp @ struct pt_regs *regs - bl handle_fiq_as_nmi - get_thread_info tsk - restore_user_regs fast = 0, offset = 0 - UNWIND(.fnend ) -ENDPROC(__fiq_usr) - -/* - * Register switch for ARMv3 and ARMv4 processors - * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info - * previous and next are guaranteed not to be the same. - */ -ENTRY(__switch_to) - UNWIND(.fnstart ) - UNWIND(.cantunwind ) - add ip, r1, #TI_CPU_SAVE - ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack - THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack - THUMB( str sp, [ip], #4 ) - THUMB( str lr, [ip], #4 ) - ldr r4, [r2, #TI_TP_VALUE] - ldr r5, [r2, #TI_TP_VALUE + 4] -#ifdef CONFIG_CPU_USE_DOMAINS - mrc p15, 0, r6, c3, c0, 0 @ Get domain register - str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register - ldr r6, [r2, #TI_CPU_DOMAIN] -#endif - switch_tls r1, r4, r5, r3, r7 -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - ldr r7, [r2, #TI_TASK] - ldr r8, =__stack_chk_guard - .if (TSK_STACK_CANARY > IMM12_MASK) - add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK - .endif - ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK] -#endif -#ifdef CONFIG_CPU_USE_DOMAINS - mcr p15, 0, r6, c3, c0, 0 @ Set domain register -#endif - mov r5, r0 - add r4, r2, #TI_CPU_SAVE - ldr r0, =thread_notify_head - mov r1, #THREAD_NOTIFY_SWITCH - bl atomic_notifier_call_chain -#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) - str r7, [r8] -#endif - THUMB( mov ip, r4 ) - mov r0, r5 - ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously - THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously - THUMB( ldr sp, [ip], #4 ) - THUMB( ldr pc, [ip] ) - UNWIND(.fnend ) -ENDPROC(__switch_to) - - __INIT - -/* - * User helpers. - * - * Each segment is 32-byte aligned and will be moved to the top of the high - * vector page. New segments (if ever needed) must be added in front of - * existing ones. This mechanism should be used only for things that are - * really small and justified, and not be abused freely. - * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. - */ - THUMB( .arm ) - - .macro usr_ret, reg -#ifdef CONFIG_ARM_THUMB - bx \reg -#else - ret \reg -#endif - .endm - - .macro kuser_pad, sym, size - .if (. - \sym) & 3 - .rept 4 - (. - \sym) & 3 - .byte 0 - .endr - .endif - .rept (\size - (. - \sym)) / 4 - .word 0xe7fddef1 - .endr - .endm - -#ifdef CONFIG_KUSER_HELPERS - .align 5 - .globl __kuser_helper_start -__kuser_helper_start: - -/* - * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular - * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point. - */ - -__kuser_cmpxchg64: @ 0xffff0f60 - -#if defined(CONFIG_CPU_32v6K) - - stmfd sp!, {r4, r5, r6, r7} - ldrd r4, r5, [r0] @ load old val - ldrd r6, r7, [r1] @ load new val - smp_dmb arm -1: ldrexd r0, r1, [r2] @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eorseq r3, r1, r5 @ compare with oldval (2) - strexdeq r3, r6, r7, [r2] @ store newval if eq - teqeq r3, #1 @ success? - beq 1b @ if no then retry - smp_dmb arm - rsbs r0, r3, #0 @ set returned val and C flag - ldmfd sp!, {r4, r5, r6, r7} - usr_ret lr - -#elif !defined(CONFIG_SMP) - -#ifdef CONFIG_MMU - - /* - * The only thing that can break atomicity in this cmpxchg64 - * implementation is either an IRQ or a data abort exception - * causing another process/thread to be scheduled in the middle of - * the critical sequence. The same strategy as for cmpxchg is used. - */ - stmfd sp!, {r4, r5, r6, lr} - ldmia r0, {r4, r5} @ load old val - ldmia r1, {r6, lr} @ load new val -1: ldmia r2, {r0, r1} @ load current val - eors r3, r0, r4 @ compare with oldval (1) - eorseq r3, r1, r5 @ compare with oldval (2) -2: stmiaeq r2, {r6, lr} @ store newval if eq - rsbs r0, r3, #0 @ set return val and C flag - ldmfd sp!, {r4, r5, r6, pc} - - .text -kuser_cmpxchg64_fixup: - @ Called from kuser_cmpxchg_fixup. - @ r4 = address of interrupted insn (must be preserved). - @ sp = saved regs. r7 and r8 are clobbered. - @ 1b = first critical insn, 2b = last critical insn. - @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. - mov r7, #0xffff0fff - sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64))) - subs r8, r4, r7 - rsbscs r8, r8, #(2b - 1b) - strcs r7, [sp, #S_PC] -#if __LINUX_ARM_ARCH__ < 6 - bcc kuser_cmpxchg32_fixup -#endif - ret lr - .previous - -#else -#warning "NPTL on non MMU needs fixing" - mov r0, #-1 - adds r0, r0, #0 - usr_ret lr -#endif - -#else -#error "incoherent kernel configuration" -#endif - - kuser_pad __kuser_cmpxchg64, 64 - -__kuser_memory_barrier: @ 0xffff0fa0 - smp_dmb arm - usr_ret lr - - kuser_pad __kuser_memory_barrier, 32 - -__kuser_cmpxchg: @ 0xffff0fc0 - -#if __LINUX_ARM_ARCH__ < 6 - -#ifdef CONFIG_MMU - - /* - * The only thing that can break atomicity in this cmpxchg - * implementation is either an IRQ or a data abort exception - * causing another process/thread to be scheduled in the middle - * of the critical sequence. To prevent this, code is added to - * the IRQ and data abort exception handlers to set the pc back - * to the beginning of the critical section if it is found to be - * within that critical section (see kuser_cmpxchg_fixup). - */ -1: ldr r3, [r2] @ load current val - subs r3, r3, r0 @ compare with oldval -2: streq r1, [r2] @ store newval if eq - rsbs r0, r3, #0 @ set return val and C flag - usr_ret lr - - .text -kuser_cmpxchg32_fixup: - @ Called from kuser_cmpxchg_check macro. - @ r4 = address of interrupted insn (must be preserved). - @ sp = saved regs. r7 and r8 are clobbered. - @ 1b = first critical insn, 2b = last critical insn. - @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. - mov r7, #0xffff0fff - sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg))) - subs r8, r4, r7 - rsbscs r8, r8, #(2b - 1b) - strcs r7, [sp, #S_PC] - ret lr - .previous - -#else -#warning "NPTL on non MMU needs fixing" - mov r0, #-1 - adds r0, r0, #0 - usr_ret lr -#endif - -#else - - smp_dmb arm -1: ldrex r3, [r2] - subs r3, r3, r0 - strexeq r3, r1, [r2] - teqeq r3, #1 - beq 1b - rsbs r0, r3, #0 - /* beware -- each __kuser slot must be 8 instructions max */ - ALT_SMP(b __kuser_memory_barrier) - ALT_UP(usr_ret lr) - -#endif - - kuser_pad __kuser_cmpxchg, 32 - -__kuser_get_tls: @ 0xffff0fe0 - ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init - usr_ret lr - mrc p15, 0, r0, c13, c0, 3 @ 0xffff0fe8 hardware TLS code - kuser_pad __kuser_get_tls, 16 - .rep 3 - .word 0 @ 0xffff0ff0 software TLS value, then - .endr @ pad up to __kuser_helper_version - -__kuser_helper_version: @ 0xffff0ffc - .word ((__kuser_helper_end - __kuser_helper_start) >> 5) - - .globl __kuser_helper_end -__kuser_helper_end: - -#endif - - THUMB( .thumb ) - -/* - * Vector stubs. - * - * This code is copied to 0xffff1000 so we can use branches in the - * vectors, rather than ldr's. Note that this code must not exceed - * a page size. - * - * Common stub entry macro: - * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - * - * SP points to a minimal amount of processor-private memory, the address - * of which is copied into r0 for the mode specific abort handler. - */ - .macro vector_stub, name, mode, correction=0 - .align 5 - -vector_\name: - .if \correction - sub lr, lr, #\correction - .endif - - @ - @ Save r0, lr_ (parent PC) and spsr_ - @ (parent CPSR) - @ - stmia sp, {r0, lr} @ save r0, lr - mrs lr, spsr - str lr, [sp, #8] @ save spsr - - @ - @ Prepare for SVC32 mode. IRQs remain disabled. - @ - mrs r0, cpsr - eor r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE) - msr spsr_cxsf, r0 - - @ - @ the branch table must immediately follow this code - @ - and lr, lr, #0x0f - THUMB( adr r0, 1f ) - THUMB( ldr lr, [r0, lr, lsl #2] ) - mov r0, sp - ARM( ldr lr, [pc, lr, lsl #2] ) - movs pc, lr @ branch to handler in SVC mode -ENDPROC(vector_\name) - - .align 2 - @ handler addresses follow this label -1: - .endm - - .section .stubs, "ax", %progbits - @ This must be the first word - .word vector_swi - -vector_rst: - ARM( swi SYS_ERROR0 ) - THUMB( svc #0 ) - THUMB( nop ) - b vector_und - -/* - * Interrupt dispatcher - */ - vector_stub irq, IRQ_MODE, 4 - - .long __irq_usr @ 0 (USR_26 / USR_32) - .long __irq_invalid @ 1 (FIQ_26 / FIQ_32) - .long __irq_invalid @ 2 (IRQ_26 / IRQ_32) - .long __irq_svc @ 3 (SVC_26 / SVC_32) - .long __irq_invalid @ 4 - .long __irq_invalid @ 5 - .long __irq_invalid @ 6 - .long __irq_invalid @ 7 - .long __irq_invalid @ 8 - .long __irq_invalid @ 9 - .long __irq_invalid @ a - .long __irq_invalid @ b - .long __irq_invalid @ c - .long __irq_invalid @ d - .long __irq_invalid @ e - .long __irq_invalid @ f - -/* - * Data abort dispatcher - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC - */ - vector_stub dabt, ABT_MODE, 8 - - .long __dabt_usr @ 0 (USR_26 / USR_32) - .long __dabt_invalid @ 1 (FIQ_26 / FIQ_32) - .long __dabt_invalid @ 2 (IRQ_26 / IRQ_32) - .long __dabt_svc @ 3 (SVC_26 / SVC_32) - .long __dabt_invalid @ 4 - .long __dabt_invalid @ 5 - .long __dabt_invalid @ 6 - .long __dabt_invalid @ 7 - .long __dabt_invalid @ 8 - .long __dabt_invalid @ 9 - .long __dabt_invalid @ a - .long __dabt_invalid @ b - .long __dabt_invalid @ c - .long __dabt_invalid @ d - .long __dabt_invalid @ e - .long __dabt_invalid @ f - -/* - * Prefetch abort dispatcher - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC - */ - vector_stub pabt, ABT_MODE, 4 - - .long __pabt_usr @ 0 (USR_26 / USR_32) - .long __pabt_invalid @ 1 (FIQ_26 / FIQ_32) - .long __pabt_invalid @ 2 (IRQ_26 / IRQ_32) - .long __pabt_svc @ 3 (SVC_26 / SVC_32) - .long __pabt_invalid @ 4 - .long __pabt_invalid @ 5 - .long __pabt_invalid @ 6 - .long __pabt_invalid @ 7 - .long __pabt_invalid @ 8 - .long __pabt_invalid @ 9 - .long __pabt_invalid @ a - .long __pabt_invalid @ b - .long __pabt_invalid @ c - .long __pabt_invalid @ d - .long __pabt_invalid @ e - .long __pabt_invalid @ f - -/* - * Undef instr entry dispatcher - * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - */ - vector_stub und, UND_MODE - - .long __und_usr @ 0 (USR_26 / USR_32) - .long __und_invalid @ 1 (FIQ_26 / FIQ_32) - .long __und_invalid @ 2 (IRQ_26 / IRQ_32) - .long __und_svc @ 3 (SVC_26 / SVC_32) - .long __und_invalid @ 4 - .long __und_invalid @ 5 - .long __und_invalid @ 6 - .long __und_invalid @ 7 - .long __und_invalid @ 8 - .long __und_invalid @ 9 - .long __und_invalid @ a - .long __und_invalid @ b - .long __und_invalid @ c - .long __und_invalid @ d - .long __und_invalid @ e - .long __und_invalid @ f - - .align 5 - -/*============================================================================= - * Address exception handler - *----------------------------------------------------------------------------- - * These aren't too critical. - * (they're not supposed to happen, and won't happen in 32-bit data mode). - */ - -vector_addrexcptn: - b vector_addrexcptn - -/*============================================================================= - * FIQ "NMI" handler - *----------------------------------------------------------------------------- - * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. - */ - vector_stub fiq, FIQ_MODE, 4 - - .long __fiq_usr @ 0 (USR_26 / USR_32) - .long __fiq_svc @ 1 (FIQ_26 / FIQ_32) - .long __fiq_svc @ 2 (IRQ_26 / IRQ_32) - .long __fiq_svc @ 3 (SVC_26 / SVC_32) - .long __fiq_svc @ 4 - .long __fiq_svc @ 5 - .long __fiq_svc @ 6 - .long __fiq_abt @ 7 - .long __fiq_svc @ 8 - .long __fiq_svc @ 9 - .long __fiq_svc @ a - .long __fiq_svc @ b - .long __fiq_svc @ c - .long __fiq_svc @ d - .long __fiq_svc @ e - .long __fiq_svc @ f - - .globl vector_fiq - - .section .vectors, "ax", %progbits -.L__vectors_start: - W(b) vector_rst - W(b) vector_und - W(ldr) pc, .L__vectors_start + 0x1000 - W(b) vector_pabt - W(b) vector_dabt - W(b) vector_addrexcptn - W(b) vector_irq - W(b) vector_fiq - - .data - .align 2 - - .globl cr_alignment -cr_alignment: - .space 4 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S deleted file mode 100644 index 271cb8a1eba1eefe4469db9ae5a677385fa8c2cc..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/entry-common.S +++ /dev/null @@ -1,460 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/entry-common.S - * - * Copyright (C) 2000 Russell King - */ - -#include -#include -#include -#include -#include -#ifdef CONFIG_AEABI -#include -#endif - - .equ NR_syscalls, __NR_syscalls - -#ifdef CONFIG_NEED_RET_TO_USER -#include -#else - .macro arch_ret_to_user, tmp1, tmp2 - .endm -#endif - -#include "entry-header.S" - -saved_psr .req r8 -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) -saved_pc .req r9 -#define TRACE(x...) x -#else -saved_pc .req lr -#define TRACE(x...) -#endif - - .section .entry.text,"ax",%progbits - .align 5 -#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \ - IS_ENABLED(CONFIG_DEBUG_RSEQ)) -/* - * This is the fast syscall return path. We do as little as possible here, - * such as avoiding writing r0 to the stack. We only use this path if we - * have tracing, context tracking and rseq debug disabled - the overheads - * from those features make this path too inefficient. - */ -ret_fast_syscall: -__ret_fast_syscall: - UNWIND(.fnstart ) - UNWIND(.cantunwind ) - disable_irq_notrace @ disable interrupts - ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE - blne addr_limit_check_failed - ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK - bne fast_work_pending - - - /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr - - restore_user_regs fast = 1, offset = S_OFF - UNWIND(.fnend ) -ENDPROC(ret_fast_syscall) - - /* Ok, we need to do extra processing, enter the slow path. */ -fast_work_pending: - str r0, [sp, #S_R0+S_OFF]! @ returned r0 - /* fall through to work_pending */ -#else -/* - * The "replacement" ret_fast_syscall for when tracing, context tracking, - * or rseq debug is enabled. As we will need to call out to some C functions, - * we save r0 first to avoid needing to save registers around each C function - * call. - */ -ret_fast_syscall: -__ret_fast_syscall: - UNWIND(.fnstart ) - UNWIND(.cantunwind ) - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 -#if IS_ENABLED(CONFIG_DEBUG_RSEQ) - /* do_rseq_syscall needs interrupts enabled. */ - mov r0, sp @ 'regs' - bl do_rseq_syscall -#endif - disable_irq_notrace @ disable interrupts - ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE - blne addr_limit_check_failed - ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK - beq no_work_pending - UNWIND(.fnend ) -ENDPROC(ret_fast_syscall) - - /* Slower path - fall through to work_pending */ -#endif - - tst r1, #_TIF_SYSCALL_WORK - bne __sys_trace_return_nosave -slow_work_pending: - mov r0, sp @ 'regs' - mov r2, why @ 'syscall' - bl do_work_pending - cmp r0, #0 - beq no_work_pending - movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) - ldmia sp, {r0 - r6} @ have to reload r0 - r6 - b local_restart @ ... and off we go -ENDPROC(ret_fast_syscall) - -/* - * "slow" syscall return path. "why" tells us if this was a real syscall. - * IRQs may be enabled here, so always disable them. Note that we use the - * "notrace" version to avoid calling into the tracing code unnecessarily. - * do_work_pending() will update this state if necessary. - */ -ENTRY(ret_to_user) -ret_slow_syscall: -#if IS_ENABLED(CONFIG_DEBUG_RSEQ) - /* do_rseq_syscall needs interrupts enabled. */ - enable_irq_notrace @ enable interrupts - mov r0, sp @ 'regs' - bl do_rseq_syscall -#endif - disable_irq_notrace @ disable interrupts -ENTRY(ret_to_user_from_irq) - ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE - blne addr_limit_check_failed - ldr r1, [tsk, #TI_FLAGS] - tst r1, #_TIF_WORK_MASK - bne slow_work_pending -no_work_pending: - asm_trace_hardirqs_on save = 0 - - /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr - ct_user_enter save = 0 - - restore_user_regs fast = 0, offset = 0 -ENDPROC(ret_to_user_from_irq) -ENDPROC(ret_to_user) - -/* - * This is how we return from a fork. - */ -ENTRY(ret_from_fork) - bl schedule_tail - cmp r5, #0 - movne r0, r4 - badrne lr, 1f - retne r5 -1: get_thread_info tsk - b ret_slow_syscall -ENDPROC(ret_from_fork) - -/*============================================================================= - * SWI handler - *----------------------------------------------------------------------------- - */ - - .align 5 -ENTRY(vector_swi) -#ifdef CONFIG_CPU_V7M - v7m_exception_entry -#else - sub sp, sp, #PT_REGS_SIZE - stmia sp, {r0 - r12} @ Calling r0 - r12 - ARM( add r8, sp, #S_PC ) - ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr - THUMB( mov r8, sp ) - THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr - mrs saved_psr, spsr @ called from non-FIQ mode, so ok. - TRACE( mov saved_pc, lr ) - str saved_pc, [sp, #S_PC] @ Save calling PC - str saved_psr, [sp, #S_PSR] @ Save CPSR - str r0, [sp, #S_OLD_R0] @ Save OLD_R0 -#endif - zero_fp - alignment_trap r10, ip, __cr_alignment - asm_trace_hardirqs_on save=0 - enable_irq_notrace - ct_user_exit save=0 - - /* - * Get the system call number. - */ - -#if defined(CONFIG_OABI_COMPAT) - - /* - * If we have CONFIG_OABI_COMPAT then we need to look at the swi - * value to determine if it is an EABI or an old ABI call. - */ -#ifdef CONFIG_ARM_THUMB - tst saved_psr, #PSR_T_BIT - movne r10, #0 @ no thumb OABI emulation - USER( ldreq r10, [saved_pc, #-4] ) @ get SWI instruction -#else - USER( ldr r10, [saved_pc, #-4] ) @ get SWI instruction -#endif - ARM_BE8(rev r10, r10) @ little endian instruction - -#elif defined(CONFIG_AEABI) - - /* - * Pure EABI user space always put syscall number into scno (r7). - */ -#elif defined(CONFIG_ARM_THUMB) - /* Legacy ABI only, possibly thumb mode. */ - tst saved_psr, #PSR_T_BIT @ this is SPSR from save_user_regs - addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in - USER( ldreq scno, [saved_pc, #-4] ) - -#else - /* Legacy ABI only. */ - USER( ldr scno, [saved_pc, #-4] ) @ get SWI instruction -#endif - - /* saved_psr and saved_pc are now dead */ - - uaccess_disable tbl - - adr tbl, sys_call_table @ load syscall table pointer - -#if defined(CONFIG_OABI_COMPAT) - /* - * If the swi argument is zero, this is an EABI call and we do nothing. - * - * If this is an old ABI call, get the syscall number into scno and - * get the old ABI syscall table address. - */ - bics r10, r10, #0xff000000 - eorne scno, r10, #__NR_OABI_SYSCALL_BASE - ldrne tbl, =sys_oabi_call_table -#elif !defined(CONFIG_AEABI) - bic scno, scno, #0xff000000 @ mask off SWI op-code - eor scno, scno, #__NR_SYSCALL_BASE @ check OS number -#endif - get_thread_info tsk - /* - * Reload the registers that may have been corrupted on entry to - * the syscall assembly (by tracing or context tracking.) - */ - TRACE( ldmia sp, {r0 - r3} ) - -local_restart: - ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing - stmdb sp!, {r4, r5} @ push fifth and sixth args - - tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? - bne __sys_trace - - invoke_syscall tbl, scno, r10, __ret_fast_syscall - - add r1, sp, #S_OFF -2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) - eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back - bcs arm_syscall - mov why, #0 @ no longer a real syscall - b sys_ni_syscall @ not private func - -#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) - /* - * We failed to handle a fault trying to access the page - * containing the swi instruction, but we're not really in a - * position to return -EFAULT. Instead, return back to the - * instruction and re-enter the user fault handling path trying - * to page it in. This will likely result in sending SEGV to the - * current task. - */ -9001: - sub lr, saved_pc, #4 - str lr, [sp, #S_PC] - get_thread_info tsk - b ret_fast_syscall -#endif -ENDPROC(vector_swi) - - /* - * This is the really slow path. We're going to be doing - * context switches, and waiting for our parent to respond. - */ -__sys_trace: - mov r1, scno - add r0, sp, #S_OFF - bl syscall_trace_enter - mov scno, r0 - invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1 - cmp scno, #-1 @ skip the syscall? - bne 2b - add sp, sp, #S_OFF @ restore stack - -__sys_trace_return_nosave: - enable_irq_notrace - mov r0, sp - bl syscall_trace_exit - b ret_slow_syscall - -__sys_trace_return: - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 - mov r0, sp - bl syscall_trace_exit - b ret_slow_syscall - - .align 5 -#ifdef CONFIG_ALIGNMENT_TRAP - .type __cr_alignment, #object -__cr_alignment: - .word cr_alignment -#endif - .ltorg - - .macro syscall_table_start, sym - .equ __sys_nr, 0 - .type \sym, #object -ENTRY(\sym) - .endm - - .macro syscall, nr, func - .ifgt __sys_nr - \nr - .error "Duplicated/unorded system call entry" - .endif - .rept \nr - __sys_nr - .long sys_ni_syscall - .endr - .long \func - .equ __sys_nr, \nr + 1 - .endm - - .macro syscall_table_end, sym - .ifgt __sys_nr - __NR_syscalls - .error "System call table too big" - .endif - .rept __NR_syscalls - __sys_nr - .long sys_ni_syscall - .endr - .size \sym, . - \sym - .endm - -#define NATIVE(nr, func) syscall nr, func - -/* - * This is the syscall table declaration for native ABI syscalls. - * With EABI a couple syscalls are obsolete and defined as sys_ni_syscall. - */ - syscall_table_start sys_call_table -#define COMPAT(nr, native, compat) syscall nr, native -#ifdef CONFIG_AEABI -#include -#else -#include -#endif -#undef COMPAT - syscall_table_end sys_call_table - -/*============================================================================ - * Special system call wrappers - */ -@ r0 = syscall number -@ r8 = syscall table -sys_syscall: - bic scno, r0, #__NR_OABI_SYSCALL_BASE - cmp scno, #__NR_syscall - __NR_SYSCALL_BASE - cmpne scno, #NR_syscalls @ check range -#ifdef CONFIG_CPU_SPECTRE - movhs scno, #0 - csdb -#endif - stmialo sp, {r5, r6} @ shuffle args - movlo r0, r1 - movlo r1, r2 - movlo r2, r3 - movlo r3, r4 - ldrlo pc, [tbl, scno, lsl #2] - b sys_ni_syscall -ENDPROC(sys_syscall) - -sys_sigreturn_wrapper: - add r0, sp, #S_OFF - mov why, #0 @ prevent syscall restart handling - b sys_sigreturn -ENDPROC(sys_sigreturn_wrapper) - -sys_rt_sigreturn_wrapper: - add r0, sp, #S_OFF - mov why, #0 @ prevent syscall restart handling - b sys_rt_sigreturn -ENDPROC(sys_rt_sigreturn_wrapper) - -sys_statfs64_wrapper: - teq r1, #88 - moveq r1, #84 - b sys_statfs64 -ENDPROC(sys_statfs64_wrapper) - -sys_fstatfs64_wrapper: - teq r1, #88 - moveq r1, #84 - b sys_fstatfs64 -ENDPROC(sys_fstatfs64_wrapper) - -/* - * Note: off_4k (r5) is always units of 4K. If we can't do the requested - * offset, we return EINVAL. - */ -sys_mmap2: - str r5, [sp, #4] - b sys_mmap_pgoff -ENDPROC(sys_mmap2) - -#ifdef CONFIG_OABI_COMPAT - -/* - * These are syscalls with argument register differences - */ - -sys_oabi_pread64: - stmia sp, {r3, r4} - b sys_pread64 -ENDPROC(sys_oabi_pread64) - -sys_oabi_pwrite64: - stmia sp, {r3, r4} - b sys_pwrite64 -ENDPROC(sys_oabi_pwrite64) - -sys_oabi_truncate64: - mov r3, r2 - mov r2, r1 - b sys_truncate64 -ENDPROC(sys_oabi_truncate64) - -sys_oabi_ftruncate64: - mov r3, r2 - mov r2, r1 - b sys_ftruncate64 -ENDPROC(sys_oabi_ftruncate64) - -sys_oabi_readahead: - str r3, [sp] - mov r3, r2 - mov r2, r1 - b sys_readahead -ENDPROC(sys_oabi_readahead) - -/* - * Let's declare a second syscall table for old ABI binaries - * using the compatibility syscall entries. - */ - syscall_table_start sys_oabi_call_table -#define COMPAT(nr, native, compat) syscall nr, compat -#include - syscall_table_end sys_oabi_call_table - -#endif - diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S deleted file mode 100644 index a74289ebc803699955155b4f31bd387c8f23b9bd..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/entry-ftrace.S +++ /dev/null @@ -1,272 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#include -#include -#include - -#include "entry-header.S" - -/* - * When compiling with -pg, gcc inserts a call to the mcount routine at the - * start of every function. In mcount, apart from the function's address (in - * lr), we need to get hold of the function's caller's address. - * - * Newer GCCs (4.4+) solve this problem by using a version of mcount with call - * sites like: - * - * push {lr} - * bl __gnu_mcount_nc - * - * With these compilers, frame pointers are not necessary. - * - * mcount can be thought of as a function called in the middle of a subroutine - * call. As such, it needs to be transparent for both the caller and the - * callee: the original lr needs to be restored when leaving mcount, and no - * registers should be clobbered. (In the __gnu_mcount_nc implementation, we - * clobber the ip register. This is OK because the ARM calling convention - * allows it to be clobbered in subroutines and doesn't use it to hold - * parameters.) - * - * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" - * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). - */ - -.macro mcount_adjust_addr rd, rn - bic \rd, \rn, #1 @ clear the Thumb bit if present - sub \rd, \rd, #MCOUNT_INSN_SIZE -.endm - -.macro __mcount suffix - mcount_enter - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne 1f - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr r1, =ftrace_graph_return - ldr r2, [r1] - cmp r0, r2 - bne ftrace_graph_caller\suffix - - ldr r1, =ftrace_graph_entry - ldr r2, [r1] - ldr r0, =ftrace_graph_entry_stub - cmp r0, r2 - bne ftrace_graph_caller\suffix -#endif - - mcount_exit - -1: mcount_get_lr r1 @ lr of instrumented func - mcount_adjust_addr r0, lr @ instrumented function - badr lr, 2f - mov pc, r2 -2: mcount_exit -.endm - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - -.macro __ftrace_regs_caller - - sub sp, sp, #8 @ space for PC and CPSR OLD_R0, - @ OLD_R0 will overwrite previous LR - - add ip, sp, #12 @ move in IP the value of SP as it was - @ before the push {lr} of the mcount mechanism - - str lr, [sp, #0] @ store LR instead of PC - - ldr lr, [sp, #8] @ get previous LR - - str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR - - stmdb sp!, {ip, lr} - stmdb sp!, {r0-r11, lr} - - @ stack content at this point: - @ 0 4 48 52 56 60 64 68 72 - @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 | - - mov r3, sp @ struct pt_regs* - - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current - @ function tracing op - - ldr r1, [sp, #S_LR] @ lr of instrumented func - - ldr lr, [sp, #S_PC] @ get LR - - mcount_adjust_addr r0, lr @ instrumented function - - .globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl ftrace_graph_regs_call -ftrace_graph_regs_call: - mov r0, r0 -#endif - - @ pop saved regs - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return -.endm - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.macro __ftrace_graph_regs_caller - - sub r0, fp, #4 @ lr of instrumented routine (parent) - - @ called from __ftrace_regs_caller - ldr r1, [sp, #S_PC] @ instrumented routine (func) - mcount_adjust_addr r1, r1 - - mov r2, fp @ frame pointer - bl prepare_ftrace_return - - @ pop registers saved in ftrace_regs_caller - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return - -.endm -#endif -#endif - -.macro __ftrace_caller suffix - mcount_enter - - mcount_get_lr r1 @ lr of instrumented func - mcount_adjust_addr r0, lr @ instrumented function - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - ldr r2, =function_trace_op - ldr r2, [r2] @ pointer to the current - @ function tracing op - mov r3, #0 @ regs is NULL -#endif - - .globl ftrace_call\suffix -ftrace_call\suffix: - bl ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl ftrace_graph_call\suffix -ftrace_graph_call\suffix: - mov r0, r0 -#endif - - mcount_exit -.endm - -.macro __ftrace_graph_caller - sub r0, fp, #4 @ &lr of instrumented routine (&parent) -#ifdef CONFIG_DYNAMIC_FTRACE - @ called from __ftrace_caller, saved in mcount_enter - ldr r1, [sp, #16] @ instrumented routine (func) - mcount_adjust_addr r1, r1 -#else - @ called from __mcount, untouched in lr - mcount_adjust_addr r1, lr @ instrumented routine (func) -#endif - mov r2, fp @ frame pointer - bl prepare_ftrace_return - mcount_exit -.endm - -/* - * __gnu_mcount_nc - */ - -.macro mcount_enter -/* - * This pad compensates for the push {lr} at the call site. Note that we are - * unable to unwind through a function which does not otherwise save its lr. - */ - UNWIND(.pad #4) - stmdb sp!, {r0-r3, lr} - UNWIND(.save {r0-r3, lr}) -.endm - -.macro mcount_get_lr reg - ldr \reg, [sp, #20] -.endm - -.macro mcount_exit - ldmia sp!, {r0-r3, ip, lr} - ret ip -.endm - -ENTRY(__gnu_mcount_nc) -UNWIND(.fnstart) -#ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} - ret ip -#else - __mcount -#endif -UNWIND(.fnend) -ENDPROC(__gnu_mcount_nc) - -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(ftrace_caller) -UNWIND(.fnstart) - __ftrace_caller -UNWIND(.fnend) -ENDPROC(ftrace_caller) - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_regs_caller) -UNWIND(.fnstart) - __ftrace_regs_caller -UNWIND(.fnend) -ENDPROC(ftrace_regs_caller) -#endif - -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) -UNWIND(.fnstart) - __ftrace_graph_caller -UNWIND(.fnend) -ENDPROC(ftrace_graph_caller) - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_graph_regs_caller) -UNWIND(.fnstart) - __ftrace_graph_regs_caller -UNWIND(.fnend) -ENDPROC(ftrace_graph_regs_caller) -#endif -#endif - -.purgem mcount_enter -.purgem mcount_get_lr -.purgem mcount_exit - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: - stmdb sp!, {r0-r3} - mov r0, fp @ frame pointer - bl ftrace_return_to_handler - mov lr, r0 @ r0 has real ret addr - ldmia sp!, {r0-r3} - ret lr -#endif - -ENTRY(ftrace_stub) -.Lftrace_stub: - ret lr -ENDPROC(ftrace_stub) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S deleted file mode 100644 index 40db0f9188b69e9e4323405c8c44e9a3fe153890..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/entry-header.S +++ /dev/null @@ -1,417 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -#include -#include -#include -#include -#include -#include - -@ Bad Abort numbers -@ ----------------- -@ -#define BAD_PREFETCH 0 -#define BAD_DATA 1 -#define BAD_ADDREXCPTN 2 -#define BAD_IRQ 3 -#define BAD_UNDEFINSTR 4 - -@ -@ Most of the stack format comes from struct pt_regs, but with -@ the addition of 8 bytes for storing syscall args 5 and 6. -@ This _must_ remain a multiple of 8 for EABI. -@ -#define S_OFF 8 - -/* - * The SWI code relies on the fact that R0 is at the bottom of the stack - * (due to slow/fast restore user regs). - */ -#if S_R0 != 0 -#error "Please fix" -#endif - - .macro zero_fp -#ifdef CONFIG_FRAME_POINTER - mov fp, #0 -#endif - .endm - -#ifdef CONFIG_ALIGNMENT_TRAP -#define ATRAP(x...) x -#else -#define ATRAP(x...) -#endif - - .macro alignment_trap, rtmp1, rtmp2, label -#ifdef CONFIG_ALIGNMENT_TRAP - mrc p15, 0, \rtmp2, c1, c0, 0 - ldr \rtmp1, \label - ldr \rtmp1, [\rtmp1] - teq \rtmp1, \rtmp2 - mcrne p15, 0, \rtmp1, c1, c0, 0 -#endif - .endm - -#ifdef CONFIG_CPU_V7M -/* - * ARMv7-M exception entry/exit macros. - * - * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are - * automatically saved on the current stack (32 words) before - * switching to the exception stack (SP_main). - * - * If exception is taken while in user mode, SP_main is - * empty. Otherwise, SP_main is aligned to 64 bit automatically - * (CCR.STKALIGN set). - * - * Linux assumes that the interrupts are disabled when entering an - * exception handler and it may BUG if this is not the case. Interrupts - * are disabled during entry and reenabled in the exit macro. - * - * v7m_exception_slow_exit is used when returning from SVC or PendSV. - * When returning to kernel mode, we don't return from exception. - */ - .macro v7m_exception_entry - @ determine the location of the registers saved by the core during - @ exception entry. Depending on the mode the cpu was in when the - @ exception happend that is either on the main or the process stack. - @ Bit 2 of EXC_RETURN stored in the lr register specifies which stack - @ was used. - tst lr, #EXC_RET_STACK_MASK - mrsne r12, psp - moveq r12, sp - - @ we cannot rely on r0-r3 and r12 matching the value saved in the - @ exception frame because of tail-chaining. So these have to be - @ reloaded. - ldmia r12!, {r0-r3} - - @ Linux expects to have irqs off. Do it here before taking stack space - cpsid i - - sub sp, #PT_REGS_SIZE-S_IP - stmdb sp!, {r0-r11} - - @ load saved r12, lr, return address and xPSR. - @ r0-r7 are used for signals and never touched from now on. Clobbering - @ r8-r12 is OK. - mov r9, r12 - ldmia r9!, {r8, r10-r12} - - @ calculate the original stack pointer value. - @ r9 currently points to the memory location just above the auto saved - @ xPSR. - @ The cpu might automatically 8-byte align the stack. Bit 9 - @ of the saved xPSR specifies if stack aligning took place. In this case - @ another 32-bit value is included in the stack. - - tst r12, V7M_xPSR_FRAMEPTRALIGN - addne r9, r9, #4 - - @ store saved r12 using str to have a register to hold the base for stm - str r8, [sp, #S_IP] - add r8, sp, #S_SP - @ store r13-r15, xPSR - stmia r8!, {r9-r12} - @ store old_r0 - str r0, [r8] - .endm - - /* - * PENDSV and SVCALL are configured to have the same exception - * priorities. As a kernel thread runs at SVCALL execution priority it - * can never be preempted and so we will never have to return to a - * kernel thread here. - */ - .macro v7m_exception_slow_exit ret_r0 - cpsid i - ldr lr, =exc_ret - ldr lr, [lr] - - @ read original r12, sp, lr, pc and xPSR - add r12, sp, #S_IP - ldmia r12, {r1-r5} - - @ an exception frame is always 8-byte aligned. To tell the hardware if - @ the sp to be restored is aligned or not set bit 9 of the saved xPSR - @ accordingly. - tst r2, #4 - subne r2, r2, #4 - orrne r5, V7M_xPSR_FRAMEPTRALIGN - biceq r5, V7M_xPSR_FRAMEPTRALIGN - - @ ensure bit 0 is cleared in the PC, otherwise behaviour is - @ unpredictable - bic r4, #1 - - @ write basic exception frame - stmdb r2!, {r1, r3-r5} - ldmia sp, {r1, r3-r5} - .if \ret_r0 - stmdb r2!, {r0, r3-r5} - .else - stmdb r2!, {r1, r3-r5} - .endif - - @ restore process sp - msr psp, r2 - - @ restore original r4-r11 - ldmia sp!, {r0-r11} - - @ restore main sp - add sp, sp, #PT_REGS_SIZE-S_IP - - cpsie i - bx lr - .endm -#endif /* CONFIG_CPU_V7M */ - - @ - @ Store/load the USER SP and LR registers by switching to the SYS - @ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not - @ available. Should only be called from SVC mode - @ - .macro store_user_sp_lr, rd, rtemp, offset = 0 - mrs \rtemp, cpsr - eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) - msr cpsr_c, \rtemp @ switch to the SYS mode - - str sp, [\rd, #\offset] @ save sp_usr - str lr, [\rd, #\offset + 4] @ save lr_usr - - eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) - msr cpsr_c, \rtemp @ switch back to the SVC mode - .endm - - .macro load_user_sp_lr, rd, rtemp, offset = 0 - mrs \rtemp, cpsr - eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) - msr cpsr_c, \rtemp @ switch to the SYS mode - - ldr sp, [\rd, #\offset] @ load sp_usr - ldr lr, [\rd, #\offset + 4] @ load lr_usr - - eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) - msr cpsr_c, \rtemp @ switch back to the SVC mode - .endm - - - .macro svc_exit, rpsr, irq = 0 - .if \irq != 0 - @ IRQs already off -#ifdef CONFIG_TRACE_IRQFLAGS - @ The parent context IRQs must have been enabled to get here in - @ the first place, so there's no point checking the PSR I bit. - bl trace_hardirqs_on -#endif - .else - @ IRQs off again before pulling preserved data off the stack - disable_irq_notrace -#ifdef CONFIG_TRACE_IRQFLAGS - tst \rpsr, #PSR_I_BIT - bleq trace_hardirqs_on - tst \rpsr, #PSR_I_BIT - blne trace_hardirqs_off -#endif - .endif - uaccess_exit tsk, r0, r1 - -#ifndef CONFIG_THUMB2_KERNEL - @ ARM mode SVC restore - msr spsr_cxsf, \rpsr -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) - @ We must avoid clrex due to Cortex-A15 erratum #830321 - sub r0, sp, #4 @ uninhabited address - strex r1, r2, [r0] @ clear the exclusive monitor -#endif - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -#else - @ Thumb mode SVC restore - ldr lr, [sp, #S_SP] @ top of the stack - ldrd r0, r1, [sp, #S_LR] @ calling lr and pc - - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r2, r1, [sp, #S_LR] @ clear the exclusive monitor - - stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context - ldmia sp, {r0 - r12} - mov sp, lr - ldr lr, [sp], #4 - rfeia sp! -#endif - .endm - - @ - @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit - @ - @ This macro acts in a similar manner to svc_exit but switches to FIQ - @ mode to restore the final part of the register state. - @ - @ We cannot use the normal svc_exit procedure because that would - @ clobber spsr_svc (FIQ could be delivered during the first few - @ instructions of vector_swi meaning its contents have not been - @ saved anywhere). - @ - @ Note that, unlike svc_exit, this macro also does not allow a caller - @ supplied rpsr. This is because the FIQ exceptions are not re-entrant - @ and the handlers cannot call into the scheduler (meaning the value - @ on the stack remains correct). - @ - .macro svc_exit_via_fiq - uaccess_exit tsk, r0, r1 -#ifndef CONFIG_THUMB2_KERNEL - @ ARM mode restore - mov r0, sp - ldmib r0, {r1 - r14} @ abort is deadly from here onward (it will - @ clobber state restored below) - msr cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT - add r8, r0, #S_PC - ldr r9, [r0, #S_PSR] - msr spsr_cxsf, r9 - ldr r0, [r0, #S_R0] - ldmia r8, {pc}^ -#else - @ Thumb mode restore - add r0, sp, #S_R2 - ldr lr, [sp, #S_LR] - ldr sp, [sp, #S_SP] @ abort is deadly from here onward (it will - @ clobber state restored below) - ldmia r0, {r2 - r12} - mov r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT - msr cpsr_c, r1 - sub r0, #S_R2 - add r8, r0, #S_PC - ldmia r0, {r0 - r1} - rfeia r8 -#endif - .endm - - - .macro restore_user_regs, fast = 0, offset = 0 - uaccess_enable r1, isb=0 -#ifndef CONFIG_THUMB2_KERNEL - @ ARM mode restore - mov r2, sp - ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr - ldr lr, [r2, #\offset + S_PC]! @ get pc - tst r1, #PSR_I_BIT | 0x0f - bne 1f - msr spsr_cxsf, r1 @ save in spsr_svc -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r1, r2, [r2] @ clear the exclusive monitor -#endif - .if \fast - ldmdb r2, {r1 - lr}^ @ get calling r1 - lr - .else - ldmdb r2, {r0 - lr}^ @ get calling r0 - lr - .endif - mov r0, r0 @ ARMv5T and earlier require a nop - @ after ldm {}^ - add sp, sp, #\offset + PT_REGS_SIZE - movs pc, lr @ return & move spsr_svc into cpsr -1: bug "Returning to usermode but unexpected PSR bits set?", \@ -#elif defined(CONFIG_CPU_V7M) - @ V7M restore. - @ Note that we don't need to do clrex here as clearing the local - @ monitor is part of the exception entry and exit sequence. - .if \offset - add sp, #\offset - .endif - v7m_exception_slow_exit ret_r0 = \fast -#else - @ Thumb mode restore - mov r2, sp - load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr - ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr - ldr lr, [sp, #\offset + S_PC] @ get pc - add sp, sp, #\offset + S_SP - tst r1, #PSR_I_BIT | 0x0f - bne 1f - msr spsr_cxsf, r1 @ save in spsr_svc - - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r1, r2, [sp] @ clear the exclusive monitor - - .if \fast - ldmdb sp, {r1 - r12} @ get calling r1 - r12 - .else - ldmdb sp, {r0 - r12} @ get calling r0 - r12 - .endif - add sp, sp, #PT_REGS_SIZE - S_SP - movs pc, lr @ return & move spsr_svc into cpsr -1: bug "Returning to usermode but unexpected PSR bits set?", \@ -#endif /* !CONFIG_THUMB2_KERNEL */ - .endm - -/* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. - */ - .macro ct_user_exit, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING - .if \save - stmdb sp!, {r0-r3, ip, lr} - bl context_tracking_user_exit - ldmia sp!, {r0-r3, ip, lr} - .else - bl context_tracking_user_exit - .endif -#endif - .endm - - .macro ct_user_enter, save = 1 -#ifdef CONFIG_CONTEXT_TRACKING - .if \save - stmdb sp!, {r0-r3, ip, lr} - bl context_tracking_user_enter - ldmia sp!, {r0-r3, ip, lr} - .else - bl context_tracking_user_enter - .endif -#endif - .endm - - .macro invoke_syscall, table, nr, tmp, ret, reload=0 -#ifdef CONFIG_CPU_SPECTRE - mov \tmp, \nr - cmp \tmp, #NR_syscalls @ check upper syscall limit - movcs \tmp, #0 - csdb - badr lr, \ret @ return address - .if \reload - add r1, sp, #S_R0 + S_OFF @ pointer to regs - ldmiacc r1, {r0 - r6} @ reload r0-r6 - stmiacc sp, {r4, r5} @ update stack arguments - .endif - ldrcc pc, [\table, \tmp, lsl #2] @ call sys_* routine -#else - cmp \nr, #NR_syscalls @ check upper syscall limit - badr lr, \ret @ return address - .if \reload - add r1, sp, #S_R0 + S_OFF @ pointer to regs - ldmiacc r1, {r0 - r6} @ reload r0-r6 - stmiacc sp, {r4, r5} @ update stack arguments - .endif - ldrcc pc, [\table, \nr, lsl #2] @ call sys_* routine -#endif - .endm - -/* - * These are the registers used in the syscall handler, and allow us to - * have in theory up to 7 arguments to a function - r0 to r6. - * - * r7 is reserved for the system call number for thumb mode. - * - * Note that tbl == why is intentional. - * - * We must set at least "tsk" and "why" when calling ret_with_reschedule. - */ -scno .req r7 @ syscall number -tbl .req r8 @ syscall table pointer -why .req r8 @ Linux syscall (!= 0) -tsk .req r9 @ current thread_info diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S deleted file mode 100644 index de1f20624be152f232cb32bdda391249500a1199..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/entry-v7m.S +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/entry-v7m.S - * - * Copyright (C) 2008 ARM Ltd. - * - * Low-level vector interface routines for the ARMv7-M architecture - */ -#include -#include -#include -#include - -#include "entry-header.S" - -#ifdef CONFIG_TRACE_IRQFLAGS -#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation" -#endif - -__invalid_entry: - v7m_exception_entry -#ifdef CONFIG_PRINTK - adr r0, strerr - mrs r1, ipsr - mov r2, lr - bl printk -#endif - mov r0, sp - bl show_regs -1: b 1b -ENDPROC(__invalid_entry) - -strerr: .asciz "\nUnhandled exception: IPSR = %08lx LR = %08lx\n" - - .align 2 -__irq_entry: - v7m_exception_entry - - @ - @ Invoke the IRQ handler - @ - mrs r0, ipsr - ldr r1, =V7M_xPSR_EXCEPTIONNO - and r0, r1 - sub r0, #16 - mov r1, sp - stmdb sp!, {lr} - @ routine called with r0 = irq number, r1 = struct pt_regs * - bl nvic_handle_irq - - pop {lr} - @ - @ Check for any pending work if returning to user - @ - ldr r1, =BASEADDR_V7M_SCB - ldr r0, [r1, V7M_SCB_ICSR] - tst r0, V7M_SCB_ICSR_RETTOBASE - beq 2f - - get_thread_info tsk - ldr r2, [tsk, #TI_FLAGS] - tst r2, #_TIF_WORK_MASK - beq 2f @ no work pending - mov r0, #V7M_SCB_ICSR_PENDSVSET - str r0, [r1, V7M_SCB_ICSR] @ raise PendSV - -2: - @ registers r0-r3 and r12 are automatically restored on exception - @ return. r4-r7 were not clobbered in v7m_exception_entry so for - @ correctness they don't need to be restored. So only r8-r11 must be - @ restored here. The easiest way to do so is to restore r0-r7, too. - ldmia sp!, {r0-r11} - add sp, #PT_REGS_SIZE-S_IP - cpsie i - bx lr -ENDPROC(__irq_entry) - -__pendsv_entry: - v7m_exception_entry - - ldr r1, =BASEADDR_V7M_SCB - mov r0, #V7M_SCB_ICSR_PENDSVCLR - str r0, [r1, V7M_SCB_ICSR] @ clear PendSV - - @ execute the pending work, including reschedule - get_thread_info tsk - mov why, #0 - b ret_to_user_from_irq -ENDPROC(__pendsv_entry) - -/* - * Register switch for ARMv7-M processors. - * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info - * previous and next are guaranteed not to be the same. - */ -ENTRY(__switch_to) - .fnstart - .cantunwind - add ip, r1, #TI_CPU_SAVE - stmia ip!, {r4 - r11} @ Store most regs on stack - str sp, [ip], #4 - str lr, [ip], #4 - mov r5, r0 - add r4, r2, #TI_CPU_SAVE - ldr r0, =thread_notify_head - mov r1, #THREAD_NOTIFY_SWITCH - bl atomic_notifier_call_chain - mov ip, r4 - mov r0, r5 - ldmia ip!, {r4 - r11} @ Load all regs saved previously - ldr sp, [ip] - ldr pc, [ip, #4]! - .fnend -ENDPROC(__switch_to) - - .data -#if CONFIG_CPU_V7M_NUM_IRQ <= 112 - .align 9 -#else - .align 10 -#endif - -/* - * Vector table (Natural alignment need to be ensured) - */ -ENTRY(vector_table) - .long 0 @ 0 - Reset stack pointer - .long __invalid_entry @ 1 - Reset - .long __invalid_entry @ 2 - NMI - .long __invalid_entry @ 3 - HardFault - .long __invalid_entry @ 4 - MemManage - .long __invalid_entry @ 5 - BusFault - .long __invalid_entry @ 6 - UsageFault - .long __invalid_entry @ 7 - Reserved - .long __invalid_entry @ 8 - Reserved - .long __invalid_entry @ 9 - Reserved - .long __invalid_entry @ 10 - Reserved - .long vector_swi @ 11 - SVCall - .long __invalid_entry @ 12 - Debug Monitor - .long __invalid_entry @ 13 - Reserved - .long __pendsv_entry @ 14 - PendSV - .long __invalid_entry @ 15 - SysTick - .rept CONFIG_CPU_V7M_NUM_IRQ - .long __irq_entry @ External Interrupts - .endr - .align 2 - .globl exc_ret -exc_ret: - .space 4 diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S deleted file mode 100644 index 8dd26e1a9bd69051a1548d99c157cad2e65ba81b..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/fiqasm.S +++ /dev/null @@ -1,49 +0,0 @@ -/* - * linux/arch/arm/kernel/fiqasm.S - * - * Derived from code originally in linux/arch/arm/kernel/fiq.c: - * - * Copyright (C) 1998 Russell King - * Copyright (C) 1998, 1999 Phil Blundell - * Copyright (C) 2011, Linaro Limited - * - * FIQ support written by Philip Blundell , 1998. - * - * FIQ support re-written by Russell King to be more generic - * - * v7/Thumb-2 compatibility modifications by Linaro Limited, 2011. - */ - -#include -#include - -/* - * Taking an interrupt in FIQ mode is death, so both these functions - * disable irqs for the duration. - */ - -ENTRY(__set_fiq_regs) - mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE - mrs r1, cpsr - msr cpsr_c, r2 @ select FIQ mode - mov r0, r0 @ avoid hazard prior to ARMv4 - ldmia r0!, {r8 - r12} - ldr sp, [r0], #4 - ldr lr, [r0] - msr cpsr_c, r1 @ return to SVC mode - mov r0, r0 @ avoid hazard prior to ARMv4 - ret lr -ENDPROC(__set_fiq_regs) - -ENTRY(__get_fiq_regs) - mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE - mrs r1, cpsr - msr cpsr_c, r2 @ select FIQ mode - mov r0, r0 @ avoid hazard prior to ARMv4 - stmia r0!, {r8 - r12} - str sp, [r0], #4 - str lr, [r0] - msr cpsr_c, r1 @ return to SVC mode - mov r0, r0 @ avoid hazard prior to ARMv4 - ret lr -ENDPROC(__get_fiq_regs) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S deleted file mode 100644 index 4a3982812a401f1259909df4e1d05ead3f29dd9c..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/head-common.S +++ /dev/null @@ -1,243 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/head-common.S - * - * Copyright (C) 1994-2002 Russell King - * Copyright (c) 2003 ARM Limited - * All Rights Reserved - */ -#include - -#define ATAG_CORE 0x54410001 -#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2) -#define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2) - -#ifdef CONFIG_CPU_BIG_ENDIAN -#define OF_DT_MAGIC 0xd00dfeed -#else -#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */ -#endif - -/* - * Exception handling. Something went wrong and we can't proceed. We - * ought to tell the user, but since we don't have any guarantee that - * we're even running on the right architecture, we do virtually nothing. - * - * If CONFIG_DEBUG_LL is set we try to print out something about the error - * and hope for the best (useful if bootloader fails to pass a proper - * machine ID for example). - */ - __HEAD - -/* Determine validity of the r2 atags pointer. The heuristic requires - * that the pointer be aligned, in the first 16k of physical RAM and - * that the ATAG_CORE marker is first and present. If CONFIG_OF_FLATTREE - * is selected, then it will also accept a dtb pointer. Future revisions - * of this function may be more lenient with the physical address and - * may also be able to move the ATAGS block if necessary. - * - * Returns: - * r2 either valid atags pointer, valid dtb pointer, or zero - * r5, r6 corrupted - */ -__vet_atags: - tst r2, #0x3 @ aligned? - bne 1f - - ldr r5, [r2, #0] -#ifdef CONFIG_OF_FLATTREE - ldr r6, =OF_DT_MAGIC @ is it a DTB? - cmp r5, r6 - beq 2f -#endif - cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE? - cmpne r5, #ATAG_CORE_SIZE_EMPTY - bne 1f - ldr r5, [r2, #4] - ldr r6, =ATAG_CORE - cmp r5, r6 - bne 1f - -2: ret lr @ atag/dtb pointer is ok - -1: mov r2, #0 - ret lr -ENDPROC(__vet_atags) - -/* - * The following fragment of code is executed with the MMU on in MMU mode, - * and uses absolute addresses; this is not position independent. - * - * r0 = cp#15 control register (exc_ret for M-class) - * r1 = machine ID - * r2 = atags/dtb pointer - * r9 = processor ID - */ - __INIT -__mmap_switched: - - mov r7, r1 - mov r8, r2 - mov r10, r0 - - adr r4, __mmap_switched_data - mov fp, #0 - -#if defined(CONFIG_XIP_DEFLATED_DATA) - ARM( ldr sp, [r4], #4 ) - THUMB( ldr sp, [r4] ) - THUMB( add r4, #4 ) - bl __inflate_kernel_data @ decompress .data to RAM - teq r0, #0 - bne __error -#elif defined(CONFIG_XIP_KERNEL) - ARM( ldmia r4!, {r0, r1, r2, sp} ) - THUMB( ldmia r4!, {r0, r1, r2, r3} ) - THUMB( mov sp, r3 ) - sub r2, r2, r1 - bl memcpy @ copy .data to RAM -#endif - - ARM( ldmia r4!, {r0, r1, sp} ) - THUMB( ldmia r4!, {r0, r1, r3} ) - THUMB( mov sp, r3 ) - sub r2, r1, r0 - mov r1, #0 - bl memset @ clear .bss - - ldmia r4, {r0, r1, r2, r3} - str r9, [r0] @ Save processor ID - str r7, [r1] @ Save machine type - str r8, [r2] @ Save atags pointer - cmp r3, #0 - strne r10, [r3] @ Save control register values - mov lr, #0 - b start_kernel -ENDPROC(__mmap_switched) - - .align 2 - .type __mmap_switched_data, %object -__mmap_switched_data: -#ifdef CONFIG_XIP_KERNEL -#ifndef CONFIG_XIP_DEFLATED_DATA - .long _sdata @ r0 - .long __data_loc @ r1 - .long _edata_loc @ r2 -#endif - .long __bss_stop @ sp (temporary stack in .bss) -#endif - - .long __bss_start @ r0 - .long __bss_stop @ r1 - .long init_thread_union + THREAD_START_SP @ sp - - .long processor_id @ r0 - .long __machine_arch_type @ r1 - .long __atags_pointer @ r2 -#ifdef CONFIG_CPU_CP15 - .long cr_alignment @ r3 -#else -M_CLASS(.long exc_ret) @ r3 -AR_CLASS(.long 0) @ r3 -#endif - .size __mmap_switched_data, . - __mmap_switched_data - - __FINIT - .text - -/* - * This provides a C-API version of __lookup_processor_type - */ -ENTRY(lookup_processor_type) - stmfd sp!, {r4 - r6, r9, lr} - mov r9, r0 - bl __lookup_processor_type - mov r0, r5 - ldmfd sp!, {r4 - r6, r9, pc} -ENDPROC(lookup_processor_type) - -/* - * Read processor ID register (CP#15, CR0), and look up in the linker-built - * supported processor list. Note that we can't use the absolute addresses - * for the __proc_info lists since we aren't running with the MMU on - * (and therefore, we are not in the correct address space). We have to - * calculate the offset. - * - * r9 = cpuid - * Returns: - * r3, r4, r6 corrupted - * r5 = proc_info pointer in physical address space - * r9 = cpuid (preserved) - */ -__lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space -1: ldmia r5, {r3, r4} @ value, mask - and r4, r4, r9 @ mask wanted bits - teq r3, r4 - beq 2f - add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list) - cmp r5, r6 - blo 1b - mov r5, #0 @ unknown processor -2: ret lr -ENDPROC(__lookup_processor_type) - -/* - * Look in for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - -__error_lpae: -#ifdef CONFIG_DEBUG_LL - adr r0, str_lpae - bl printascii - b __error -str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n" -#else - b __error -#endif - .align -ENDPROC(__error_lpae) - -__error_p: -#ifdef CONFIG_DEBUG_LL - adr r0, str_p1 - bl printascii - mov r0, r9 - bl printhex8 - adr r0, str_p2 - bl printascii - b __error -str_p1: .asciz "\nError: unrecognized/unsupported processor variant (0x" -str_p2: .asciz ").\n" - .align -#endif -ENDPROC(__error_p) - -__error: -#ifdef CONFIG_ARCH_RPC -/* - * Turn the screen red on a error - RiscPC only. - */ - mov r0, #0x02000000 - mov r3, #0x11 - orr r3, r3, r3, lsl #8 - orr r3, r3, r3, lsl #16 - str r3, [r0], #4 - str r3, [r0], #4 - str r3, [r0], #4 - str r3, [r0], #4 -#endif -1: mov r0, r0 - b 1b -ENDPROC(__error) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S deleted file mode 100644 index 0fc814bbc34b171e43e55a57b081220b15fe70a2..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/head-nommu.S +++ /dev/null @@ -1,535 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/head-nommu.S - * - * Copyright (C) 1994-2002 Russell King - * Copyright (C) 2003-2006 Hyok S. Choi - * - * Common kernel startup code (non-paged MM) - */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Kernel startup entry point. - * --------------------------- - * - * This is normally called from the decompressor code. The requirements - * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0, - * r1 = machine nr. - * - * See linux/arch/arm/tools/mach-types for the complete list of machine - * numbers for r1. - * - */ - - __HEAD - -#ifdef CONFIG_CPU_THUMBONLY - .thumb -ENTRY(stext) -#else - .arm -ENTRY(stext) - - THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) -#endif - -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install -#endif - @ ensure svc mode and all interrupts masked - safe_svcmode_maskall r9 - @ and irqs disabled -#if defined(CONFIG_CPU_CP15) - mrc p15, 0, r9, c0, c0 @ get processor id -#elif defined(CONFIG_CPU_V7M) - ldr r9, =BASEADDR_V7M_SCB - ldr r9, [r9, V7M_SCB_CPUID] -#else - ldr r9, =CONFIG_PROCESSOR_ID -#endif - bl __lookup_processor_type @ r5=procinfo r9=cpuid - movs r10, r5 @ invalid processor (r5=0)? - beq __error_p @ yes, error 'p' - -#ifdef CONFIG_ARM_MPU - bl __setup_mpu -#endif - - badr lr, 1f @ return (PIC) address - ldr r12, [r10, #PROCINFO_INITFUNC] - add r12, r12, r10 - ret r12 -1: ldr lr, =__mmap_switched - b __after_proc_init -ENDPROC(stext) - -#ifdef CONFIG_SMP - .text -ENTRY(secondary_startup) - /* - * Common entry point for secondary CPUs. - * - * Ensure that we're in SVC mode, and IRQs are disabled. Lookup - * the processor type - there is no need to check the machine type - * as it has already been validated by the primary processor. - */ -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install_secondary -#endif - safe_svcmode_maskall r9 - -#ifndef CONFIG_CPU_CP15 - ldr r9, =CONFIG_PROCESSOR_ID -#else - mrc p15, 0, r9, c0, c0 @ get processor id -#endif - bl __lookup_processor_type @ r5=procinfo r9=cpuid - movs r10, r5 @ invalid processor? - beq __error_p @ yes, error 'p' - - ldr r7, __secondary_data - -#ifdef CONFIG_ARM_MPU - bl __secondary_setup_mpu @ Initialize the MPU -#endif - - badr lr, 1f @ return (PIC) address - ldr r12, [r10, #PROCINFO_INITFUNC] - add r12, r12, r10 - ret r12 -1: bl __after_proc_init - ldr sp, [r7, #12] @ set up the stack pointer - mov fp, #0 - b secondary_start_kernel -ENDPROC(secondary_startup) - - .type __secondary_data, %object -__secondary_data: - .long secondary_data -#endif /* CONFIG_SMP */ - -/* - * Set the Control Register and Read the process ID. - */ - .text -__after_proc_init: -M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) -M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) -#ifdef CONFIG_ARM_MPU -M_CLASS(ldr r3, [r12, 0x50]) -AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 - and r3, r3, #(MMFR0_PMSA) @ PMSA field - teq r3, #(MMFR0_PMSAv7) @ PMSA v7 - beq 1f - teq r3, #(MMFR0_PMSAv8) @ PMSA v8 - /* - * Memory region attributes for PMSAv8: - * - * n = AttrIndx[2:0] - * n MAIR - * DEVICE_nGnRnE 000 00000000 - * NORMAL 001 11111111 - */ - ldreq r3, =PMSAv8_MAIR(0x00, PMSAv8_RGN_DEVICE_nGnRnE) | \ - PMSAv8_MAIR(0xff, PMSAv8_RGN_NORMAL) -AR_CLASS(mcreq p15, 0, r3, c10, c2, 0) @ MAIR 0 -M_CLASS(streq r3, [r12, #PMSAv8_MAIR0]) - moveq r3, #0 -AR_CLASS(mcreq p15, 0, r3, c10, c2, 1) @ MAIR 1 -M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) - -1: -#endif -#ifdef CONFIG_CPU_CP15 - /* - * CP15 system control register value returned in r0 from - * the CPU init function. - */ - -#ifdef CONFIG_ARM_MPU - biceq r0, r0, #CR_BR @ Disable the 'default mem-map' - orreq r0, r0, #CR_M @ Set SCTRL.M (MPU on) -#endif -#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6 - orr r0, r0, #CR_A -#else - bic r0, r0, #CR_A -#endif -#ifdef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CR_C -#endif -#ifdef CONFIG_CPU_BPREDICT_DISABLE - bic r0, r0, #CR_Z -#endif -#ifdef CONFIG_CPU_ICACHE_DISABLE - bic r0, r0, #CR_I -#endif - mcr p15, 0, r0, c1, c0, 0 @ write control reg - instr_sync -#elif defined (CONFIG_CPU_V7M) -#ifdef CONFIG_ARM_MPU - ldreq r3, [r12, MPU_CTRL] - biceq r3, #MPU_CTRL_PRIVDEFENA - orreq r3, #MPU_CTRL_ENABLE - streq r3, [r12, MPU_CTRL] - isb -#endif - /* For V7M systems we want to modify the CCR similarly to the SCTLR */ -#ifdef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #V7M_SCB_CCR_DC -#endif -#ifdef CONFIG_CPU_BPREDICT_DISABLE - bic r0, r0, #V7M_SCB_CCR_BP -#endif -#ifdef CONFIG_CPU_ICACHE_DISABLE - bic r0, r0, #V7M_SCB_CCR_IC -#endif - str r0, [r12, V7M_SCB_CCR] - /* Pass exc_ret to __mmap_switched */ - mov r0, r10 -#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ - ret lr -ENDPROC(__after_proc_init) - .ltorg - -#ifdef CONFIG_ARM_MPU - - -#ifndef CONFIG_CPU_V7M -/* Set which MPU region should be programmed */ -.macro set_region_nr tmp, rgnr, unused - mov \tmp, \rgnr @ Use static region numbers - mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR -.endm - -/* Setup a single MPU region, either D or I side (D-side for unified) */ -.macro setup_region bar, acr, sr, side = PMSAv7_DATA_SIDE, unused - mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR - mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR - mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR -.endm -#else -.macro set_region_nr tmp, rgnr, base - mov \tmp, \rgnr - str \tmp, [\base, #PMSAv7_RNR] -.endm - -.macro setup_region bar, acr, sr, unused, base - lsl \acr, \acr, #16 - orr \acr, \acr, \sr - str \bar, [\base, #PMSAv7_RBAR] - str \acr, [\base, #PMSAv7_RASR] -.endm - -#endif -/* - * Setup the MPU and initial MPU Regions. We create the following regions: - * Region 0: Use this for probing the MPU details, so leave disabled. - * Region 1: Background region - covers the whole of RAM as strongly ordered - * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6 - * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page - * - * r6: Value to be written to DRSR (and IRSR if required) for PMSAv7_RAM_REGION -*/ - __HEAD - -ENTRY(__setup_mpu) - - /* Probe for v7 PMSA compliance */ -M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) -M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) - -AR_CLASS(mrc p15, 0, r0, c0, c1, 4) @ Read ID_MMFR0 -M_CLASS(ldr r0, [r12, 0x50]) - and r0, r0, #(MMFR0_PMSA) @ PMSA field - teq r0, #(MMFR0_PMSAv7) @ PMSA v7 - beq __setup_pmsa_v7 - teq r0, #(MMFR0_PMSAv8) @ PMSA v8 - beq __setup_pmsa_v8 - - ret lr -ENDPROC(__setup_mpu) - -ENTRY(__setup_pmsa_v7) - /* Calculate the size of a region covering just the kernel */ - ldr r5, =PLAT_PHYS_OFFSET @ Region start: PHYS_OFFSET - ldr r6, =(_end) @ Cover whole kernel - sub r6, r6, r5 @ Minimum size of region to map - clz r6, r6 @ Region size must be 2^N... - rsb r6, r6, #31 @ ...so round up region size - lsl r6, r6, #PMSAv7_RSR_SZ @ Put size in right field - orr r6, r6, #(1 << PMSAv7_RSR_EN) @ Set region enabled bit - - /* Determine whether the D/I-side memory map is unified. We set the - * flags here and continue to use them for the rest of this function */ -AR_CLASS(mrc p15, 0, r0, c0, c0, 4) @ MPUIR -M_CLASS(ldr r0, [r12, #MPU_TYPE]) - ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU - bxeq lr - tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified - - /* Setup second region first to free up r6 */ - set_region_nr r0, #PMSAv7_RAM_REGION, r12 - isb - /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */ - ldr r0, =PLAT_PHYS_OFFSET @ RAM starts at PHYS_OFFSET - ldr r5,=(PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL) - - setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12 @ PHYS_OFFSET, shared, enabled - beq 1f @ Memory-map not unified - setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12 @ PHYS_OFFSET, shared, enabled -1: isb - - /* First/background region */ - set_region_nr r0, #PMSAv7_BG_REGION, r12 - isb - /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */ - mov r0, #0 @ BG region starts at 0x0 - ldr r5,=(PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0NA) - mov r6, #PMSAv7_RSR_ALL_MEM @ 4GB region, enabled - - setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12 @ 0x0, BG region, enabled - beq 2f @ Memory-map not unified - setup_region r0, r5, r6, PMSAv7_INSTR_SIDE r12 @ 0x0, BG region, enabled -2: isb - -#ifdef CONFIG_XIP_KERNEL - set_region_nr r0, #PMSAv7_ROM_REGION, r12 - isb - - ldr r5,=(PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL) - - ldr r0, =CONFIG_XIP_PHYS_ADDR @ ROM start - ldr r6, =(_exiprom) @ ROM end - sub r6, r6, r0 @ Minimum size of region to map - clz r6, r6 @ Region size must be 2^N... - rsb r6, r6, #31 @ ...so round up region size - lsl r6, r6, #PMSAv7_RSR_SZ @ Put size in right field - orr r6, r6, #(1 << PMSAv7_RSR_EN) @ Set region enabled bit - - setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12 @ XIP_PHYS_ADDR, shared, enabled - beq 3f @ Memory-map not unified - setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12 @ XIP_PHYS_ADDR, shared, enabled -3: isb -#endif - ret lr -ENDPROC(__setup_pmsa_v7) - -ENTRY(__setup_pmsa_v8) - mov r0, #0 -AR_CLASS(mcr p15, 0, r0, c6, c2, 1) @ PRSEL -M_CLASS(str r0, [r12, #PMSAv8_RNR]) - isb - -#ifdef CONFIG_XIP_KERNEL - ldr r5, =CONFIG_XIP_PHYS_ADDR @ ROM start - ldr r6, =(_exiprom) @ ROM end - sub r6, r6, #1 - bic r6, r6, #(PMSAv8_MINALIGN - 1) - - orr r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED) - orr r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN) - -AR_CLASS(mcr p15, 0, r5, c6, c8, 0) @ PRBAR0 -AR_CLASS(mcr p15, 0, r6, c6, c8, 1) @ PRLAR0 -M_CLASS(str r5, [r12, #PMSAv8_RBAR_A(0)]) -M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(0)]) -#endif - - ldr r5, =KERNEL_START - ldr r6, =KERNEL_END - sub r6, r6, #1 - bic r6, r6, #(PMSAv8_MINALIGN - 1) - - orr r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED) - orr r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN) - -AR_CLASS(mcr p15, 0, r5, c6, c8, 4) @ PRBAR1 -AR_CLASS(mcr p15, 0, r6, c6, c8, 5) @ PRLAR1 -M_CLASS(str r5, [r12, #PMSAv8_RBAR_A(1)]) -M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(1)]) - - /* Setup Background: 0x0 - min(KERNEL_START, XIP_PHYS_ADDR) */ -#ifdef CONFIG_XIP_KERNEL - ldr r6, =KERNEL_START - ldr r5, =CONFIG_XIP_PHYS_ADDR - cmp r6, r5 - movcs r6, r5 -#else - ldr r6, =KERNEL_START -#endif - cmp r6, #0 - beq 1f - - mov r5, #0 - sub r6, r6, #1 - bic r6, r6, #(PMSAv8_MINALIGN - 1) - - orr r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN) - orr r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN) - -AR_CLASS(mcr p15, 0, r5, c6, c9, 0) @ PRBAR2 -AR_CLASS(mcr p15, 0, r6, c6, c9, 1) @ PRLAR2 -M_CLASS(str r5, [r12, #PMSAv8_RBAR_A(2)]) -M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(2)]) - -1: - /* Setup Background: max(KERNEL_END, _exiprom) - 0xffffffff */ -#ifdef CONFIG_XIP_KERNEL - ldr r5, =KERNEL_END - ldr r6, =(_exiprom) - cmp r5, r6 - movcc r5, r6 -#else - ldr r5, =KERNEL_END -#endif - mov r6, #0xffffffff - bic r6, r6, #(PMSAv8_MINALIGN - 1) - - orr r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN) - orr r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN) - -AR_CLASS(mcr p15, 0, r5, c6, c9, 4) @ PRBAR3 -AR_CLASS(mcr p15, 0, r6, c6, c9, 5) @ PRLAR3 -M_CLASS(str r5, [r12, #PMSAv8_RBAR_A(3)]) -M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(3)]) - -#ifdef CONFIG_XIP_KERNEL - /* Setup Background: min(_exiprom, KERNEL_END) - max(KERNEL_START, XIP_PHYS_ADDR) */ - ldr r5, =(_exiprom) - ldr r6, =KERNEL_END - cmp r5, r6 - movcs r5, r6 - - ldr r6, =KERNEL_START - ldr r0, =CONFIG_XIP_PHYS_ADDR - cmp r6, r0 - movcc r6, r0 - - sub r6, r6, #1 - bic r6, r6, #(PMSAv8_MINALIGN - 1) - - orr r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN) - orr r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN) - -#ifdef CONFIG_CPU_V7M - /* There is no alias for n == 4 */ - mov r0, #4 - str r0, [r12, #PMSAv8_RNR] @ PRSEL - isb - - str r5, [r12, #PMSAv8_RBAR_A(0)] - str r6, [r12, #PMSAv8_RLAR_A(0)] -#else - mcr p15, 0, r5, c6, c10, 0 @ PRBAR4 - mcr p15, 0, r6, c6, c10, 1 @ PRLAR4 -#endif -#endif - ret lr -ENDPROC(__setup_pmsa_v8) - -#ifdef CONFIG_SMP -/* - * r6: pointer at mpu_rgn_info - */ - - .text -ENTRY(__secondary_setup_mpu) - /* Use MPU region info supplied by __cpu_up */ - ldr r6, [r7] @ get secondary_data.mpu_rgn_info - - /* Probe for v7 PMSA compliance */ - mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 - and r0, r0, #(MMFR0_PMSA) @ PMSA field - teq r0, #(MMFR0_PMSAv7) @ PMSA v7 - beq __secondary_setup_pmsa_v7 - teq r0, #(MMFR0_PMSAv8) @ PMSA v8 - beq __secondary_setup_pmsa_v8 - b __error_p -ENDPROC(__secondary_setup_mpu) - -/* - * r6: pointer at mpu_rgn_info - */ -ENTRY(__secondary_setup_pmsa_v7) - /* Determine whether the D/I-side memory map is unified. We set the - * flags here and continue to use them for the rest of this function */ - mrc p15, 0, r0, c0, c0, 4 @ MPUIR - ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU - beq __error_p - - ldr r4, [r6, #MPU_RNG_INFO_USED] - mov r5, #MPU_RNG_SIZE - add r3, r6, #MPU_RNG_INFO_RNGS - mla r3, r4, r5, r3 - -1: - tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified - sub r3, r3, #MPU_RNG_SIZE - sub r4, r4, #1 - - set_region_nr r0, r4 - isb - - ldr r0, [r3, #MPU_RGN_DRBAR] - ldr r6, [r3, #MPU_RGN_DRSR] - ldr r5, [r3, #MPU_RGN_DRACR] - - setup_region r0, r5, r6, PMSAv7_DATA_SIDE - beq 2f - setup_region r0, r5, r6, PMSAv7_INSTR_SIDE -2: isb - - mrc p15, 0, r0, c0, c0, 4 @ Reevaluate the MPUIR - cmp r4, #0 - bgt 1b - - ret lr -ENDPROC(__secondary_setup_pmsa_v7) - -ENTRY(__secondary_setup_pmsa_v8) - ldr r4, [r6, #MPU_RNG_INFO_USED] -#ifndef CONFIG_XIP_KERNEL - add r4, r4, #1 -#endif - mov r5, #MPU_RNG_SIZE - add r3, r6, #MPU_RNG_INFO_RNGS - mla r3, r4, r5, r3 - -1: - sub r3, r3, #MPU_RNG_SIZE - sub r4, r4, #1 - - mcr p15, 0, r4, c6, c2, 1 @ PRSEL - isb - - ldr r5, [r3, #MPU_RGN_PRBAR] - ldr r6, [r3, #MPU_RGN_PRLAR] - - mcr p15, 0, r5, c6, c3, 0 @ PRBAR - mcr p15, 0, r6, c6, c3, 1 @ PRLAR - - cmp r4, #0 - bgt 1b - - ret lr -ENDPROC(__secondary_setup_pmsa_v8) -#endif /* CONFIG_SMP */ -#endif /* CONFIG_ARM_MPU */ -#include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S deleted file mode 100644 index f1cdc1f369575c368a3ea012375c19b52cc47bb9..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/head.S +++ /dev/null @@ -1,727 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/head.S - * - * Copyright (C) 1994-2002 Russell King - * Copyright (c) 2003 ARM Limited - * All Rights Reserved - * - * Kernel startup code for all 32-bit CPUs - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING) -#include CONFIG_DEBUG_LL_INCLUDE -#endif - -/* - * swapper_pg_dir is the virtual address of the initial page table. - * We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must - * make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect - * the least significant 16 bits to be 0x8000, but we could probably - * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000. - */ -#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000 -#error KERNEL_RAM_VADDR must start at 0xXXXX8000 -#endif - -#ifdef CONFIG_ARM_LPAE - /* LPAE requires an additional page for the PGD */ -#define PG_DIR_SIZE 0x5000 -#define PMD_ORDER 3 -#else -#define PG_DIR_SIZE 0x4000 -#define PMD_ORDER 2 -#endif - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE - - .macro pgtbl, rd, phys - add \rd, \phys, #TEXT_OFFSET - sub \rd, \rd, #PG_DIR_SIZE - .endm - -/* - * Kernel startup entry point. - * --------------------------- - * - * This is normally called from the decompressor code. The requirements - * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0, - * r1 = machine nr, r2 = atags or dtb pointer. - * - * This code is mostly position independent, so if you link the kernel at - * 0xc0008000, you call this at __pa(0xc0008000). - * - * See linux/arch/arm/tools/mach-types for the complete list of machine - * numbers for r1. - * - * We're trying to keep crap to a minimum; DO NOT add any machine specific - * crap here - that's what the boot loader (or in extreme, well justified - * circumstances, zImage) is for. - */ - .arm - - __HEAD -ENTRY(stext) - ARM_BE8(setend be ) @ ensure we are in BE8 mode - - THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) - -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install -#endif - @ ensure svc mode and all interrupts masked - safe_svcmode_maskall r9 - - mrc p15, 0, r9, c0, c0 @ get processor id - bl __lookup_processor_type @ r5=procinfo r9=cpuid - movs r10, r5 @ invalid processor (r5=0)? - THUMB( it eq ) @ force fixup-able long branch encoding - beq __error_p @ yes, error 'p' - -#ifdef CONFIG_ARM_LPAE - mrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0 - and r3, r3, #0xf @ extract VMSA support - cmp r3, #5 @ long-descriptor translation table format? - THUMB( it lo ) @ force fixup-able long branch encoding - blo __error_lpae @ only classic page table format -#endif - -#ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET -#else - ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case -#endif - - /* - * r1 = machine no, r2 = atags or dtb, - * r8 = phys_offset, r9 = cpuid, r10 = procinfo - */ - bl __vet_atags -#ifdef CONFIG_SMP_ON_UP - bl __fixup_smp -#endif -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - bl __fixup_pv_table -#endif - bl __create_page_tables - - /* - * The following calls CPU specific code in a position independent - * manner. See arch/arm/mm/proc-*.S for details. r10 = base of - * xxx_proc_info structure selected by __lookup_processor_type - * above. - * - * The processor init function will be called with: - * r1 - machine type - * r2 - boot data (atags/dt) pointer - * r4 - translation table base (low word) - * r5 - translation table base (high word, if LPAE) - * r8 - translation table base 1 (pfn if LPAE) - * r9 - cpuid - * r13 - virtual address for __enable_mmu -> __turn_mmu_on - * - * On return, the CPU will be ready for the MMU to be turned on, - * r0 will hold the CPU control register value, r1, r2, r4, and - * r9 will be preserved. r5 will also be preserved if LPAE. - */ - ldr r13, =__mmap_switched @ address to jump to after - @ mmu has been enabled - badr lr, 1f @ return (PIC) address -#ifdef CONFIG_ARM_LPAE - mov r5, #0 @ high TTBR0 - mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn -#else - mov r8, r4 @ set TTBR1 to swapper_pg_dir -#endif - ldr r12, [r10, #PROCINFO_INITFUNC] - add r12, r12, r10 - ret r12 -1: b __enable_mmu -ENDPROC(stext) - .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif - -/* - * Setup the initial page tables. We only setup the barest - * amount which are required to get the kernel running, which - * generally means mapping in the kernel code. - * - * r8 = phys_offset, r9 = cpuid, r10 = procinfo - * - * Returns: - * r0, r3, r5-r7 corrupted - * r4 = physical page table address - */ -__create_page_tables: - pgtbl r4, r8 @ page table address - - /* - * Clear the swapper page table - */ - mov r0, r4 - mov r3, #0 - add r6, r0, #PG_DIR_SIZE -1: str r3, [r0], #4 - str r3, [r0], #4 - str r3, [r0], #4 - str r3, [r0], #4 - teq r0, r6 - bne 1b - -#ifdef CONFIG_ARM_LPAE - /* - * Build the PGD table (first level) to point to the PMD table. A PGD - * entry is 64-bit wide. - */ - mov r0, r4 - add r3, r4, #0x1000 @ first PMD table address - orr r3, r3, #3 @ PGD block type - mov r6, #4 @ PTRS_PER_PGD - mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER -1: -#ifdef CONFIG_CPU_ENDIAN_BE8 - str r7, [r0], #4 @ set top PGD entry bits - str r3, [r0], #4 @ set bottom PGD entry bits -#else - str r3, [r0], #4 @ set bottom PGD entry bits - str r7, [r0], #4 @ set top PGD entry bits -#endif - add r3, r3, #0x1000 @ next PMD table - subs r6, r6, #1 - bne 1b - - add r4, r4, #0x1000 @ point to the PMD tables -#ifdef CONFIG_CPU_ENDIAN_BE8 - add r4, r4, #4 @ we only write the bottom word -#endif -#endif - - ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags - - /* - * Create identity mapping to cater for __enable_mmu. - * This identity mapping will be removed by paging_init(). - */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end - mov r5, r5, lsr #SECTION_SHIFT - mov r6, r6, lsr #SECTION_SHIFT - -1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base - str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping - cmp r5, r6 - addlo r5, r5, #1 @ next section - blo 1b - - /* - * Map our RAM from the start to the end of the kernel .bss section. - */ - add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER) - ldr r6, =(_end - 1) - orr r3, r8, r7 - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) -1: str r3, [r0], #1 << PMD_ORDER - add r3, r3, #1 << SECTION_SHIFT - cmp r0, r6 - bls 1b - -#ifdef CONFIG_XIP_KERNEL - /* - * Map the kernel image separately as it is not located in RAM. - */ -#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) - mov r3, pc - mov r3, r3, lsr #SECTION_SHIFT - orr r3, r7, r3, lsl #SECTION_SHIFT - add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! - ldr r6, =(_edata_loc - 1) - add r0, r0, #1 << PMD_ORDER - add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) -1: cmp r0, r6 - add r3, r3, #1 << SECTION_SHIFT - strls r3, [r0], #1 << PMD_ORDER - bls 1b -#endif - - /* - * Then map boot params address in r2 if specified. - * We map 2 sections in case the ATAGs/DTB crosses a section boundary. - */ - mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT - subne r3, r0, r8 - addne r3, r3, #PAGE_OFFSET - addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) - orrne r6, r7, r0 - strne r6, [r3], #1 << PMD_ORDER - addne r6, r6, #1 << SECTION_SHIFT - strne r6, [r3] - -#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) - sub r4, r4, #4 @ Fixup page table pointer - @ for 64-bit descriptors -#endif - -#ifdef CONFIG_DEBUG_LL -#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) - /* - * Map in IO space for serial debugging. - * This allows debug messages to be output - * via a serial console before paging_init. - */ - addruart r7, r3, r0 - - mov r3, r3, lsr #SECTION_SHIFT - mov r3, r3, lsl #PMD_ORDER - - add r0, r4, r3 - mov r3, r7, lsr #SECTION_SHIFT - ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags - orr r3, r7, r3, lsl #SECTION_SHIFT -#ifdef CONFIG_ARM_LPAE - mov r7, #1 << (54 - 32) @ XN -#ifdef CONFIG_CPU_ENDIAN_BE8 - str r7, [r0], #4 - str r3, [r0], #4 -#else - str r3, [r0], #4 - str r7, [r0], #4 -#endif -#else - orr r3, r3, #PMD_SECT_XN - str r3, [r0], #4 -#endif - -#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */ - /* we don't need any serial debugging mappings */ - ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags -#endif - -#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS) - /* - * If we're using the NetWinder or CATS, we also need to map - * in the 16550-type serial port for the debug messages - */ - add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER) - orr r3, r7, #0x7c000000 - str r3, [r0] -#endif -#ifdef CONFIG_ARCH_RPC - /* - * Map in screen at 0x02000000 & SCREEN2_BASE - * Similar reasons here - for debug. This is - * only for Acorn RiscPC architectures. - */ - add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER) - orr r3, r7, #0x02000000 - str r3, [r0] - add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0] -#endif -#endif -#ifdef CONFIG_ARM_LPAE - sub r4, r4, #0x1000 @ point to the PGD table -#endif - ret lr -ENDPROC(__create_page_tables) - .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end - -#if defined(CONFIG_SMP) - .text - .arm -ENTRY(secondary_startup_arm) - THUMB( badr r9, 1f ) @ Kernel is entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) -ENTRY(secondary_startup) - /* - * Common entry point for secondary CPUs. - * - * Ensure that we're in SVC mode, and IRQs are disabled. Lookup - * the processor type - there is no need to check the machine type - * as it has already been validated by the primary processor. - */ - - ARM_BE8(setend be) @ ensure we are in BE8 mode - -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install_secondary -#endif - safe_svcmode_maskall r9 - - mrc p15, 0, r9, c0, c0 @ get processor id - bl __lookup_processor_type - movs r10, r5 @ invalid processor? - moveq r0, #'p' @ yes, error 'p' - THUMB( it eq ) @ force fixup-able long branch encoding - beq __error_p - - /* - * Use the page tables supplied from __cpu_up. - */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr - ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir -ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: -ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps -ARM_BE8(eor r4, r4, r5) @ without using a temp reg. - ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir - badr lr, __enable_mmu @ return address - mov r13, r12 @ __secondary_switched address - ldr r12, [r10, #PROCINFO_INITFUNC] - add r12, r12, r10 @ initialise processor - @ (return control reg) - ret r12 -ENDPROC(secondary_startup) -ENDPROC(secondary_startup_arm) - - /* - * r6 = &secondary_data - */ -ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack - mov fp, #0 - b secondary_start_kernel -ENDPROC(__secondary_switched) - - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched -#endif /* defined(CONFIG_SMP) */ - - - -/* - * Setup common bits before finally enabling the MMU. Essentially - * this is just loading the page table pointer and domain access - * registers. All these registers need to be preserved by the - * processor setup function (or set in the case of r0) - * - * r0 = cp#15 control register - * r1 = machine ID - * r2 = atags or dtb pointer - * r4 = TTBR pointer (low word) - * r5 = TTBR pointer (high word if LPAE) - * r9 = processor ID - * r13 = *virtual* address to jump to upon completion - */ -__enable_mmu: -#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6 - orr r0, r0, #CR_A -#else - bic r0, r0, #CR_A -#endif -#ifdef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CR_C -#endif -#ifdef CONFIG_CPU_BPREDICT_DISABLE - bic r0, r0, #CR_Z -#endif -#ifdef CONFIG_CPU_ICACHE_DISABLE - bic r0, r0, #CR_I -#endif -#ifdef CONFIG_ARM_LPAE - mcrr p15, 0, r4, r5, c2 @ load TTBR0 -#else - mov r5, #DACR_INIT - mcr p15, 0, r5, c3, c0, 0 @ load domain access register - mcr p15, 0, r4, c2, c0, 0 @ load page table pointer -#endif - b __turn_mmu_on -ENDPROC(__enable_mmu) - -/* - * Enable the MMU. This completely changes the structure of the visible - * memory space. You will not be able to trace execution through this. - * If you have an enquiry about this, *please* check the linux-arm-kernel - * mailing list archives BEFORE sending another post to the list. - * - * r0 = cp#15 control register - * r1 = machine ID - * r2 = atags or dtb pointer - * r9 = processor ID - * r13 = *virtual* address to jump to upon completion - * - * other registers depend on the function called upon completion - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(__turn_mmu_on) - mov r0, r0 - instr_sync - mcr p15, 0, r0, c1, c0, 0 @ write control reg - mrc p15, 0, r3, c0, c0, 0 @ read id reg - instr_sync - mov r3, r3 - mov r3, r13 - ret r3 -__turn_mmu_on_end: -ENDPROC(__turn_mmu_on) - .popsection - - -#ifdef CONFIG_SMP_ON_UP - __HEAD -__fixup_smp: - and r3, r9, #0x000f0000 @ architecture version - teq r3, #0x000f0000 @ CPU ID supported? - bne __fixup_smp_on_up @ no, assume UP - - bic r3, r9, #0x00ff0000 - bic r3, r3, #0x0000000f @ mask 0xff00fff0 - mov r4, #0x41000000 - orr r4, r4, #0x0000b000 - orr r4, r4, #0x00000020 @ val 0x4100b020 - teq r3, r4 @ ARM 11MPCore? - reteq lr @ yes, assume SMP - - mrc p15, 0, r0, c0, c0, 5 @ read MPIDR - and r0, r0, #0xc0000000 @ multiprocessing extensions and - teq r0, #0x80000000 @ not part of a uniprocessor system? - bne __fixup_smp_on_up @ no, assume UP - - @ Core indicates it is SMP. Check for Aegis SOC where a single - @ Cortex-A9 CPU is present but SMP operations fault. - mov r4, #0x41000000 - orr r4, r4, #0x0000c000 - orr r4, r4, #0x00000090 - teq r3, r4 @ Check for ARM Cortex-A9 - retne lr @ Not ARM Cortex-A9, - - @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the - @ below address check will need to be #ifdef'd or equivalent - @ for the Aegis platform. - mrc p15, 4, r0, c15, c0 @ get SCU base address - teq r0, #0x0 @ '0' on actual UP A9 hardware - beq __fixup_smp_on_up @ So its an A9 UP - ldr r0, [r0, #4] @ read SCU Config -ARM_BE8(rev r0, r0) @ byteswap if big endian - and r0, r0, #0x3 @ number of CPUs - teq r0, #0x0 @ is 1? - retne lr - -__fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 - b __do_fixup_smp_on_up -ENDPROC(__fixup_smp) - - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - - .pushsection .data - .align 2 - .globl smp_on_up -smp_on_up: - ALT_SMP(.long 1) - ALT_UP(.long 0) - .popsection -#endif - - .text -__do_fixup_smp_on_up: - cmp r4, r5 - reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) -#ifdef __ARMEB__ - THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. -#endif - THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. - THUMB( strh r6, [r0] ) - b __do_fixup_smp_on_up -ENDPROC(__do_fixup_smp_on_up) - -ENTRY(fixup_smp) - stmfd sp!, {r4 - r6, lr} - mov r4, r0 - add r5, r0, r1 - mov r3, #0 - bl __do_fixup_smp_on_up - ldmfd sp!, {r4 - r6, pc} -ENDPROC(fixup_smp) - -#ifdef __ARMEB__ -#define LOW_OFFSET 0x4 -#define HIGH_OFFSET 0x0 -#else -#define LOW_OFFSET 0x0 -#define HIGH_OFFSET 0x4 -#endif - -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - -/* __fixup_pv_table - patch the stub instructions with the delta between - * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and - * can be expressed by an immediate shifter operand. The stub instruction - * has a form of '(add|sub) rd, rn, #imm'. - */ - __HEAD -__fixup_pv_table: - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned -THUMB( it ne @ cross section branch ) - bne __error - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits - b __fixup_a_pv_table -ENDPROC(__fixup_pv_table) - - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - - .text -__fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 - ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word - ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 - cmn r0, #1 -#ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq 2f - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne 2f - ldrh ip, [r7] -ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 -ARM_BE8(rev16 ip, ip) - strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr -#else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b 2f -1: ldr ip, [r7, r3] -#ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 -#else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#endif - str ip, [r7, r3] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - ret lr -#endif -ENDPROC(__fixup_a_pv_table) - - .align -3: .long __pv_offset - -ENTRY(fixup_pv_table) - stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset - mov r4, r0 @ r0 = table start - add r5, r0, r1 @ r1 = table size - bl __fixup_a_pv_table - ldmfd sp!, {r4 - r7, pc} -ENDPROC(fixup_pv_table) - - .data - .align 2 - .globl __pv_phys_pfn_offset - .type __pv_phys_pfn_offset, %object -__pv_phys_pfn_offset: - .word 0 - .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset - - .globl __pv_offset - .type __pv_offset, %object -__pv_offset: - .quad 0 - .size __pv_offset, . -__pv_offset -#endif - -#include "head-common.S" diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S deleted file mode 100644 index 6607fa817bba9a5510ac0a0c2de88342a80dfdb9..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/hyp-stub.S +++ /dev/null @@ -1,271 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2012 Linaro Limited. - */ - -#include -#include -#include -#include -#include - -#ifndef ZIMAGE -/* - * For the kernel proper, we need to find out the CPU boot mode long after - * boot, so we need to store it in a writable variable. - * - * This is not in .bss, because we set it sufficiently early that the boot-time - * zeroing of .bss would clobber it. - */ -.data - .align 2 -ENTRY(__boot_cpu_mode) - .long 0 -.text - - /* - * Save the primary CPU boot mode. Requires 3 scratch registers. - */ - .macro store_primary_cpu_mode reg1, reg2, reg3 - mrs \reg1, cpsr - and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] - .endm - - /* - * Compare the current mode with the one saved on the primary CPU. - * If they don't match, record that fact. The Z bit indicates - * if there's a match or not. - * Requires 3 additionnal scratch registers. - */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] - cmp \mode, \reg1 @ matches primary CPU boot mode? - orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up - .endm - -#else /* ZIMAGE */ - - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req - .endm - -/* - * The zImage loader only runs on one CPU, so we don't bother with mult-CPU - * consistency checking: - */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - cmp \mode, \mode - .endm - -#endif /* ZIMAGE */ - -/* - * Hypervisor stub installation functions. - * - * These must be called with the MMU and D-cache off. - * They are not ABI compliant and are only intended to be called from the kernel - * entry points in head.S. - */ -@ Call this from the primary CPU -ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 -ENDPROC(__hyp_stub_install) - - @ fall through... - -@ Secondary CPUs should call here -ENTRY(__hyp_stub_install_secondary) - mrs r4, cpsr - and r4, r4, #MODE_MASK - - /* - * If the secondary has booted with a different mode, give up - * immediately. - */ - compare_cpu_mode_with_primary r4, r5, r6, r7 - retne lr - - /* - * Once we have given up on one CPU, we do not try to install the - * stub hypervisor on the remaining ones: because the saved boot mode - * is modified, it can't compare equal to the CPSR mode field any - * more. - * - * Otherwise... - */ - - cmp r4, #HYP_MODE - retne lr @ give up if the CPU is not in HYP mode - -/* - * Configure HSCTLR to set correct exception endianness/instruction set - * state etc. - * Turn off all traps - * Eventually, CPU-specific code might be needed -- assume not for now - * - * This code relies on the "eret" instruction to synchronize the - * various coprocessor accesses. This is done when we switch to SVC - * (see safe_svcmode_maskall). - */ - @ Now install the hypervisor stub: - W(adr) r7, __hyp_stub_vectors - mcr p15, 4, r7, c12, c0, 0 @ set hypervisor vector base (HVBAR) - - @ Disable all traps, so we don't get any nasty surprise - mov r7, #0 - mcr p15, 4, r7, c1, c1, 0 @ HCR - mcr p15, 4, r7, c1, c1, 2 @ HCPTR - mcr p15, 4, r7, c1, c1, 3 @ HSTR - -THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE -ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE - mcr p15, 4, r7, c1, c0, 0 @ HSCTLR - - mrc p15, 4, r7, c1, c1, 1 @ HDCR - and r7, #0x1f @ Preserve HPMN - mcr p15, 4, r7, c1, c1, 1 @ HDCR - - @ Make sure NS-SVC is initialised appropriately - mrc p15, 0, r7, c1, c0, 0 @ SCTLR - orr r7, #(1 << 5) @ CP15 barriers enabled - bic r7, #(3 << 7) @ Clear SED/ITD for v8 (RES0 for v7) - bic r7, #(3 << 19) @ WXN and UWXN disabled - mcr p15, 0, r7, c1, c0, 0 @ SCTLR - - mrc p15, 0, r7, c0, c0, 0 @ MIDR - mcr p15, 4, r7, c0, c0, 0 @ VPIDR - - mrc p15, 0, r7, c0, c0, 5 @ MPIDR - mcr p15, 4, r7, c0, c0, 5 @ VMPIDR - -#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) - @ make CNTP_* and CNTPCT accessible from PL1 - mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - ubfx r7, r7, #16, #4 - teq r7, #0 - beq 1f - mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL - orr r7, r7, #3 @ PL1PCEN | PL1PCTEN - mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL - mov r7, #0 - mcrr p15, 4, r7, r7, c14 @ CNTVOFF - - @ Disable virtual timer in case it was counting - mrc p15, 0, r7, c14, c3, 1 @ CNTV_CTL - bic r7, #1 @ Clear ENABLE - mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL -1: -#endif - -#ifdef CONFIG_ARM_GIC_V3 - @ Check whether GICv3 system registers are available - mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - ubfx r7, r7, #28, #4 - teq r7, #0 - beq 2f - - @ Enable system register accesses - mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE - orr r7, r7, #(ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE) - mcr p15, 4, r7, c12, c9, 5 @ ICC_HSRE - isb - - @ SRE bit could be forced to 0 by firmware. - @ Check whether it sticks before accessing any other sysreg - mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE - tst r7, #ICC_SRE_EL2_SRE - beq 2f - mov r7, #0 - mcr p15, 4, r7, c12, c11, 0 @ ICH_HCR -2: -#endif - - bx lr @ The boot CPU mode is left in r4. -ENDPROC(__hyp_stub_install_secondary) - -__hyp_stub_do_trap: - teq r0, #HVC_SET_VECTORS - bne 1f - mcr p15, 4, r1, c12, c0, 0 @ set HVBAR - b __hyp_stub_exit - -1: teq r0, #HVC_SOFT_RESTART - bne 1f - bx r1 - -1: teq r0, #HVC_RESET_VECTORS - beq __hyp_stub_exit - - ldr r0, =HVC_STUB_ERR - __ERET - -__hyp_stub_exit: - mov r0, #0 - __ERET -ENDPROC(__hyp_stub_do_trap) - -/* - * __hyp_set_vectors: Call this after boot to set the initial hypervisor - * vectors as part of hypervisor installation. On an SMP system, this should - * be called on each CPU. - * - * r0 must be the physical address of the new vector table (which must lie in - * the bottom 4GB of physical address space. - * - * r0 must be 32-byte aligned. - * - * Before calling this, you must check that the stub hypervisor is installed - * everywhere, by waiting for any secondary CPUs to be brought up and then - * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true. - * - * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or - * something else went wrong... in such cases, trying to install a new - * hypervisor is unlikely to work as desired. - * - * When you call into your shiny new hypervisor, sp_hyp will contain junk, - * so you will need to set that to something sensible at the new hypervisor's - * initialisation entry point. - */ -ENTRY(__hyp_set_vectors) - mov r1, r0 - mov r0, #HVC_SET_VECTORS - __HVC(0) - ret lr -ENDPROC(__hyp_set_vectors) - -ENTRY(__hyp_soft_restart) - mov r1, r0 - mov r0, #HVC_SOFT_RESTART - __HVC(0) - ret lr -ENDPROC(__hyp_soft_restart) - -ENTRY(__hyp_reset_vectors) - mov r0, #HVC_RESET_VECTORS - __HVC(0) - ret lr -ENDPROC(__hyp_reset_vectors) - -#ifndef ZIMAGE -.align 2 -.L__boot_cpu_mode_offset: - .long __boot_cpu_mode - . -#endif - -.align 5 -ENTRY(__hyp_stub_vectors) -__hyp_stub_reset: W(b) . -__hyp_stub_und: W(b) . -__hyp_stub_svc: W(b) . -__hyp_stub_pabort: W(b) . -__hyp_stub_dabort: W(b) . -__hyp_stub_trap: W(b) __hyp_stub_do_trap -__hyp_stub_irq: W(b) . -__hyp_stub_fiq: W(b) . -ENDPROC(__hyp_stub_vectors) - diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S deleted file mode 100644 index 0dcae787b004d61cfe92bf4548b8dface3e8d01e..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/iwmmxt.S +++ /dev/null @@ -1,370 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/kernel/iwmmxt.S - * - * XScale iWMMXt (Concan) context switching and handling - * - * Initial code: - * Copyright (c) 2003, Intel Corporation - * - * Full lazy switching support, optimizations and more, by Nicolas Pitre -* Copyright (c) 2003-2004, MontaVista Software, Inc. - */ - -#include -#include -#include -#include -#include - -#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) -#define PJ4(code...) code -#define XSC(code...) -#elif defined(CONFIG_CPU_MOHAWK) || \ - defined(CONFIG_CPU_XSC3) || \ - defined(CONFIG_CPU_XSCALE) -#define PJ4(code...) -#define XSC(code...) code -#else -#error "Unsupported iWMMXt architecture" -#endif - -#define MMX_WR0 (0x00) -#define MMX_WR1 (0x08) -#define MMX_WR2 (0x10) -#define MMX_WR3 (0x18) -#define MMX_WR4 (0x20) -#define MMX_WR5 (0x28) -#define MMX_WR6 (0x30) -#define MMX_WR7 (0x38) -#define MMX_WR8 (0x40) -#define MMX_WR9 (0x48) -#define MMX_WR10 (0x50) -#define MMX_WR11 (0x58) -#define MMX_WR12 (0x60) -#define MMX_WR13 (0x68) -#define MMX_WR14 (0x70) -#define MMX_WR15 (0x78) -#define MMX_WCSSF (0x80) -#define MMX_WCASF (0x84) -#define MMX_WCGR0 (0x88) -#define MMX_WCGR1 (0x8C) -#define MMX_WCGR2 (0x90) -#define MMX_WCGR3 (0x94) - -#define MMX_SIZE (0x98) - - .text - .arm - -/* - * Lazy switching of Concan coprocessor context - * - * r10 = struct thread_info pointer - * r9 = ret_from_exception - * lr = undefined instr exit - * - * called from prefetch exception handler with interrupts enabled - */ - -ENTRY(iwmmxt_task_enable) - inc_preempt_count r10, r3 - - XSC(mrc p15, 0, r2, c15, c1, 0) - PJ4(mrc p15, 0, r2, c1, c0, 2) - @ CP0 and CP1 accessible? - XSC(tst r2, #0x3) - PJ4(tst r2, #0xf) - bne 4f @ if so no business here - @ enable access to CP0 and CP1 - XSC(orr r2, r2, #0x3) - XSC(mcr p15, 0, r2, c15, c1, 0) - PJ4(orr r2, r2, #0xf) - PJ4(mcr p15, 0, r2, c1, c0, 2) - - ldr r3, =concan_owner - add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area - ldr r2, [sp, #60] @ current task pc value - ldr r1, [r3] @ get current Concan owner - str r0, [r3] @ this task now owns Concan regs - sub r2, r2, #4 @ adjust pc back - str r2, [sp, #60] - - mrc p15, 0, r2, c2, c0, 0 - mov r2, r2 @ cpwait - bl concan_save - -#ifdef CONFIG_PREEMPT_COUNT - get_thread_info r10 -#endif -4: dec_preempt_count r10, r3 - ret r9 @ normal exit from exception - -concan_save: - - teq r1, #0 @ test for last ownership - beq concan_load @ no owner, skip save - - tmrc r2, wCon - - @ CUP? wCx - tst r2, #0x1 - beq 1f - -concan_dump: - - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] - -1: @ MUP? wRn - tst r2, #0x2 - beq 2f - - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] - -2: teq r0, #0 @ anything to load? - reteq lr @ if not, return - -concan_load: - - @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] - - @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] - - @ clear CUP/MUP (only if r1 != 0) - teq r1, #0 - mov r2, #0 - reteq lr - - tmcr wCon, r2 - ret lr - -ENDPROC(iwmmxt_task_enable) - -/* - * Back up Concan regs to save area and disable access to them - * (mainly for gdb or sleep mode usage) - * - * r0 = struct thread_info pointer of target task or NULL for any - */ - -ENTRY(iwmmxt_task_disable) - - stmfd sp!, {r4, lr} - - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =concan_owner - add r2, r0, #TI_IWMMXT_STATE @ get task Concan save area - ldr r1, [r3] @ get current Concan owner - teq r1, #0 @ any current owner? - beq 1f @ no: quit - teq r0, #0 @ any owner? - teqne r1, r2 @ or specified one? - bne 1f @ no: quit - - @ enable access to CP0 and CP1 - XSC(mrc p15, 0, r4, c15, c1, 0) - XSC(orr r4, r4, #0x3) - XSC(mcr p15, 0, r4, c15, c1, 0) - PJ4(mrc p15, 0, r4, c1, c0, 2) - PJ4(orr r4, r4, #0xf) - PJ4(mcr p15, 0, r4, c1, c0, 2) - - mov r0, #0 @ nothing to load - str r0, [r3] @ no more current owner - mrc p15, 0, r2, c2, c0, 0 - mov r2, r2 @ cpwait - bl concan_save - - @ disable access to CP0 and CP1 - XSC(bic r4, r4, #0x3) - XSC(mcr p15, 0, r4, c15, c1, 0) - PJ4(bic r4, r4, #0xf) - PJ4(mcr p15, 0, r4, c1, c0, 2) - - mrc p15, 0, r2, c2, c0, 0 - mov r2, r2 @ cpwait - -1: msr cpsr_c, ip @ restore interrupt mode - ldmfd sp!, {r4, pc} - -ENDPROC(iwmmxt_task_disable) - -/* - * Copy Concan state to given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to store Concan state - * - * this is called mainly in the creation of signal stack frames - */ - -ENTRY(iwmmxt_task_copy) - - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =concan_owner - add r2, r0, #TI_IWMMXT_STATE @ get task Concan save area - ldr r3, [r3] @ get current Concan owner - teq r2, r3 @ does this task own it... - beq 1f - - @ current Concan values are in the task save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r1 - mov r1, r2 - mov r2, #MMX_SIZE - b memcpy - -1: @ this task owns Concan regs -- grab a copy from there - mov r0, #0 @ nothing to load - mov r2, #3 @ save all regs - mov r3, lr @ preserve return address - bl concan_dump - msr cpsr_c, ip @ restore interrupt mode - ret r3 - -ENDPROC(iwmmxt_task_copy) - -/* - * Restore Concan state from given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to get Concan state from - * - * this is used to restore Concan state when unwinding a signal stack frame - */ - -ENTRY(iwmmxt_task_restore) - - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =concan_owner - add r2, r0, #TI_IWMMXT_STATE @ get task Concan save area - ldr r3, [r3] @ get current Concan owner - bic r2, r2, #0x7 @ 64-bit alignment - teq r2, r3 @ does this task own it... - beq 1f - - @ this task doesn't own Concan regs -- use its save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r2 - mov r2, #MMX_SIZE - b memcpy - -1: @ this task owns Concan regs -- load them directly - mov r0, r1 - mov r1, #0 @ don't clear CUP/MUP - mov r3, lr @ preserve return address - bl concan_load - msr cpsr_c, ip @ restore interrupt mode - ret r3 - -ENDPROC(iwmmxt_task_restore) - -/* - * Concan handling on task switch - * - * r0 = next thread_info pointer - * - * Called only from the iwmmxt notifier with task preemption disabled. - */ -ENTRY(iwmmxt_task_switch) - - XSC(mrc p15, 0, r1, c15, c1, 0) - PJ4(mrc p15, 0, r1, c1, c0, 2) - @ CP0 and CP1 accessible? - XSC(tst r1, #0x3) - PJ4(tst r1, #0xf) - bne 1f @ yes: block them for next task - - ldr r2, =concan_owner - add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area - ldr r2, [r2] @ get current Concan owner - teq r2, r3 @ next task owns it? - retne lr @ no: leave Concan disabled - -1: @ flip Concan access - XSC(eor r1, r1, #0x3) - XSC(mcr p15, 0, r1, c15, c1, 0) - PJ4(eor r1, r1, #0xf) - PJ4(mcr p15, 0, r1, c1, c0, 2) - - mrc p15, 0, r1, c2, c0, 0 - sub pc, lr, r1, lsr #32 @ cpwait and return - -ENDPROC(iwmmxt_task_switch) - -/* - * Remove Concan ownership of given task - * - * r0 = struct thread_info pointer - */ -ENTRY(iwmmxt_task_release) - - mrs r2, cpsr - orr ip, r2, #PSR_I_BIT @ disable interrupts - msr cpsr_c, ip - ldr r3, =concan_owner - add r0, r0, #TI_IWMMXT_STATE @ get task Concan save area - ldr r1, [r3] @ get current Concan owner - eors r0, r0, r1 @ if equal... - streq r0, [r3] @ then clear ownership - msr cpsr_c, r2 @ restore interrupts - ret lr - -ENDPROC(iwmmxt_task_release) - - .data - .align 2 -concan_owner: - .word 0 - diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S deleted file mode 100644 index 5e15b5912cb05f74e65446ed84503f2ab4fd3d1e..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/relocate_kernel.S +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * relocate_kernel.S - put the kernel image in place to boot - */ - -#include -#include -#include -#include - - .align 3 /* not needed for this code, but keeps fncpy() happy */ - -ENTRY(relocate_new_kernel) - - adr r7, relocate_new_kernel_end - ldr r0, [r7, #KEXEC_INDIR_PAGE] - ldr r1, [r7, #KEXEC_START_ADDR] - - /* - * If there is no indirection page (we are doing crashdumps) - * skip any relocation. - */ - cmp r0, #0 - beq 2f - -0: /* top, read another word for the indirection page */ - ldr r3, [r0],#4 - - /* Is it a destination page. Put destination address to r4 */ - tst r3,#1,0 - beq 1f - bic r4,r3,#1 - b 0b -1: - /* Is it an indirection page */ - tst r3,#2,0 - beq 1f - bic r0,r3,#2 - b 0b -1: - - /* are we done ? */ - tst r3,#4,0 - beq 1f - b 2f - -1: - /* is it source ? */ - tst r3,#8,0 - beq 0b - bic r3,r3,#8 - mov r6,#1024 -9: - ldr r5,[r3],#4 - str r5,[r4],#4 - subs r6,r6,#1 - bne 9b - b 0b - -2: - /* Jump to relocated kernel */ - mov lr, r1 - mov r0, #0 - ldr r1, [r7, #KEXEC_MACH_TYPE] - ldr r2, [r7, #KEXEC_R2] - ARM( ret lr ) - THUMB( bx lr ) - -ENDPROC(relocate_new_kernel) - - .align 3 -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel - - diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S deleted file mode 100644 index 7540ec51d16cdceb9f3ddf5f60b3293d373e9249..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/sigreturn_codes.S +++ /dev/null @@ -1,140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sigreturn_codes.S - code sinpets for sigreturn syscalls - * - * Created by: Victor Kamensky, 2013-08-13 - * Copyright: (C) 2013 Linaro Limited - */ - -#include -#include -#include - -/* - * For ARM syscalls, we encode the syscall number into the instruction. - * With EABI, the syscall number has to be loaded into r7. As result - * ARM syscall sequence snippet will have move and svc in .arm encoding - * - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions in .thumb encoding - * - * Please note sigreturn_codes code are not executed in place. Instead - * they just copied by kernel into appropriate places. Code inside of - * arch/arm/kernel/signal.c is very sensitive to layout of these code - * snippets. - */ - -/* - * In CPU_THUMBONLY case kernel arm opcodes are not allowed. - * Note in this case codes skips those instructions but it uses .org - * directive to keep correct layout of sigreturn_codes array. - */ -#ifndef CONFIG_CPU_THUMBONLY -#define ARM_OK(code...) code -#else -#define ARM_OK(code...) -#endif - - .macro arm_slot n - .org sigreturn_codes + 12 * (\n) -ARM_OK( .arm ) - .endm - - .macro thumb_slot n - .org sigreturn_codes + 12 * (\n) + 8 - .thumb - .endm - - .macro arm_fdpic_slot n - .org sigreturn_codes + 24 + 20 * (\n) -ARM_OK( .arm ) - .endm - - .macro thumb_fdpic_slot n - .org sigreturn_codes + 24 + 20 * (\n) + 12 - .thumb - .endm - - -#if __LINUX_ARM_ARCH__ <= 4 - /* - * Note we manually set minimally required arch that supports - * required thumb opcodes for early arch versions. It is OK - * for this file to be used in combination with other - * lower arch variants, since these code snippets are only - * used as input data. - */ - .arch armv4t -#endif - - .section .rodata - .global sigreturn_codes - .type sigreturn_codes, #object - - .align - -sigreturn_codes: - - /* ARM sigreturn syscall code snippet */ - arm_slot 0 -ARM_OK( mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) ) -ARM_OK( swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) - - /* Thumb sigreturn syscall code snippet */ - thumb_slot 0 - movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) - swi #0 - - /* ARM sigreturn_rt syscall code snippet */ - arm_slot 1 -ARM_OK( mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) ) -ARM_OK( swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) - - /* Thumb sigreturn_rt syscall code snippet */ - thumb_slot 1 - movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) - swi #0 - - /* ARM sigreturn restorer FDPIC bounce code snippet */ - arm_fdpic_slot 0 -ARM_OK( ldr r3, [sp, #SIGFRAME_RC3_OFFSET] ) -ARM_OK( ldmia r3, {r3, r9} ) -#ifdef CONFIG_ARM_THUMB -ARM_OK( bx r3 ) -#else -ARM_OK( ret r3 ) -#endif - - /* Thumb sigreturn restorer FDPIC bounce code snippet */ - thumb_fdpic_slot 0 - ldr r3, [sp, #SIGFRAME_RC3_OFFSET] - ldmia r3, {r2, r3} - mov r9, r3 - bx r2 - - /* ARM sigreturn_rt restorer FDPIC bounce code snippet */ - arm_fdpic_slot 1 -ARM_OK( ldr r3, [sp, #RT_SIGFRAME_RC3_OFFSET] ) -ARM_OK( ldmia r3, {r3, r9} ) -#ifdef CONFIG_ARM_THUMB -ARM_OK( bx r3 ) -#else -ARM_OK( ret r3 ) -#endif - - /* Thumb sigreturn_rt restorer FDPIC bounce code snippet */ - thumb_fdpic_slot 1 - ldr r3, [sp, #RT_SIGFRAME_RC3_OFFSET] - ldmia r3, {r2, r3} - mov r9, r3 - bx r2 - - /* - * Note on additional space: setup_return in signal.c - * always copies the same number of words regardless whether - * it is thumb case or not, so we need one additional padding - * word after the last entry. - */ - .space 4 - - .size sigreturn_codes, . - sigreturn_codes diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S deleted file mode 100644 index 5dc8b80bb69383643eddec5ba62164e0458b4512..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/sleep.S +++ /dev/null @@ -1,190 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include -#include -#include - .text - -/* - * Implementation of MPIDR hash algorithm through shifting - * and OR'ing. - * - * @dst: register containing hash result - * @rs0: register containing affinity level 0 bit shift - * @rs1: register containing affinity level 1 bit shift - * @rs2: register containing affinity level 2 bit shift - * @mpidr: register containing MPIDR value - * @mask: register containing MPIDR mask - * - * Pseudo C-code: - * - *u32 dst; - * - *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) { - * u32 aff0, aff1, aff2; - * u32 mpidr_masked = mpidr & mask; - * aff0 = mpidr_masked & 0xff; - * aff1 = mpidr_masked & 0xff00; - * aff2 = mpidr_masked & 0xff0000; - * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2); - *} - * Input registers: rs0, rs1, rs2, mpidr, mask - * Output register: dst - * Note: input and output registers must be disjoint register sets - (eg: a macro instance with mpidr = r1 and dst = r1 is invalid) - */ - .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask - and \mpidr, \mpidr, \mask @ mask out MPIDR bits - and \dst, \mpidr, #0xff @ mask=aff0 - ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0 - THUMB( lsr \dst, \dst, \rs0 ) - and \mask, \mpidr, #0xff00 @ mask = aff1 - ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1) - THUMB( lsr \mask, \mask, \rs1 ) - THUMB( orr \dst, \dst, \mask ) - and \mask, \mpidr, #0xff0000 @ mask = aff2 - ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2) - THUMB( lsr \mask, \mask, \rs2 ) - THUMB( orr \dst, \dst, \mask ) - .endm - -/* - * Save CPU state for a suspend. This saves the CPU general purpose - * registers, and allocates space on the kernel stack to save the CPU - * specific registers and some other data for resume. - * r0 = suspend function arg0 - * r1 = suspend function - * r2 = MPIDR value the resuming CPU will use - */ -ENTRY(__cpu_suspend) - stmfd sp!, {r4 - r11, lr} -#ifdef MULTI_CPU - ldr r10, =processor - ldr r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state -#else - ldr r4, =cpu_suspend_size -#endif - mov r5, sp @ current virtual SP - add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn - sub sp, sp, r4 @ allocate CPU state on stack - ldr r3, =sleep_save_sp - stmfd sp!, {r0, r1} @ save suspend func arg and pointer - ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) - ALT_UP_B(1f) - /* This ldmia relies on the memory layout of the mpidr_hash struct */ - ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts - compute_mpidr_hash r0, r6, r7, r8, r2, r1 - add r3, r3, r0, lsl #2 -1: mov r2, r5 @ virtual SP - mov r1, r4 @ size of save block - add r0, sp, #8 @ pointer to save block - bl __cpu_suspend_save - badr lr, cpu_suspend_abort - ldmfd sp!, {r0, pc} @ call suspend fn -ENDPROC(__cpu_suspend) - .ltorg - -cpu_suspend_abort: - ldmia sp!, {r1 - r3} @ pop phys pgd, virt SP, phys resume fn - teq r0, #0 - moveq r0, #1 @ force non-zero value - mov sp, r2 - ldmfd sp!, {r4 - r11, pc} -ENDPROC(cpu_suspend_abort) - -/* - * r0 = control register value - */ - .align 5 - .pushsection .idmap.text,"ax" -ENTRY(cpu_resume_mmu) - ldr r3, =cpu_resume_after_mmu - instr_sync - mcr p15, 0, r0, c1, c0, 0 @ turn on MMU, I-cache, etc - mrc p15, 0, r0, c0, c0, 0 @ read id reg - instr_sync - mov r0, r0 - mov r0, r0 - ret r3 @ jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: - bl cpu_init @ restore the und/abt/irq banked regs - mov r0, #0 @ return zero on success - ldmfd sp!, {r4 - r11, pc} -ENDPROC(cpu_resume_after_mmu) - - .text - .align - -#ifdef CONFIG_MCPM - .arm -THUMB( .thumb ) -ENTRY(cpu_resume_no_hyp) -ARM_BE8(setend be) @ ensure we are in BE mode - b no_hyp -#endif - -#ifdef CONFIG_MMU - .arm -ENTRY(cpu_resume_arm) - THUMB( badr r9, 1f ) @ Kernel is entered in ARM. - THUMB( bx r9 ) @ If this is a Thumb-2 kernel, - THUMB( .thumb ) @ switch to Thumb now. - THUMB(1: ) -#endif - -ENTRY(cpu_resume) -ARM_BE8(setend be) @ ensure we are in BE mode -#ifdef CONFIG_ARM_VIRT_EXT - bl __hyp_stub_install_secondary -#endif - safe_svcmode_maskall r1 -no_hyp: - mov r1, #0 - ALT_SMP(mrc p15, 0, r0, c0, c0, 5) - ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address - /* - * This ldmia relies on the memory layout of the mpidr_hash - * struct mpidr_hash. - */ - ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts - compute_mpidr_hash r1, r4, r5, r6, r0, r3 -1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] - ldr r0, [r0, r1, lsl #2] - - @ load phys pgd, stack, resume fn - ARM( ldmia r0!, {r1, sp, pc} ) -THUMB( ldmia r0!, {r1, r2, r3} ) -THUMB( mov sp, r2 ) -THUMB( bx r3 ) -ENDPROC(cpu_resume) - -#ifdef CONFIG_MMU -ENDPROC(cpu_resume_arm) -#endif -#ifdef CONFIG_MCPM -ENDPROC(cpu_resume_no_hyp) -#endif - - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - - .data - .align 2 - .type sleep_save_sp, #object -ENTRY(sleep_save_sp) - .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S deleted file mode 100644 index 00664c78facab321049bf7e7dfcdb380d15986d9..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/smccc-call.S +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2015, Linaro Limited - */ -#include - -#include -#include -#include - - /* - * Wrap c macros in asm macros to delay expansion until after the - * SMCCC asm macro is expanded. - */ - .macro SMCCC_SMC - __SMC(0) - .endm - - .macro SMCCC_HVC - __HVC(0) - .endm - - .macro SMCCC instr -UNWIND( .fnstart) - mov r12, sp - push {r4-r7} -UNWIND( .save {r4-r7}) - ldm r12, {r4-r7} - \instr - pop {r4-r7} - ldr r12, [sp, #(4 * 4)] - stm r12, {r0-r3} - bx lr -UNWIND( .fnend) - .endm - -/* - * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, - * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, - * struct arm_smccc_quirk *quirk) - */ -ENTRY(__arm_smccc_smc) - SMCCC SMCCC_SMC -ENDPROC(__arm_smccc_smc) - -/* - * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, - * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, - * struct arm_smccc_quirk *quirk) - */ -ENTRY(__arm_smccc_hvc) - SMCCC SMCCC_HVC -ENDPROC(__arm_smccc_hvc) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S deleted file mode 100644 index 8c74037ade22958688e4766e4ef762a6cad514d9..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* ld script to make ARM Linux kernel - * taken from the i386 version by Russell King - * Written by Martin Mares - */ - -/* No __ro_after_init data in the .rodata section - which will always be ro */ -#define RO_AFTER_INIT_DATA - -#include - -#include -#include -#include -#include -#include -#include - -#include "vmlinux.lds.h" - -OUTPUT_ARCH(arm) -ENTRY(stext) - -#ifndef __ARMEB__ -jiffies = jiffies_64; -#else -jiffies = jiffies_64 + 4; -#endif - -SECTIONS -{ - /* - * XXX: The linker does not define how output sections are - * assigned to input sections when there are multiple statements - * matching the same input section name. There is no documented - * order of matching. - * - * unwind exit sections must be discarded before the rest of the - * unwind sections get included. - */ - /DISCARD/ : { - ARM_DISCARD - *(.alt.smp.init) - *(.pv_table) - } - - . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); - _xiprom = .; /* XIP ROM area to be mapped */ - - .head.text : { - _text = .; - HEAD_TEXT - } - - .text : { /* Real text segment */ - _stext = .; /* Text and read-only data */ - ARM_TEXT - } - - RO_DATA(PAGE_SIZE) - - . = ALIGN(4); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) - __stop___ex_table = .; - } - -#ifdef CONFIG_ARM_UNWIND - ARM_UNWIND_SECTIONS -#endif - - NOTES - - _etext = .; /* End of text and rodata section */ - - ARM_VECTORS - INIT_TEXT_SECTION(8) - .exit.text : { - ARM_EXIT_KEEP(EXIT_TEXT) - } - .init.proc.info : { - ARM_CPU_DISCARD(PROC_INFO) - } - .init.arch.info : { - __arch_info_begin = .; - *(.arch.info.init) - __arch_info_end = .; - } - .init.tagtable : { - __tagtable_begin = .; - *(.taglist.init) - __tagtable_end = .; - } - .init.rodata : { - INIT_SETUP(16) - INIT_CALLS - CON_INITCALL - INIT_RAM_FS - } - -#ifdef CONFIG_ARM_MPU - . = ALIGN(SZ_128K); -#endif - _exiprom = .; /* End of XIP ROM area */ - -/* - * From this point, stuff is considered writable and will be copied to RAM - */ - __data_loc = ALIGN(4); /* location in file */ - . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */ -#undef LOAD_OFFSET -#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc) - - . = ALIGN(THREAD_SIZE); - _sdata = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { - *(.data..ro_after_init) - } - _edata = .; - - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - ARM_EXIT_KEEP(EXIT_DATA) - } -#ifdef CONFIG_SMP - PERCPU_SECTION(L1_CACHE_BYTES) -#endif - -#ifdef CONFIG_HAVE_TCM - ARM_TCM -#endif - - /* - * End of copied data. We need a dummy section to get its LMA. - * Also located before final ALIGN() as trailing padding is not stored - * in the resulting binary file and useless to copy. - */ - .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { } - _edata_loc = LOADADDR(.data.endmark); - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - BSS_SECTION(0, 0, 8) -#ifdef CONFIG_ARM_MPU - . = ALIGN(PMSAv8_MINALIGN); -#endif - _end = .; - - STABS_DEBUG -} - -/* - * These must never be empty - * If you have to comment these two assert statements out, your - * binutils is too old (for other reasons as well) - */ -ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") -ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") - -/* - * The HYP init code can't be more than a page long, - * and should not cross a page boundary. - * The above comment applies as well. - */ -ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, - "HYP init code too big or misaligned") - -#ifdef CONFIG_XIP_DEFLATED_DATA -/* - * The .bss is used as a stack area for __inflate_kernel_data() whose stack - * frame is 9568 bytes. Make sure it has extra room left. - */ -ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") -#endif - -#ifdef CONFIG_ARM_MPU -/* - * Due to PMSAv7 restriction on base address and size we have to - * enforce minimal alignment restrictions. It was seen that weaker - * alignment restriction on _xiprom will likely force XIP address - * space spawns multiple MPU regions thus it is likely we run in - * situation when we are reprogramming MPU region we run on with - * something which doesn't cover reprogramming code itself, so as soon - * as we update MPU settings we'd immediately try to execute straight - * from background region which is XN. - * It seem that alignment in 1M should suit most users. - * _exiprom is aligned as 1/8 of 1M so can be covered by subregion - * disable - */ -ASSERT(!(_xiprom & (SZ_1M - 1)), "XIP start address may cause MPU programming issues") -ASSERT(!(_exiprom & (SZ_128K - 1)), "XIP end address may cause MPU programming issues") -#endif diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S deleted file mode 100644 index 23150c0f0f4d4f81ec816798ab0415a3ac00d68b..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/vmlinux.lds.S +++ /dev/null @@ -1,183 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* ld script to make ARM Linux kernel - * taken from the i386 version by Russell King - * Written by Martin Mares - */ - -#ifdef CONFIG_XIP_KERNEL -#include "vmlinux-xip.lds.S" -#else - -#include -#include -#include -#include -#include -#include -#include - -#include "vmlinux.lds.h" - -OUTPUT_ARCH(arm) -ENTRY(stext) - -#ifndef __ARMEB__ -jiffies = jiffies_64; -#else -jiffies = jiffies_64 + 4; -#endif - -SECTIONS -{ - /* - * XXX: The linker does not define how output sections are - * assigned to input sections when there are multiple statements - * matching the same input section name. There is no documented - * order of matching. - * - * unwind exit sections must be discarded before the rest of the - * unwind sections get included. - */ - /DISCARD/ : { - ARM_DISCARD -#ifndef CONFIG_SMP_ON_UP - *(.alt.smp.init) -#endif - } - - . = PAGE_OFFSET + TEXT_OFFSET; - .head.text : { - _text = .; - HEAD_TEXT - } - -#ifdef CONFIG_STRICT_KERNEL_RWX - . = ALIGN(1< -*/ - -#include -#include -#include -#include - - .arch_extension virt - - .text - .pushsection .hyp.text, "ax" - -#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR) - -/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */ -ENTRY(__guest_enter) - @ Save host registers - add r1, r1, #(USR_REGS_OFFSET + S_R4) - stm r1!, {r4-r12} - str lr, [r1, #4] @ Skip SP_usr (already saved) - - @ Restore guest registers - add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) - ldr lr, [r0, #S_LR] - ldm r0, {r0-r12} - - clrex - eret -ENDPROC(__guest_enter) - -ENTRY(__guest_exit) - /* - * return convention: - * guest r0, r1, r2 saved on the stack - * r0: vcpu pointer - * r1: exception code - */ - - add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3) - stm r2!, {r3-r12} - str lr, [r2, #4] - add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) - pop {r3, r4, r5} @ r0, r1, r2 - stm r2, {r3-r5} - - ldr r0, [r0, #VCPU_HOST_CTXT] - add r0, r0, #(USR_REGS_OFFSET + S_R4) - ldm r0!, {r4-r12} - ldr lr, [r0, #4] - - mov r0, r1 - mrs r1, SPSR - mrs r2, ELR_hyp - mrc p15, 4, r3, c5, c2, 0 @ HSR - - /* - * Force loads and stores to complete before unmasking aborts - * and forcing the delivery of the exception. This gives us a - * single instruction window, which the handler will try to - * match. - */ - dsb sy - cpsie a - - .global abort_guest_exit_start -abort_guest_exit_start: - - isb - - .global abort_guest_exit_end -abort_guest_exit_end: - - /* - * If we took an abort, r0[31] will be set, and cmp will set - * the N bit in PSTATE. - */ - cmp r0, #0 - msrmi SPSR_cxsf, r1 - msrmi ELR_hyp, r2 - mcrmi p15, 4, r3, c5, c2, 0 @ HSR - - bx lr -ENDPROC(__guest_exit) - -/* - * If VFPv3 support is not available, then we will not switch the VFP - * registers; however cp10 and cp11 accesses will still trap and fallback - * to the regular coprocessor emulation code, which currently will - * inject an undefined exception to the guest. - */ -#ifdef CONFIG_VFPv3 -ENTRY(__vfp_guest_restore) - push {r3, r4, lr} - - @ NEON/VFP used. Turn on VFP access. - mrc p15, 4, r1, c1, c1, 2 @ HCPTR - bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - mcr p15, 4, r1, c1, c1, 2 @ HCPTR - isb - - @ Switch VFP/NEON hardware state to the guest's - mov r4, r0 - ldr r0, [r0, #VCPU_HOST_CTXT] - add r0, r0, #CPU_CTXT_VFP - bl __vfp_save_state - add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP) - bl __vfp_restore_state - - pop {r3, r4, lr} - pop {r0, r1, r2} - clrex - eret -ENDPROC(__vfp_guest_restore) -#endif - - .popsection - diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S deleted file mode 100644 index fe3d7811a908c73de535eef289cbae2cd1a9cc02..0000000000000000000000000000000000000000 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ /dev/null @@ -1,295 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include - - .arch_extension virt - - .text - .pushsection .hyp.text, "ax" - -.macro load_vcpu reg - mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR -.endm - -/******************************************************************** - * Hypervisor exception vector and handlers - * - * - * The KVM/ARM Hypervisor ABI is defined as follows: - * - * Entry to Hyp mode from the host kernel will happen _only_ when an HVC - * instruction is issued since all traps are disabled when running the host - * kernel as per the Hyp-mode initialization at boot time. - * - * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc - * below) when the HVC instruction is called from SVC mode (i.e. a guest or the - * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC - * instructions are called from within Hyp-mode. - * - * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): - * Switching to Hyp mode is done through a simple HVC #0 instruction. The - * exception vector code will check that the HVC comes from VMID==0. - * - r0 contains a pointer to a HYP function - * - r1, r2, and r3 contain arguments to the above function. - * - The HYP function will be called with its arguments in r0, r1 and r2. - * On HYP function return, we return directly to SVC. - * - * Note that the above is used to execute code in Hyp-mode from a host-kernel - * point of view, and is a different concept from performing a world-switch and - * executing guest code SVC mode (with a VMID != 0). - */ - - .align 5 -__kvm_hyp_vector: - .global __kvm_hyp_vector - - @ Hyp-mode exception vector - W(b) hyp_reset - W(b) hyp_undef - W(b) hyp_svc - W(b) hyp_pabt - W(b) hyp_dabt - W(b) hyp_hvc - W(b) hyp_irq - W(b) hyp_fiq - -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - .align 5 -__kvm_hyp_vector_ic_inv: - .global __kvm_hyp_vector_ic_inv - - /* - * We encode the exception entry in the bottom 3 bits of - * SP, and we have to guarantee to be 8 bytes aligned. - */ - W(add) sp, sp, #1 /* Reset 7 */ - W(add) sp, sp, #1 /* Undef 6 */ - W(add) sp, sp, #1 /* Syscall 5 */ - W(add) sp, sp, #1 /* Prefetch abort 4 */ - W(add) sp, sp, #1 /* Data abort 3 */ - W(add) sp, sp, #1 /* HVC 2 */ - W(add) sp, sp, #1 /* IRQ 1 */ - W(nop) /* FIQ 0 */ - - mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ - isb - - b decode_vectors - - .align 5 -__kvm_hyp_vector_bp_inv: - .global __kvm_hyp_vector_bp_inv - - /* - * We encode the exception entry in the bottom 3 bits of - * SP, and we have to guarantee to be 8 bytes aligned. - */ - W(add) sp, sp, #1 /* Reset 7 */ - W(add) sp, sp, #1 /* Undef 6 */ - W(add) sp, sp, #1 /* Syscall 5 */ - W(add) sp, sp, #1 /* Prefetch abort 4 */ - W(add) sp, sp, #1 /* Data abort 3 */ - W(add) sp, sp, #1 /* HVC 2 */ - W(add) sp, sp, #1 /* IRQ 1 */ - W(nop) /* FIQ 0 */ - - mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ - isb - -decode_vectors: - -#ifdef CONFIG_THUMB2_KERNEL - /* - * Yet another silly hack: Use VPIDR as a temp register. - * Thumb2 is really a pain, as SP cannot be used with most - * of the bitwise instructions. The vect_br macro ensures - * things gets cleaned-up. - */ - mcr p15, 4, r0, c0, c0, 0 /* VPIDR */ - mov r0, sp - and r0, r0, #7 - sub sp, sp, r0 - push {r1, r2} - mov r1, r0 - mrc p15, 4, r0, c0, c0, 0 /* VPIDR */ - mrc p15, 0, r2, c0, c0, 0 /* MIDR */ - mcr p15, 4, r2, c0, c0, 0 /* VPIDR */ -#endif - -.macro vect_br val, targ -ARM( eor sp, sp, #\val ) -ARM( tst sp, #7 ) -ARM( eorne sp, sp, #\val ) - -THUMB( cmp r1, #\val ) -THUMB( popeq {r1, r2} ) - - beq \targ -.endm - - vect_br 0, hyp_fiq - vect_br 1, hyp_irq - vect_br 2, hyp_hvc - vect_br 3, hyp_dabt - vect_br 4, hyp_pabt - vect_br 5, hyp_svc - vect_br 6, hyp_undef - vect_br 7, hyp_reset -#endif - -.macro invalid_vector label, cause - .align -\label: mov r0, #\cause - b __hyp_panic -.endm - - invalid_vector hyp_reset ARM_EXCEPTION_RESET - invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED - invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE - invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT - invalid_vector hyp_fiq ARM_EXCEPTION_FIQ - -ENTRY(__hyp_do_panic) - mrs lr, cpsr - bic lr, lr, #MODE_MASK - orr lr, lr, #SVC_MODE -THUMB( orr lr, lr, #PSR_T_BIT ) - msr spsr_cxsf, lr - ldr lr, =panic - msr ELR_hyp, lr - ldr lr, =__kvm_call_hyp - clrex - eret -ENDPROC(__hyp_do_panic) - -hyp_hvc: - /* - * Getting here is either because of a trap from a guest, - * or from executing HVC from the host kernel, which means - * "do something in Hyp mode". - */ - push {r0, r1, r2} - - @ Check syndrome register - mrc p15, 4, r1, c5, c2, 0 @ HSR - lsr r0, r1, #HSR_EC_SHIFT - cmp r0, #HSR_EC_HVC - bne guest_trap @ Not HVC instr. - - /* - * Let's check if the HVC came from VMID 0 and allow simple - * switch to Hyp mode - */ - mrrc p15, 6, r0, r2, c2 - lsr r2, r2, #16 - and r2, r2, #0xff - cmp r2, #0 - bne guest_hvc_trap @ Guest called HVC - - /* - * Getting here means host called HVC, we shift parameters and branch - * to Hyp function. - */ - pop {r0, r1, r2} - - /* - * Check if we have a kernel function, which is guaranteed to be - * bigger than the maximum hyp stub hypercall - */ - cmp r0, #HVC_STUB_HCALL_NR - bhs 1f - - /* - * Not a kernel function, treat it as a stub hypercall. - * Compute the physical address for __kvm_handle_stub_hvc - * (as the code lives in the idmaped page) and branch there. - * We hijack ip (r12) as a tmp register. - */ - push {r1} - ldr r1, =kimage_voffset - ldr r1, [r1] - ldr ip, =__kvm_handle_stub_hvc - sub ip, ip, r1 - pop {r1} - - bx ip - -1: - /* - * Pushing r2 here is just a way of keeping the stack aligned to - * 8 bytes on any path that can trigger a HYP exception. Here, - * we may well be about to jump into the guest, and the guest - * exit would otherwise be badly decoded by our fancy - * "decode-exception-without-a-branch" code... - */ - push {r2, lr} - - mov lr, r0 - mov r0, r1 - mov r1, r2 - mov r2, r3 - -THUMB( orr lr, #1) - blx lr @ Call the HYP function - - pop {r2, lr} - eret - -guest_hvc_trap: - movw r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 - movt r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 - ldr r0, [sp] @ Guest's r0 - teq r0, r2 - bne guest_trap - add sp, sp, #12 - @ Returns: - @ r0 = 0 - @ r1 = HSR value (perfectly predictable) - @ r2 = ARM_SMCCC_ARCH_WORKAROUND_1 - mov r0, #0 - eret - -guest_trap: - load_vcpu r0 @ Load VCPU pointer to r0 - -#ifdef CONFIG_VFPv3 - @ Check for a VFP access - lsr r1, r1, #HSR_EC_SHIFT - cmp r1, #HSR_EC_CP_0_13 - beq __vfp_guest_restore -#endif - - mov r1, #ARM_EXCEPTION_HVC - b __guest_exit - -hyp_irq: - push {r0, r1, r2} - mov r1, #ARM_EXCEPTION_IRQ - load_vcpu r0 @ Load VCPU pointer to r0 - b __guest_exit - -hyp_dabt: - push {r0, r1} - mrs r0, ELR_hyp - ldr r1, =abort_guest_exit_start -THUMB( add r1, r1, #1) - cmp r0, r1 - ldrne r1, =abort_guest_exit_end -THUMB( addne r1, r1, #1) - cmpne r0, r1 - pop {r0, r1} - bne __hyp_panic - - orr r0, r0, #(1 << ARM_EXIT_WITH_ABORT_BIT) - eret - - .ltorg - - .popsection diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S deleted file mode 100644 index 675a52348d8dc11d7a5e724aca7441af1fef8282..0000000000000000000000000000000000000000 --- a/arch/arm/kvm/hyp/vfp.S +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include - - .text - .pushsection .hyp.text, "ax" - -/* void __vfp_save_state(struct vfp_hard_struct *vfp); */ -ENTRY(__vfp_save_state) - push {r4, r5} - VFPFMRX r1, FPEXC - - @ Make sure *really* VFP is enabled so we can touch the registers. - orr r5, r1, #FPEXC_EN - tst r5, #FPEXC_EX @ Check for VFP Subarchitecture - bic r5, r5, #FPEXC_EX @ FPEXC_EX disable - VFPFMXR FPEXC, r5 - isb - - VFPFMRX r2, FPSCR - beq 1f - - @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so - @ we only need to save them if FPEXC_EX is set. - VFPFMRX r3, FPINST - tst r5, #FPEXC_FP2V - VFPFMRX r4, FPINST2, ne @ vmrsne -1: - VFPFSTMIA r0, r5 @ Save VFP registers - stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2 - pop {r4, r5} - bx lr -ENDPROC(__vfp_save_state) - -/* void __vfp_restore_state(struct vfp_hard_struct *vfp); - * Assume FPEXC_EN is on and FPEXC_EX is off */ -ENTRY(__vfp_restore_state) - VFPFLDMIA r0, r1 @ Load VFP registers - ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2 - - VFPFMXR FPSCR, r1 - tst r0, #FPEXC_EX @ Check for VFP Subarchitecture - beq 1f - VFPFMXR FPINST, r2 - tst r0, #FPEXC_FP2V - VFPFMXR FPINST2, r3, ne -1: - VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN) - bx lr -ENDPROC(__vfp_restore_state) - - .popsection diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S deleted file mode 100644 index 33e34b6d24b24e23b872deaabe0cbe80d3271b3f..0000000000000000000000000000000000000000 --- a/arch/arm/kvm/init.S +++ /dev/null @@ -1,157 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/******************************************************************** - * Hypervisor initialization - * - should be called with: - * r0 = top of Hyp stack (kernel VA) - * r1 = pointer to hyp vectors - * r2,r3 = Hypervisor pgd pointer - * - * The init scenario is: - * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack, - * runtime vectors - * - Invalidate TLBs - * - Set stack and vectors - * - Setup the page tables - * - Enable the MMU - * - Profit! (or eret, if you only care about the code). - * - * Another possibility is to get a HYP stub hypercall. - * We discriminate between the two by checking if r0 contains a value - * that is less than HVC_STUB_HCALL_NR. - */ - - .text - .pushsection .hyp.idmap.text,"ax" - .align 5 -__kvm_hyp_init: - .globl __kvm_hyp_init - - @ Hyp-mode exception vector - W(b) . - W(b) . - W(b) . - W(b) . - W(b) . - W(b) __do_hyp_init - W(b) . - W(b) . - -__do_hyp_init: - @ Check for a stub hypercall - cmp r0, #HVC_STUB_HCALL_NR - blo __kvm_handle_stub_hvc - - @ Set stack pointer - mov sp, r0 - - @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r1, c12, c0, 0 @ HVBAR - - @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, rr_lo_hi(r2, r3), c2 - - @ Set the HTCR and VTCR to the same shareability and cacheability - @ settings as the non-secure TTBCR and with T0SZ == 0. - mrc p15, 4, r0, c2, c0, 2 @ HTCR - ldr r2, =HTCR_MASK - bic r0, r0, r2 - mrc p15, 0, r1, c2, c0, 2 @ TTBCR - and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) - orr r0, r0, r1 - mcr p15, 4, r0, c2, c0, 2 @ HTCR - - @ Use the same memory attributes for hyp. accesses as the kernel - @ (copy MAIRx ro HMAIRx). - mrc p15, 0, r0, c10, c2, 0 - mcr p15, 4, r0, c10, c2, 0 - mrc p15, 0, r0, c10, c2, 1 - mcr p15, 4, r0, c10, c2, 1 - - @ Invalidate the stale TLBs from Bootloader - mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb ish - - @ Set the HSCTLR to: - @ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel) - @ - Endianness: Kernel config - @ - Fast Interrupt Features: Kernel config - @ - Write permission implies XN: disabled - @ - Instruction cache: enabled - @ - Data/Unified cache: enabled - @ - MMU: enabled (this code must be run from an identity mapping) - mrc p15, 4, r0, c1, c0, 0 @ HSCR - ldr r2, =HSCTLR_MASK - bic r0, r0, r2 - mrc p15, 0, r1, c1, c0, 0 @ SCTLR - ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) - and r1, r1, r2 - ARM( ldr r2, =(HSCTLR_M) ) - THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) ) - orr r1, r1, r2 - orr r0, r0, r1 - mcr p15, 4, r0, c1, c0, 0 @ HSCR - isb - - eret - -ENTRY(__kvm_handle_stub_hvc) - cmp r0, #HVC_SOFT_RESTART - bne 1f - - /* The target is expected in r1 */ - msr ELR_hyp, r1 - mrs r0, cpsr - bic r0, r0, #MODE_MASK - orr r0, r0, #HYP_MODE -THUMB( orr r0, r0, #PSR_T_BIT ) - msr spsr_cxsf, r0 - b reset - -1: cmp r0, #HVC_RESET_VECTORS - bne 1f - -reset: - /* We're now in idmap, disable MMU */ - mrc p15, 4, r1, c1, c0, 0 @ HSCTLR - ldr r0, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I) - bic r1, r1, r0 - mcr p15, 4, r1, c1, c0, 0 @ HSCTLR - - /* - * Install stub vectors, using ardb's VA->PA trick. - */ -0: adr r0, 0b @ PA(0) - movw r1, #:lower16:__hyp_stub_vectors - 0b @ VA(stub) - VA(0) - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r1, r1, r0 @ PA(stub) - mcr p15, 4, r1, c12, c0, 0 @ HVBAR - b exit - -1: ldr r0, =HVC_STUB_ERR - eret - -exit: - mov r0, #0 - eret -ENDPROC(__kvm_handle_stub_hvc) - - .ltorg - - .globl __kvm_hyp_init_end -__kvm_hyp_init_end: - - .popsection diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S deleted file mode 100644 index 064f4f118ca73d0ff49a0e00185d5e985a1817a1..0000000000000000000000000000000000000000 --- a/arch/arm/kvm/interrupts.S +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#include - - .text - -/******************************************************************** - * Call function in Hyp mode - * - * - * unsigned long kvm_call_hyp(void *hypfn, ...); - * - * This is not really a variadic function in the classic C-way and care must - * be taken when calling this to ensure parameters are passed in registers - * only, since the stack will change between the caller and the callee. - * - * Call the function with the first argument containing a pointer to the - * function you wish to call in Hyp mode, and subsequent arguments will be - * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the - * function pointer can be passed). The function being called must be mapped - * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 (strictly 32bit). - * - * The calling convention follows the standard AAPCS: - * r0 - r3: caller save - * r12: caller save - * rest: callee save - */ -ENTRY(__kvm_call_hyp) - hvc #0 - bx lr -ENDPROC(__kvm_call_hyp) diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S deleted file mode 100644 index b05e95840651d0f5acc653a1efaeecd5c85b18fe..0000000000000000000000000000000000000000 --- a/arch/arm/lib/ashldi3.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ - - -#include -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -ENTRY(__ashldi3) -ENTRY(__aeabi_llsl) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi ah, ah, lsl r2 - movpl ah, al, lsl r3 - ARM( orrmi ah, ah, al, lsr ip ) - THUMB( lsrmi r3, al, ip ) - THUMB( orrmi ah, ah, r3 ) - mov al, al, lsl r2 - ret lr - -ENDPROC(__ashldi3) -ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S deleted file mode 100644 index 275d7d2341a4e52e31e19924ebde00aaa771a49c..0000000000000000000000000000000000000000 --- a/arch/arm/lib/ashrdi3.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ - - -#include -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -ENTRY(__ashrdi3) -ENTRY(__aeabi_lasr) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, asr r3 - ARM( orrmi al, al, ah, lsl ip ) - THUMB( lslmi r3, ah, ip ) - THUMB( orrmi al, al, r3 ) - mov ah, ah, asr r2 - ret lr - -ENDPROC(__ashrdi3) -ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S deleted file mode 100644 index 2ff375144b55b0347240441205f476aae1a837cc..0000000000000000000000000000000000000000 --- a/arch/arm/lib/backtrace-clang.S +++ /dev/null @@ -1,217 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/backtrace-clang.S - * - * Copyright (C) 2019 Nathan Huckleberry - * - */ -#include -#include -#include - .text - -/* fp is 0 or stack frame */ - -#define frame r4 -#define sv_fp r5 -#define sv_pc r6 -#define mask r7 -#define sv_lr r8 - -ENTRY(c_backtrace) - -#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) - ret lr -ENDPROC(c_backtrace) -#else - - -/* - * Clang does not store pc or sp in function prologues so we don't know exactly - * where the function starts. - * - * We can treat the current frame's lr as the saved pc and the preceding - * frame's lr as the current frame's lr, but we can't trace the most recent - * call. Inserting a false stack frame allows us to reference the function - * called last in the stacktrace. - * - * If the call instruction was a bl we can look at the callers branch - * instruction to calculate the saved pc. We can recover the pc in most cases, - * but in cases such as calling function pointers we cannot. In this case, - * default to using the lr. This will be some address in the function, but will - * not be the function start. - * - * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these - * are less frequently saved. - * - * Stack frame layout: - * - * saved lr - * frame=> saved fp - * optionally saved caller registers (r4 - r10) - * optionally saved arguments (r0 - r3) - * - * - * - * Functions start with the following code sequence: - * corrected pc => stmfd sp!, {..., fp, lr} - * add fp, sp, #x - * stmfd sp!, {r0 - r3} (optional) - * - * - * - * - * - * - * The diagram below shows an example stack setup for dump_stack. - * - * The frame for c_backtrace has pointers to the code of dump_stack. This is - * why the frame of c_backtrace is used to for the pc calculation of - * dump_stack. This is why we must move back a frame to print dump_stack. - * - * The stored locals for dump_stack are in dump_stack's frame. This means that - * to fully print dump_stack's frame we need both the frame for dump_stack (for - * locals) and the frame that was called by dump_stack (for pc). - * - * To print locals we must know where the function start is. If we read the - * function prologue opcodes we can determine which variables are stored in the - * stack frame. - * - * To find the function start of dump_stack we can look at the stored LR of - * show_stack. It points at the instruction directly after the bl dump_stack. - * We can then read the offset from the bl opcode to determine where the branch - * takes us. The address calculated must be the start of dump_stack. - * - * c_backtrace frame dump_stack: - * {[LR] } ============| ... - * {[FP] } =======| | bl c_backtrace - * | |=> ... - * {[R4-R10]} | - * {[R0-R3] } | show_stack: - * dump_stack frame | ... - * {[LR] } =============| bl dump_stack - * {[FP] } <=======| |=> ... - * {[R4-R10]} - * {[R0-R3] } - */ - - stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register - @ to ensure 8 byte alignment - movs frame, r0 @ if frame pointer is zero - beq no_frame @ we have no stack frames - tst r1, #0x10 @ 26 or 32-bit mode? - moveq mask, #0xfc000003 - movne mask, #0 @ mask for 32-bit - -/* - * Switches the current frame to be the frame for dump_stack. - */ - add frame, sp, #24 @ switch to false frame -for_each_frame: tst frame, mask @ Check for address exceptions - bne no_frame - -/* - * sv_fp is the stack frame with the locals for the current considered - * function. - * - * sv_pc is the saved lr frame the frame above. This is a pointer to a code - * address within the current considered function, but it is not the function - * start. This value gets updated to be the function start later if it is - * possible. - */ -1001: ldr sv_pc, [frame, #4] @ get saved 'pc' -1002: ldr sv_fp, [frame, #0] @ get saved fp - - teq sv_fp, mask @ make sure next frame exists - beq no_frame - -/* - * sv_lr is the lr from the function that called the current function. This is - * a pointer to a code address in the current function's caller. sv_lr-4 is - * the instruction used to call the current function. - * - * This sv_lr can be used to calculate the function start if the function was - * called using a bl instruction. If the function start can be recovered sv_pc - * is overwritten with the function start. - * - * If the current function was called using a function pointer we cannot - * recover the function start and instead continue with sv_pc as an arbitrary - * value within the current function. If this is the case we cannot print - * registers for the current function, but the stacktrace is still printed - * properly. - */ -1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame - - ldr r0, [sv_lr, #-4] @ get call instruction - ldr r3, .Lopcode+4 - and r2, r3, r0 @ is this a bl call - teq r2, r3 - bne finished_setup @ give up if it's not - and r0, #0xffffff @ get call offset 24-bit int - lsl r0, r0, #8 @ sign extend offset - asr r0, r0, #8 - ldr sv_pc, [sv_fp, #4] @ get lr address - add sv_pc, sv_pc, #-4 @ get call instruction address - add sv_pc, sv_pc, #8 @ take care of prefetch - add sv_pc, sv_pc, r0, lsl #2@ find function start - -finished_setup: - - bic sv_pc, sv_pc, mask @ mask PC/LR for the mode - -/* - * Print the function (sv_pc) and where it was called from (sv_lr). - */ -1004: mov r0, sv_pc - - mov r1, sv_lr - mov r2, frame - bic r1, r1, mask @ mask PC/LR for the mode - bl dump_backtrace_entry - -/* - * Test if the function start is a stmfd instruction to determine which - * registers were stored in the function prologue. - * - * If we could not recover the sv_pc because we were called through a function - * pointer the comparison will fail and no registers will print. Unwinding will - * continue as if there had been no registers stored in this frame. - */ -1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr} - ldr r3, .Lopcode @ instruction exists, - teq r3, r1, lsr #11 - ldr r0, [frame] @ locals are stored in - @ the preceding frame - subeq r0, r0, #4 - bleq dump_backtrace_stm @ dump saved registers - -/* - * If we are out of frames or if the next frame is invalid. - */ - teq sv_fp, #0 @ zero saved fp means - beq no_frame @ no further frames - - cmp sv_fp, frame @ next frame must be - mov frame, sv_fp @ above the current frame - bhi for_each_frame - -1006: adr r0, .Lbad - mov r1, frame - bl printk -no_frame: ldmfd sp!, {r4 - r9, fp, pc} -ENDPROC(c_backtrace) - .pushsection __ex_table,"a" - .align 3 - .long 1001b, 1006b - .long 1002b, 1006b - .long 1003b, 1006b - .long 1004b, 1006b - .long 1005b, 1006b - .popsection - -.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" - .align -.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr} - .word 0x0b000000 @ bl if these bits are set - -#endif diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S deleted file mode 100644 index 582925238d65ea261cc126078462ce2a66421549..0000000000000000000000000000000000000000 --- a/arch/arm/lib/backtrace.S +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/backtrace.S - * - * Copyright (C) 1995, 1996 Russell King - * - * 27/03/03 Ian Molton Clean up CONFIG_CPU - */ -#include -#include -#include - .text - -@ fp is 0 or stack frame - -#define frame r4 -#define sv_fp r5 -#define sv_pc r6 -#define mask r7 -#define offset r8 - -ENTRY(c_backtrace) - -#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) - ret lr -ENDPROC(c_backtrace) -#else - stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... - movs frame, r0 @ if frame pointer is zero - beq no_frame @ we have no stack frames - - tst r1, #0x10 @ 26 or 32-bit mode? - ARM( moveq mask, #0xfc000003 ) - THUMB( moveq mask, #0xfc000000 ) - THUMB( orreq mask, #0x03 ) - movne mask, #0 @ mask for 32-bit - -1: stmfd sp!, {pc} @ calculate offset of PC stored - ldr r0, [sp], #4 @ by stmfd for this CPU - adr r1, 1b - sub offset, r0, r1 - -/* - * Stack frame layout: - * optionally saved caller registers (r4 - r10) - * saved fp - * saved sp - * saved lr - * frame => saved pc - * optionally saved arguments (r0 - r3) - * saved sp => - * - * Functions start with the following code sequence: - * mov ip, sp - * stmfd sp!, {r0 - r3} (optional) - * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} - */ -for_each_frame: tst frame, mask @ Check for address exceptions - bne no_frame - -1001: ldr sv_pc, [frame, #0] @ get saved pc -1002: ldr sv_fp, [frame, #-12] @ get saved fp - - sub sv_pc, sv_pc, offset @ Correct PC for prefetching - bic sv_pc, sv_pc, mask @ mask PC/LR for the mode - -1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, - ldr r3, .Ldsi+4 @ adjust saved 'pc' back one - teq r3, r2, lsr #11 @ instruction - subne r0, sv_pc, #4 @ allow for mov - subeq r0, sv_pc, #8 @ allow for mov + stmia - - ldr r1, [frame, #-4] @ get saved lr - mov r2, frame - bic r1, r1, mask @ mask PC/LR for the mode - bl dump_backtrace_entry - - ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, - ldr r3, .Ldsi+4 - teq r3, r1, lsr #11 - ldreq r0, [frame, #-8] @ get sp - subeq r0, r0, #4 @ point at the last arg - bleq dump_backtrace_stm @ dump saved registers - -1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} - ldr r3, .Ldsi @ instruction exists, - teq r3, r1, lsr #11 - subeq r0, frame, #16 - bleq dump_backtrace_stm @ dump saved registers - - teq sv_fp, #0 @ zero saved fp means - beq no_frame @ no further frames - - cmp sv_fp, frame @ next frame must be - mov frame, sv_fp @ above the current frame - bhi for_each_frame - -1006: adr r0, .Lbad - mov r1, frame - bl printk -no_frame: ldmfd sp!, {r4 - r8, pc} -ENDPROC(c_backtrace) - - .pushsection __ex_table,"a" - .align 3 - .long 1001b, 1006b - .long 1002b, 1006b - .long 1003b, 1006b - .long 1004b, 1006b - .popsection - -.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" - .align -.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} - .word 0xe92d0000 >> 11 @ stmfd sp!, {} - -#endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S deleted file mode 100644 index 591ba077e874de5f5fef18da11d0a5643d8c7ff1..0000000000000000000000000000000000000000 --- a/arch/arm/lib/bswapsdi2.S +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -#if __LINUX_ARM_ARCH__ >= 6 -ENTRY(__bswapsi2) - rev r0, r0 - bx lr -ENDPROC(__bswapsi2) - -ENTRY(__bswapdi2) - rev r3, r0 - rev r0, r1 - mov r1, r3 - bx lr -ENDPROC(__bswapdi2) -#else -ENTRY(__bswapsi2) - eor r3, r0, r0, ror #16 - mov r3, r3, lsr #8 - bic r3, r3, #0xff00 - eor r0, r3, r0, ror #8 - ret lr -ENDPROC(__bswapsi2) - -ENTRY(__bswapdi2) - mov ip, r1 - eor r3, ip, ip, ror #16 - eor r1, r0, r0, ror #16 - mov r1, r1, lsr #8 - mov r3, r3, lsr #8 - bic r3, r3, #0xff00 - bic r1, r1, #0xff00 - eor r1, r1, r0, ror #8 - eor r0, r3, ip, ror #8 - ret lr -ENDPROC(__bswapdi2) -#endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S deleted file mode 100644 index 28b0341ae786fbd73aa5d4fe9a43e49bee3e910c..0000000000000000000000000000000000000000 --- a/arch/arm/lib/call_with_stack.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/lib/call_with_stack.S - * - * Copyright (C) 2011 ARM Ltd. - * Written by Will Deacon - */ - -#include -#include - -/* - * void call_with_stack(void (*fn)(void *), void *arg, void *sp) - * - * Change the stack to that pointed at by sp, then invoke fn(arg) with - * the new stack. - */ -ENTRY(call_with_stack) - str sp, [r2, #-4]! - str lr, [r2, #-4]! - - mov sp, r2 - mov r2, r0 - mov r0, r1 - - badr lr, 1f - ret r2 - -1: ldr lr, [sp] - ldr sp, [sp, #4] - ret lr -ENDPROC(call_with_stack) diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S deleted file mode 100644 index 02424765e9e1cd8659c8886d5c97c191ad0880b7..0000000000000000000000000000000000000000 --- a/arch/arm/lib/changebit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/changebit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -bitop _change_bit, eor diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S deleted file mode 100644 index 8f2c4dbfc5f21009d6336c16f514140a226fe3eb..0000000000000000000000000000000000000000 --- a/arch/arm/lib/clear_user.S +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/clear_user.S - * - * Copyright (C) 1995, 1996,1997,1998 Russell King - */ -#include -#include -#include - - .text - -/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz) - * Purpose : clear some user memory - * Params : addr - user memory address to clear - * : sz - number of bytes to clear - * Returns : number of bytes NOT cleared - */ -ENTRY(__clear_user_std) -WEAK(arm_clear_user) -UNWIND(.fnstart) -UNWIND(.save {r1, lr}) - stmfd sp!, {r1, lr} - mov r2, #0 - cmp r1, #4 - blt 2f - ands ip, r0, #3 - beq 1f - cmp ip, #2 - strusr r2, r0, 1 - strusr r2, r0, 1, le - strusr r2, r0, 1, lt - rsb ip, ip, #4 - sub r1, r1, ip @ 7 6 5 4 3 2 1 -1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 - strusr r2, r0, 4, pl, rept=2 - bpl 1b - adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 - strusr r2, r0, 4, pl -2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x - strusr r2, r0, 1, ne, rept=2 - tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 - it ne @ explicit IT needed for the label -USER( strbtne r2, [r0]) - mov r0, #0 - ldmfd sp!, {r1, pc} -UNWIND(.fnend) -ENDPROC(arm_clear_user) -ENDPROC(__clear_user_std) - - .pushsection .text.fixup,"ax" - .align 0 -9001: ldmfd sp!, {r0, pc} - .popsection - diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S deleted file mode 100644 index 4646dee8a3394121794ad9988cf737fa5ac99d3c..0000000000000000000000000000000000000000 --- a/arch/arm/lib/clearbit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/clearbit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -bitop _clear_bit, bic diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S deleted file mode 100644 index f8016e3db65d7f628327ed7600f24943c210ea7f..0000000000000000000000000000000000000000 --- a/arch/arm/lib/copy_from_user.S +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/copy_from_user.S - * - * Author: Nicolas Pitre - * Created: Sep 29, 2005 - * Copyright: MontaVista Software, Inc. - */ - -#include -#include -#include - -/* - * Prototype: - * - * size_t arm_copy_from_user(void *to, const void *from, size_t n) - * - * Purpose: - * - * copy a block to kernel memory from user memory - * - * Params: - * - * to = kernel memory - * from = user memory - * n = number of bytes to copy - * - * Return value: - * - * Number of bytes NOT copied. - */ - -#ifdef CONFIG_CPU_USE_DOMAINS - -#ifndef CONFIG_THUMB2_KERNEL -#define LDR1W_SHIFT 0 -#else -#define LDR1W_SHIFT 1 -#endif - - .macro ldr1w ptr reg abort - ldrusr \reg, \ptr, 4, abort=\abort - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldr1w \ptr, \reg1, \abort - ldr1w \ptr, \reg2, \abort - ldr1w \ptr, \reg3, \abort - ldr1w \ptr, \reg4, \abort - .endm - - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort - ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort - .endm - -#else - -#define LDR1W_SHIFT 0 - - .macro ldr1w ptr reg abort - USERL(\abort, W(ldr) \reg, [\ptr], #4) - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}) - .endm - - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) - .endm - -#endif /* CONFIG_CPU_USE_DOMAINS */ - - .macro ldr1b ptr reg cond=al abort - ldrusr \reg, \ptr, 1, \cond, abort=\abort - .endm - -#define STR1W_SHIFT 0 - - .macro str1w ptr reg abort - W(str) \reg, [\ptr], #4 - .endm - - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro str1b ptr reg cond=al abort - strb\cond \reg, [\ptr], #1 - .endm - - .macro enter reg1 reg2 - mov r3, #0 - stmdb sp!, {r0, r2, r3, \reg1, \reg2} - .endm - - .macro usave reg1 reg2 - UNWIND( .save {r0, r2, r3, \reg1, \reg2} ) - .endm - - .macro exit reg1 reg2 - add sp, sp, #8 - ldmfd sp!, {r0, \reg1, \reg2} - .endm - - .text - -ENTRY(arm_copy_from_user) -#ifdef CONFIG_CPU_SPECTRE - get_thread_info r3 - ldr r3, [r3, #TI_ADDR_LIMIT] - uaccess_mask_range_ptr r1, r2, r3, ip -#endif - -#include "copy_template.S" - -ENDPROC(arm_copy_from_user) - - .pushsection .text.fixup,"ax" - .align 0 - copy_abort_preamble - ldmfd sp!, {r1, r2, r3} - sub r0, r0, r1 - rsb r0, r0, r2 - copy_abort_end - .popsection - diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S deleted file mode 100644 index 5db1a8ee3d9fb94416e2dae5e744de507f2659d9..0000000000000000000000000000000000000000 --- a/arch/arm/lib/copy_page.S +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/copypage.S - * - * Copyright (C) 1995-1999 Russell King - * - * ASM optimised string functions - */ -#include -#include -#include -#include - -#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) - - .text - .align 5 -/* - * StrongARM optimised copy_page routine - * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) - * Note that we probably achieve closer to the 100MB/s target with - * the core clock switching. - */ -ENTRY(copy_page) - stmfd sp!, {r4, lr} @ 2 - PLD( pld [r1, #0] ) - PLD( pld [r1, #L1_CACHE_BYTES] ) - mov r2, #COPY_COUNT @ 1 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 -1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) - PLD( pld [r1, #3 * L1_CACHE_BYTES]) -2: - .rept (2 * L1_CACHE_BYTES / 16 - 1) - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - .endr - subs r2, r2, #1 @ 1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmiagt r1!, {r3, r4, ip, lr} @ 4 - bgt 1b @ 1 - PLD( ldmiaeq r1!, {r3, r4, ip, lr} ) - PLD( beq 2b ) - ldmfd sp!, {r4, pc} @ 3 -ENDPROC(copy_page) diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S deleted file mode 100644 index 810a805d36dce8f78e879ae38d392be16d52bebc..0000000000000000000000000000000000000000 --- a/arch/arm/lib/copy_template.S +++ /dev/null @@ -1,294 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/copy_template.s - * - * Code template for optimized memory copy functions - * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. - */ - -/* - * Theory of operation - * ------------------- - * - * This file provides the core code for a forward memory copy used in - * the implementation of memcopy(), copy_to_user() and copy_from_user(). - * - * The including file must define the following accessor macros - * according to the need of the given function: - * - * ldr1w ptr reg abort - * - * This loads one word from 'ptr', stores it in 'reg' and increments - * 'ptr' to the next word. The 'abort' argument is used for fixup tables. - * - * ldr4w ptr reg1 reg2 reg3 reg4 abort - * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - * - * This loads four or eight words starting from 'ptr', stores them - * in provided registers and increments 'ptr' past those words. - * The'abort' argument is used for fixup tables. - * - * ldr1b ptr reg cond abort - * - * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. - * It also must apply the condition code if provided, otherwise the - * "al" condition is assumed by default. - * - * str1w ptr reg abort - * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - * str1b ptr reg cond abort - * - * Same as their ldr* counterparts, but data is stored to 'ptr' location - * rather than being loaded. - * - * enter reg1 reg2 - * - * Preserve the provided registers on the stack plus any additional - * data as needed by the implementation including this code. Called - * upon code entry. - * - * usave reg1 reg2 - * - * Unwind annotation macro is corresponding for 'enter' macro. - * It tell unwinder that preserved some provided registers on the stack - * and additional data by a prior 'enter' macro. - * - * exit reg1 reg2 - * - * Restore registers with the values previously saved with the - * 'preserv' macro. Called upon code termination. - * - * LDR1W_SHIFT - * STR1W_SHIFT - * - * Correction to be applied to the "ip" register when branching into - * the ldr1w or str1w instructions (some of these macros may expand to - * than one 32bit instruction in Thumb-2) - */ - - - UNWIND( .fnstart ) - enter r4, lr - UNWIND( .fnend ) - - UNWIND( .fnstart ) - usave r4, lr @ in first stmdb block - - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - UNWIND( .fnend ) - - UNWIND( .fnstart ) - usave r4, lr - UNWIND( .save {r5 - r8} ) @ in second stmfd block - blt 5f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcsne r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -3: PLD( pld [r1, #124] ) -4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - subs r2, r2, #32 - str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 -#if LDR1W_SHIFT > 0 - lsl ip, ip, #LDR1W_SHIFT -#endif - addne pc, pc, ip @ C is always clear here - b 7f -6: - .rept (1 << LDR1W_SHIFT) - W(nop) - .endr - ldr1w r1, r3, abort=20f - ldr1w r1, r4, abort=20f - ldr1w r1, r5, abort=20f - ldr1w r1, r6, abort=20f - ldr1w r1, r7, abort=20f - ldr1w r1, r8, abort=20f - ldr1w r1, lr, abort=20f - -#if LDR1W_SHIFT < STR1W_SHIFT - lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT -#elif LDR1W_SHIFT > STR1W_SHIFT - lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT -#endif - add pc, pc, ip - nop - .rept (1 << STR1W_SHIFT) - W(nop) - .endr - str1w r0, r3, abort=20f - str1w r0, r4, abort=20f - str1w r0, r5, abort=20f - str1w r0, r6, abort=20f - str1w r0, r7, abort=20f - str1w r0, r8, abort=20f - str1w r0, lr, abort=20f - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - UNWIND( .fnend ) @ end of second stmfd block - - UNWIND( .fnstart ) - usave r4, lr @ still in first stmdb block -8: movs r2, r2, lsl #31 - ldr1b r1, r3, ne, abort=21f - ldr1b r1, r4, cs, abort=21f - ldr1b r1, ip, cs, abort=21f - str1b r0, r3, ne, abort=21f - str1b r0, r4, cs, abort=21f - str1b r0, ip, cs, abort=21f - - exit r4, pc - -9: rsb ip, ip, #4 - cmp ip, #2 - ldr1b r1, r3, gt, abort=21f - ldr1b r1, r4, ge, abort=21f - ldr1b r1, lr, abort=21f - str1b r0, r3, gt, abort=21f - str1b r0, r4, ge, abort=21f - subs r2, r2, ip - str1b r0, lr, abort=21f - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr1w r1, lr, abort=21f - beq 17f - bgt 18f - UNWIND( .fnend ) - - - .macro forward_copy_shift pull push - - UNWIND( .fnstart ) - usave r4, lr @ still in first stmdb block - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcsne r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - UNWIND( .fnend ) - - UNWIND( .fnstart ) - usave r4, lr - UNWIND( .save {r5 - r9} ) @ in new second stmfd block - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -12: PLD( pld [r1, #124] ) -13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, lspull #\pull - subs r2, r2, #32 - ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, lspush #\push - mov r4, r4, lspull #\pull - orr r4, r4, r5, lspush #\push - mov r5, r5, lspull #\pull - orr r5, r5, r6, lspush #\push - mov r6, r6, lspull #\pull - orr r6, r6, r7, lspush #\push - mov r7, r7, lspull #\pull - orr r7, r7, r8, lspush #\push - mov r8, r8, lspull #\pull - orr r8, r8, r9, lspush #\push - mov r9, r9, lspull #\pull - orr r9, r9, ip, lspush #\push - mov ip, ip, lspull #\pull - orr ip, ip, lr, lspush #\push - str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - UNWIND( .fnend ) @ end of the second stmfd block - - UNWIND( .fnstart ) - usave r4, lr @ still in first stmdb block -14: ands ip, r2, #28 - beq 16f - -15: mov r3, lr, lspull #\pull - ldr1w r1, lr, abort=21f - subs ip, ip, #4 - orr r3, r3, lr, lspush #\push - str1w r0, r3, abort=21f - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: sub r1, r1, #(\push / 8) - b 8b - UNWIND( .fnend ) - - .endm - - - forward_copy_shift pull=8 push=24 - -17: forward_copy_shift pull=16 push=16 - -18: forward_copy_shift pull=24 push=8 - - -/* - * Abort preamble and completion macros. - * If a fixup handler is required then those macros must surround it. - * It is assumed that the fixup code will handle the private part of - * the exit macro. - */ - - .macro copy_abort_preamble -19: ldmfd sp!, {r5 - r9} - b 21f -20: ldmfd sp!, {r5 - r8} -21: - .endm - - .macro copy_abort_end - ldmfd sp!, {r4, pc} - .endm - diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S deleted file mode 100644 index ebfe4cb3d9125056a5abcd867d6342d44ba0d3a7..0000000000000000000000000000000000000000 --- a/arch/arm/lib/copy_to_user.S +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/copy_to_user.S - * - * Author: Nicolas Pitre - * Created: Sep 29, 2005 - * Copyright: MontaVista Software, Inc. - */ - -#include -#include -#include - -/* - * Prototype: - * - * size_t arm_copy_to_user(void *to, const void *from, size_t n) - * - * Purpose: - * - * copy a block to user memory from kernel memory - * - * Params: - * - * to = user memory - * from = kernel memory - * n = number of bytes to copy - * - * Return value: - * - * Number of bytes NOT copied. - */ - -#define LDR1W_SHIFT 0 - - .macro ldr1w ptr reg abort - W(ldr) \reg, [\ptr], #4 - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} - .endm - - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro ldr1b ptr reg cond=al abort - ldrb\cond \reg, [\ptr], #1 - .endm - -#ifdef CONFIG_CPU_USE_DOMAINS - -#ifndef CONFIG_THUMB2_KERNEL -#define STR1W_SHIFT 0 -#else -#define STR1W_SHIFT 1 -#endif - - .macro str1w ptr reg abort - strusr \reg, \ptr, 4, abort=\abort - .endm - - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - str1w \ptr, \reg1, \abort - str1w \ptr, \reg2, \abort - str1w \ptr, \reg3, \abort - str1w \ptr, \reg4, \abort - str1w \ptr, \reg5, \abort - str1w \ptr, \reg6, \abort - str1w \ptr, \reg7, \abort - str1w \ptr, \reg8, \abort - .endm - -#else - -#define STR1W_SHIFT 0 - - .macro str1w ptr reg abort - USERL(\abort, W(str) \reg, [\ptr], #4) - .endm - - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) - .endm - -#endif /* CONFIG_CPU_USE_DOMAINS */ - - .macro str1b ptr reg cond=al abort - strusr \reg, \ptr, 1, \cond, abort=\abort - .endm - - .macro enter reg1 reg2 - mov r3, #0 - stmdb sp!, {r0, r2, r3, \reg1, \reg2} - .endm - - .macro usave reg1 reg2 - UNWIND( .save {r0, r2, r3, \reg1, \reg2} ) - .endm - - .macro exit reg1 reg2 - add sp, sp, #8 - ldmfd sp!, {r0, \reg1, \reg2} - .endm - - .text - -ENTRY(__copy_to_user_std) -WEAK(arm_copy_to_user) -#ifdef CONFIG_CPU_SPECTRE - get_thread_info r3 - ldr r3, [r3, #TI_ADDR_LIMIT] - uaccess_mask_range_ptr r0, r2, r3, ip -#endif - -#include "copy_template.S" - -ENDPROC(arm_copy_to_user) -ENDPROC(__copy_to_user_std) - - .pushsection .text.fixup,"ax" - .align 0 - copy_abort_preamble - ldmfd sp!, {r1, r2, r3} - sub r0, r0, r1 - rsb r0, r0, r2 - copy_abort_end - .popsection diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S deleted file mode 100644 index 3559d515144c2243844527bf6e83b616cc25a7b4..0000000000000000000000000000000000000000 --- a/arch/arm/lib/csumipv6.S +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/csumipv6.S - * - * Copyright (C) 1995-1998 Russell King - */ -#include -#include - - .text - -ENTRY(__csum_ipv6_magic) - str lr, [sp, #-4]! - adds ip, r2, r3 - ldmia r1, {r1 - r3, lr} - adcs ip, ip, r1 - adcs ip, ip, r2 - adcs ip, ip, r3 - adcs ip, ip, lr - ldmia r0, {r0 - r3} - adcs r0, ip, r0 - adcs r0, r0, r1 - adcs r0, r0, r2 - ldr r2, [sp, #4] - adcs r0, r0, r3 - adcs r0, r0, r2 - adcs r0, r0, #0 - ldmfd sp!, {pc} -ENDPROC(__csum_ipv6_magic) - diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S deleted file mode 100644 index 87c9471be8b65aa527c5805369430b2e49cee961..0000000000000000000000000000000000000000 --- a/arch/arm/lib/csumpartial.S +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/csumpartial.S - * - * Copyright (C) 1995-1998 Russell King - */ -#include -#include - - .text - -/* - * Function: __u32 csum_partial(const char *src, int len, __u32 sum) - * Params : r0 = buffer, r1 = len, r2 = checksum - * Returns : r0 = new checksum - */ - -buf .req r0 -len .req r1 -sum .req r2 -td0 .req r3 -td1 .req r4 @ save before use -td2 .req r5 @ save before use -td3 .req lr - -.Lzero: mov r0, sum - add sp, sp, #4 - ldr pc, [sp], #4 - - /* - * Handle 0 to 7 bytes, with any alignment of source and - * destination pointers. Note that when we get here, C = 0 - */ -.Lless8: teq len, #0 @ check for zero count - beq .Lzero - - /* we must have at least one byte. */ - tst buf, #1 @ odd address? - movne sum, sum, ror #8 - ldrbne td0, [buf], #1 - subne len, len, #1 - adcsne sum, sum, td0, put_byte_1 - -.Lless4: tst len, #6 - beq .Lless8_byte - - /* we are now half-word aligned */ - -.Lless8_wordlp: -#if __LINUX_ARM_ARCH__ >= 4 - ldrh td0, [buf], #2 - sub len, len, #2 -#else - ldrb td0, [buf], #1 - ldrb td3, [buf], #1 - sub len, len, #2 -#ifndef __ARMEB__ - orr td0, td0, td3, lsl #8 -#else - orr td0, td3, td0, lsl #8 -#endif -#endif - adcs sum, sum, td0 - tst len, #6 - bne .Lless8_wordlp - -.Lless8_byte: tst len, #1 @ odd number of bytes - ldrbne td0, [buf], #1 @ include last byte - adcsne sum, sum, td0, put_byte_0 @ update checksum - -.Ldone: adc r0, sum, #0 @ collect up the last carry - ldr td0, [sp], #4 - tst td0, #1 @ check buffer alignment - movne r0, r0, ror #8 @ rotate checksum by 8 bits - ldr pc, [sp], #4 @ return - -.Lnot_aligned: tst buf, #1 @ odd address - ldrbne td0, [buf], #1 @ make even - subne len, len, #1 - adcsne sum, sum, td0, put_byte_1 @ update checksum - - tst buf, #2 @ 32-bit aligned? -#if __LINUX_ARM_ARCH__ >= 4 - ldrhne td0, [buf], #2 @ make 32-bit aligned - subne len, len, #2 -#else - ldrbne td0, [buf], #1 - ldrbne ip, [buf], #1 - subne len, len, #2 -#ifndef __ARMEB__ - orrne td0, td0, ip, lsl #8 -#else - orrne td0, ip, td0, lsl #8 -#endif -#endif - adcsne sum, sum, td0 @ update checksum - ret lr - -ENTRY(csum_partial) - stmfd sp!, {buf, lr} - cmp len, #8 @ Ensure that we have at least - blo .Lless8 @ 8 bytes to copy. - - tst buf, #1 - movne sum, sum, ror #8 - - adds sum, sum, #0 @ C = 0 - tst buf, #3 @ Test destination alignment - blne .Lnot_aligned @ align destination, return here - -1: bics ip, len, #31 - beq 3f - - stmfd sp!, {r4 - r5} -2: ldmia buf!, {td0, td1, td2, td3} - adcs sum, sum, td0 - adcs sum, sum, td1 - adcs sum, sum, td2 - adcs sum, sum, td3 - ldmia buf!, {td0, td1, td2, td3} - adcs sum, sum, td0 - adcs sum, sum, td1 - adcs sum, sum, td2 - adcs sum, sum, td3 - sub ip, ip, #32 - teq ip, #0 - bne 2b - ldmfd sp!, {r4 - r5} - -3: tst len, #0x1c @ should not change C - beq .Lless4 - -4: ldr td0, [buf], #4 - sub len, len, #4 - adcs sum, sum, td0 - tst len, #0x1c - bne 4b - b .Lless4 -ENDPROC(csum_partial) diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S deleted file mode 100644 index 184d97254a7a2de14cdf83e312383c6fe3071b8f..0000000000000000000000000000000000000000 --- a/arch/arm/lib/csumpartialcopy.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/csumpartialcopy.S - * - * Copyright (C) 1995-1998 Russell King - */ -#include -#include - - .text - -/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum) - * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum - * Returns : r0 = new checksum - */ - - .macro save_regs - stmfd sp!, {r1, r4 - r8, lr} - .endm - - .macro load_regs - ldmfd sp!, {r1, r4 - r8, pc} - .endm - - .macro load1b, reg1 - ldrb \reg1, [r0], #1 - .endm - - .macro load2b, reg1, reg2 - ldrb \reg1, [r0], #1 - ldrb \reg2, [r0], #1 - .endm - - .macro load1l, reg1 - ldr \reg1, [r0], #4 - .endm - - .macro load2l, reg1, reg2 - ldr \reg1, [r0], #4 - ldr \reg2, [r0], #4 - .endm - - .macro load4l, reg1, reg2, reg3, reg4 - ldmia r0!, {\reg1, \reg2, \reg3, \reg4} - .endm - -#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) -#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) - -#include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S deleted file mode 100644 index 0b706a39a6770296034c4421a7c7cf4f483949ba..0000000000000000000000000000000000000000 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ /dev/null @@ -1,330 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/csumpartialcopygeneric.S - * - * Copyright (C) 1995-2001 Russell King - */ -#include - -/* - * unsigned int - * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) - * r0 = src, r1 = dst, r2 = len, r3 = sum - * Returns : r0 = checksum - * - * Note that 'tst' and 'teq' preserve the carry flag. - */ - -src .req r0 -dst .req r1 -len .req r2 -sum .req r3 - -.Lzero: mov r0, sum - load_regs - - /* - * Align an unaligned destination pointer. We know that - * we have >= 8 bytes here, so we don't need to check - * the length. Note that the source pointer hasn't been - * aligned yet. - */ -.Ldst_unaligned: - tst dst, #1 - beq .Ldst_16bit - - load1b ip - sub len, len, #1 - adcs sum, sum, ip, put_byte_1 @ update checksum - strb ip, [dst], #1 - tst dst, #2 - reteq lr @ dst is now 32bit aligned - -.Ldst_16bit: load2b r8, ip - sub len, len, #2 - adcs sum, sum, r8, put_byte_0 - strb r8, [dst], #1 - adcs sum, sum, ip, put_byte_1 - strb ip, [dst], #1 - ret lr @ dst is now 32bit aligned - - /* - * Handle 0 to 7 bytes, with any alignment of source and - * destination pointers. Note that when we get here, C = 0 - */ -.Lless8: teq len, #0 @ check for zero count - beq .Lzero - - /* we must have at least one byte. */ - tst dst, #1 @ dst 16-bit aligned - beq .Lless8_aligned - - /* Align dst */ - load1b ip - sub len, len, #1 - adcs sum, sum, ip, put_byte_1 @ update checksum - strb ip, [dst], #1 - tst len, #6 - beq .Lless8_byteonly - -1: load2b r8, ip - sub len, len, #2 - adcs sum, sum, r8, put_byte_0 - strb r8, [dst], #1 - adcs sum, sum, ip, put_byte_1 - strb ip, [dst], #1 -.Lless8_aligned: - tst len, #6 - bne 1b -.Lless8_byteonly: - tst len, #1 - beq .Ldone - load1b r8 - adcs sum, sum, r8, put_byte_0 @ update checksum - strb r8, [dst], #1 - b .Ldone - -FN_ENTRY - save_regs - - cmp len, #8 @ Ensure that we have at least - blo .Lless8 @ 8 bytes to copy. - - adds sum, sum, #0 @ C = 0 - tst dst, #3 @ Test destination alignment - blne .Ldst_unaligned @ align destination, return here - - /* - * Ok, the dst pointer is now 32bit aligned, and we know - * that we must have more than 4 bytes to copy. Note - * that C contains the carry from the dst alignment above. - */ - - tst src, #3 @ Test source alignment - bne .Lsrc_not_aligned - - /* Routine for src & dst aligned */ - - bics ip, len, #15 - beq 2f - -1: load4l r4, r5, r6, r7 - stmia dst!, {r4, r5, r6, r7} - adcs sum, sum, r4 - adcs sum, sum, r5 - adcs sum, sum, r6 - adcs sum, sum, r7 - sub ip, ip, #16 - teq ip, #0 - bne 1b - -2: ands ip, len, #12 - beq 4f - tst ip, #8 - beq 3f - load2l r4, r5 - stmia dst!, {r4, r5} - adcs sum, sum, r4 - adcs sum, sum, r5 - tst ip, #4 - beq 4f - -3: load1l r4 - str r4, [dst], #4 - adcs sum, sum, r4 - -4: ands len, len, #3 - beq .Ldone - load1l r4 - tst len, #2 - mov r5, r4, get_byte_0 - beq .Lexit - adcs sum, sum, r4, lspush #16 - strb r5, [dst], #1 - mov r5, r4, get_byte_1 - strb r5, [dst], #1 - mov r5, r4, get_byte_2 -.Lexit: tst len, #1 - strbne r5, [dst], #1 - andne r5, r5, #255 - adcsne sum, sum, r5, put_byte_0 - - /* - * If the dst pointer was not 16-bit aligned, we - * need to rotate the checksum here to get around - * the inefficient byte manipulations in the - * architecture independent code. - */ -.Ldone: adc r0, sum, #0 - ldr sum, [sp, #0] @ dst - tst sum, #1 - movne r0, r0, ror #8 - load_regs - -.Lsrc_not_aligned: - adc sum, sum, #0 @ include C from dst alignment - and ip, src, #3 - bic src, src, #3 - load1l r5 - cmp ip, #2 - beq .Lsrc2_aligned - bhi .Lsrc3_aligned - mov r4, r5, lspull #8 @ C = 0 - bics ip, len, #15 - beq 2f -1: load4l r5, r6, r7, r8 - orr r4, r4, r5, lspush #24 - mov r5, r5, lspull #8 - orr r5, r5, r6, lspush #24 - mov r6, r6, lspull #8 - orr r6, r6, r7, lspush #24 - mov r7, r7, lspull #8 - orr r7, r7, r8, lspush #24 - stmia dst!, {r4, r5, r6, r7} - adcs sum, sum, r4 - adcs sum, sum, r5 - adcs sum, sum, r6 - adcs sum, sum, r7 - mov r4, r8, lspull #8 - sub ip, ip, #16 - teq ip, #0 - bne 1b -2: ands ip, len, #12 - beq 4f - tst ip, #8 - beq 3f - load2l r5, r6 - orr r4, r4, r5, lspush #24 - mov r5, r5, lspull #8 - orr r5, r5, r6, lspush #24 - stmia dst!, {r4, r5} - adcs sum, sum, r4 - adcs sum, sum, r5 - mov r4, r6, lspull #8 - tst ip, #4 - beq 4f -3: load1l r5 - orr r4, r4, r5, lspush #24 - str r4, [dst], #4 - adcs sum, sum, r4 - mov r4, r5, lspull #8 -4: ands len, len, #3 - beq .Ldone - mov r5, r4, get_byte_0 - tst len, #2 - beq .Lexit - adcs sum, sum, r4, lspush #16 - strb r5, [dst], #1 - mov r5, r4, get_byte_1 - strb r5, [dst], #1 - mov r5, r4, get_byte_2 - b .Lexit - -.Lsrc2_aligned: mov r4, r5, lspull #16 - adds sum, sum, #0 - bics ip, len, #15 - beq 2f -1: load4l r5, r6, r7, r8 - orr r4, r4, r5, lspush #16 - mov r5, r5, lspull #16 - orr r5, r5, r6, lspush #16 - mov r6, r6, lspull #16 - orr r6, r6, r7, lspush #16 - mov r7, r7, lspull #16 - orr r7, r7, r8, lspush #16 - stmia dst!, {r4, r5, r6, r7} - adcs sum, sum, r4 - adcs sum, sum, r5 - adcs sum, sum, r6 - adcs sum, sum, r7 - mov r4, r8, lspull #16 - sub ip, ip, #16 - teq ip, #0 - bne 1b -2: ands ip, len, #12 - beq 4f - tst ip, #8 - beq 3f - load2l r5, r6 - orr r4, r4, r5, lspush #16 - mov r5, r5, lspull #16 - orr r5, r5, r6, lspush #16 - stmia dst!, {r4, r5} - adcs sum, sum, r4 - adcs sum, sum, r5 - mov r4, r6, lspull #16 - tst ip, #4 - beq 4f -3: load1l r5 - orr r4, r4, r5, lspush #16 - str r4, [dst], #4 - adcs sum, sum, r4 - mov r4, r5, lspull #16 -4: ands len, len, #3 - beq .Ldone - mov r5, r4, get_byte_0 - tst len, #2 - beq .Lexit - adcs sum, sum, r4 - strb r5, [dst], #1 - mov r5, r4, get_byte_1 - strb r5, [dst], #1 - tst len, #1 - beq .Ldone - load1b r5 - b .Lexit - -.Lsrc3_aligned: mov r4, r5, lspull #24 - adds sum, sum, #0 - bics ip, len, #15 - beq 2f -1: load4l r5, r6, r7, r8 - orr r4, r4, r5, lspush #8 - mov r5, r5, lspull #24 - orr r5, r5, r6, lspush #8 - mov r6, r6, lspull #24 - orr r6, r6, r7, lspush #8 - mov r7, r7, lspull #24 - orr r7, r7, r8, lspush #8 - stmia dst!, {r4, r5, r6, r7} - adcs sum, sum, r4 - adcs sum, sum, r5 - adcs sum, sum, r6 - adcs sum, sum, r7 - mov r4, r8, lspull #24 - sub ip, ip, #16 - teq ip, #0 - bne 1b -2: ands ip, len, #12 - beq 4f - tst ip, #8 - beq 3f - load2l r5, r6 - orr r4, r4, r5, lspush #8 - mov r5, r5, lspull #24 - orr r5, r5, r6, lspush #8 - stmia dst!, {r4, r5} - adcs sum, sum, r4 - adcs sum, sum, r5 - mov r4, r6, lspull #24 - tst ip, #4 - beq 4f -3: load1l r5 - orr r4, r4, r5, lspush #8 - str r4, [dst], #4 - adcs sum, sum, r4 - mov r4, r5, lspull #24 -4: ands len, len, #3 - beq .Ldone - mov r5, r4, get_byte_0 - tst len, #2 - beq .Lexit - strb r5, [dst], #1 - adcs sum, sum, r4 - load1l r4 - mov r5, r4, get_byte_0 - strb r5, [dst], #1 - adcs sum, sum, r4, lspush #24 - mov r5, r4, get_byte_1 - b .Lexit -FN_EXIT diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S deleted file mode 100644 index 6bd3a93eaa3c15202fc5b0f45d3ca4e5b52be054..0000000000000000000000000000000000000000 --- a/arch/arm/lib/csumpartialcopyuser.S +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/csumpartialcopyuser.S - * - * Copyright (C) 1995-1998 Russell King - * - * 27/03/03 Ian Molton Clean up CONFIG_CPU - */ -#include -#include -#include -#include - - .text - -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - .macro save_regs - mrc p15, 0, ip, c3, c0, 0 - stmfd sp!, {r1, r2, r4 - r8, ip, lr} - uaccess_enable ip - .endm - - .macro load_regs - ldmfd sp!, {r1, r2, r4 - r8, ip, lr} - mcr p15, 0, ip, c3, c0, 0 - ret lr - .endm -#else - .macro save_regs - stmfd sp!, {r1, r2, r4 - r8, lr} - .endm - - .macro load_regs - ldmfd sp!, {r1, r2, r4 - r8, pc} - .endm -#endif - - .macro load1b, reg1 - ldrusr \reg1, r0, 1 - .endm - - .macro load2b, reg1, reg2 - ldrusr \reg1, r0, 1 - ldrusr \reg2, r0, 1 - .endm - - .macro load1l, reg1 - ldrusr \reg1, r0, 4 - .endm - - .macro load2l, reg1, reg2 - ldrusr \reg1, r0, 4 - ldrusr \reg2, r0, 4 - .endm - - .macro load4l, reg1, reg2, reg3, reg4 - ldrusr \reg1, r0, 4 - ldrusr \reg2, r0, 4 - ldrusr \reg3, r0, 4 - ldrusr \reg4, r0, 4 - .endm - -/* - * unsigned int - * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr) - * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr - * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT - */ - -#define FN_ENTRY ENTRY(csum_partial_copy_from_user) -#define FN_EXIT ENDPROC(csum_partial_copy_from_user) - -#include "csumpartialcopygeneric.S" - -/* - * FIXME: minor buglet here - * We don't return the checksum for the data present in the buffer. To do - * so properly, we would have to add in whatever registers were loaded before - * the fault, which, with the current asm above is not predictable. - */ - .pushsection .text.fixup,"ax" - .align 4 -9001: mov r4, #-EFAULT -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - ldr r5, [sp, #9*4] @ *err_ptr -#else - ldr r5, [sp, #8*4] @ *err_ptr -#endif - str r4, [r5] - ldmia sp, {r1, r2} @ retrieve dst, len - add r2, r2, r1 - mov r0, #0 @ zero the buffer -9002: teq r2, r1 - strbne r0, [r1], #1 - bne 9002b - load_regs - .popsection diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S deleted file mode 100644 index 3ccade0f813038c89404a26f92b4a35a3638dd86..0000000000000000000000000000000000000000 --- a/arch/arm/lib/delay-loop.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/delay.S - * - * Copyright (C) 1995, 1996 Russell King - */ -#include -#include -#include - - .text - -.LC0: .word loops_per_jiffy -.LC1: .word UDELAY_MULT - -/* - * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * r0 <= 2000 - * HZ <= 1000 - */ - -ENTRY(__loop_udelay) - ldr r2, .LC1 - mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT -ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0 - ldr r2, .LC0 - ldr r2, [r2] - umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy - adds r1, r1, #0xffffffff @ rounding up ... - adcs r0, r0, r0 @ and right shift by 31 - reteq lr - - .align 3 - -@ Delay routine -ENTRY(__loop_delay) - subs r0, r0, #1 -#if 0 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 - retls lr - subs r0, r0, #1 -#endif - bhi __loop_delay - ret lr -ENDPROC(__loop_udelay) -ENDPROC(__loop_const_udelay) -ENDPROC(__loop_delay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S deleted file mode 100644 index a87c02925ffae039687167b26345d8e0b490179f..0000000000000000000000000000000000000000 --- a/arch/arm/lib/div64.S +++ /dev/null @@ -1,209 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/div64.S - * - * Optimized computation of 64-bit dividend / 32-bit divisor - * - * Author: Nicolas Pitre - * Created: Oct 5, 2003 - * Copyright: Monta Vista Software, Inc. - */ - -#include -#include -#include - -#ifdef __ARMEB__ -#define xh r0 -#define xl r1 -#define yh r2 -#define yl r3 -#else -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 -#endif - -/* - * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. - * - * Note: Calling convention is totally non standard for optimal code. - * This is meant to be used by do_div() from include/asm/div64.h only. - * - * Input parameters: - * xh-xl = dividend (clobbered) - * r4 = divisor (preserved) - * - * Output values: - * yh-yl = result - * xh = remainder - * - * Clobbered regs: xl, ip - */ - -ENTRY(__do_div64) -UNWIND(.fnstart) - - @ Test for easy paths first. - subs ip, r4, #1 - bls 9f @ divisor is 0 or 1 - tst ip, r4 - beq 8f @ divisor is power of 2 - - @ See if we need to handle upper 32-bit result. - cmp xh, r4 - mov yh, #0 - blo 3f - - @ Align divisor with upper part of dividend. - @ The aligned divisor is stored in yl preserving the original. - @ The bit position is stored in ip. - -#if __LINUX_ARM_ARCH__ >= 5 - - clz yl, r4 - clz ip, xh - sub yl, yl, ip - mov ip, #1 - mov ip, ip, lsl yl - mov yl, r4, lsl yl - -#else - - mov yl, r4 - mov ip, #1 -1: cmp yl, #0x80000000 - cmpcc yl, xh - movcc yl, yl, lsl #1 - movcc ip, ip, lsl #1 - bcc 1b - -#endif - - @ The division loop for needed upper bit positions. - @ Break out early if dividend reaches 0. -2: cmp xh, yl - orrcs yh, yh, ip - subscs xh, xh, yl - movsne ip, ip, lsr #1 - mov yl, yl, lsr #1 - bne 2b - - @ See if we need to handle lower 32-bit result. -3: cmp xh, #0 - mov yl, #0 - cmpeq xl, r4 - movlo xh, xl - retlo lr - - @ The division loop for lower bit positions. - @ Here we shift remainer bits leftwards rather than moving the - @ divisor for comparisons, considering the carry-out bit as well. - mov ip, #0x80000000 -4: movs xl, xl, lsl #1 - adcs xh, xh, xh - beq 6f - cmpcc xh, r4 -5: orrcs yl, yl, ip - subcs xh, xh, r4 - movs ip, ip, lsr #1 - bne 4b - ret lr - - @ The top part of remainder became zero. If carry is set - @ (the 33th bit) this is a false positive so resume the loop. - @ Otherwise, if lower part is also null then we are done. -6: bcs 5b - cmp xl, #0 - reteq lr - - @ We still have remainer bits in the low part. Bring them up. - -#if __LINUX_ARM_ARCH__ >= 5 - - clz xh, xl @ we know xh is zero here so... - add xh, xh, #1 - mov xl, xl, lsl xh - mov ip, ip, lsr xh - -#else - -7: movs xl, xl, lsl #1 - mov ip, ip, lsr #1 - bcc 7b - -#endif - - @ Current remainder is now 1. It is worthless to compare with - @ divisor at this point since divisor can not be smaller than 3 here. - @ If possible, branch for another shift in the division loop. - @ If no bit position left then we are done. - movs ip, ip, lsr #1 - mov xh, #1 - bne 4b - ret lr - -8: @ Division by a power of 2: determine what that divisor order is - @ then simply shift values around - -#if __LINUX_ARM_ARCH__ >= 5 - - clz ip, r4 - rsb ip, ip, #31 - -#else - - mov yl, r4 - cmp r4, #(1 << 16) - mov ip, #0 - movhs yl, yl, lsr #16 - movhs ip, #16 - - cmp yl, #(1 << 8) - movhs yl, yl, lsr #8 - addhs ip, ip, #8 - - cmp yl, #(1 << 4) - movhs yl, yl, lsr #4 - addhs ip, ip, #4 - - cmp yl, #(1 << 2) - addhi ip, ip, #3 - addls ip, ip, yl, lsr #1 - -#endif - - mov yh, xh, lsr ip - mov yl, xl, lsr ip - rsb ip, ip, #32 - ARM( orr yl, yl, xh, lsl ip ) - THUMB( lsl xh, xh, ip ) - THUMB( orr yl, yl, xh ) - mov xh, xl, lsl ip - mov xh, xh, lsr ip - ret lr - - @ eq -> division by 1: obvious enough... -9: moveq yl, xl - moveq yh, xh - moveq xh, #0 - reteq lr -UNWIND(.fnend) - -UNWIND(.fnstart) -UNWIND(.pad #4) -UNWIND(.save {lr}) -Ldiv0_64: - @ Division by 0: - str lr, [sp, #-8]! - bl __div0 - - @ as wrong as it could be... - mov yl, #0 - mov yh, #0 - mov xh, #0 - ldr pc, [sp], #8 - -UNWIND(.fnend) -ENDPROC(__do_div64) diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S deleted file mode 100644 index b5e8b9ae4c7d496dcd292e6437d3bebd6417a870..0000000000000000000000000000000000000000 --- a/arch/arm/lib/findbit.S +++ /dev/null @@ -1,193 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/findbit.S - * - * Copyright (C) 1995-2000 Russell King - * - * 16th March 2001 - John Ripley - * Fixed so that "size" is an exclusive not an inclusive quantity. - * All users of these functions expect exclusive sizes, and may - * also call with zero size. - * Reworked by rmk. - */ -#include -#include - .text - -/* - * Purpose : Find a 'zero' bit - * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); - */ -ENTRY(_find_first_zero_bit_le) - teq r1, #0 - beq 3f - mov r2, #0 -1: - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eors r3, r3, #0xff @ invert bits - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_zero_bit_le) - -/* - * Purpose : Find next 'zero' bit - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) - */ -ENTRY(_find_next_zero_bit_le) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eor r3, r3, #0xff @ now looking for a 1 bit - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_zero_bit_le) - -/* - * Purpose : Find a 'one' bit - * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); - */ -ENTRY(_find_first_bit_le) - teq r1, #0 - beq 3f - mov r2, #0 -1: - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_bit_le) - -/* - * Purpose : Find next 'one' bit - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) - */ -ENTRY(_find_next_bit_le) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_bit_le) - -#ifdef __ARMEB__ - -ENTRY(_find_first_zero_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eors r3, r3, #0xff @ invert bits - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_zero_bit_be) - -ENTRY(_find_next_zero_bit_be) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eor r3, r3, #0xff @ now looking for a 1 bit - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_zero_bit_be) - -ENTRY(_find_first_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_bit_be) - -ENTRY(_find_next_bit_be) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_bit_be) - -#endif - -/* - * One or more bits in the LSB of r3 are assumed to be set. - */ -.L_found: -#if __LINUX_ARM_ARCH__ >= 5 - rsb r0, r3, #0 - and r3, r3, r0 - clz r3, r3 - rsb r3, r3, #31 - add r0, r2, r3 -#else - tst r3, #0x0f - addeq r2, r2, #4 - movne r3, r3, lsl #4 - tst r3, #0x30 - addeq r2, r2, #2 - movne r3, r3, lsl #2 - tst r3, #0x40 - addeq r2, r2, #1 - mov r0, r2 -#endif - cmp r1, r0 @ Clamp to maxbit - movlo r0, r1 - ret lr - diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S deleted file mode 100644 index c5e420750c48d70374b216a70d3078add0fc0eb5..0000000000000000000000000000000000000000 --- a/arch/arm/lib/getuser.S +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/getuser.S - * - * Copyright (C) 2001 Russell King - * - * Idea from x86 version, (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface to make them more - * efficient, especially as they return an error value in addition to - * the "real" return value. - * - * __get_user_X - * - * Inputs: r0 contains the address - * r1 contains the address limit, which must be preserved - * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value - * lr corrupted - * - * No other registers must be altered. (see - * for specific ASM register usage). - * - * Note that ADDR_LIMIT is either 0 or 0xc0000000. - * Note also that it is intended that __get_user_bad is not global. - */ -#include -#include -#include -#include - -ENTRY(__get_user_1) - check_uaccess r0, 1, r1, r2, __get_user_bad -1: TUSER(ldrb) r2, [r0] - mov r0, #0 - ret lr -ENDPROC(__get_user_1) -_ASM_NOKPROBE(__get_user_1) - -ENTRY(__get_user_2) - check_uaccess r0, 2, r1, r2, __get_user_bad -#if __LINUX_ARM_ARCH__ >= 6 - -2: TUSER(ldrh) r2, [r0] - -#else - -#ifdef CONFIG_CPU_USE_DOMAINS -rb .req ip -2: ldrbt r2, [r0], #1 -3: ldrbt rb, [r0], #0 -#else -rb .req r0 -2: ldrb r2, [r0] -3: ldrb rb, [r0, #1] -#endif -#ifndef __ARMEB__ - orr r2, r2, rb, lsl #8 -#else - orr r2, rb, r2, lsl #8 -#endif - -#endif /* __LINUX_ARM_ARCH__ >= 6 */ - - mov r0, #0 - ret lr -ENDPROC(__get_user_2) -_ASM_NOKPROBE(__get_user_2) - -ENTRY(__get_user_4) - check_uaccess r0, 4, r1, r2, __get_user_bad -4: TUSER(ldr) r2, [r0] - mov r0, #0 - ret lr -ENDPROC(__get_user_4) -_ASM_NOKPROBE(__get_user_4) - -ENTRY(__get_user_8) - check_uaccess r0, 8, r1, r2, __get_user_bad8 -#ifdef CONFIG_THUMB2_KERNEL -5: TUSER(ldr) r2, [r0] -6: TUSER(ldr) r3, [r0, #4] -#else -5: TUSER(ldr) r2, [r0], #4 -6: TUSER(ldr) r3, [r0] -#endif - mov r0, #0 - ret lr -ENDPROC(__get_user_8) -_ASM_NOKPROBE(__get_user_8) - -#ifdef __ARMEB__ -ENTRY(__get_user_32t_8) - check_uaccess r0, 8, r1, r2, __get_user_bad -#ifdef CONFIG_CPU_USE_DOMAINS - add r0, r0, #4 -7: ldrt r2, [r0] -#else -7: ldr r2, [r0, #4] -#endif - mov r0, #0 - ret lr -ENDPROC(__get_user_32t_8) -_ASM_NOKPROBE(__get_user_32t_8) - -ENTRY(__get_user_64t_1) - check_uaccess r0, 1, r1, r2, __get_user_bad8 -8: TUSER(ldrb) r3, [r0] - mov r0, #0 - ret lr -ENDPROC(__get_user_64t_1) -_ASM_NOKPROBE(__get_user_64t_1) - -ENTRY(__get_user_64t_2) - check_uaccess r0, 2, r1, r2, __get_user_bad8 -#ifdef CONFIG_CPU_USE_DOMAINS -rb .req ip -9: ldrbt r3, [r0], #1 -10: ldrbt rb, [r0], #0 -#else -rb .req r0 -9: ldrb r3, [r0] -10: ldrb rb, [r0, #1] -#endif - orr r3, rb, r3, lsl #8 - mov r0, #0 - ret lr -ENDPROC(__get_user_64t_2) -_ASM_NOKPROBE(__get_user_64t_2) - -ENTRY(__get_user_64t_4) - check_uaccess r0, 4, r1, r2, __get_user_bad8 -11: TUSER(ldr) r3, [r0] - mov r0, #0 - ret lr -ENDPROC(__get_user_64t_4) -_ASM_NOKPROBE(__get_user_64t_4) -#endif - -__get_user_bad8: - mov r3, #0 -__get_user_bad: - mov r2, #0 - mov r0, #-EFAULT - ret lr -ENDPROC(__get_user_bad) -ENDPROC(__get_user_bad8) -_ASM_NOKPROBE(__get_user_bad) -_ASM_NOKPROBE(__get_user_bad8) - -.pushsection __ex_table, "a" - .long 1b, __get_user_bad - .long 2b, __get_user_bad -#if __LINUX_ARM_ARCH__ < 6 - .long 3b, __get_user_bad -#endif - .long 4b, __get_user_bad - .long 5b, __get_user_bad8 - .long 6b, __get_user_bad8 -#ifdef __ARMEB__ - .long 7b, __get_user_bad - .long 8b, __get_user_bad8 - .long 9b, __get_user_bad8 - .long 10b, __get_user_bad8 - .long 11b, __get_user_bad8 -#endif -.popsection diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S deleted file mode 100644 index 0def9388fb1566ebd0b75ed1322c06d4b7542760..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-readsb.S +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-readsb.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - -.Linsb_align: rsb ip, ip, #4 - cmp ip, r2 - movgt ip, r2 - cmp ip, #2 - ldrb r3, [r0] - strb r3, [r1], #1 - ldrbge r3, [r0] - strbge r3, [r1], #1 - ldrbgt r3, [r0] - strbgt r3, [r1], #1 - subs r2, r2, ip - bne .Linsb_aligned - -ENTRY(__raw_readsb) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - ands ip, r1, #3 - bne .Linsb_align - -.Linsb_aligned: stmfd sp!, {r4 - r6, lr} - - subs r2, r2, #16 - bmi .Linsb_no_16 - -.Linsb_16_lp: ldrb r3, [r0] - ldrb r4, [r0] - ldrb r5, [r0] - mov r3, r3, put_byte_0 - ldrb r6, [r0] - orr r3, r3, r4, put_byte_1 - ldrb r4, [r0] - orr r3, r3, r5, put_byte_2 - ldrb r5, [r0] - orr r3, r3, r6, put_byte_3 - ldrb r6, [r0] - mov r4, r4, put_byte_0 - ldrb ip, [r0] - orr r4, r4, r5, put_byte_1 - ldrb r5, [r0] - orr r4, r4, r6, put_byte_2 - ldrb r6, [r0] - orr r4, r4, ip, put_byte_3 - ldrb ip, [r0] - mov r5, r5, put_byte_0 - ldrb lr, [r0] - orr r5, r5, r6, put_byte_1 - ldrb r6, [r0] - orr r5, r5, ip, put_byte_2 - ldrb ip, [r0] - orr r5, r5, lr, put_byte_3 - ldrb lr, [r0] - mov r6, r6, put_byte_0 - orr r6, r6, ip, put_byte_1 - ldrb ip, [r0] - orr r6, r6, lr, put_byte_2 - orr r6, r6, ip, put_byte_3 - stmia r1!, {r3 - r6} - - subs r2, r2, #16 - bpl .Linsb_16_lp - - tst r2, #15 - ldmfdeq sp!, {r4 - r6, pc} - -.Linsb_no_16: tst r2, #8 - beq .Linsb_no_8 - - ldrb r3, [r0] - ldrb r4, [r0] - ldrb r5, [r0] - mov r3, r3, put_byte_0 - ldrb r6, [r0] - orr r3, r3, r4, put_byte_1 - ldrb r4, [r0] - orr r3, r3, r5, put_byte_2 - ldrb r5, [r0] - orr r3, r3, r6, put_byte_3 - ldrb r6, [r0] - mov r4, r4, put_byte_0 - ldrb ip, [r0] - orr r4, r4, r5, put_byte_1 - orr r4, r4, r6, put_byte_2 - orr r4, r4, ip, put_byte_3 - stmia r1!, {r3, r4} - -.Linsb_no_8: tst r2, #4 - beq .Linsb_no_4 - - ldrb r3, [r0] - ldrb r4, [r0] - ldrb r5, [r0] - ldrb r6, [r0] - mov r3, r3, put_byte_0 - orr r3, r3, r4, put_byte_1 - orr r3, r3, r5, put_byte_2 - orr r3, r3, r6, put_byte_3 - str r3, [r1], #4 - -.Linsb_no_4: ands r2, r2, #3 - ldmfdeq sp!, {r4 - r6, pc} - - cmp r2, #2 - ldrb r3, [r0] - strb r3, [r1], #1 - ldrbge r3, [r0] - strbge r3, [r1], #1 - ldrbgt r3, [r0] - strbgt r3, [r1] - - ldmfd sp!, {r4 - r6, pc} -ENDPROC(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S deleted file mode 100644 index d9f6b372b0586e4b123ac149e08d5c322481cedd..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-readsl.S +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-readsl.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - -ENTRY(__raw_readsl) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - ands ip, r1, #3 - bne 3f - - subs r2, r2, #4 - bmi 2f - stmfd sp!, {r4, lr} -1: ldr r3, [r0, #0] - ldr r4, [r0, #0] - ldr ip, [r0, #0] - ldr lr, [r0, #0] - subs r2, r2, #4 - stmia r1!, {r3, r4, ip, lr} - bpl 1b - ldmfd sp!, {r4, lr} -2: movs r2, r2, lsl #31 - ldrcs r3, [r0, #0] - ldrcs ip, [r0, #0] - stmiacs r1!, {r3, ip} - ldrne r3, [r0, #0] - strne r3, [r1, #0] - ret lr - -3: ldr r3, [r0] - cmp ip, #2 - mov ip, r3, get_byte_0 - strb ip, [r1], #1 - bgt 6f - mov ip, r3, get_byte_1 - strb ip, [r1], #1 - beq 5f - mov ip, r3, get_byte_2 - strb ip, [r1], #1 - -4: subs r2, r2, #1 - mov ip, r3, lspull #24 - ldrne r3, [r0] - orrne ip, ip, r3, lspush #8 - strne ip, [r1], #4 - bne 4b - b 8f - -5: subs r2, r2, #1 - mov ip, r3, lspull #16 - ldrne r3, [r0] - orrne ip, ip, r3, lspush #16 - strne ip, [r1], #4 - bne 5b - b 7f - -6: subs r2, r2, #1 - mov ip, r3, lspull #8 - ldrne r3, [r0] - orrne ip, ip, r3, lspush #24 - strne ip, [r1], #4 - bne 6b - - mov r3, ip, get_byte_2 - strb r3, [r1, #2] -7: mov r3, ip, get_byte_1 - strb r3, [r1, #1] -8: mov r3, ip, get_byte_0 - strb r3, [r1, #0] - ret lr -ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S deleted file mode 100644 index 266043610c0c19bbafb931292714fbf13f9f709b..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-readsw-armv3.S +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-readsw-armv3.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - -.Linsw_bad_alignment: - adr r0, .Linsw_bad_align_msg - mov r2, lr - b panic -.Linsw_bad_align_msg: - .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" - .align - -.Linsw_align: tst r1, #1 - bne .Linsw_bad_alignment - - ldr r3, [r0] - strb r3, [r1], #1 - mov r3, r3, lsr #8 - strb r3, [r1], #1 - - subs r2, r2, #1 - reteq lr - -ENTRY(__raw_readsw) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - tst r1, #3 - bne .Linsw_align - -.Linsw_aligned: mov ip, #0xff - orr ip, ip, ip, lsl #8 - stmfd sp!, {r4, r5, r6, lr} - - subs r2, r2, #8 - bmi .Lno_insw_8 - -.Linsw_8_lp: ldr r3, [r0] - and r3, r3, ip - ldr r4, [r0] - orr r3, r3, r4, lsl #16 - - ldr r4, [r0] - and r4, r4, ip - ldr r5, [r0] - orr r4, r4, r5, lsl #16 - - ldr r5, [r0] - and r5, r5, ip - ldr r6, [r0] - orr r5, r5, r6, lsl #16 - - ldr r6, [r0] - and r6, r6, ip - ldr lr, [r0] - orr r6, r6, lr, lsl #16 - - stmia r1!, {r3 - r6} - - subs r2, r2, #8 - bpl .Linsw_8_lp - - tst r2, #7 - ldmfdeq sp!, {r4, r5, r6, pc} - -.Lno_insw_8: tst r2, #4 - beq .Lno_insw_4 - - ldr r3, [r0] - and r3, r3, ip - ldr r4, [r0] - orr r3, r3, r4, lsl #16 - - ldr r4, [r0] - and r4, r4, ip - ldr r5, [r0] - orr r4, r4, r5, lsl #16 - - stmia r1!, {r3, r4} - -.Lno_insw_4: tst r2, #2 - beq .Lno_insw_2 - - ldr r3, [r0] - and r3, r3, ip - ldr r4, [r0] - orr r3, r3, r4, lsl #16 - - str r3, [r1], #4 - -.Lno_insw_2: tst r2, #1 - ldrne r3, [r0] - strbne r3, [r1], #1 - movne r3, r3, lsr #8 - strbne r3, [r1] - - ldmfd sp!, {r4, r5, r6, pc} - - diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S deleted file mode 100644 index 228c176a94d1e4b5d5d27e79da6817ba732f15a9..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-readsw-armv4.S +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-readsw-armv4.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - - .macro pack, rd, hw1, hw2 -#ifndef __ARMEB__ - orr \rd, \hw1, \hw2, lsl #16 -#else - orr \rd, \hw2, \hw1, lsl #16 -#endif - .endm - -.Linsw_align: movs ip, r1, lsl #31 - bne .Linsw_noalign - ldrh ip, [r0] - sub r2, r2, #1 - strh ip, [r1], #2 - -ENTRY(__raw_readsw) - teq r2, #0 - reteq lr - tst r1, #3 - bne .Linsw_align - - stmfd sp!, {r4, r5, lr} - - subs r2, r2, #8 - bmi .Lno_insw_8 - -.Linsw_8_lp: ldrh r3, [r0] - ldrh r4, [r0] - pack r3, r3, r4 - - ldrh r4, [r0] - ldrh r5, [r0] - pack r4, r4, r5 - - ldrh r5, [r0] - ldrh ip, [r0] - pack r5, r5, ip - - ldrh ip, [r0] - ldrh lr, [r0] - pack ip, ip, lr - - subs r2, r2, #8 - stmia r1!, {r3 - r5, ip} - bpl .Linsw_8_lp - -.Lno_insw_8: tst r2, #4 - beq .Lno_insw_4 - - ldrh r3, [r0] - ldrh r4, [r0] - pack r3, r3, r4 - - ldrh r4, [r0] - ldrh ip, [r0] - pack r4, r4, ip - - stmia r1!, {r3, r4} - -.Lno_insw_4: movs r2, r2, lsl #31 - bcc .Lno_insw_2 - - ldrh r3, [r0] - ldrh ip, [r0] - pack r3, r3, ip - str r3, [r1], #4 - -.Lno_insw_2: ldrhne r3, [r0] - strhne r3, [r1] - - ldmfd sp!, {r4, r5, pc} - -#ifdef __ARMEB__ -#define _BE_ONLY_(code...) code -#define _LE_ONLY_(code...) -#define push_hbyte0 lsr #8 -#define pull_hbyte1 lsl #24 -#else -#define _BE_ONLY_(code...) -#define _LE_ONLY_(code...) code -#define push_hbyte0 lsl #24 -#define pull_hbyte1 lsr #8 -#endif - -.Linsw_noalign: stmfd sp!, {r4, lr} - ldrbcc ip, [r1, #-1]! - bcc 1f - - ldrh ip, [r0] - sub r2, r2, #1 - _BE_ONLY_( mov ip, ip, ror #8 ) - strb ip, [r1], #1 - _LE_ONLY_( mov ip, ip, lsr #8 ) - _BE_ONLY_( mov ip, ip, lsr #24 ) - -1: subs r2, r2, #2 - bmi 3f - _BE_ONLY_( mov ip, ip, lsl #24 ) - -2: ldrh r3, [r0] - ldrh r4, [r0] - subs r2, r2, #2 - orr ip, ip, r3, lsl #8 - orr ip, ip, r4, push_hbyte0 - str ip, [r1], #4 - mov ip, r4, pull_hbyte1 - bpl 2b - - _BE_ONLY_( mov ip, ip, lsr #24 ) - -3: tst r2, #1 - strb ip, [r1], #1 - ldrhne ip, [r0] - _BE_ONLY_( movne ip, ip, ror #8 ) - strbne ip, [r1], #1 - _LE_ONLY_( movne ip, ip, lsr #8 ) - _BE_ONLY_( movne ip, ip, lsr #24 ) - strbne ip, [r1] - ldmfd sp!, {r4, pc} -ENDPROC(__raw_readsw) diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S deleted file mode 100644 index e2ae312f0b69101469a11a918db55e56554a9438..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-writesb.S +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-writesb.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - - .macro outword, rd -#ifndef __ARMEB__ - strb \rd, [r0] - mov \rd, \rd, lsr #8 - strb \rd, [r0] - mov \rd, \rd, lsr #8 - strb \rd, [r0] - mov \rd, \rd, lsr #8 - strb \rd, [r0] -#else - mov lr, \rd, lsr #24 - strb lr, [r0] - mov lr, \rd, lsr #16 - strb lr, [r0] - mov lr, \rd, lsr #8 - strb lr, [r0] - strb \rd, [r0] -#endif - .endm - -.Loutsb_align: rsb ip, ip, #4 - cmp ip, r2 - movgt ip, r2 - cmp ip, #2 - ldrb r3, [r1], #1 - strb r3, [r0] - ldrbge r3, [r1], #1 - strbge r3, [r0] - ldrbgt r3, [r1], #1 - strbgt r3, [r0] - subs r2, r2, ip - bne .Loutsb_aligned - -ENTRY(__raw_writesb) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - ands ip, r1, #3 - bne .Loutsb_align - -.Loutsb_aligned: - stmfd sp!, {r4, r5, lr} - - subs r2, r2, #16 - bmi .Loutsb_no_16 - -.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip} - outword r3 - outword r4 - outword r5 - outword ip - subs r2, r2, #16 - bpl .Loutsb_16_lp - - tst r2, #15 - ldmfdeq sp!, {r4, r5, pc} - -.Loutsb_no_16: tst r2, #8 - beq .Loutsb_no_8 - - ldmia r1!, {r3, r4} - outword r3 - outword r4 - -.Loutsb_no_8: tst r2, #4 - beq .Loutsb_no_4 - - ldr r3, [r1], #4 - outword r3 - -.Loutsb_no_4: ands r2, r2, #3 - ldmfdeq sp!, {r4, r5, pc} - - cmp r2, #2 - ldrb r3, [r1], #1 - strb r3, [r0] - ldrbge r3, [r1], #1 - strbge r3, [r0] - ldrbgt r3, [r1] - strbgt r3, [r0] - - ldmfd sp!, {r4, r5, pc} -ENDPROC(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S deleted file mode 100644 index 89ef7be61421918f8c6cc74769ba7c22ca2e3382..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-writesl.S +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-writesl.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - -ENTRY(__raw_writesl) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - ands ip, r1, #3 - bne 3f - - subs r2, r2, #4 - bmi 2f - stmfd sp!, {r4, lr} -1: ldmia r1!, {r3, r4, ip, lr} - subs r2, r2, #4 - str r3, [r0, #0] - str r4, [r0, #0] - str ip, [r0, #0] - str lr, [r0, #0] - bpl 1b - ldmfd sp!, {r4, lr} -2: movs r2, r2, lsl #31 - ldmiacs r1!, {r3, ip} - strcs r3, [r0, #0] - ldrne r3, [r1, #0] - strcs ip, [r0, #0] - strne r3, [r0, #0] - ret lr - -3: bic r1, r1, #3 - ldr r3, [r1], #4 - cmp ip, #2 - blt 5f - bgt 6f - -4: mov ip, r3, lspull #16 - ldr r3, [r1], #4 - subs r2, r2, #1 - orr ip, ip, r3, lspush #16 - str ip, [r0] - bne 4b - ret lr - -5: mov ip, r3, lspull #8 - ldr r3, [r1], #4 - subs r2, r2, #1 - orr ip, ip, r3, lspush #24 - str ip, [r0] - bne 5b - ret lr - -6: mov ip, r3, lspull #24 - ldr r3, [r1], #4 - subs r2, r2, #1 - orr ip, ip, r3, lspush #8 - str ip, [r0] - bne 6b - ret lr -ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S deleted file mode 100644 index 4cabbee7f3b8278ddf165f6ec2ab065f0e26ca2f..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-writesw-armv3.S +++ /dev/null @@ -1,123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-writesw-armv3.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - -.Loutsw_bad_alignment: - adr r0, .Loutsw_bad_align_msg - mov r2, lr - b panic -.Loutsw_bad_align_msg: - .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" - .align - -.Loutsw_align: tst r1, #1 - bne .Loutsw_bad_alignment - - add r1, r1, #2 - - ldr r3, [r1, #-4] - mov r3, r3, lsr #16 - orr r3, r3, r3, lsl #16 - str r3, [r0] - subs r2, r2, #1 - reteq lr - -ENTRY(__raw_writesw) - teq r2, #0 @ do we have to check for the zero len? - reteq lr - tst r1, #3 - bne .Loutsw_align - - stmfd sp!, {r4, r5, r6, lr} - - subs r2, r2, #8 - bmi .Lno_outsw_8 - -.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} - - mov ip, r3, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r3, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - - mov ip, r4, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r4, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - - mov ip, r5, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r5, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - - mov ip, r6, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r6, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - - subs r2, r2, #8 - bpl .Loutsw_8_lp - - tst r2, #7 - ldmfdeq sp!, {r4, r5, r6, pc} - -.Lno_outsw_8: tst r2, #4 - beq .Lno_outsw_4 - - ldmia r1!, {r3, r4} - - mov ip, r3, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r3, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - - mov ip, r4, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r4, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - -.Lno_outsw_4: tst r2, #2 - beq .Lno_outsw_2 - - ldr r3, [r1], #4 - - mov ip, r3, lsl #16 - orr ip, ip, ip, lsr #16 - str ip, [r0] - - mov ip, r3, lsr #16 - orr ip, ip, ip, lsl #16 - str ip, [r0] - -.Lno_outsw_2: tst r2, #1 - - ldrne r3, [r1] - - movne ip, r3, lsl #16 - orrne ip, ip, ip, lsr #16 - strne ip, [r0] - - ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S deleted file mode 100644 index 12eec53266c74dbd569fb4a3d673794faa5854c2..0000000000000000000000000000000000000000 --- a/arch/arm/lib/io-writesw-armv4.S +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-writesw-armv4.S - * - * Copyright (C) 1995-2000 Russell King - */ -#include -#include - - .macro outword, rd -#ifndef __ARMEB__ - strh \rd, [r0] - mov \rd, \rd, lsr #16 - strh \rd, [r0] -#else - mov lr, \rd, lsr #16 - strh lr, [r0] - strh \rd, [r0] -#endif - .endm - -.Loutsw_align: movs ip, r1, lsl #31 - bne .Loutsw_noalign - - ldrh r3, [r1], #2 - sub r2, r2, #1 - strh r3, [r0] - -ENTRY(__raw_writesw) - teq r2, #0 - reteq lr - ands r3, r1, #3 - bne .Loutsw_align - - stmfd sp!, {r4, r5, lr} - - subs r2, r2, #8 - bmi .Lno_outsw_8 - -.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip} - subs r2, r2, #8 - outword r3 - outword r4 - outword r5 - outword ip - bpl .Loutsw_8_lp - -.Lno_outsw_8: tst r2, #4 - beq .Lno_outsw_4 - - ldmia r1!, {r3, ip} - outword r3 - outword ip - -.Lno_outsw_4: movs r2, r2, lsl #31 - bcc .Lno_outsw_2 - - ldr r3, [r1], #4 - outword r3 - -.Lno_outsw_2: ldrhne r3, [r1] - strhne r3, [r0] - - ldmfd sp!, {r4, r5, pc} - -#ifdef __ARMEB__ -#define pull_hbyte0 lsl #8 -#define push_hbyte1 lsr #24 -#else -#define pull_hbyte0 lsr #24 -#define push_hbyte1 lsl #8 -#endif - -.Loutsw_noalign: - ARM( ldr r3, [r1, -r3]! ) - THUMB( rsb r3, r3, #0 ) - THUMB( ldr r3, [r1, r3] ) - THUMB( sub r1, r3 ) - subcs r2, r2, #1 - bcs 2f - subs r2, r2, #2 - bmi 3f - -1: mov ip, r3, lsr #8 - strh ip, [r0] -2: mov ip, r3, pull_hbyte0 - ldr r3, [r1, #4]! - subs r2, r2, #2 - orr ip, ip, r3, push_hbyte1 - strh ip, [r0] - bpl 1b - - tst r2, #1 -3: movne ip, r3, lsr #8 - strhne ip, [r0] - ret lr -ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S deleted file mode 100644 index c23f9d9e29704be4c834185a22d8ca9eefef7013..0000000000000000000000000000000000000000 --- a/arch/arm/lib/lib1funcs.S +++ /dev/null @@ -1,371 +0,0 @@ -/* - * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines - * - * Author: Nicolas Pitre - * - contributed to gcc-3.4 on Sep 30, 2003 - * - adapted for the Linux kernel on Oct 2, 2003 - */ - -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - - -#include -#include -#include - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed subtractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subsge \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/subtractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - -#ifdef CONFIG_ARM_PATCH_IDIV - .align 3 -#endif - -ENTRY(__udivsi3) -ENTRY(__aeabi_uidiv) -UNWIND(.fnstart) - - subs r2, r1, #1 - reteq lr - bcc Ldiv0 - cmp r0, r1 - bls 11f - tst r1, r2 - beq 12f - - ARM_DIV_BODY r0, r1, r2, r3 - - mov r0, r2 - ret lr - -11: moveq r0, #1 - movne r0, #0 - ret lr - -12: ARM_DIV2_ORDER r1, r2 - - mov r0, r0, lsr r2 - ret lr - -UNWIND(.fnend) -ENDPROC(__udivsi3) -ENDPROC(__aeabi_uidiv) - -ENTRY(__umodsi3) -UNWIND(.fnstart) - - subs r2, r1, #1 @ compare divisor with 1 - bcc Ldiv0 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - retls lr - - ARM_MOD_BODY r0, r1, r2, r3 - - ret lr - -UNWIND(.fnend) -ENDPROC(__umodsi3) - -#ifdef CONFIG_ARM_PATCH_IDIV - .align 3 -#endif - -ENTRY(__divsi3) -ENTRY(__aeabi_idiv) -UNWIND(.fnstart) - - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - ret lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - ret lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - ret lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - ret lr - -UNWIND(.fnend) -ENDPROC(__divsi3) -ENDPROC(__aeabi_idiv) - -ENTRY(__modsi3) -UNWIND(.fnstart) - - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - ret lr - -UNWIND(.fnend) -ENDPROC(__modsi3) - -#ifdef CONFIG_AEABI - -ENTRY(__aeabi_uidivmod) -UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - ret lr - -UNWIND(.fnend) -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) -UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - ret lr - -UNWIND(.fnend) -ENDPROC(__aeabi_idivmod) - -#endif - -Ldiv0: -UNWIND(.fnstart) -UNWIND(.pad #4) -UNWIND(.save {lr}) - str lr, [sp, #-8]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #8 -UNWIND(.fnend) -ENDPROC(Ldiv0) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S deleted file mode 100644 index 922dcd88b02b7804fca63f0d891e9a7ed6cbf83e..0000000000000000000000000000000000000000 --- a/arch/arm/lib/lshrdi3.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ - - -#include -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -ENTRY(__lshrdi3) -ENTRY(__aeabi_llsr) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, lsr r3 - ARM( orrmi al, al, ah, lsl ip ) - THUMB( lslmi r3, ah, ip ) - THUMB( orrmi al, al, r3 ) - mov ah, ah, lsr r2 - ret lr - -ENDPROC(__lshrdi3) -ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S deleted file mode 100644 index 95bedafd0330561f405a0d6af80bd156e6fa2c09..0000000000000000000000000000000000000000 --- a/arch/arm/lib/memchr.S +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/memchr.S - * - * Copyright (C) 1995-2000 Russell King - * - * ASM optimised string functions - */ -#include -#include - - .text - .align 5 -ENTRY(memchr) -1: subs r2, r2, #1 - bmi 2f - ldrb r3, [r0], #1 - teq r3, r1 - bne 1b - sub r0, r0, #1 -2: movne r0, #0 - ret lr -ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S deleted file mode 100644 index 09a333153dc66409e4cbf81cb657253fa24fb428..0000000000000000000000000000000000000000 --- a/arch/arm/lib/memcpy.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/memcpy.S - * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. - */ - -#include -#include -#include - -#define LDR1W_SHIFT 0 -#define STR1W_SHIFT 0 - - .macro ldr1w ptr reg abort - W(ldr) \reg, [\ptr], #4 - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} - .endm - - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro ldr1b ptr reg cond=al abort - ldrb\cond \reg, [\ptr], #1 - .endm - - .macro str1w ptr reg abort - W(str) \reg, [\ptr], #4 - .endm - - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm - - .macro str1b ptr reg cond=al abort - strb\cond \reg, [\ptr], #1 - .endm - - .macro enter reg1 reg2 - stmdb sp!, {r0, \reg1, \reg2} - .endm - - .macro usave reg1 reg2 - UNWIND( .save {r0, \reg1, \reg2} ) - .endm - - .macro exit reg1 reg2 - ldmfd sp!, {r0, \reg1, \reg2} - .endm - - .text - -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ - -ENTRY(mmiocpy) -ENTRY(memcpy) - -#include "copy_template.S" - -ENDPROC(memcpy) -ENDPROC(mmiocpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S deleted file mode 100644 index b50e5770fb44de25d37a6818f468c9ec3795ba91..0000000000000000000000000000000000000000 --- a/arch/arm/lib/memmove.S +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/memmove.S - * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: (C) MontaVista Software Inc. - */ - -#include -#include -#include - - .text - -/* - * Prototype: void *memmove(void *dest, const void *src, size_t n); - * - * Note: - * - * If the memory regions don't overlap, we simply branch to memcpy which is - * normally a bit faster. Otherwise the copy is done going downwards. This - * is a transposition of the code from copy_template.S but with the copy - * occurring in the opposite direction. - */ - -ENTRY(memmove) - UNWIND( .fnstart ) - - subs ip, r0, r1 - cmphi r2, ip - bls memcpy - - stmfd sp!, {r0, r4, lr} - UNWIND( .fnend ) - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) @ in first stmfd block - add r1, r1, r2 - add r0, r0, r2 - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #-4] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - UNWIND( .fnend ) - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) - UNWIND( .save {r5 - r8} ) @ in second stmfd block - blt 5f - - CALGN( ands ip, r0, #31 ) - CALGN( sbcsne r4, ip, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, ip ) @ C is set here - CALGN( rsb ip, ip, #32 ) - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #-4] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 4f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) - -3: PLD( pld [r1, #-128] ) -4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} - subs r2, r2, #32 - stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 - addne pc, pc, ip @ C is always clear here - b 7f -6: W(nop) - W(ldr) r3, [r1, #-4]! - W(ldr) r4, [r1, #-4]! - W(ldr) r5, [r1, #-4]! - W(ldr) r6, [r1, #-4]! - W(ldr) r7, [r1, #-4]! - W(ldr) r8, [r1, #-4]! - W(ldr) lr, [r1, #-4]! - - add pc, pc, ip - nop - W(nop) - W(str) r3, [r0, #-4]! - W(str) r4, [r0, #-4]! - W(str) r5, [r0, #-4]! - W(str) r6, [r0, #-4]! - W(str) r7, [r0, #-4]! - W(str) r8, [r0, #-4]! - W(str) lr, [r0, #-4]! - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - UNWIND( .fnend ) @ end of second stmfd block - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block - -8: movs r2, r2, lsl #31 - ldrbne r3, [r1, #-1]! - ldrbcs r4, [r1, #-1]! - ldrbcs ip, [r1, #-1] - strbne r3, [r0, #-1]! - strbcs r4, [r0, #-1]! - strbcs ip, [r0, #-1] - ldmfd sp!, {r0, r4, pc} - -9: cmp ip, #2 - ldrbgt r3, [r1, #-1]! - ldrbge r4, [r1, #-1]! - ldrb lr, [r1, #-1]! - strbgt r3, [r0, #-1]! - strbge r4, [r0, #-1]! - subs r2, r2, ip - strb lr, [r0, #-1]! - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr r3, [r1, #0] - beq 17f - blt 18f - UNWIND( .fnend ) - - - .macro backward_copy_shift push pull - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r0, #31 ) - CALGN( sbcsne r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - UNWIND( .fnend ) - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) - UNWIND( .save {r5 - r9} ) @ in new second stmfd block - - PLD( pld [r1, #-4] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 13f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) - -12: PLD( pld [r1, #-128] ) -13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, lspush #\push - subs r2, r2, #32 - ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, lspull #\pull - mov ip, ip, lspush #\push - orr ip, ip, r9, lspull #\pull - mov r9, r9, lspush #\push - orr r9, r9, r8, lspull #\pull - mov r8, r8, lspush #\push - orr r8, r8, r7, lspull #\pull - mov r7, r7, lspush #\push - orr r7, r7, r6, lspull #\pull - mov r6, r6, lspush #\push - orr r6, r6, r5, lspull #\pull - mov r5, r5, lspush #\push - orr r5, r5, r4, lspull #\pull - mov r4, r4, lspush #\push - orr r4, r4, r3, lspull #\pull - stmdb r0!, {r4 - r9, ip, lr} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - UNWIND( .fnend ) @ end of the second stmfd block - - UNWIND( .fnstart ) - UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block - -14: ands ip, r2, #28 - beq 16f - -15: mov lr, r3, lspush #\push - ldr r3, [r1, #-4]! - subs ip, ip, #4 - orr lr, lr, r3, lspull #\pull - str lr, [r0, #-4]! - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: add r1, r1, #(\pull / 8) - b 8b - UNWIND( .fnend ) - - .endm - - - backward_copy_shift push=8 pull=24 - -17: backward_copy_shift push=16 pull=16 - -18: backward_copy_shift push=24 pull=8 - -ENDPROC(memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S deleted file mode 100644 index 6ca4535c47fb63dd48fc15c9c91a60ee34552def..0000000000000000000000000000000000000000 --- a/arch/arm/lib/memset.S +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/memset.S - * - * Copyright (C) 1995-2000 Russell King - * - * ASM optimised string functions - */ -#include -#include -#include - - .text - .align 5 - -ENTRY(mmioset) -ENTRY(memset) -UNWIND( .fnstart ) - ands r3, r0, #3 @ 1 unaligned? - mov ip, r0 @ preserve r0 as return value - bne 6f @ 1 -/* - * we know that the pointer in ip is aligned to a word boundary. - */ -1: orr r1, r1, r1, lsl #8 - orr r1, r1, r1, lsl #16 - mov r3, r1 -7: cmp r2, #16 - blt 4f - -#if ! CALGN(1)+0 - -/* - * We need 2 extra registers for this loop - use r8 and the LR - */ - stmfd sp!, {r8, lr} -UNWIND( .fnend ) -UNWIND( .fnstart ) -UNWIND( .save {r8, lr} ) - mov r8, r1 - mov lr, r3 - -2: subs r2, r2, #64 - stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. - stmiage ip!, {r1, r3, r8, lr} - stmiage ip!, {r1, r3, r8, lr} - stmiage ip!, {r1, r3, r8, lr} - bgt 2b - ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. -/* - * No need to correct the count; we're only testing bits from now on - */ - tst r2, #32 - stmiane ip!, {r1, r3, r8, lr} - stmiane ip!, {r1, r3, r8, lr} - tst r2, #16 - stmiane ip!, {r1, r3, r8, lr} - ldmfd sp!, {r8, lr} -UNWIND( .fnend ) - -#else - -/* - * This version aligns the destination pointer in order to write - * whole cache lines at once. - */ - - stmfd sp!, {r4-r8, lr} -UNWIND( .fnend ) -UNWIND( .fnstart ) -UNWIND( .save {r4-r8, lr} ) - mov r4, r1 - mov r5, r3 - mov r6, r1 - mov r7, r3 - mov r8, r1 - mov lr, r3 - - cmp r2, #96 - tstgt ip, #31 - ble 3f - - and r8, ip, #31 - rsb r8, r8, #32 - sub r2, r2, r8 - movs r8, r8, lsl #(32 - 4) - stmiacs ip!, {r4, r5, r6, r7} - stmiami ip!, {r4, r5} - tst r8, #(1 << 30) - mov r8, r1 - strne r1, [ip], #4 - -3: subs r2, r2, #64 - stmiage ip!, {r1, r3-r8, lr} - stmiage ip!, {r1, r3-r8, lr} - bgt 3b - ldmfdeq sp!, {r4-r8, pc} - - tst r2, #32 - stmiane ip!, {r1, r3-r8, lr} - tst r2, #16 - stmiane ip!, {r4-r7} - ldmfd sp!, {r4-r8, lr} -UNWIND( .fnend ) - -#endif - -UNWIND( .fnstart ) -4: tst r2, #8 - stmiane ip!, {r1, r3} - tst r2, #4 - strne r1, [ip], #4 -/* - * When we get here, we've got less than 4 bytes to set. We - * may have an unaligned pointer as well. - */ -5: tst r2, #2 - strbne r1, [ip], #1 - strbne r1, [ip], #1 - tst r2, #1 - strbne r1, [ip], #1 - ret lr - -6: subs r2, r2, #4 @ 1 do we have enough - blt 5b @ 1 bytes to align with? - cmp r3, #2 @ 1 - strblt r1, [ip], #1 @ 1 - strble r1, [ip], #1 @ 1 - strb r1, [ip], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) - b 1b -UNWIND( .fnend ) -ENDPROC(memset) -ENDPROC(mmioset) - -ENTRY(__memset32) -UNWIND( .fnstart ) - mov r3, r1 @ copy r1 to r3 and fall into memset64 -UNWIND( .fnend ) -ENDPROC(__memset32) -ENTRY(__memset64) -UNWIND( .fnstart ) - mov ip, r0 @ preserve r0 as return value - b 7b @ jump into the middle of memset -UNWIND( .fnend ) -ENDPROC(__memset64) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S deleted file mode 100644 index 8362fe6c0de9d71a344d913c3fada986e9e5234f..0000000000000000000000000000000000000000 --- a/arch/arm/lib/muldi3.S +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/muldi3.S - * - * Author: Nicolas Pitre - * Created: Oct 19, 2005 - * Copyright: Monta Vista Software, Inc. - */ - -#include -#include - -#ifdef __ARMEB__ -#define xh r0 -#define xl r1 -#define yh r2 -#define yl r3 -#else -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 -#endif - -ENTRY(__muldi3) -ENTRY(__aeabi_lmul) - - mul xh, yl, xh - mla xh, xl, yh, xh - mov ip, xl, lsr #16 - mov yh, yl, lsr #16 - bic xl, xl, ip, lsl #16 - bic yl, yl, yh, lsl #16 - mla xh, yh, ip, xh - mul yh, xl, yh - mul xl, yl, xl - mul ip, yl, ip - adds xl, xl, yh, lsl #16 - adc xh, xh, yh, lsr #16 - adds xl, xl, ip, lsl #16 - adc xh, xh, ip, lsr #16 - ret lr - -ENDPROC(__muldi3) -ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S deleted file mode 100644 index bdd8836dc5c25ecfcb51efbacf4bde858daac5f8..0000000000000000000000000000000000000000 --- a/arch/arm/lib/putuser.S +++ /dev/null @@ -1,95 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/putuser.S - * - * Copyright (C) 2001 Russell King - * - * Idea from x86 version, (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface to make - * them more efficient, especially as they return an error - * value in addition to the "real" return value. - * - * __put_user_X - * - * Inputs: r0 contains the address - * r1 contains the address limit, which must be preserved - * r2, r3 contains the value - * Outputs: r0 is the error code - * lr corrupted - * - * No other registers must be altered. (see - * for specific ASM register usage). - * - * Note that ADDR_LIMIT is either 0 or 0xc0000000 - * Note also that it is intended that __put_user_bad is not global. - */ -#include -#include -#include -#include - -ENTRY(__put_user_1) - check_uaccess r0, 1, r1, ip, __put_user_bad -1: TUSER(strb) r2, [r0] - mov r0, #0 - ret lr -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - check_uaccess r0, 2, r1, ip, __put_user_bad -#if __LINUX_ARM_ARCH__ >= 6 - -2: TUSER(strh) r2, [r0] - -#else - - mov ip, r2, lsr #8 -#ifndef __ARMEB__ -2: TUSER(strb) r2, [r0], #1 -3: TUSER(strb) ip, [r0] -#else -2: TUSER(strb) ip, [r0], #1 -3: TUSER(strb) r2, [r0] -#endif - -#endif /* __LINUX_ARM_ARCH__ >= 6 */ - mov r0, #0 - ret lr -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - check_uaccess r0, 4, r1, ip, __put_user_bad -4: TUSER(str) r2, [r0] - mov r0, #0 - ret lr -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - check_uaccess r0, 8, r1, ip, __put_user_bad -#ifdef CONFIG_THUMB2_KERNEL -5: TUSER(str) r2, [r0] -6: TUSER(str) r3, [r0, #4] -#else -5: TUSER(str) r2, [r0], #4 -6: TUSER(str) r3, [r0] -#endif - mov r0, #0 - ret lr -ENDPROC(__put_user_8) - -__put_user_bad: - mov r0, #-EFAULT - ret lr -ENDPROC(__put_user_bad) - -.pushsection __ex_table, "a" - .long 1b, __put_user_bad - .long 2b, __put_user_bad -#if __LINUX_ARM_ARCH__ < 6 - .long 3b, __put_user_bad -#endif - .long 4b, __put_user_bad - .long 5b, __put_user_bad - .long 6b, __put_user_bad -.popsection diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S deleted file mode 100644 index 19a96f43f4bbe71ddb697fb6af760f7ba45df499..0000000000000000000000000000000000000000 --- a/arch/arm/lib/setbit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/setbit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -bitop _set_bit, orr diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S deleted file mode 100644 index 09e2cc8a89501a0f9ace6f81cb2a3256d68cd105..0000000000000000000000000000000000000000 --- a/arch/arm/lib/strchr.S +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/strchr.S - * - * Copyright (C) 1995-2000 Russell King - * - * ASM optimised string functions - */ -#include -#include - - .text - .align 5 -ENTRY(strchr) - and r1, r1, #0xff -1: ldrb r2, [r0], #1 - teq r2, r1 - teqne r2, #0 - bne 1b - teq r2, r1 - movne r0, #0 - subeq r0, r0, #1 - ret lr -ENDPROC(strchr) diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S deleted file mode 100644 index 5e87247d1e8bf46eabe27015d5921df2a8d0f94b..0000000000000000000000000000000000000000 --- a/arch/arm/lib/strrchr.S +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/strrchr.S - * - * Copyright (C) 1995-2000 Russell King - * - * ASM optimised string functions - */ -#include -#include - - .text - .align 5 -ENTRY(strrchr) - mov r3, #0 -1: ldrb r2, [r0], #1 - teq r2, r1 - subeq r3, r0, #1 - teq r2, #0 - bne 1b - mov r0, r3 - ret lr -ENDPROC(strrchr) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S deleted file mode 100644 index 4ebecc67e6e044ab6fc38fb254314a2cd0332c83..0000000000000000000000000000000000000000 --- a/arch/arm/lib/testchangebit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/testchangebit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -testop _test_and_change_bit, eor, str diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S deleted file mode 100644 index 009afa0f5b4a73fcafac5b2736aa90258fe1f77d..0000000000000000000000000000000000000000 --- a/arch/arm/lib/testclearbit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/testclearbit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -testop _test_and_clear_bit, bicne, strne diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S deleted file mode 100644 index f3192e55acc87fede3a34a50d7ca6d446b049b89..0000000000000000000000000000000000000000 --- a/arch/arm/lib/testsetbit.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/testsetbit.S - * - * Copyright (C) 1995-1996 Russell King - */ -#include -#include -#include "bitops.h" - .text - -testop _test_and_set_bit, orreq, streq diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S deleted file mode 100644 index 679e16a210ae78cff87c0718ee86b5ae5fbd558d..0000000000000000000000000000000000000000 --- a/arch/arm/lib/ucmpdi2.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/ucmpdi2.S - * - * Author: Nicolas Pitre - * Created: Oct 19, 2005 - * Copyright: Monta Vista Software, Inc. - */ - -#include -#include - -#ifdef __ARMEB__ -#define xh r0 -#define xl r1 -#define yh r2 -#define yl r3 -#else -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 -#endif - -ENTRY(__ucmpdi2) - - cmp xh, yh - cmpeq xl, yl - movlo r0, #0 - moveq r0, #1 - movhi r0, #2 - ret lr - -ENDPROC(__ucmpdi2) - -#ifdef CONFIG_AEABI - -ENTRY(__aeabi_ulcmp) - - cmp xh, yh - cmpeq xl, yl - movlo r0, #-1 - moveq r0, #0 - movhi r0, #1 - ret lr - -ENDPROC(__aeabi_ulcmp) - -#endif - diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S deleted file mode 100644 index 2591cba61937b9cc4cbd24c6289dd3d1937b3137..0000000000000000000000000000000000000000 --- a/arch/arm/mach-at91/pm_suspend.S +++ /dev/null @@ -1,523 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/mach-at91/pm_slow_clock.S - * - * Copyright (C) 2006 Savin Zlobec - * - * AT91SAM9 support: - * Copyright (C) 2007 Anti Sullin - */ -#include -#include -#include "pm.h" -#include "pm_data-offsets.h" - -#define SRAMC_SELF_FRESH_ACTIVE 0x01 -#define SRAMC_SELF_FRESH_EXIT 0x00 - -pmc .req r0 -tmp1 .req r4 -tmp2 .req r5 - -/* - * Wait until master clock is ready (after switching master clock source) - */ - .macro wait_mckrdy -1: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MCKRDY - beq 1b - .endm - -/* - * Wait until master oscillator has stabilized. - */ - .macro wait_moscrdy -1: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCS - beq 1b - .endm - -/* - * Wait for main oscillator selection is done - */ - .macro wait_moscsels -1: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCSELS - beq 1b - .endm - -/* - * Put the processor to enter the idle state - */ - .macro at91_cpu_idle - -#if defined(CONFIG_CPU_V7) - mov tmp1, #AT91_PMC_PCK - str tmp1, [pmc, #AT91_PMC_SCDR] - - dsb - - wfi @ Wait For Interrupt -#else - mcr p15, 0, tmp1, c7, c0, 4 -#endif - - .endm - - .text - - .arm - -/* - * void at91_suspend_sram_fn(struct at91_pm_data*) - * @input param: - * @r0: base address of struct at91_pm_data - */ -/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */ - .align 3 -ENTRY(at91_pm_suspend_in_sram) - /* Save registers on stack */ - stmfd sp!, {r4 - r12, lr} - - /* Drain write buffer */ - mov tmp1, #0 - mcr p15, 0, tmp1, c7, c10, 4 - - ldr tmp1, [r0, #PM_DATA_PMC] - str tmp1, .pmc_base - ldr tmp1, [r0, #PM_DATA_RAMC0] - str tmp1, .sramc_base - ldr tmp1, [r0, #PM_DATA_RAMC1] - str tmp1, .sramc1_base - ldr tmp1, [r0, #PM_DATA_MEMCTRL] - str tmp1, .memtype - ldr tmp1, [r0, #PM_DATA_MODE] - str tmp1, .pm_mode - /* Both ldrne below are here to preload their address in the TLB */ - ldr tmp1, [r0, #PM_DATA_SHDWC] - str tmp1, .shdwc - cmp tmp1, #0 - ldrne tmp2, [tmp1, #0] - ldr tmp1, [r0, #PM_DATA_SFRBU] - str tmp1, .sfr - cmp tmp1, #0 - ldrne tmp2, [tmp1, #0x10] - - /* Active the self-refresh mode */ - mov r0, #SRAMC_SELF_FRESH_ACTIVE - bl at91_sramc_self_refresh - - ldr r0, .pm_mode - cmp r0, #AT91_PM_STANDBY - beq standby - cmp r0, #AT91_PM_BACKUP - beq backup_mode - - bl at91_ulp_mode - b exit_suspend - -standby: - /* Wait for interrupt */ - ldr pmc, .pmc_base - at91_cpu_idle - b exit_suspend - -backup_mode: - bl at91_backup_mode - b exit_suspend - -exit_suspend: - /* Exit the self-refresh mode */ - mov r0, #SRAMC_SELF_FRESH_EXIT - bl at91_sramc_self_refresh - - /* Restore registers, and return */ - ldmfd sp!, {r4 - r12, pc} -ENDPROC(at91_pm_suspend_in_sram) - -ENTRY(at91_backup_mode) - /* Switch the master clock source to slow clock. */ - ldr pmc, .pmc_base - ldr tmp1, [pmc, #AT91_PMC_MCKR] - bic tmp1, tmp1, #AT91_PMC_CSS - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - /*BUMEN*/ - ldr r0, .sfr - mov tmp1, #0x1 - str tmp1, [r0, #0x10] - - /* Shutdown */ - ldr r0, .shdwc - mov tmp1, #0xA5000000 - add tmp1, tmp1, #0x1 - str tmp1, [r0, #0] -ENDPROC(at91_backup_mode) - -.macro at91_pm_ulp0_mode - ldr pmc, .pmc_base - - /* Turn off the crystal oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - bic tmp1, tmp1, #AT91_PMC_MOSCEN - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Save RC oscillator state */ - ldr tmp1, [pmc, #AT91_PMC_SR] - str tmp1, .saved_osc_status - tst tmp1, #AT91_PMC_MOSCRCS - bne 1f - - /* Turn off RC oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - bic tmp1, tmp1, #AT91_PMC_MOSCRCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Wait main RC disabled done */ -2: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCRCS - bne 2b - - /* Wait for interrupt */ -1: at91_cpu_idle - - /* Restore RC oscillator state */ - ldr tmp1, .saved_osc_status - tst tmp1, #AT91_PMC_MOSCRCS - beq 4f - - /* Turn on RC oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_MOSCRCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Wait main RC stabilization */ -3: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCRCS - beq 3b - - /* Turn on the crystal oscillator */ -4: ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_MOSCEN - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - wait_moscrdy -.endm - -/** - * Note: This procedure only applies on the platform which uses - * the external crystal oscillator as a main clock source. - */ -.macro at91_pm_ulp1_mode - ldr pmc, .pmc_base - - /* Save RC oscillator state and check if it is enabled. */ - ldr tmp1, [pmc, #AT91_PMC_SR] - str tmp1, .saved_osc_status - tst tmp1, #AT91_PMC_MOSCRCS - bne 2f - - /* Enable RC oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_MOSCRCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Wait main RC stabilization */ -1: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCRCS - beq 1b - - /* Switch the main clock source to 12-MHz RC oscillator */ -2: ldr tmp1, [pmc, #AT91_CKGR_MOR] - bic tmp1, tmp1, #AT91_PMC_MOSCSEL - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - wait_moscsels - - /* Disable the crystal oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - bic tmp1, tmp1, #AT91_PMC_MOSCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Switch the master clock source to main clock */ - ldr tmp1, [pmc, #AT91_PMC_MCKR] - bic tmp1, tmp1, #AT91_PMC_CSS - orr tmp1, tmp1, #AT91_PMC_CSS_MAIN - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - /* Enter the ULP1 mode by set WAITMODE bit in CKGR_MOR */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_WAITMODE - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Quirk for SAM9X60's PMC */ - nop - nop - - wait_mckrdy - - /* Enable the crystal oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_MOSCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - wait_moscrdy - - /* Switch the master clock source to slow clock */ - ldr tmp1, [pmc, #AT91_PMC_MCKR] - bic tmp1, tmp1, #AT91_PMC_CSS - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - /* Switch main clock source to crystal oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - orr tmp1, tmp1, #AT91_PMC_MOSCSEL - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - wait_moscsels - - /* Switch the master clock source to main clock */ - ldr tmp1, [pmc, #AT91_PMC_MCKR] - bic tmp1, tmp1, #AT91_PMC_CSS - orr tmp1, tmp1, #AT91_PMC_CSS_MAIN - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - /* Restore RC oscillator state */ - ldr tmp1, .saved_osc_status - tst tmp1, #AT91_PMC_MOSCRCS - bne 3f - - /* Disable RC oscillator */ - ldr tmp1, [pmc, #AT91_CKGR_MOR] - bic tmp1, tmp1, #AT91_PMC_MOSCRCEN - bic tmp1, tmp1, #AT91_PMC_KEY_MASK - orr tmp1, tmp1, #AT91_PMC_KEY - str tmp1, [pmc, #AT91_CKGR_MOR] - - /* Wait RC oscillator disable done */ -4: ldr tmp1, [pmc, #AT91_PMC_SR] - tst tmp1, #AT91_PMC_MOSCRCS - bne 4b - -3: -.endm - -ENTRY(at91_ulp_mode) - ldr pmc, .pmc_base - - /* Save Master clock setting */ - ldr tmp1, [pmc, #AT91_PMC_MCKR] - str tmp1, .saved_mckr - - /* - * Set the Master clock source to slow clock - */ - bic tmp1, tmp1, #AT91_PMC_CSS - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - ldr r0, .pm_mode - cmp r0, #AT91_PM_ULP1 - beq ulp1_mode - - at91_pm_ulp0_mode - b ulp_exit - -ulp1_mode: - at91_pm_ulp1_mode - b ulp_exit - -ulp_exit: - ldr pmc, .pmc_base - - /* - * Restore master clock setting - */ - ldr tmp1, .saved_mckr - str tmp1, [pmc, #AT91_PMC_MCKR] - - wait_mckrdy - - mov pc, lr -ENDPROC(at91_ulp_mode) - -/* - * void at91_sramc_self_refresh(unsigned int is_active) - * - * @input param: - * @r0: 1 - active self-refresh mode - * 0 - exit self-refresh mode - * register usage: - * @r1: memory type - * @r2: base address of the sram controller - */ - -ENTRY(at91_sramc_self_refresh) - ldr r1, .memtype - ldr r2, .sramc_base - - cmp r1, #AT91_MEMCTRL_MC - bne ddrc_sf - - /* - * at91rm9200 Memory controller - */ - - /* - * For exiting the self-refresh mode, do nothing, - * automatically exit the self-refresh mode. - */ - tst r0, #SRAMC_SELF_FRESH_ACTIVE - beq exit_sramc_sf - - /* Active SDRAM self-refresh mode */ - mov r3, #1 - str r3, [r2, #AT91_MC_SDRAMC_SRR] - b exit_sramc_sf - -ddrc_sf: - cmp r1, #AT91_MEMCTRL_DDRSDR - bne sdramc_sf - - /* - * DDR Memory controller - */ - tst r0, #SRAMC_SELF_FRESH_ACTIVE - beq ddrc_exit_sf - - /* LPDDR1 --> force DDR2 mode during self-refresh */ - ldr r3, [r2, #AT91_DDRSDRC_MDR] - str r3, .saved_sam9_mdr - bic r3, r3, #~AT91_DDRSDRC_MD - cmp r3, #AT91_DDRSDRC_MD_LOW_POWER_DDR - ldreq r3, [r2, #AT91_DDRSDRC_MDR] - biceq r3, r3, #AT91_DDRSDRC_MD - orreq r3, r3, #AT91_DDRSDRC_MD_DDR2 - streq r3, [r2, #AT91_DDRSDRC_MDR] - - /* Active DDRC self-refresh mode */ - ldr r3, [r2, #AT91_DDRSDRC_LPR] - str r3, .saved_sam9_lpr - bic r3, r3, #AT91_DDRSDRC_LPCB - orr r3, r3, #AT91_DDRSDRC_LPCB_SELF_REFRESH - str r3, [r2, #AT91_DDRSDRC_LPR] - - /* If using the 2nd ddr controller */ - ldr r2, .sramc1_base - cmp r2, #0 - beq no_2nd_ddrc - - ldr r3, [r2, #AT91_DDRSDRC_MDR] - str r3, .saved_sam9_mdr1 - bic r3, r3, #~AT91_DDRSDRC_MD - cmp r3, #AT91_DDRSDRC_MD_LOW_POWER_DDR - ldreq r3, [r2, #AT91_DDRSDRC_MDR] - biceq r3, r3, #AT91_DDRSDRC_MD - orreq r3, r3, #AT91_DDRSDRC_MD_DDR2 - streq r3, [r2, #AT91_DDRSDRC_MDR] - - /* Active DDRC self-refresh mode */ - ldr r3, [r2, #AT91_DDRSDRC_LPR] - str r3, .saved_sam9_lpr1 - bic r3, r3, #AT91_DDRSDRC_LPCB - orr r3, r3, #AT91_DDRSDRC_LPCB_SELF_REFRESH - str r3, [r2, #AT91_DDRSDRC_LPR] - -no_2nd_ddrc: - b exit_sramc_sf - -ddrc_exit_sf: - /* Restore MDR in case of LPDDR1 */ - ldr r3, .saved_sam9_mdr - str r3, [r2, #AT91_DDRSDRC_MDR] - /* Restore LPR on AT91 with DDRAM */ - ldr r3, .saved_sam9_lpr - str r3, [r2, #AT91_DDRSDRC_LPR] - - /* If using the 2nd ddr controller */ - ldr r2, .sramc1_base - cmp r2, #0 - ldrne r3, .saved_sam9_mdr1 - strne r3, [r2, #AT91_DDRSDRC_MDR] - ldrne r3, .saved_sam9_lpr1 - strne r3, [r2, #AT91_DDRSDRC_LPR] - - b exit_sramc_sf - - /* - * SDRAMC Memory controller - */ -sdramc_sf: - tst r0, #SRAMC_SELF_FRESH_ACTIVE - beq sdramc_exit_sf - - /* Active SDRAMC self-refresh mode */ - ldr r3, [r2, #AT91_SDRAMC_LPR] - str r3, .saved_sam9_lpr - bic r3, r3, #AT91_SDRAMC_LPCB - orr r3, r3, #AT91_SDRAMC_LPCB_SELF_REFRESH - str r3, [r2, #AT91_SDRAMC_LPR] - -sdramc_exit_sf: - ldr r3, .saved_sam9_lpr - str r3, [r2, #AT91_SDRAMC_LPR] - -exit_sramc_sf: - mov pc, lr -ENDPROC(at91_sramc_self_refresh) - -.pmc_base: - .word 0 -.sramc_base: - .word 0 -.sramc1_base: - .word 0 -.shdwc: - .word 0 -.sfr: - .word 0 -.memtype: - .word 0 -.pm_mode: - .word 0 -.saved_mckr: - .word 0 -.saved_sam9_lpr: - .word 0 -.saved_sam9_lpr1: - .word 0 -.saved_sam9_mdr: - .word 0 -.saved_sam9_mdr1: - .word 0 -.saved_osc_status: - .word 0 - -ENTRY(at91_pm_suspend_in_sram_sz) - .word .-at91_pm_suspend_in_sram diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S deleted file mode 100644 index 3057885d97728f6896409f62ddb93cb64378962d..0000000000000000000000000000000000000000 --- a/arch/arm/mach-berlin/headsmp.S +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2014 Marvell Technology Group Ltd. - * - * Antoine Ténart - */ - -#include -#include -#include - -/* - * If the following instruction is set in the reset exception vector, CPUs - * will fetch the value of the software reset address vector when being - * reset. - */ -.global boot_inst -boot_inst: - ldr pc, [pc, #140] - - .align diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S deleted file mode 100644 index 71262dcdbca32aea22714945f43bc6671c71c8a9..0000000000000000000000000000000000000000 --- a/arch/arm/mach-davinci/sleep.S +++ /dev/null @@ -1,216 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * (C) Copyright 2009, Texas Instruments, Inc. http://www.ti.com/ - */ - -/* replicated define because linux/bitops.h cannot be included in assembly */ -#define BIT(nr) (1 << (nr)) - -#include -#include -#include "psc.h" -#include "ddr2.h" - -#include "clock.h" - -/* Arbitrary, hardware currently does not update PHYRDY correctly */ -#define PHYRDY_CYCLES 0x1000 - -/* Assume 25 MHz speed for the cycle conversions since PLLs are bypassed */ -#define PLL_BYPASS_CYCLES (PLL_BYPASS_TIME * 25) -#define PLL_RESET_CYCLES (PLL_RESET_TIME * 25) -#define PLL_LOCK_CYCLES (PLL_LOCK_TIME * 25) - -#define DEEPSLEEP_SLEEPENABLE_BIT BIT(31) - - .text - .arch armv5te -/* - * Move DaVinci into deep sleep state - * - * Note: This code is copied to internal SRAM by PM code. When the DaVinci - * wakes up it continues execution at the point it went to sleep. - * Register Usage: - * r0: contains virtual base for DDR2 controller - * r1: contains virtual base for DDR2 Power and Sleep controller (PSC) - * r2: contains PSC number for DDR2 - * r3: contains virtual base DDR2 PLL controller - * r4: contains virtual address of the DEEPSLEEP register - */ -ENTRY(davinci_cpu_suspend) - stmfd sp!, {r0-r12, lr} @ save registers on stack - - ldr ip, CACHE_FLUSH - blx ip - - ldmia r0, {r0-r4} - - /* - * Switch DDR to self-refresh mode. - */ - - /* calculate SDRCR address */ - ldr ip, [r0, #DDR2_SDRCR_OFFSET] - bic ip, ip, #DDR2_SRPD_BIT - orr ip, ip, #DDR2_LPMODEN_BIT - str ip, [r0, #DDR2_SDRCR_OFFSET] - - ldr ip, [r0, #DDR2_SDRCR_OFFSET] - orr ip, ip, #DDR2_MCLKSTOPEN_BIT - str ip, [r0, #DDR2_SDRCR_OFFSET] - - mov ip, #PHYRDY_CYCLES -1: subs ip, ip, #0x1 - bne 1b - - /* Disable DDR2 LPSC */ - mov r7, r0 - mov r0, #0x2 - bl davinci_ddr_psc_config - mov r0, r7 - - /* Disable clock to DDR PHY */ - ldr ip, [r3, #PLLDIV1] - bic ip, ip, #PLLDIV_EN - str ip, [r3, #PLLDIV1] - - /* Put the DDR PLL in bypass and power down */ - ldr ip, [r3, #PLLCTL] - bic ip, ip, #PLLCTL_PLLENSRC - bic ip, ip, #PLLCTL_PLLEN - str ip, [r3, #PLLCTL] - - /* Wait for PLL to switch to bypass */ - mov ip, #PLL_BYPASS_CYCLES -2: subs ip, ip, #0x1 - bne 2b - - /* Power down the PLL */ - ldr ip, [r3, #PLLCTL] - orr ip, ip, #PLLCTL_PLLPWRDN - str ip, [r3, #PLLCTL] - - /* Go to deep sleep */ - ldr ip, [r4] - orr ip, ip, #DEEPSLEEP_SLEEPENABLE_BIT - /* System goes to sleep beyond after this instruction */ - str ip, [r4] - - /* Wake up from sleep */ - - /* Clear sleep enable */ - ldr ip, [r4] - bic ip, ip, #DEEPSLEEP_SLEEPENABLE_BIT - str ip, [r4] - - /* initialize the DDR PLL controller */ - - /* Put PLL in reset */ - ldr ip, [r3, #PLLCTL] - bic ip, ip, #PLLCTL_PLLRST - str ip, [r3, #PLLCTL] - - /* Clear PLL power down */ - ldr ip, [r3, #PLLCTL] - bic ip, ip, #PLLCTL_PLLPWRDN - str ip, [r3, #PLLCTL] - - mov ip, #PLL_RESET_CYCLES -3: subs ip, ip, #0x1 - bne 3b - - /* Bring PLL out of reset */ - ldr ip, [r3, #PLLCTL] - orr ip, ip, #PLLCTL_PLLRST - str ip, [r3, #PLLCTL] - - /* Wait for PLL to lock (assume prediv = 1, 25MHz OSCIN) */ - mov ip, #PLL_LOCK_CYCLES -4: subs ip, ip, #0x1 - bne 4b - - /* Remove PLL from bypass mode */ - ldr ip, [r3, #PLLCTL] - bic ip, ip, #PLLCTL_PLLENSRC - orr ip, ip, #PLLCTL_PLLEN - str ip, [r3, #PLLCTL] - - /* Start 2x clock to DDR2 */ - - ldr ip, [r3, #PLLDIV1] - orr ip, ip, #PLLDIV_EN - str ip, [r3, #PLLDIV1] - - /* Enable VCLK */ - - /* Enable DDR2 LPSC */ - mov r7, r0 - mov r0, #0x3 - bl davinci_ddr_psc_config - mov r0, r7 - - /* clear MCLKSTOPEN */ - - ldr ip, [r0, #DDR2_SDRCR_OFFSET] - bic ip, ip, #DDR2_MCLKSTOPEN_BIT - str ip, [r0, #DDR2_SDRCR_OFFSET] - - ldr ip, [r0, #DDR2_SDRCR_OFFSET] - bic ip, ip, #DDR2_LPMODEN_BIT - str ip, [r0, #DDR2_SDRCR_OFFSET] - - /* Restore registers and return */ - ldmfd sp!, {r0-r12, pc} - -ENDPROC(davinci_cpu_suspend) - -/* - * Disables or Enables DDR2 LPSC - * Register Usage: - * r0: Enable or Disable LPSC r0 = 0x3 => Enable, r0 = 0x2 => Disable LPSC - * r1: contains virtual base for DDR2 Power and Sleep controller (PSC) - * r2: contains PSC number for DDR2 - */ -ENTRY(davinci_ddr_psc_config) - /* Set next state in mdctl for DDR2 */ - mov r6, #MDCTL - add r6, r6, r2, lsl #2 - ldr ip, [r1, r6] - bic ip, ip, #MDSTAT_STATE_MASK - orr ip, ip, r0 - str ip, [r1, r6] - - /* Enable the Power Domain Transition Command */ - ldr ip, [r1, #PTCMD] - orr ip, ip, #0x1 - str ip, [r1, #PTCMD] - - /* Check for Transition Complete (PTSTAT) */ -ptstat_done: - ldr ip, [r1, #PTSTAT] - and ip, ip, #0x1 - cmp ip, #0x0 - bne ptstat_done - - /* Check for DDR2 clock disable completion; */ - mov r6, #MDSTAT - add r6, r6, r2, lsl #2 -ddr2clk_stop_done: - ldr ip, [r1, r6] - and ip, ip, #MDSTAT_STATE_MASK - cmp ip, r0 - bne ddr2clk_stop_done - - ret lr -ENDPROC(davinci_ddr_psc_config) - -CACHE_FLUSH: -#ifdef CONFIG_CPU_V6 - .word v6_flush_kern_cache_all -#else - .word arm926_flush_kern_cache_all -#endif - -ENTRY(davinci_cpu_suspend_sz) - .word . - davinci_cpu_suspend -ENDPROC(davinci_cpu_suspend_sz) diff --git a/arch/arm/mach-ebsa110/include/mach/entry-macro.S b/arch/arm/mach-ebsa110/include/mach/entry-macro.S deleted file mode 100644 index 14b110de78a9caf133dce9b9f5c99f29c06385c5..0000000000000000000000000000000000000000 --- a/arch/arm/mach-ebsa110/include/mach/entry-macro.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - * arch/arm/mach-ebsa110/include/mach/entry-macro.S - * - * Low-level IRQ helper macros for ebsa110 platform. - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - - - -#define IRQ_STAT 0xff000000 /* read */ - - .macro get_irqnr_preamble, base, tmp - mov \base, #IRQ_STAT - .endm - - .macro get_irqnr_and_base, irqnr, stat, base, tmp - ldrb \stat, [\base] @ get interrupts - mov \irqnr, #0 - tst \stat, #15 - addeq \irqnr, \irqnr, #4 - moveq \stat, \stat, lsr #4 - tst \stat, #3 - addeq \irqnr, \irqnr, #2 - moveq \stat, \stat, lsr #2 - tst \stat, #1 - addeq \irqnr, \irqnr, #1 - moveq \stat, \stat, lsr #1 - tst \stat, #1 @ bit 0 should be set - .endm - diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S deleted file mode 100644 index fb2dbf76f09ee58df6ba0550f03f74d75817f490..0000000000000000000000000000000000000000 --- a/arch/arm/mach-ep93xx/crunch-bits.S +++ /dev/null @@ -1,310 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/kernel/crunch-bits.S - * Cirrus MaverickCrunch context switching and handling - * - * Copyright (C) 2006 Lennert Buytenhek - * - * Shamelessly stolen from the iWMMXt code by Nicolas Pitre, which is - * Copyright (c) 2003-2004, MontaVista Software, Inc. - */ - -#include -#include -#include -#include -#include -#include - -/* - * We can't use hex constants here due to a bug in gas. - */ -#define CRUNCH_MVDX0 0 -#define CRUNCH_MVDX1 8 -#define CRUNCH_MVDX2 16 -#define CRUNCH_MVDX3 24 -#define CRUNCH_MVDX4 32 -#define CRUNCH_MVDX5 40 -#define CRUNCH_MVDX6 48 -#define CRUNCH_MVDX7 56 -#define CRUNCH_MVDX8 64 -#define CRUNCH_MVDX9 72 -#define CRUNCH_MVDX10 80 -#define CRUNCH_MVDX11 88 -#define CRUNCH_MVDX12 96 -#define CRUNCH_MVDX13 104 -#define CRUNCH_MVDX14 112 -#define CRUNCH_MVDX15 120 -#define CRUNCH_MVAX0L 128 -#define CRUNCH_MVAX0M 132 -#define CRUNCH_MVAX0H 136 -#define CRUNCH_MVAX1L 140 -#define CRUNCH_MVAX1M 144 -#define CRUNCH_MVAX1H 148 -#define CRUNCH_MVAX2L 152 -#define CRUNCH_MVAX2M 156 -#define CRUNCH_MVAX2H 160 -#define CRUNCH_MVAX3L 164 -#define CRUNCH_MVAX3M 168 -#define CRUNCH_MVAX3H 172 -#define CRUNCH_DSPSC 176 - -#define CRUNCH_SIZE 184 - - .text - -/* - * Lazy switching of crunch coprocessor context - * - * r10 = struct thread_info pointer - * r9 = ret_from_exception - * lr = undefined instr exit - * - * called from prefetch exception handler with interrupts enabled - */ -ENTRY(crunch_task_enable) - inc_preempt_count r10, r3 - - ldr r8, =(EP93XX_APB_VIRT_BASE + 0x00130000) @ syscon addr - - ldr r1, [r8, #0x80] - tst r1, #0x00800000 @ access to crunch enabled? - bne 2f @ if so no business here - mov r3, #0xaa @ unlock syscon swlock - str r3, [r8, #0xc0] - orr r1, r1, #0x00800000 @ enable access to crunch - str r1, [r8, #0x80] - - ldr r3, =crunch_owner - add r0, r10, #TI_CRUNCH_STATE @ get task crunch save area - ldr r2, [sp, #60] @ current task pc value - ldr r1, [r3] @ get current crunch owner - str r0, [r3] @ this task now owns crunch - sub r2, r2, #4 @ adjust pc back - str r2, [sp, #60] - - ldr r2, [r8, #0x80] - mov r2, r2 @ flush out enable (@@@) - - teq r1, #0 @ test for last ownership - mov lr, r9 @ normal exit from exception - beq crunch_load @ no owner, skip save - -crunch_save: - cfstr64 mvdx0, [r1, #CRUNCH_MVDX0] @ save 64b registers - cfstr64 mvdx1, [r1, #CRUNCH_MVDX1] - cfstr64 mvdx2, [r1, #CRUNCH_MVDX2] - cfstr64 mvdx3, [r1, #CRUNCH_MVDX3] - cfstr64 mvdx4, [r1, #CRUNCH_MVDX4] - cfstr64 mvdx5, [r1, #CRUNCH_MVDX5] - cfstr64 mvdx6, [r1, #CRUNCH_MVDX6] - cfstr64 mvdx7, [r1, #CRUNCH_MVDX7] - cfstr64 mvdx8, [r1, #CRUNCH_MVDX8] - cfstr64 mvdx9, [r1, #CRUNCH_MVDX9] - cfstr64 mvdx10, [r1, #CRUNCH_MVDX10] - cfstr64 mvdx11, [r1, #CRUNCH_MVDX11] - cfstr64 mvdx12, [r1, #CRUNCH_MVDX12] - cfstr64 mvdx13, [r1, #CRUNCH_MVDX13] - cfstr64 mvdx14, [r1, #CRUNCH_MVDX14] - cfstr64 mvdx15, [r1, #CRUNCH_MVDX15] - -#ifdef __ARMEB__ -#error fix me for ARMEB -#endif - - cfmv32al mvfx0, mvax0 @ save 72b accumulators - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0L] - cfmv32am mvfx0, mvax0 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0M] - cfmv32ah mvfx0, mvax0 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0H] - cfmv32al mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1L] - cfmv32am mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1M] - cfmv32ah mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1H] - cfmv32al mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2L] - cfmv32am mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2M] - cfmv32ah mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2H] - cfmv32al mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3L] - cfmv32am mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3M] - cfmv32ah mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3H] - - cfmv32sc mvdx0, dspsc @ save status word - cfstr64 mvdx0, [r1, #CRUNCH_DSPSC] - - teq r0, #0 @ anything to load? - cfldr64eq mvdx0, [r1, #CRUNCH_MVDX0] @ mvdx0 was clobbered - beq 1f - -crunch_load: - cfldr64 mvdx0, [r0, #CRUNCH_DSPSC] @ load status word - cfmvsc32 dspsc, mvdx0 - - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0L] @ load 72b accumulators - cfmval32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0M] - cfmvam32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0H] - cfmvah32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1L] - cfmval32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1M] - cfmvam32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1H] - cfmvah32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2L] - cfmval32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2M] - cfmvam32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2H] - cfmvah32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3L] - cfmval32 mvax3, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3M] - cfmvam32 mvax3, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3H] - cfmvah32 mvax3, mvfx0 - - cfldr64 mvdx0, [r0, #CRUNCH_MVDX0] @ load 64b registers - cfldr64 mvdx1, [r0, #CRUNCH_MVDX1] - cfldr64 mvdx2, [r0, #CRUNCH_MVDX2] - cfldr64 mvdx3, [r0, #CRUNCH_MVDX3] - cfldr64 mvdx4, [r0, #CRUNCH_MVDX4] - cfldr64 mvdx5, [r0, #CRUNCH_MVDX5] - cfldr64 mvdx6, [r0, #CRUNCH_MVDX6] - cfldr64 mvdx7, [r0, #CRUNCH_MVDX7] - cfldr64 mvdx8, [r0, #CRUNCH_MVDX8] - cfldr64 mvdx9, [r0, #CRUNCH_MVDX9] - cfldr64 mvdx10, [r0, #CRUNCH_MVDX10] - cfldr64 mvdx11, [r0, #CRUNCH_MVDX11] - cfldr64 mvdx12, [r0, #CRUNCH_MVDX12] - cfldr64 mvdx13, [r0, #CRUNCH_MVDX13] - cfldr64 mvdx14, [r0, #CRUNCH_MVDX14] - cfldr64 mvdx15, [r0, #CRUNCH_MVDX15] - -1: -#ifdef CONFIG_PREEMPT_COUNT - get_thread_info r10 -#endif -2: dec_preempt_count r10, r3 - ret lr - -/* - * Back up crunch regs to save area and disable access to them - * (mainly for gdb or sleep mode usage) - * - * r0 = struct thread_info pointer of target task or NULL for any - */ -ENTRY(crunch_task_disable) - stmfd sp!, {r4, r5, lr} - - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r4, =(EP93XX_APB_VIRT_BASE + 0x00130000) @ syscon addr - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r1, [r3] @ get current crunch owner - teq r1, #0 @ any current owner? - beq 1f @ no: quit - teq r0, #0 @ any owner? - teqne r1, r2 @ or specified one? - bne 1f @ no: quit - - ldr r5, [r4, #0x80] @ enable access to crunch - mov r2, #0xaa - str r2, [r4, #0xc0] - orr r5, r5, #0x00800000 - str r5, [r4, #0x80] - - mov r0, #0 @ nothing to load - str r0, [r3] @ no more current owner - ldr r2, [r4, #0x80] @ flush out enable (@@@) - mov r2, r2 - bl crunch_save - - mov r2, #0xaa @ disable access to crunch - str r2, [r4, #0xc0] - bic r5, r5, #0x00800000 - str r5, [r4, #0x80] - ldr r5, [r4, #0x80] @ flush out enable (@@@) - mov r5, r5 - -1: msr cpsr_c, ip @ restore interrupt mode - ldmfd sp!, {r4, r5, pc} - -/* - * Copy crunch state to given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to store crunch state - * - * this is called mainly in the creation of signal stack frames - */ -ENTRY(crunch_task_copy) - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r3, [r3] @ get current crunch owner - teq r2, r3 @ does this task own it... - beq 1f - - @ current crunch values are in the task save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r1 - mov r1, r2 - mov r2, #CRUNCH_SIZE - b memcpy - -1: @ this task owns crunch regs -- grab a copy from there - mov r0, #0 @ nothing to load - mov r3, lr @ preserve return address - bl crunch_save - msr cpsr_c, ip @ restore interrupt mode - ret r3 - -/* - * Restore crunch state from given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to get crunch state from - * - * this is used to restore crunch state when unwinding a signal stack frame - */ -ENTRY(crunch_task_restore) - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r3, [r3] @ get current crunch owner - teq r2, r3 @ does this task own it... - beq 1f - - @ this task doesn't own crunch regs -- use its save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r2 - mov r2, #CRUNCH_SIZE - b memcpy - -1: @ this task owns crunch regs -- load them directly - mov r0, r1 - mov r1, #0 @ nothing to save - mov r3, lr @ preserve return address - bl crunch_load - msr cpsr_c, ip @ restore interrupt mode - ret r3 diff --git a/arch/arm/mach-exynos/exynos-smc.S b/arch/arm/mach-exynos/exynos-smc.S deleted file mode 100644 index 6da31e6a7acbc74397020a2bf9498f8eb223a3f5..0000000000000000000000000000000000000000 --- a/arch/arm/mach-exynos/exynos-smc.S +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 Samsung Electronics. - * - * Copied from omap-smc.S Copyright (C) 2010 Texas Instruments, Inc. - */ - -#include - -/* - * Function signature: void exynos_smc(u32 cmd, u32 arg1, u32 arg2, u32 arg3) - */ - .arch armv7-a - .arch_extension sec -ENTRY(exynos_smc) - stmfd sp!, {r4-r11, lr} - dsb - smc #0 - ldmfd sp!, {r4-r11, pc} -ENDPROC(exynos_smc) diff --git a/arch/arm/mach-exynos/headsmp.S b/arch/arm/mach-exynos/headsmp.S deleted file mode 100644 index 0ac2cb9a735568613c3dc7cdd52b599945e0134c..0000000000000000000000000000000000000000 --- a/arch/arm/mach-exynos/headsmp.S +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Cloned from linux/arch/arm/mach-realview/headsmp.S - * - * Copyright (c) 2003 ARM Limited - * All Rights Reserved - */ -#include -#include - -#include - -/* - * exynos4 specific entry point for secondary CPUs. This provides - * a "holding pen" into which all secondary cores are held until we're - * ready for them to initialise. - */ -ENTRY(exynos4_secondary_startup) -ARM_BE8(setend be) - mrc p15, 0, r0, c0, c0, 5 - and r0, r0, #15 - adr r4, 1f - ldmia r4, {r5, r6} - sub r4, r4, r5 - add r6, r6, r4 -pen: ldr r7, [r6] - cmp r7, r0 - bne pen - - /* - * we've been released from the holding pen: secondary_stack - * should now contain the SVC stack for this core - */ - b secondary_startup -ENDPROC(exynos4_secondary_startup) - - .align 2 -1: .long . - .long exynos_pen_release diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S deleted file mode 100644 index ed93f91853b8cddc3c3e869adeaee1bcb1fe3650..0000000000000000000000000000000000000000 --- a/arch/arm/mach-exynos/sleep.S +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Exynos low-level resume code - */ - -#include -#include -#include -#include "smc.h" - -#define CPU_MASK 0xff0ffff0 -#define CPU_CORTEX_A9 0x410fc090 - - .text - .align - - /* - * sleep magic, to allow the bootloader to check for an valid - * image to resume to. Must be the first word before the - * exynos_cpu_resume entry. - */ - - .word 0x2bedf00d - - /* - * exynos_cpu_resume - * - * resume code entry for bootloader to call - */ - -ENTRY(exynos_cpu_resume) -#ifdef CONFIG_CACHE_L2X0 - mrc p15, 0, r0, c0, c0, 0 - ldr r1, =CPU_MASK - and r0, r0, r1 - ldr r1, =CPU_CORTEX_A9 - cmp r0, r1 - bleq l2c310_early_resume -#endif - b cpu_resume -ENDPROC(exynos_cpu_resume) - - .align - .arch armv7-a - .arch_extension sec -ENTRY(exynos_cpu_resume_ns) - mrc p15, 0, r0, c0, c0, 0 - ldr r1, =CPU_MASK - and r0, r0, r1 - ldr r1, =CPU_CORTEX_A9 - cmp r0, r1 - bne skip_cp15 - - adr r0, _cp15_save_power - ldr r1, [r0] - ldr r1, [r0, r1] - adr r0, _cp15_save_diag - ldr r2, [r0] - ldr r2, [r0, r2] - mov r0, #SMC_CMD_C15RESUME - dsb - smc #0 -#ifdef CONFIG_CACHE_L2X0 - adr r0, 1f - ldr r2, [r0] - add r0, r2, r0 - - /* Check that the address has been initialised. */ - ldr r1, [r0, #L2X0_R_PHY_BASE] - teq r1, #0 - beq skip_l2x0 - - /* Check if controller has been enabled. */ - ldr r2, [r1, #L2X0_CTRL] - tst r2, #0x1 - bne skip_l2x0 - - ldr r1, [r0, #L2X0_R_TAG_LATENCY] - ldr r2, [r0, #L2X0_R_DATA_LATENCY] - ldr r3, [r0, #L2X0_R_PREFETCH_CTRL] - mov r0, #SMC_CMD_L2X0SETUP1 - smc #0 - - /* Reload saved regs pointer because smc corrupts registers. */ - adr r0, 1f - ldr r2, [r0] - add r0, r2, r0 - - ldr r1, [r0, #L2X0_R_PWR_CTRL] - ldr r2, [r0, #L2X0_R_AUX_CTRL] - mov r0, #SMC_CMD_L2X0SETUP2 - smc #0 - - mov r0, #SMC_CMD_L2X0INVALL - smc #0 - - mov r1, #1 - mov r0, #SMC_CMD_L2X0CTRL - smc #0 -skip_l2x0: -#endif /* CONFIG_CACHE_L2X0 */ -skip_cp15: - b cpu_resume -ENDPROC(exynos_cpu_resume_ns) - - .align -_cp15_save_power: - .long cp15_save_power - . -_cp15_save_diag: - .long cp15_save_diag - . -#ifdef CONFIG_CACHE_L2X0 -1: .long l2x0_saved_regs - . -#endif /* CONFIG_CACHE_L2X0 */ - - .data - .align 2 - .globl cp15_save_diag -cp15_save_diag: - .long 0 @ cp15 diagnostic - .globl cp15_save_power -cp15_save_power: - .long 0 @ cp15 power control diff --git a/arch/arm/mach-footbridge/include/mach/entry-macro.S b/arch/arm/mach-footbridge/include/mach/entry-macro.S deleted file mode 100644 index dabbd5c54a788f5529d81b2942a974bc66d86018..0000000000000000000000000000000000000000 --- a/arch/arm/mach-footbridge/include/mach/entry-macro.S +++ /dev/null @@ -1,107 +0,0 @@ -/* - * arch/arm/mach-footbridge/include/mach/entry-macro.S - * - * Low-level IRQ helper macros for footbridge-based platforms - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ -#include -#include -#include - - .equ dc21285_high, ARMCSR_BASE & 0xff000000 - .equ dc21285_low, ARMCSR_BASE & 0x00ffffff - - .macro get_irqnr_preamble, base, tmp - mov \base, #dc21285_high - .if dc21285_low - orr \base, \base, #dc21285_low - .endif - .endm - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqstat, [\base, #0x180] @ get interrupts - - mov \irqnr, #IRQ_SDRAMPARITY - tst \irqstat, #IRQ_MASK_SDRAMPARITY - bne 1001f - - tst \irqstat, #IRQ_MASK_UART_RX - movne \irqnr, #IRQ_CONRX - bne 1001f - - tst \irqstat, #IRQ_MASK_DMA1 - movne \irqnr, #IRQ_DMA1 - bne 1001f - - tst \irqstat, #IRQ_MASK_DMA2 - movne \irqnr, #IRQ_DMA2 - bne 1001f - - tst \irqstat, #IRQ_MASK_IN0 - movne \irqnr, #IRQ_IN0 - bne 1001f - - tst \irqstat, #IRQ_MASK_IN1 - movne \irqnr, #IRQ_IN1 - bne 1001f - - tst \irqstat, #IRQ_MASK_IN2 - movne \irqnr, #IRQ_IN2 - bne 1001f - - tst \irqstat, #IRQ_MASK_IN3 - movne \irqnr, #IRQ_IN3 - bne 1001f - - tst \irqstat, #IRQ_MASK_PCI - movne \irqnr, #IRQ_PCI - bne 1001f - - tst \irqstat, #IRQ_MASK_DOORBELLHOST - movne \irqnr, #IRQ_DOORBELLHOST - bne 1001f - - tst \irqstat, #IRQ_MASK_I2OINPOST - movne \irqnr, #IRQ_I2OINPOST - bne 1001f - - tst \irqstat, #IRQ_MASK_TIMER1 - movne \irqnr, #IRQ_TIMER1 - bne 1001f - - tst \irqstat, #IRQ_MASK_TIMER2 - movne \irqnr, #IRQ_TIMER2 - bne 1001f - - tst \irqstat, #IRQ_MASK_TIMER3 - movne \irqnr, #IRQ_TIMER3 - bne 1001f - - tst \irqstat, #IRQ_MASK_UART_TX - movne \irqnr, #IRQ_CONTX - bne 1001f - - tst \irqstat, #IRQ_MASK_PCI_ABORT - movne \irqnr, #IRQ_PCI_ABORT - bne 1001f - - tst \irqstat, #IRQ_MASK_PCI_SERR - movne \irqnr, #IRQ_PCI_SERR - bne 1001f - - tst \irqstat, #IRQ_MASK_DISCARD_TIMER - movne \irqnr, #IRQ_DISCARD_TIMER - bne 1001f - - tst \irqstat, #IRQ_MASK_PCI_DPERR - movne \irqnr, #IRQ_PCI_DPERR - bne 1001f - - tst \irqstat, #IRQ_MASK_PCI_PERR - movne \irqnr, #IRQ_PCI_PERR -1001: - .endm - diff --git a/arch/arm/mach-highbank/smc.S b/arch/arm/mach-highbank/smc.S deleted file mode 100644 index 78b3f19e7f37fe83edb25a723037417c1d2d7bd8..0000000000000000000000000000000000000000 --- a/arch/arm/mach-highbank/smc.S +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copied from omap44xx-smc.S Copyright (C) 2010 Texas Instruments, Inc. - * Copyright 2012 Calxeda, Inc. - */ - -#include - -/* - * This is common routine to manage secure monitor API - * used to modify the PL310 secure registers. - * 'r0' contains the value to be modified and 'r12' contains - * the monitor API number. - * Function signature : void highbank_smc1(u32 fn, u32 arg) - */ - .arch armv7-a - .arch_extension sec -ENTRY(highbank_smc1) - stmfd sp!, {r4-r11, lr} - mov r12, r0 - mov r0, r1 - dsb - smc #0 - ldmfd sp!, {r4-r11, pc} -ENDPROC(highbank_smc1) diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S deleted file mode 100644 index 766dbdb2ae27b9eb51cca555c8170103d17dde17..0000000000000000000000000000000000000000 --- a/arch/arm/mach-imx/headsmp.S +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2011 Freescale Semiconductor, Inc. - * Copyright 2011 Linaro Ltd. - */ - -#include -#include -#include - -diag_reg_offset: - .word g_diag_reg - . - - .macro set_diag_reg - adr r0, diag_reg_offset - ldr r1, [r0] - add r1, r1, r0 @ r1 = physical &g_diag_reg - ldr r0, [r1] - mcr p15, 0, r0, c15, c0, 1 @ write diagnostic register - .endm - -ENTRY(v7_secondary_startup) -ARM_BE8(setend be) @ go BE8 if entered LE - set_diag_reg - b secondary_startup -ENDPROC(v7_secondary_startup) diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S deleted file mode 100644 index 5bd1ba7ef15b61cb98d1bd2d3af4e85acc5ec632..0000000000000000000000000000000000000000 --- a/arch/arm/mach-imx/resume-imx6.S +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2014 Freescale Semiconductor, Inc. - */ - -#include -#include -#include -#include -#include "hardware.h" - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/ssi-fiq.S b/arch/arm/mach-imx/ssi-fiq.S deleted file mode 100644 index 68d7fdea92ad6c3c37284b78778001de5522f290..0000000000000000000000000000000000000000 --- a/arch/arm/mach-imx/ssi-fiq.S +++ /dev/null @@ -1,144 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2009 Sascha Hauer - */ - -#include -#include - -/* - * r8 = bit 0-15: tx offset, bit 16-31: tx buffer size - * r9 = bit 0-15: rx offset, bit 16-31: rx buffer size - */ - -#define SSI_STX0 0x00 -#define SSI_SRX0 0x08 -#define SSI_SISR 0x14 -#define SSI_SIER 0x18 -#define SSI_SACNT 0x38 - -#define SSI_SACNT_AC97EN (1 << 0) - -#define SSI_SIER_TFE0_EN (1 << 0) -#define SSI_SISR_TFE0 (1 << 0) -#define SSI_SISR_RFF0 (1 << 2) -#define SSI_SIER_RFF0_EN (1 << 2) - - .text - .global imx_ssi_fiq_start - .global imx_ssi_fiq_end - .global imx_ssi_fiq_base - .global imx_ssi_fiq_rx_buffer - .global imx_ssi_fiq_tx_buffer - -/* - * imx_ssi_fiq_start is _intentionally_ not marked as a function symbol - * using ENDPROC(). imx_ssi_fiq_start and imx_ssi_fiq_end are used to - * mark the function body so that it can be copied to the FIQ vector in - * the vectors page. imx_ssi_fiq_start should only be called as the result - * of an FIQ: calling it directly will not work. - */ -imx_ssi_fiq_start: - ldr r12, .L_imx_ssi_fiq_base - - /* TX */ - ldr r13, .L_imx_ssi_fiq_tx_buffer - - /* shall we send? */ - ldr r11, [r12, #SSI_SIER] - tst r11, #SSI_SIER_TFE0_EN - beq 1f - - /* TX FIFO empty? */ - ldr r11, [r12, #SSI_SISR] - tst r11, #SSI_SISR_TFE0 - beq 1f - - mov r10, #0x10000 - sub r10, #1 - and r10, r10, r8 /* r10: current buffer offset */ - - add r13, r13, r10 - - ldrh r11, [r13] - strh r11, [r12, #SSI_STX0] - - ldrh r11, [r13, #2] - strh r11, [r12, #SSI_STX0] - - ldrh r11, [r13, #4] - strh r11, [r12, #SSI_STX0] - - ldrh r11, [r13, #6] - strh r11, [r12, #SSI_STX0] - - add r10, #8 - lsr r11, r8, #16 /* r11: buffer size */ - cmp r10, r11 - lslgt r8, r11, #16 - addle r8, #8 -1: - /* RX */ - - /* shall we receive? */ - ldr r11, [r12, #SSI_SIER] - tst r11, #SSI_SIER_RFF0_EN - beq 1f - - /* RX FIFO full? */ - ldr r11, [r12, #SSI_SISR] - tst r11, #SSI_SISR_RFF0 - beq 1f - - ldr r13, .L_imx_ssi_fiq_rx_buffer - - mov r10, #0x10000 - sub r10, #1 - and r10, r10, r9 /* r10: current buffer offset */ - - add r13, r13, r10 - - ldr r11, [r12, #SSI_SACNT] - tst r11, #SSI_SACNT_AC97EN - - ldr r11, [r12, #SSI_SRX0] - strh r11, [r13] - - ldr r11, [r12, #SSI_SRX0] - strh r11, [r13, #2] - - /* dummy read to skip slot 12 */ - ldrne r11, [r12, #SSI_SRX0] - - ldr r11, [r12, #SSI_SRX0] - strh r11, [r13, #4] - - ldr r11, [r12, #SSI_SRX0] - strh r11, [r13, #6] - - /* dummy read to skip slot 12 */ - ldrne r11, [r12, #SSI_SRX0] - - add r10, #8 - lsr r11, r9, #16 /* r11: buffer size */ - cmp r10, r11 - lslgt r9, r11, #16 - addle r9, #8 - -1: - @ return from FIQ - subs pc, lr, #4 - - .align -.L_imx_ssi_fiq_base: -imx_ssi_fiq_base: - .word 0x0 -.L_imx_ssi_fiq_rx_buffer: -imx_ssi_fiq_rx_buffer: - .word 0x0 -.L_imx_ssi_fiq_tx_buffer: -imx_ssi_fiq_tx_buffer: - .word 0x0 -.L_imx_ssi_fiq_end: -imx_ssi_fiq_end: - diff --git a/arch/arm/mach-imx/suspend-imx53.S b/arch/arm/mach-imx/suspend-imx53.S deleted file mode 100644 index 41b8aad653634074ea592953dc661b3a8ace10d4..0000000000000000000000000000000000000000 --- a/arch/arm/mach-imx/suspend-imx53.S +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. - */ -/* - */ - -#include - -#define M4IF_MCR0_OFFSET (0x008C) -#define M4IF_MCR0_FDVFS (0x1 << 11) -#define M4IF_MCR0_FDVACK (0x1 << 27) - - .align 3 - -/* - * ==================== low level suspend ==================== - * - * On entry - * r0: pm_info structure address; - * - * suspend ocram space layout: - * ======================== high address ====================== - * . - * . - * . - * ^ - * ^ - * ^ - * imx53_suspend code - * PM_INFO structure(imx53_suspend_info) - * ======================== low address ======================= - */ - -/* Offsets of members of struct imx53_suspend_info */ -#define SUSPEND_INFO_MX53_M4IF_V_OFFSET 0x0 -#define SUSPEND_INFO_MX53_IOMUXC_V_OFFSET 0x4 -#define SUSPEND_INFO_MX53_IO_COUNT_OFFSET 0x8 -#define SUSPEND_INFO_MX53_IO_STATE_OFFSET 0xc - -ENTRY(imx53_suspend) - stmfd sp!, {r4,r5,r6,r7} - - /* Save pad config */ - ldr r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET] - cmp r1, #0 - beq skip_pad_conf_1 - - add r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET - ldr r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET] - -1: - ldr r5, [r2], #12 /* IOMUXC register offset */ - ldr r6, [r3, r5] /* current value */ - str r6, [r2], #4 /* save area */ - subs r1, r1, #1 - bne 1b - -skip_pad_conf_1: - /* Set FDVFS bit of M4IF_MCR0 to request DDR to enter self-refresh */ - ldr r1, [r0, #SUSPEND_INFO_MX53_M4IF_V_OFFSET] - ldr r2,[r1, #M4IF_MCR0_OFFSET] - orr r2, r2, #M4IF_MCR0_FDVFS - str r2,[r1, #M4IF_MCR0_OFFSET] - - /* Poll FDVACK bit of M4IF_MCR to wait for DDR to enter self-refresh */ -wait_sr_ack: - ldr r2,[r1, #M4IF_MCR0_OFFSET] - ands r2, r2, #M4IF_MCR0_FDVACK - beq wait_sr_ack - - /* Set pad config */ - ldr r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET] - cmp r1, #0 - beq skip_pad_conf_2 - - add r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET - ldr r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET] - -2: - ldr r5, [r2], #4 /* IOMUXC register offset */ - ldr r6, [r2], #4 /* clear */ - ldr r7, [r3, r5] - bic r7, r7, r6 - ldr r6, [r2], #8 /* set */ - orr r7, r7, r6 - str r7, [r3, r5] - subs r1, r1, #1 - bne 2b - -skip_pad_conf_2: - /* Zzz, enter stop mode */ - wfi - nop - nop - nop - nop - - /* Restore pad config */ - ldr r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET] - cmp r1, #0 - beq skip_pad_conf_3 - - add r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET - ldr r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET] - -3: - ldr r5, [r2], #12 /* IOMUXC register offset */ - ldr r6, [r2], #4 /* saved value */ - str r6, [r3, r5] - subs r1, r1, #1 - bne 3b - -skip_pad_conf_3: - /* Clear FDVFS bit of M4IF_MCR0 to request DDR to exit self-refresh */ - ldr r1, [r0, #SUSPEND_INFO_MX53_M4IF_V_OFFSET] - ldr r2,[r1, #M4IF_MCR0_OFFSET] - bic r2, r2, #M4IF_MCR0_FDVFS - str r2,[r1, #M4IF_MCR0_OFFSET] - - /* Poll FDVACK bit of M4IF_MCR to wait for DDR to exit self-refresh */ -wait_ar_ack: - ldr r2,[r1, #M4IF_MCR0_OFFSET] - ands r2, r2, #M4IF_MCR0_FDVACK - bne wait_ar_ack - - /* Restore registers */ - ldmfd sp!, {r4,r5,r6,r7} - mov pc, lr - -ENDPROC(imx53_suspend) - -ENTRY(imx53_suspend_sz) - .word . - imx53_suspend diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S deleted file mode 100644 index e06f946b75b96a9455d34facbf019b16121edb82..0000000000000000000000000000000000000000 --- a/arch/arm/mach-imx/suspend-imx6.S +++ /dev/null @@ -1,330 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2014 Freescale Semiconductor, Inc. - */ - -#include -#include -#include -#include -#include "hardware.h" - -/* - * ==================== low level suspend ==================== - * - * Better to follow below rules to use ARM registers: - * r0: pm_info structure address; - * r1 ~ r4: for saving pm_info members; - * r5 ~ r10: free registers; - * r11: io base address. - * - * suspend ocram space layout: - * ======================== high address ====================== - * . - * . - * . - * ^ - * ^ - * ^ - * imx6_suspend code - * PM_INFO structure(imx6_cpu_pm_info) - * ======================== low address ======================= - */ - -/* - * Below offsets are based on struct imx6_cpu_pm_info - * which defined in arch/arm/mach-imx/pm-imx6q.c, this - * structure contains necessary pm info for low level - * suspend related code. - */ -#define PM_INFO_PBASE_OFFSET 0x0 -#define PM_INFO_RESUME_ADDR_OFFSET 0x4 -#define PM_INFO_DDR_TYPE_OFFSET 0x8 -#define PM_INFO_PM_INFO_SIZE_OFFSET 0xC -#define PM_INFO_MX6Q_MMDC_P_OFFSET 0x10 -#define PM_INFO_MX6Q_MMDC_V_OFFSET 0x14 -#define PM_INFO_MX6Q_SRC_P_OFFSET 0x18 -#define PM_INFO_MX6Q_SRC_V_OFFSET 0x1C -#define PM_INFO_MX6Q_IOMUXC_P_OFFSET 0x20 -#define PM_INFO_MX6Q_IOMUXC_V_OFFSET 0x24 -#define PM_INFO_MX6Q_CCM_P_OFFSET 0x28 -#define PM_INFO_MX6Q_CCM_V_OFFSET 0x2C -#define PM_INFO_MX6Q_GPC_P_OFFSET 0x30 -#define PM_INFO_MX6Q_GPC_V_OFFSET 0x34 -#define PM_INFO_MX6Q_L2_P_OFFSET 0x38 -#define PM_INFO_MX6Q_L2_V_OFFSET 0x3C -#define PM_INFO_MMDC_IO_NUM_OFFSET 0x40 -#define PM_INFO_MMDC_IO_VAL_OFFSET 0x44 - -#define MX6Q_SRC_GPR1 0x20 -#define MX6Q_SRC_GPR2 0x24 -#define MX6Q_MMDC_MAPSR 0x404 -#define MX6Q_MMDC_MPDGCTRL0 0x83c -#define MX6Q_GPC_IMR1 0x08 -#define MX6Q_GPC_IMR2 0x0c -#define MX6Q_GPC_IMR3 0x10 -#define MX6Q_GPC_IMR4 0x14 -#define MX6Q_CCM_CCR 0x0 - - .align 3 - .arm - - .macro sync_l2_cache - - /* sync L2 cache to drain L2's buffers to DRAM. */ -#ifdef CONFIG_CACHE_L2X0 - ldr r11, [r0, #PM_INFO_MX6Q_L2_V_OFFSET] - teq r11, #0 - beq 6f - mov r6, #0x0 - str r6, [r11, #L2X0_CACHE_SYNC] -1: - ldr r6, [r11, #L2X0_CACHE_SYNC] - ands r6, r6, #0x1 - bne 1b -6: -#endif - - .endm - - .macro resume_mmdc - - /* restore MMDC IO */ - cmp r5, #0x0 - ldreq r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET] - ldrne r11, [r0, #PM_INFO_MX6Q_IOMUXC_P_OFFSET] - - ldr r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET] - ldr r7, =PM_INFO_MMDC_IO_VAL_OFFSET - add r7, r7, r0 -1: - ldr r8, [r7], #0x4 - ldr r9, [r7], #0x4 - str r9, [r11, r8] - subs r6, r6, #0x1 - bne 1b - - cmp r5, #0x0 - ldreq r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET] - ldrne r11, [r0, #PM_INFO_MX6Q_MMDC_P_OFFSET] - - cmp r3, #IMX_DDR_TYPE_LPDDR2 - bne 4f - - /* reset read FIFO, RST_RD_FIFO */ - ldr r7, =MX6Q_MMDC_MPDGCTRL0 - ldr r6, [r11, r7] - orr r6, r6, #(1 << 31) - str r6, [r11, r7] -2: - ldr r6, [r11, r7] - ands r6, r6, #(1 << 31) - bne 2b - - /* reset FIFO a second time */ - ldr r6, [r11, r7] - orr r6, r6, #(1 << 31) - str r6, [r11, r7] -3: - ldr r6, [r11, r7] - ands r6, r6, #(1 << 31) - bne 3b -4: - /* let DDR out of self-refresh */ - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - bic r7, r7, #(1 << 21) - str r7, [r11, #MX6Q_MMDC_MAPSR] -5: - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - ands r7, r7, #(1 << 25) - bne 5b - - /* enable DDR auto power saving */ - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - bic r7, r7, #0x1 - str r7, [r11, #MX6Q_MMDC_MAPSR] - - .endm - -ENTRY(imx6_suspend) - ldr r1, [r0, #PM_INFO_PBASE_OFFSET] - ldr r2, [r0, #PM_INFO_RESUME_ADDR_OFFSET] - ldr r3, [r0, #PM_INFO_DDR_TYPE_OFFSET] - ldr r4, [r0, #PM_INFO_PM_INFO_SIZE_OFFSET] - - /* - * counting the resume address in iram - * to set it in SRC register. - */ - ldr r6, =imx6_suspend - ldr r7, =resume - sub r7, r7, r6 - add r8, r1, r4 - add r9, r8, r7 - - /* - * make sure TLB contain the addr we want, - * as we will access them after MMDC IO floated. - */ - - ldr r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET] - ldr r6, [r11, #0x0] - ldr r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET] - ldr r6, [r11, #0x0] - ldr r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET] - ldr r6, [r11, #0x0] - - /* use r11 to store the IO address */ - ldr r11, [r0, #PM_INFO_MX6Q_SRC_V_OFFSET] - /* store physical resume addr and pm_info address. */ - str r9, [r11, #MX6Q_SRC_GPR1] - str r1, [r11, #MX6Q_SRC_GPR2] - - /* need to sync L2 cache before DSM. */ - sync_l2_cache - - ldr r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET] - /* - * put DDR explicitly into self-refresh and - * disable automatic power savings. - */ - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - orr r7, r7, #0x1 - str r7, [r11, #MX6Q_MMDC_MAPSR] - - /* make the DDR explicitly enter self-refresh. */ - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - orr r7, r7, #(1 << 21) - str r7, [r11, #MX6Q_MMDC_MAPSR] - -poll_dvfs_set: - ldr r7, [r11, #MX6Q_MMDC_MAPSR] - ands r7, r7, #(1 << 25) - beq poll_dvfs_set - - ldr r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET] - ldr r6, =0x0 - ldr r7, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET] - ldr r8, =PM_INFO_MMDC_IO_VAL_OFFSET - add r8, r8, r0 - /* LPDDR2's last 3 IOs need special setting */ - cmp r3, #IMX_DDR_TYPE_LPDDR2 - subeq r7, r7, #0x3 -set_mmdc_io_lpm: - ldr r9, [r8], #0x8 - str r6, [r11, r9] - subs r7, r7, #0x1 - bne set_mmdc_io_lpm - - cmp r3, #IMX_DDR_TYPE_LPDDR2 - bne set_mmdc_io_lpm_done - ldr r6, =0x1000 - ldr r9, [r8], #0x8 - str r6, [r11, r9] - ldr r9, [r8], #0x8 - str r6, [r11, r9] - ldr r6, =0x80000 - ldr r9, [r8] - str r6, [r11, r9] -set_mmdc_io_lpm_done: - - /* - * mask all GPC interrupts before - * enabling the RBC counters to - * avoid the counter starting too - * early if an interupt is already - * pending. - */ - ldr r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET] - ldr r6, [r11, #MX6Q_GPC_IMR1] - ldr r7, [r11, #MX6Q_GPC_IMR2] - ldr r8, [r11, #MX6Q_GPC_IMR3] - ldr r9, [r11, #MX6Q_GPC_IMR4] - - ldr r10, =0xffffffff - str r10, [r11, #MX6Q_GPC_IMR1] - str r10, [r11, #MX6Q_GPC_IMR2] - str r10, [r11, #MX6Q_GPC_IMR3] - str r10, [r11, #MX6Q_GPC_IMR4] - - /* - * enable the RBC bypass counter here - * to hold off the interrupts. RBC counter - * = 32 (1ms), Minimum RBC delay should be - * 400us for the analog LDOs to power down. - */ - ldr r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET] - ldr r10, [r11, #MX6Q_CCM_CCR] - bic r10, r10, #(0x3f << 21) - orr r10, r10, #(0x20 << 21) - str r10, [r11, #MX6Q_CCM_CCR] - - /* enable the counter. */ - ldr r10, [r11, #MX6Q_CCM_CCR] - orr r10, r10, #(0x1 << 27) - str r10, [r11, #MX6Q_CCM_CCR] - - /* unmask all the GPC interrupts. */ - ldr r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET] - str r6, [r11, #MX6Q_GPC_IMR1] - str r7, [r11, #MX6Q_GPC_IMR2] - str r8, [r11, #MX6Q_GPC_IMR3] - str r9, [r11, #MX6Q_GPC_IMR4] - - /* - * now delay for a short while (3usec) - * ARM is at 1GHz at this point - * so a short loop should be enough. - * this delay is required to ensure that - * the RBC counter can start counting in - * case an interrupt is already pending - * or in case an interrupt arrives just - * as ARM is about to assert DSM_request. - */ - ldr r6, =2000 -rbc_loop: - subs r6, r6, #0x1 - bne rbc_loop - - /* Zzz, enter stop mode */ - wfi - nop - nop - nop - nop - - /* - * run to here means there is pending - * wakeup source, system should auto - * resume, we need to restore MMDC IO first - */ - mov r5, #0x0 - resume_mmdc - - /* return to suspend finish */ - ret lr - -resume: - /* invalidate L1 I-cache first */ - mov r6, #0x0 - mcr p15, 0, r6, c7, c5, 0 - mcr p15, 0, r6, c7, c5, 6 - /* enable the Icache and branch prediction */ - mov r6, #0x1800 - mcr p15, 0, r6, c1, c0, 0 - isb - - /* get physical resume address from pm_info. */ - ldr lr, [r0, #PM_INFO_RESUME_ADDR_OFFSET] - /* clear core0's entry and parameter */ - ldr r11, [r0, #PM_INFO_MX6Q_SRC_P_OFFSET] - mov r7, #0x0 - str r7, [r11, #MX6Q_SRC_GPR1] - str r7, [r11, #MX6Q_SRC_GPR2] - - ldr r3, [r0, #PM_INFO_DDR_TYPE_OFFSET] - mov r5, #0x1 - resume_mmdc - - ret lr -ENDPROC(imx6_suspend) diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S deleted file mode 100644 index 8e6766d4621eb7c6bf53afbd575f2eb5ec6f056f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S +++ /dev/null @@ -1,31 +0,0 @@ -/* - * arch/arm/mach-iop32x/include/mach/entry-macro.S - * - * Low-level IRQ helper macros for IOP32x-based platforms - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - .macro get_irqnr_preamble, base, tmp - mrc p15, 0, \tmp, c15, c1, 0 - orr \tmp, \tmp, #(1 << 6) - mcr p15, 0, \tmp, c15, c1, 0 @ Enable cp6 access - mrc p15, 0, \tmp, c15, c1, 0 - mov \tmp, \tmp - sub pc, pc, #4 @ cp_wait - .endm - - .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC - cmp \irqstat, #0 - clzne \irqnr, \irqstat - rsbne \irqnr, \irqnr, #31 - .endm - - .macro arch_ret_to_user, tmp1, tmp2 - mrc p15, 0, \tmp1, c15, c1, 0 - ands \tmp2, \tmp1, #(1 << 6) - bicne \tmp1, \tmp1, #(1 << 6) - mcrne p15, 0, \tmp1, c15, c1, 0 @ Disable cp6 access - .endm diff --git a/arch/arm/mach-keystone/smc.S b/arch/arm/mach-keystone/smc.S deleted file mode 100644 index 21ef75cf537091fa9262d928e22bfdcda4228666..0000000000000000000000000000000000000000 --- a/arch/arm/mach-keystone/smc.S +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Keystone Secure APIs - * - * Copyright (C) 2013 Texas Instruments, Inc. - * Santosh Shilimkar - */ - -#include - -/** - * u32 keystone_cpu_smc(u32 command, u32 cpu, u32 addr) - * - * Low level CPU monitor API - * @command: Monitor command. - * @cpu: CPU Number - * @addr: Kernel jump address for boot CPU - * - * Return: Non zero value on failure - */ - .arch_extension sec -ENTRY(keystone_cpu_smc) - stmfd sp!, {r4-r11, lr} - smc #0 - ldmfd sp!, {r4-r11, pc} -ENDPROC(keystone_cpu_smc) diff --git a/arch/arm/mach-lpc32xx/suspend.S b/arch/arm/mach-lpc32xx/suspend.S deleted file mode 100644 index 3f0a8282ef6fd2edfb9dd308689ef94350fccd39..0000000000000000000000000000000000000000 --- a/arch/arm/mach-lpc32xx/suspend.S +++ /dev/null @@ -1,150 +0,0 @@ -/* - * arch/arm/mach-lpc32xx/suspend.S - * - * Original authors: Dmitry Chigirev, Vitaly Wool - * Modified by Kevin Wells - * - * 2005 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#include -#include -#include "lpc32xx.h" - -/* Using named register defines makes the code easier to follow */ -#define WORK1_REG r0 -#define WORK2_REG r1 -#define SAVED_HCLK_DIV_REG r2 -#define SAVED_HCLK_PLL_REG r3 -#define SAVED_DRAM_CLKCTRL_REG r4 -#define SAVED_PWR_CTRL_REG r5 -#define CLKPWRBASE_REG r6 -#define EMCBASE_REG r7 - -#define LPC32XX_EMC_STATUS_OFFS 0x04 -#define LPC32XX_EMC_STATUS_BUSY 0x1 -#define LPC32XX_EMC_STATUS_SELF_RFSH 0x4 - -#define LPC32XX_CLKPWR_PWR_CTRL_OFFS 0x44 -#define LPC32XX_CLKPWR_HCLK_DIV_OFFS 0x40 -#define LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS 0x58 - -#define CLKPWR_PCLK_DIV_MASK 0xFFFFFE7F - - .text - -ENTRY(lpc32xx_sys_suspend) - @ Save a copy of the used registers in IRAM, r0 is corrupted - adr r0, tmp_stack_end - stmfd r0!, {r3 - r7, sp, lr} - - @ Load a few common register addresses - adr WORK1_REG, reg_bases - ldr CLKPWRBASE_REG, [WORK1_REG, #0] - ldr EMCBASE_REG, [WORK1_REG, #4] - - ldr SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - orr WORK1_REG, SAVED_PWR_CTRL_REG, #LPC32XX_CLKPWR_SDRAM_SELF_RFSH - - @ Wait for SDRAM busy status to go busy and then idle - @ This guarantees a small windows where DRAM isn't busy -1: - ldr WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS] - and WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_BUSY - cmp WORK2_REG, #LPC32XX_EMC_STATUS_BUSY - bne 1b @ Branch while idle -2: - ldr WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS] - and WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_BUSY - cmp WORK2_REG, #LPC32XX_EMC_STATUS_BUSY - beq 2b @ Branch until idle - - @ Setup self-refresh with support for manual exit of - @ self-refresh mode - str WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - orr WORK2_REG, WORK1_REG, #LPC32XX_CLKPWR_UPD_SDRAM_SELF_RFSH - str WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - str WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - - @ Wait for self-refresh acknowledge, clocks to the DRAM device - @ will automatically stop on start of self-refresh -3: - ldr WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS] - and WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH - cmp WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH - bne 3b @ Branch until self-refresh mode starts - - @ Enter direct-run mode from run mode - bic WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_SELECT_RUN_MODE - str WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - - @ Safe disable of DRAM clock in EMC block, prevents DDR sync - @ issues on restart - ldr SAVED_HCLK_DIV_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_HCLK_DIV_OFFS] - and WORK2_REG, SAVED_HCLK_DIV_REG, #CLKPWR_PCLK_DIV_MASK - str WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLK_DIV_OFFS] - - @ Save HCLK PLL state and disable HCLK PLL - ldr SAVED_HCLK_PLL_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS] - bic WORK2_REG, SAVED_HCLK_PLL_REG, #LPC32XX_CLKPWR_HCLKPLL_POWER_UP - str WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS] - - @ Enter stop mode until an enabled event occurs - orr WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_STOP_MODE_CTRL - str WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - .rept 9 - nop - .endr - - @ Clear stop status - bic WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_STOP_MODE_CTRL - - @ Restore original HCLK PLL value and wait for PLL lock - str SAVED_HCLK_PLL_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS] -4: - ldr WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS] - and WORK2_REG, WORK2_REG, #LPC32XX_CLKPWR_HCLKPLL_PLL_STS - bne 4b - - @ Re-enter run mode with self-refresh flag cleared, but no DRAM - @ update yet. DRAM is still in self-refresh - str SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - - @ Restore original DRAM clock mode to restore DRAM clocks - str SAVED_HCLK_DIV_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_HCLK_DIV_OFFS] - - @ Clear self-refresh mode - orr WORK1_REG, SAVED_PWR_CTRL_REG,\ - #LPC32XX_CLKPWR_UPD_SDRAM_SELF_RFSH - str WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - str SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\ - #LPC32XX_CLKPWR_PWR_CTRL_OFFS] - - @ Wait for EMC to clear self-refresh mode -5: - ldr WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS] - and WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH - bne 5b @ Branch until self-refresh has exited - - @ restore regs and return - adr r0, tmp_stack - ldmfd r0!, {r3 - r7, sp, pc} - -reg_bases: - .long IO_ADDRESS(LPC32XX_CLK_PM_BASE) - .long IO_ADDRESS(LPC32XX_EMC_BASE) - -tmp_stack: - .long 0, 0, 0, 0, 0, 0, 0 -tmp_stack_end: - -ENTRY(lpc32xx_sys_suspend_sz) - .word . - lpc32xx_sys_suspend diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S deleted file mode 100644 index 2d962fe488210d309f050e7387bc7f1812210d3f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Coherency fabric: low level functions - * - * Copyright (C) 2012 Marvell - * - * Gregory CLEMENT - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - * This file implements the assembly function to add a CPU to the - * coherency fabric. This function is called by each of the secondary - * CPUs during their early boot in an SMP kernel, this why this - * function have to callable from assembly. It can also be called by a - * primary CPU from C code during its boot. - */ - -#include -#define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 -#define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 - -#include -#include - - .text -/* - * Returns the coherency base address in r1 (r0 is untouched), or 0 if - * the coherency fabric is not enabled. - */ -ENTRY(ll_get_coherency_base) - mrc p15, 0, r1, c1, c0, 0 - tst r1, #CR_M @ Check MMU bit enabled - bne 1f - - /* - * MMU is disabled, use the physical address of the coherency - * base address. However, if the coherency fabric isn't mapped - * (i.e its virtual address is zero), it means coherency is - * not enabled, so we return 0. - */ - ldr r1, =coherency_base - cmp r1, #0 - beq 2f - adr r1, 3f - ldr r3, [r1] - ldr r1, [r1, r3] - b 2f -1: - /* - * MMU is enabled, use the virtual address of the coherency - * base address. - */ - ldr r1, =coherency_base - ldr r1, [r1] -2: - ret lr -ENDPROC(ll_get_coherency_base) - -/* - * Returns the coherency CPU mask in r3 (r0 is untouched). This - * coherency CPU mask can be used with the coherency fabric - * configuration and control registers. Note that the mask is already - * endian-swapped as appropriate so that the calling functions do not - * have to care about endianness issues while accessing the coherency - * fabric registers - */ -ENTRY(ll_get_coherency_cpumask) - mrc p15, 0, r3, cr0, cr0, 5 - and r3, r3, #15 - mov r2, #(1 << 24) - lsl r3, r2, r3 -ARM_BE8(rev r3, r3) - ret lr -ENDPROC(ll_get_coherency_cpumask) - -/* - * ll_add_cpu_to_smp_group(), ll_enable_coherency() and - * ll_disable_coherency() use the strex/ldrex instructions while the - * MMU can be disabled. The Armada XP SoC has an exclusive monitor - * that tracks transactions to Device and/or SO memory and thanks to - * that, exclusive transactions are functional even when the MMU is - * disabled. - */ - -ENTRY(ll_add_cpu_to_smp_group) - /* - * As r0 is not modified by ll_get_coherency_base() and - * ll_get_coherency_cpumask(), we use it to temporarly save lr - * and avoid it being modified by the branch and link - * calls. This function is used very early in the secondary - * CPU boot, and no stack is available at this point. - */ - mov r0, lr - bl ll_get_coherency_base - /* Bail out if the coherency is not enabled */ - cmp r1, #0 - reteq r0 - bl ll_get_coherency_cpumask - mov lr, r0 - add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET -1: - ldrex r2, [r0] - orr r2, r2, r3 - strex r1, r2, [r0] - cmp r1, #0 - bne 1b - ret lr -ENDPROC(ll_add_cpu_to_smp_group) - -ENTRY(ll_enable_coherency) - /* - * As r0 is not modified by ll_get_coherency_base() and - * ll_get_coherency_cpumask(), we use it to temporarly save lr - * and avoid it being modified by the branch and link - * calls. This function is used very early in the secondary - * CPU boot, and no stack is available at this point. - */ - mov r0, lr - bl ll_get_coherency_base - /* Bail out if the coherency is not enabled */ - cmp r1, #0 - reteq r0 - bl ll_get_coherency_cpumask - mov lr, r0 - add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET -1: - ldrex r2, [r0] - orr r2, r2, r3 - strex r1, r2, [r0] - cmp r1, #0 - bne 1b - dsb - mov r0, #0 - ret lr -ENDPROC(ll_enable_coherency) - -ENTRY(ll_disable_coherency) - /* - * As r0 is not modified by ll_get_coherency_base() and - * ll_get_coherency_cpumask(), we use it to temporarly save lr - * and avoid it being modified by the branch and link - * calls. This function is used very early in the secondary - * CPU boot, and no stack is available at this point. - */ - mov r0, lr - bl ll_get_coherency_base - /* Bail out if the coherency is not enabled */ - cmp r1, #0 - reteq r0 - bl ll_get_coherency_cpumask - mov lr, r0 - add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET -1: - ldrex r2, [r0] - bic r2, r2, r3 - strex r1, r2, [r0] - cmp r1, #0 - bne 1b - dsb - ret lr -ENDPROC(ll_disable_coherency) - - .align 2 -3: - .long coherency_phys_base - . diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S deleted file mode 100644 index b093a196e80176d44cc083ee89d51b6fee1318c4..0000000000000000000000000000000000000000 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ /dev/null @@ -1,23 +0,0 @@ -/* - * SMP support: Entry point for secondary CPUs of Marvell EBU - * Cortex-A9 based SOCs (Armada 375 and Armada 38x). - * - * Copyright (C) 2014 Marvell - * - * Gregory CLEMENT - * Thomas Petazzoni - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include - -#include - -ENTRY(mvebu_cortex_a9_secondary_startup) -ARM_BE8(setend be) - bl armada_38x_scu_power_up - b secondary_startup -ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S deleted file mode 100644 index 2c4032e368badaa94d5ff60b7a6361b42d7f04dd..0000000000000000000000000000000000000000 --- a/arch/arm/mach-mvebu/headsmp.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SMP support: Entry point for secondary CPUs - * - * Copyright (C) 2012 Marvell - * - * Yehuda Yitschak - * Gregory CLEMENT - * Thomas Petazzoni - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - * This file implements the assembly entry point for secondary CPUs in - * an SMP kernel. The only thing we need to do is to add the CPU to - * the coherency fabric by writing to 2 registers. Currently the base - * register addresses are hard coded due to the early initialisation - * problems. - */ - -#include -#include - -#include - -/* - * Armada XP specific entry point for secondary CPUs. - * We add the CPU to the coherency fabric and then jump to secondary - * startup - */ -ENTRY(armada_xp_secondary_startup) - ARM_BE8(setend be ) @ go BE8 if entered LE - - bl ll_add_cpu_to_smp_group - - bl ll_enable_coherency - - b secondary_startup - -ENDPROC(armada_xp_secondary_startup) diff --git a/arch/arm/mach-mvebu/pmsu_ll.S b/arch/arm/mach-mvebu/pmsu_ll.S deleted file mode 100644 index 7aae9a25cfeb7cab3285d9c02a5e584d33eabc95..0000000000000000000000000000000000000000 --- a/arch/arm/mach-mvebu/pmsu_ll.S +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2014 Marvell - * - * Thomas Petazzoni - * Gregory Clement - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include - - -ENTRY(armada_38x_scu_power_up) - mrc p15, 4, r1, c15, c0 @ get SCU base address - orr r1, r1, #0x8 @ SCU CPU Power Status Register - mrc p15, 0, r0, cr0, cr0, 5 @ get the CPU ID - and r0, r0, #15 - add r1, r1, r0 - mov r0, #0x0 - strb r0, [r1] @ switch SCU power state to Normal mode - ret lr -ENDPROC(armada_38x_scu_power_up) - -/* - * This is the entry point through which CPUs exiting cpuidle deep - * idle state are going. - */ -ENTRY(armada_370_xp_cpu_resume) -ARM_BE8(setend be ) @ go BE8 if entered LE - /* - * Disable the MMU that might have been enabled in BootROM if - * this code is used in the resume path of a suspend/resume - * cycle. - */ - mrc p15, 0, r1, c1, c0, 0 - bic r1, #1 - mcr p15, 0, r1, c1, c0, 0 - bl ll_add_cpu_to_smp_group - bl ll_enable_coherency - b cpu_resume -ENDPROC(armada_370_xp_cpu_resume) - -ENTRY(armada_38x_cpu_resume) - /* do we need it for Armada 38x*/ -ARM_BE8(setend be ) @ go BE8 if entered LE - bl v7_invalidate_l1 - bl armada_38x_scu_power_up - b cpu_resume -ENDPROC(armada_38x_cpu_resume) - -.global mvebu_boot_wa_start -.global mvebu_boot_wa_end - -/* The following code will be executed from SRAM */ -ENTRY(mvebu_boot_wa_start) -ARM_BE8(setend be) - adr r0, 1f - ldr r0, [r0] @ load the address of the - @ resume register - ldr r0, [r0] @ load the value in the - @ resume register -ARM_BE8(rev r0, r0) @ the value is stored LE - mov pc, r0 @ jump to this value -/* - * the last word of this piece of code will be filled by the physical - * address of the boot address register just after being copied in SRAM - */ -1: - .long . -mvebu_boot_wa_end: -ENDPROC(mvebu_boot_wa_end) diff --git a/arch/arm/mach-npcm/headsmp.S b/arch/arm/mach-npcm/headsmp.S deleted file mode 100644 index c083fe09a07b123cd347a42db6138faa24fd3853..0000000000000000000000000000000000000000 --- a/arch/arm/mach-npcm/headsmp.S +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Nuvoton Technology corporation. -// Copyright 2018 Google, Inc. - -#include -#include -#include - -/* - * The boot ROM does not start secondary CPUs in SVC mode, so we need to do that - * here. - */ -ENTRY(npcm7xx_secondary_startup) - safe_svcmode_maskall r0 - - b secondary_startup -ENDPROC(npcm7xx_secondary_startup) diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S deleted file mode 100644 index f745a65d3bd7a3239eaabc25a6771c5d91323cf5..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ /dev/null @@ -1,274 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mach-omap1/ams-delta-fiq-handler.S - * - * Based on linux/arch/arm/lib/floppydma.S - * Renamed and modified to work with 2.6 kernel by Matt Callow - * Copyright (C) 1995, 1996 Russell King - * Copyright (C) 2004 Pete Trapps - * Copyright (C) 2006 Matt Callow - * Copyright (C) 2010 Janusz Krzysztofik - */ - -#include -#include -#include - -#include -#include - -#include "ams-delta-fiq.h" -#include "board-ams-delta.h" -#include "iomap.h" -#include "soc.h" - -/* - * OMAP1510 GPIO related symbol copied from arch/arm/mach-omap1/gpio15xx.c. - * Unfortunately, it was not placed in a separate header file. - */ -#define OMAP1510_GPIO_BASE 0xFFFCE000 - -/* GPIO register bitmasks */ -#define KEYBRD_DATA_MASK (0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_DATA) -#define KEYBRD_CLK_MASK (0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_CLK) -#define MODEM_IRQ_MASK (0x1 << AMS_DELTA_GPIO_PIN_MODEM_IRQ) -#define HOOK_SWITCH_MASK (0x1 << AMS_DELTA_GPIO_PIN_HOOK_SWITCH) -#define OTHERS_MASK (MODEM_IRQ_MASK | HOOK_SWITCH_MASK) - -/* IRQ handler register bitmasks */ -#define DEFERRED_FIQ_MASK OMAP_IRQ_BIT(INT_DEFERRED_FIQ) -#define GPIO_BANK1_MASK OMAP_IRQ_BIT(INT_GPIO_BANK1) - -/* Driver buffer byte offsets */ -#define BUF_MASK (FIQ_MASK * 4) -#define BUF_STATE (FIQ_STATE * 4) -#define BUF_KEYS_CNT (FIQ_KEYS_CNT * 4) -#define BUF_TAIL_OFFSET (FIQ_TAIL_OFFSET * 4) -#define BUF_HEAD_OFFSET (FIQ_HEAD_OFFSET * 4) -#define BUF_BUF_LEN (FIQ_BUF_LEN * 4) -#define BUF_KEY (FIQ_KEY * 4) -#define BUF_MISSED_KEYS (FIQ_MISSED_KEYS * 4) -#define BUF_BUFFER_START (FIQ_BUFFER_START * 4) -#define BUF_GPIO_INT_MASK (FIQ_GPIO_INT_MASK * 4) -#define BUF_KEYS_HICNT (FIQ_KEYS_HICNT * 4) -#define BUF_IRQ_PEND (FIQ_IRQ_PEND * 4) -#define BUF_SIR_CODE_L1 (FIQ_SIR_CODE_L1 * 4) -#define BUF_SIR_CODE_L2 (IRQ_SIR_CODE_L2 * 4) -#define BUF_CNT_INT_00 (FIQ_CNT_INT_00 * 4) -#define BUF_CNT_INT_KEY (FIQ_CNT_INT_KEY * 4) -#define BUF_CNT_INT_MDM (FIQ_CNT_INT_MDM * 4) -#define BUF_CNT_INT_03 (FIQ_CNT_INT_03 * 4) -#define BUF_CNT_INT_HSW (FIQ_CNT_INT_HSW * 4) -#define BUF_CNT_INT_05 (FIQ_CNT_INT_05 * 4) -#define BUF_CNT_INT_06 (FIQ_CNT_INT_06 * 4) -#define BUF_CNT_INT_07 (FIQ_CNT_INT_07 * 4) -#define BUF_CNT_INT_08 (FIQ_CNT_INT_08 * 4) -#define BUF_CNT_INT_09 (FIQ_CNT_INT_09 * 4) -#define BUF_CNT_INT_10 (FIQ_CNT_INT_10 * 4) -#define BUF_CNT_INT_11 (FIQ_CNT_INT_11 * 4) -#define BUF_CNT_INT_12 (FIQ_CNT_INT_12 * 4) -#define BUF_CNT_INT_13 (FIQ_CNT_INT_13 * 4) -#define BUF_CNT_INT_14 (FIQ_CNT_INT_14 * 4) -#define BUF_CNT_INT_15 (FIQ_CNT_INT_15 * 4) -#define BUF_CIRC_BUFF (FIQ_CIRC_BUFF * 4) - - -/* - * Register usage - * r8 - temporary - * r9 - the driver buffer - * r10 - temporary - * r11 - interrupts mask - * r12 - base pointers - * r13 - interrupts status - */ - - .text - - .global qwerty_fiqin_end - -ENTRY(qwerty_fiqin_start) - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ FIQ intrrupt handler - ldr r12, omap_ih1_base @ set pointer to level1 handler - - ldr r11, [r12, #IRQ_MIR_REG_OFFSET] @ fetch interrupts mask - - ldr r13, [r12, #IRQ_ITR_REG_OFFSET] @ fetch interrupts status - bics r13, r13, r11 @ clear masked - any left? - beq exit @ none - spurious FIQ? exit - - ldr r10, [r12, #IRQ_SIR_FIQ_REG_OFFSET] @ get requested interrupt number - - mov r8, #2 @ reset FIQ agreement - str r8, [r12, #IRQ_CONTROL_REG_OFFSET] - - cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY) @ is it GPIO interrupt? - beq gpio @ yes - process it - - mov r8, #1 - orr r8, r11, r8, lsl r10 @ mask spurious interrupt - str r8, [r12, #IRQ_MIR_REG_OFFSET] -exit: - subs pc, lr, #4 @ return from FIQ - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - - - @@@@@@@@@@@@@@@@@@@@@@@@@@@ -gpio: @ GPIO bank interrupt handler - ldr r12, omap1510_gpio_base @ set base pointer to GPIO bank - - ldr r11, [r12, #OMAP1510_GPIO_INT_MASK] @ fetch GPIO interrupts mask -restart: - ldr r13, [r12, #OMAP1510_GPIO_INT_STATUS] @ fetch status bits - bics r13, r13, r11 @ clear masked - any left? - beq exit @ no - spurious interrupt? exit - - orr r11, r11, r13 @ mask all requested interrupts - str r11, [r12, #OMAP1510_GPIO_INT_MASK] - - str r13, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack all requested interrupts - - ands r10, r13, #KEYBRD_CLK_MASK @ extract keyboard status - set? - beq hksw @ no - try next source - - - @@@@@@@@@@@@@@@@@@@@@@ - @ Keyboard clock FIQ mode interrupt handler - @ r10 now contains KEYBRD_CLK_MASK, use it - bic r11, r11, r10 @ unmask it - str r11, [r12, #OMAP1510_GPIO_INT_MASK] - - @ Process keyboard data - ldr r8, [r12, #OMAP1510_GPIO_DATA_INPUT] @ fetch GPIO input - - ldr r10, [r9, #BUF_STATE] @ fetch kbd interface state - cmp r10, #0 @ are we expecting start bit? - bne data @ no - go to data processing - - ands r8, r8, #KEYBRD_DATA_MASK @ check start bit - detected? - beq hksw @ no - try next source - - @ r8 contains KEYBRD_DATA_MASK, use it - str r8, [r9, #BUF_STATE] @ enter data processing state - @ r10 already contains 0, reuse it - str r10, [r9, #BUF_KEY] @ clear keycode - mov r10, #2 @ reset input bit mask - str r10, [r9, #BUF_MASK] - - @ Mask other GPIO line interrupts till key done - str r11, [r9, #BUF_GPIO_INT_MASK] @ save mask for later restore - mvn r11, #KEYBRD_CLK_MASK @ prepare all except kbd mask - str r11, [r12, #OMAP1510_GPIO_INT_MASK] @ store into the mask register - - b restart @ restart - -data: ldr r10, [r9, #BUF_MASK] @ fetch current input bit mask - - @ r8 still contains GPIO input bits - ands r8, r8, #KEYBRD_DATA_MASK @ is keyboard data line low? - ldreq r8, [r9, #BUF_KEY] @ yes - fetch collected so far, - orreq r8, r8, r10 @ set 1 at current mask position - streq r8, [r9, #BUF_KEY] @ and save back - - mov r10, r10, lsl #1 @ shift mask left - bics r10, r10, #0x800 @ have we got all the bits? - strne r10, [r9, #BUF_MASK] @ not yet - store the mask - bne restart @ and restart - - @ r10 already contains 0, reuse it - str r10, [r9, #BUF_STATE] @ reset state to start - - @ Key done - restore interrupt mask - ldr r10, [r9, #BUF_GPIO_INT_MASK] @ fetch saved mask - and r11, r11, r10 @ unmask all saved as unmasked - str r11, [r12, #OMAP1510_GPIO_INT_MASK] @ restore into the mask register - - @ Try appending the keycode to the circular buffer - ldr r10, [r9, #BUF_KEYS_CNT] @ get saved keystrokes count - ldr r8, [r9, #BUF_BUF_LEN] @ get buffer size - cmp r10, r8 @ is buffer full? - beq hksw @ yes - key lost, next source - - add r10, r10, #1 @ incremet keystrokes counter - str r10, [r9, #BUF_KEYS_CNT] - - ldr r10, [r9, #BUF_TAIL_OFFSET] @ get buffer tail offset - @ r8 already contains buffer size - cmp r10, r8 @ end of buffer? - moveq r10, #0 @ yes - rewind to buffer start - - ldr r12, [r9, #BUF_BUFFER_START] @ get buffer start address - add r12, r12, r10, LSL #2 @ calculate buffer tail address - ldr r8, [r9, #BUF_KEY] @ get last keycode - str r8, [r12] @ append it to the buffer tail - - add r10, r10, #1 @ increment buffer tail offset - str r10, [r9, #BUF_TAIL_OFFSET] - - ldr r10, [r9, #BUF_CNT_INT_KEY] @ increment interrupts counter - add r10, r10, #1 - str r10, [r9, #BUF_CNT_INT_KEY] - @@@@@@@@@@@@@@@@@@@@@@@@ - - -hksw: @Is hook switch interrupt requested? - tst r13, #HOOK_SWITCH_MASK @ is hook switch status bit set? - beq mdm @ no - try next source - - - @@@@@@@@@@@@@@@@@@@@@@@@ - @ Hook switch interrupt FIQ mode simple handler - - @ Don't toggle active edge, the switch always bounces - - @ Increment hook switch interrupt counter - ldr r10, [r9, #BUF_CNT_INT_HSW] - add r10, r10, #1 - str r10, [r9, #BUF_CNT_INT_HSW] - @@@@@@@@@@@@@@@@@@@@@@@@ - - -mdm: @Is it a modem interrupt? - tst r13, #MODEM_IRQ_MASK @ is modem status bit set? - beq irq @ no - check for next interrupt - - - @@@@@@@@@@@@@@@@@@@@@@@@ - @ Modem FIQ mode interrupt handler stub - - @ Increment modem interrupt counter - ldr r10, [r9, #BUF_CNT_INT_MDM] - add r10, r10, #1 - str r10, [r9, #BUF_CNT_INT_MDM] - @@@@@@@@@@@@@@@@@@@@@@@@ - - -irq: @ Place deferred_fiq interrupt request - ldr r12, deferred_fiq_ih_base @ set pointer to IRQ handler - mov r10, #DEFERRED_FIQ_MASK @ set deferred_fiq bit - str r10, [r12, #IRQ_ISR_REG_OFFSET] @ place it in the ISR register - - ldr r12, omap1510_gpio_base @ set pointer back to GPIO bank - b restart @ check for next GPIO interrupt - @@@@@@@@@@@@@@@@@@@@@@@@@@@ - - -/* - * Virtual addresses for IO - */ -omap_ih1_base: - .word OMAP1_IO_ADDRESS(OMAP_IH1_BASE) -deferred_fiq_ih_base: - .word OMAP1_IO_ADDRESS(DEFERRED_FIQ_IH_BASE) -omap1510_gpio_base: - .word OMAP1_IO_ADDRESS(OMAP1510_GPIO_BASE) -qwerty_fiqin_end: - -/* - * Check the size of the FIQ, - * it cannot go beyond 0xffff0200, and is copied to 0xffff001c - */ -.if (qwerty_fiqin_end - qwerty_fiqin_start) > (0x200 - 0x1c) - .err -.endif diff --git a/arch/arm/mach-omap1/sleep.S b/arch/arm/mach-omap1/sleep.S deleted file mode 100644 index a908c51839a43bc7b56c2db0abdc27fbc16f89fa..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap1/sleep.S +++ /dev/null @@ -1,370 +0,0 @@ -/* - * linux/arch/arm/mach-omap1/sleep.S - * - * Low-level OMAP7XX/1510/1610 sleep/wakeUp support - * - * Initial SA1110 code: - * Copyright (c) 2001 Cliff Brake - * - * Adapted for PXA by Nicolas Pitre: - * Copyright (c) 2002 Monta Vista Software, Inc. - * - * Support for OMAP1510/1610 by Dirk Behme - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -#include - -#include - -#include "iomap.h" -#include "pm.h" - - .text - - -/* - * Forces OMAP into deep sleep state - * - * omapXXXX_cpu_suspend() - * - * The values of the registers ARM_IDLECT1 and ARM_IDLECT2 are passed - * as arg0 and arg1 from caller. arg0 is stored in register r0 and arg1 - * in register r1. - * - * Note: This code get's copied to internal SRAM at boot. When the OMAP - * wakes up it continues execution at the point it went to sleep. - * - * Note: Because of errata work arounds we have processor specific functions - * here. They are mostly the same, but slightly different. - * - */ - -#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850) - .align 3 -ENTRY(omap7xx_cpu_suspend) - - @ save registers on stack - stmfd sp!, {r0 - r12, lr} - - @ Drain write cache - mov r4, #0 - mcr p15, 0, r0, c7, c10, 4 - nop - - @ load base address of Traffic Controller - mov r6, #TCMIF_ASM_BASE & 0xff000000 - orr r6, r6, #TCMIF_ASM_BASE & 0x00ff0000 - orr r6, r6, #TCMIF_ASM_BASE & 0x0000ff00 - - @ prepare to put SDRAM into self-refresh manually - ldr r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - orr r9, r7, #SELF_REFRESH_MODE & 0xff000000 - orr r9, r9, #SELF_REFRESH_MODE & 0x000000ff - str r9, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - - @ prepare to put EMIFS to Sleep - ldr r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - orr r9, r8, #IDLE_EMIFS_REQUEST & 0xff - str r9, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ load base address of ARM_IDLECT1 and ARM_IDLECT2 - mov r4, #CLKGEN_REG_ASM_BASE & 0xff000000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00 - - @ turn off clock domains - @ do not disable PERCK (0x04) - mov r5, #OMAP7XX_IDLECT2_SLEEP_VAL & 0xff - orr r5, r5, #OMAP7XX_IDLECT2_SLEEP_VAL & 0xff00 - strh r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - - @ request ARM idle - mov r3, #OMAP7XX_IDLECT1_SLEEP_VAL & 0xff - orr r3, r3, #OMAP7XX_IDLECT1_SLEEP_VAL & 0xff00 - strh r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - - @ disable instruction cache - mrc p15, 0, r9, c1, c0, 0 - bic r2, r9, #0x1000 - mcr p15, 0, r2, c1, c0, 0 - nop - -/* - * Let's wait for the next wake up event to wake us up. r0 can't be - * used here because r0 holds ARM_IDLECT1 - */ - mov r2, #0 - mcr p15, 0, r2, c7, c0, 4 @ wait for interrupt -/* - * omap7xx_cpu_suspend()'s resume point. - * - * It will just start executing here, so we'll restore stuff from the - * stack. - */ - @ re-enable Icache - mcr p15, 0, r9, c1, c0, 0 - - @ reset the ARM_IDLECT1 and ARM_IDLECT2. - strh r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - strh r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - - @ Restore EMIFF controls - str r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - str r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ restore regs and return - ldmfd sp!, {r0 - r12, pc} - -ENTRY(omap7xx_cpu_suspend_sz) - .word . - omap7xx_cpu_suspend -#endif /* CONFIG_ARCH_OMAP730 || CONFIG_ARCH_OMAP850 */ - -#ifdef CONFIG_ARCH_OMAP15XX - .align 3 -ENTRY(omap1510_cpu_suspend) - - @ save registers on stack - stmfd sp!, {r0 - r12, lr} - - @ load base address of Traffic Controller - mov r4, #TCMIF_ASM_BASE & 0xff000000 - orr r4, r4, #TCMIF_ASM_BASE & 0x00ff0000 - orr r4, r4, #TCMIF_ASM_BASE & 0x0000ff00 - - @ work around errata of OMAP1510 PDE bit for TC shut down - @ clear PDE bit - ldr r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - bic r5, r5, #PDE_BIT & 0xff - str r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ set PWD_EN bit - and r5, r5, #PWD_EN_BIT & 0xff - str r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ prepare to put SDRAM into self-refresh manually - ldr r5, [r4, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - orr r5, r5, #SELF_REFRESH_MODE & 0xff000000 - orr r5, r5, #SELF_REFRESH_MODE & 0x000000ff - str r5, [r4, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - - @ prepare to put EMIFS to Sleep - ldr r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - orr r5, r5, #IDLE_EMIFS_REQUEST & 0xff - str r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ load base address of ARM_IDLECT1 and ARM_IDLECT2 - mov r4, #CLKGEN_REG_ASM_BASE & 0xff000000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00 - - @ turn off clock domains - mov r5, #OMAP1510_IDLE_CLOCK_DOMAINS & 0xff - orr r5, r5, #OMAP1510_IDLE_CLOCK_DOMAINS & 0xff00 - strh r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - - @ request ARM idle - mov r3, #OMAP1510_DEEP_SLEEP_REQUEST & 0xff - orr r3, r3, #OMAP1510_DEEP_SLEEP_REQUEST & 0xff00 - strh r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - - mov r5, #IDLE_WAIT_CYCLES & 0xff - orr r5, r5, #IDLE_WAIT_CYCLES & 0xff00 -l_1510_2: - subs r5, r5, #1 - bne l_1510_2 -/* - * Let's wait for the next wake up event to wake us up. r0 can't be - * used here because r0 holds ARM_IDLECT1 - */ - mov r2, #0 - mcr p15, 0, r2, c7, c0, 4 @ wait for interrupt -/* - * omap1510_cpu_suspend()'s resume point. - * - * It will just start executing here, so we'll restore stuff from the - * stack, reset the ARM_IDLECT1 and ARM_IDLECT2. - */ - strh r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - strh r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - - @ restore regs and return - ldmfd sp!, {r0 - r12, pc} - -ENTRY(omap1510_cpu_suspend_sz) - .word . - omap1510_cpu_suspend -#endif /* CONFIG_ARCH_OMAP15XX */ - -#if defined(CONFIG_ARCH_OMAP16XX) - .align 3 -ENTRY(omap1610_cpu_suspend) - - @ save registers on stack - stmfd sp!, {r0 - r12, lr} - - @ Drain write cache - mov r4, #0 - mcr p15, 0, r0, c7, c10, 4 - nop - - @ Load base address of Traffic Controller - mov r6, #TCMIF_ASM_BASE & 0xff000000 - orr r6, r6, #TCMIF_ASM_BASE & 0x00ff0000 - orr r6, r6, #TCMIF_ASM_BASE & 0x0000ff00 - - @ Prepare to put SDRAM into self-refresh manually - ldr r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - orr r9, r7, #SELF_REFRESH_MODE & 0xff000000 - orr r9, r9, #SELF_REFRESH_MODE & 0x000000ff - str r9, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - - @ Prepare to put EMIFS to Sleep - ldr r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - orr r9, r8, #IDLE_EMIFS_REQUEST & 0xff - str r9, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ Load base address of ARM_IDLECT1 and ARM_IDLECT2 - mov r4, #CLKGEN_REG_ASM_BASE & 0xff000000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000 - orr r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00 - - @ Turn off clock domains - @ Do not disable PERCK (0x04) - mov r5, #OMAP1610_IDLECT2_SLEEP_VAL & 0xff - orr r5, r5, #OMAP1610_IDLECT2_SLEEP_VAL & 0xff00 - strh r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - - @ Request ARM idle - mov r3, #OMAP1610_IDLECT1_SLEEP_VAL & 0xff - orr r3, r3, #OMAP1610_IDLECT1_SLEEP_VAL & 0xff00 - strh r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - -/* - * Let's wait for the next wake up event to wake us up. r0 can't be - * used here because r0 holds ARM_IDLECT1 - */ - mov r2, #0 - mcr p15, 0, r2, c7, c0, 4 @ wait for interrupt - - @ Errata (HEL3SU467, section 1.4.4) specifies nop-instructions - @ according to this formula: - @ 2 + (4*DPLL_MULT)/DPLL_DIV/ARMDIV - @ Max DPLL_MULT = 18 - @ DPLL_DIV = 1 - @ ARMDIV = 1 - @ => 74 nop-instructions - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @10 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @20 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @30 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @40 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @50 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @60 - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop @70 - nop - nop - nop - nop @74 -/* - * omap1610_cpu_suspend()'s resume point. - * - * It will just start executing here, so we'll restore stuff from the - * stack. - */ - @ Restore the ARM_IDLECT1 and ARM_IDLECT2. - strh r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff] - strh r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff] - - @ Restore EMIFF controls - str r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff] - str r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff] - - @ Restore regs and return - ldmfd sp!, {r0 - r12, pc} - -ENTRY(omap1610_cpu_suspend_sz) - .word . - omap1610_cpu_suspend -#endif /* CONFIG_ARCH_OMAP16XX */ diff --git a/arch/arm/mach-omap1/sram.S b/arch/arm/mach-omap1/sram.S deleted file mode 100644 index 37f34fcd65fb9dee5060f9949f08a7e3ff6a653f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap1/sram.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/plat-omap/sram-fn.S - * - * Functions that need to be run in internal SRAM - */ - -#include - -#include - -#include - -#include "iomap.h" - - .text - -/* - * Reprograms ULPD and CKCTL. - */ - .align 3 -ENTRY(omap1_sram_reprogram_clock) - stmfd sp!, {r0 - r12, lr} @ save registers on stack - - mov r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0xff000000 - orr r2, r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0x00ff0000 - orr r2, r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0x0000ff00 - - mov r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0xff000000 - orr r3, r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0x00ff0000 - orr r3, r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0x0000ff00 - - tst r0, #1 << 4 @ want lock mode? - beq newck @ nope - bic r0, r0, #1 << 4 @ else clear lock bit - strh r0, [r2] @ set dpll into bypass mode - orr r0, r0, #1 << 4 @ set lock bit again - -newck: - strh r1, [r3] @ write new ckctl value - strh r0, [r2] @ write new dpll value - - mov r4, #0x0700 @ let the clocks settle - orr r4, r4, #0x00ff -delay: sub r4, r4, #1 - cmp r4, #0 - bne delay - -lock: ldrh r4, [r2], #0 @ read back dpll value - tst r0, #1 << 4 @ want lock mode? - beq out @ nope - tst r4, #1 << 0 @ dpll rate locked? - beq lock @ try again - -out: - ldmfd sp!, {r0 - r12, pc} @ restore regs and return -ENTRY(omap1_sram_reprogram_clock_sz) - .word . - omap1_sram_reprogram_clock diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S deleted file mode 100644 index 1762f919941f4f98d3159e83d8d39f882f23477c..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/omap-headsmp.S +++ /dev/null @@ -1,133 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Secondary CPU startup routine source file. - * - * Copyright (C) 2009-2014 Texas Instruments, Inc. - * - * Author: - * Santosh Shilimkar - * - * Interface functions needed for the SMP. This file is based on arm - * realview smp platform. - * Copyright (c) 2003 ARM Limited. - */ - -#include -#include -#include - -#include "omap44xx.h" - -/* Physical address needed since MMU not enabled yet on secondary core */ -#define AUX_CORE_BOOT0_PA 0x48281800 -#define API_HYP_ENTRY 0x102 - -ENTRY(omap_secondary_startup) -#ifdef CONFIG_SMP - b secondary_startup -#else -/* Should never get here */ -again: wfi - b again -#endif -#ENDPROC(omap_secondary_startup) - -/* - * OMAP5 specific entry point for secondary CPU to jump from ROM - * code. This routine also provides a holding flag into which - * secondary core is held until we're ready for it to initialise. - * The primary core will update this flag using a hardware - * register AuxCoreBoot0. - */ -ENTRY(omap5_secondary_startup) -wait: ldr r2, =AUX_CORE_BOOT0_PA @ read from AuxCoreBoot0 - ldr r0, [r2] - mov r0, r0, lsr #5 - mrc p15, 0, r4, c0, c0, 5 - and r4, r4, #0x0f - cmp r0, r4 - bne wait - b omap_secondary_startup -ENDPROC(omap5_secondary_startup) -/* - * Same as omap5_secondary_startup except we call into the ROM to - * enable HYP mode first. This is called instead of - * omap5_secondary_startup if the primary CPU was put into HYP mode by - * the boot loader. - */ - .arch armv7-a - .arch_extension sec -ENTRY(omap5_secondary_hyp_startup) -wait_2: ldr r2, =AUX_CORE_BOOT0_PA @ read from AuxCoreBoot0 - ldr r0, [r2] - mov r0, r0, lsr #5 - mrc p15, 0, r4, c0, c0, 5 - and r4, r4, #0x0f - cmp r0, r4 - bne wait_2 - ldr r12, =API_HYP_ENTRY - badr r0, hyp_boot - smc #0 -hyp_boot: - b omap_secondary_startup -ENDPROC(omap5_secondary_hyp_startup) -/* - * OMAP4 specific entry point for secondary CPU to jump from ROM - * code. This routine also provides a holding flag into which - * secondary core is held until we're ready for it to initialise. - * The primary core will update this flag using a hardware - * register AuxCoreBoot0. - */ -ENTRY(omap4_secondary_startup) -hold: ldr r12,=0x103 - dsb - smc #0 @ read from AuxCoreBoot0 - mov r0, r0, lsr #9 - mrc p15, 0, r4, c0, c0, 5 - and r4, r4, #0x0f - cmp r0, r4 - bne hold - - /* - * we've been released from the wait loop,secondary_stack - * should now contain the SVC stack for this core - */ - b omap_secondary_startup -ENDPROC(omap4_secondary_startup) - -ENTRY(omap4460_secondary_startup) -hold_2: ldr r12,=0x103 - dsb - smc #0 @ read from AuxCoreBoot0 - mov r0, r0, lsr #9 - mrc p15, 0, r4, c0, c0, 5 - and r4, r4, #0x0f - cmp r0, r4 - bne hold_2 - - /* - * GIC distributor control register has changed between - * CortexA9 r1pX and r2pX. The Control Register secure - * banked version is now composed of 2 bits: - * bit 0 == Secure Enable - * bit 1 == Non-Secure Enable - * The Non-Secure banked register has not changed - * Because the ROM Code is based on the r1pX GIC, the CPU1 - * GIC restoration will cause a problem to CPU0 Non-Secure SW. - * The workaround must be: - * 1) Before doing the CPU1 wakeup, CPU0 must disable - * the GIC distributor - * 2) CPU1 must re-enable the GIC distributor on - * it's wakeup path. - */ - ldr r1, =OMAP44XX_GIC_DIST_BASE - ldr r0, [r1] - orr r0, #1 - str r0, [r1] - - /* - * we've been released from the wait loop,secondary_stack - * should now contain the SVC stack for this core - */ - b omap_secondary_startup -ENDPROC(omap4460_secondary_startup) diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S deleted file mode 100644 index fd2bcd91f4a14c874bd8b833bd91a7e5ef0d8c8c..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/omap-smc.S +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * OMAP34xx and OMAP44xx secure APIs file. - * - * Copyright (C) 2010 Texas Instruments, Inc. - * Written by Santosh Shilimkar - * - * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár - */ - -#include - -/* - * This is common routine to manage secure monitor API - * used to modify the PL310 secure registers. - * 'r0' contains the value to be modified and 'r12' contains - * the monitor API number. It uses few CPU registers - * internally and hence they need be backed up including - * link register "lr". - * Function signature : void omap_smc1(u32 fn, u32 arg) - */ - .arch armv7-a - .arch_extension sec -ENTRY(omap_smc1) - stmfd sp!, {r2-r12, lr} - mov r12, r0 - mov r0, r1 - dsb - smc #0 - ldmfd sp!, {r2-r12, pc} -ENDPROC(omap_smc1) - -/** - * u32 omap_smc2(u32 id, u32 falg, u32 pargs) - * Low level common routine for secure HAL and PPA APIs. - * @id: Application ID of HAL APIs - * @flag: Flag to indicate the criticality of operation - * @pargs: Physical address of parameter list starting - * with number of parametrs - */ -ENTRY(omap_smc2) - stmfd sp!, {r4-r12, lr} - mov r3, r2 - mov r2, r1 - mov r1, #0x0 @ Process ID - mov r6, #0xff - mov r12, #0x00 @ Secure Service ID - mov r7, #0 - mcr p15, 0, r7, c7, c5, 6 - dsb - dmb - smc #0 - ldmfd sp!, {r4-r12, pc} -ENDPROC(omap_smc2) - -/** - * u32 omap_smc3(u32 service_id, u32 process_id, u32 flag, u32 pargs) - * Low level common routine for secure HAL and PPA APIs via smc #1 - * r0 - @service_id: Secure Service ID - * r1 - @process_id: Process ID - * r2 - @flag: Flag to indicate the criticality of operation - * r3 - @pargs: Physical address of parameter list - */ -ENTRY(omap_smc3) - stmfd sp!, {r4-r11, lr} - mov r12, r0 @ Copy the secure service ID - mov r6, #0xff @ Indicate new Task call - dsb @ Memory Barrier (not sure if needed, copied from omap_smc2) - smc #1 @ Call PPA service - ldmfd sp!, {r4-r11, pc} -ENDPROC(omap_smc3) - -ENTRY(omap_modify_auxcoreboot0) - stmfd sp!, {r1-r12, lr} - ldr r12, =0x104 - dsb - smc #0 - ldmfd sp!, {r1-r12, pc} -ENDPROC(omap_modify_auxcoreboot0) - -ENTRY(omap_auxcoreboot_addr) - stmfd sp!, {r2-r12, lr} - ldr r12, =0x105 - dsb - smc #0 - ldmfd sp!, {r2-r12, pc} -ENDPROC(omap_auxcoreboot_addr) - -ENTRY(omap_read_auxcoreboot0) - stmfd sp!, {r2-r12, lr} - ldr r12, =0x103 - dsb - smc #0 - ldmfd sp!, {r2-r12, pc} -ENDPROC(omap_read_auxcoreboot0) diff --git a/arch/arm/mach-omap2/sleep24xx.S b/arch/arm/mach-omap2/sleep24xx.S deleted file mode 100644 index 84d8c43ad382c2a8323efc260dd0f61e0adb20e5..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sleep24xx.S +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mach-omap2/sleep.S - * - * (C) Copyright 2004 - * Texas Instruments, - * Richard Woodruff - * - * (C) Copyright 2006 Nokia Corporation - * Fixed idle loop sleep - * Igor Stoppa - */ - -#include -#include - -#include "omap24xx.h" -#include "sdrc.h" - -/* First address of reserved address space? apparently valid for OMAP2 & 3 */ -#define A_SDRC0_V (0xC0000000) - - .text - -/* - * omap24xx_cpu_suspend() - Forces OMAP into deep sleep state by completing - * SDRC shutdown then ARM shutdown. Upon wake MPU is back on so just restore - * SDRC. - * - * Input: - * R0 : DLL ctrl value pre-Sleep - * R1 : SDRC_DLLA_CTRL - * R2 : SDRC_POWER - * - * The if the DPLL is going to AutoIdle. It seems like the DPLL may be back on - * when we get called, but the DLL probably isn't. We will wait a bit more in - * case the DPLL isn't quite there yet. The code will wait on DLL for DDR even - * if in unlocked mode. - * - * For less than 242x-ES2.2 upon wake from a sleep mode where the external - * oscillator was stopped, a timing bug exists where a non-stabilized 12MHz - * clock can pass into the PRCM can cause problems at DSP and IVA. - * To work around this the code will switch to the 32kHz source prior to sleep. - * Post sleep we will shift back to using the DPLL. Apparently, - * CM_IDLEST_CLKGEN does not reflect the full clock change so you need to wait - * 3x12MHz + 3x32kHz clocks for a full switch. - * - * The DLL load value is not kept in RETENTION or OFF. It needs to be restored - * at wake - */ - .align 3 -ENTRY(omap24xx_cpu_suspend) - stmfd sp!, {r0 - r12, lr} @ save registers on stack - mov r3, #0x0 @ clear for mcr call - mcr p15, 0, r3, c7, c10, 4 @ memory barrier, hope SDR/DDR finished - nop - nop - ldr r4, [r2] @ read SDRC_POWER - orr r4, r4, #0x40 @ enable self refresh on idle req - mov r5, #0x2000 @ set delay (DPLL relock + DLL relock) - str r4, [r2] @ make it so - nop - mcr p15, 0, r3, c7, c0, 4 @ wait for interrupt - nop -loop: - subs r5, r5, #0x1 @ awake, wait just a bit - bne loop - - /* The DPLL has to be on before we take the DDR out of self refresh */ - bic r4, r4, #0x40 @ now clear self refresh bit. - str r4, [r2] @ write to SDRC_POWER - ldr r4, A_SDRC0 @ make a clock happen - ldr r4, [r4] @ read A_SDRC0 - nop @ start auto refresh only after clk ok - movs r0, r0 @ see if DDR or SDR - strne r0, [r1] @ rewrite DLLA to force DLL reload - addne r1, r1, #0x8 @ move to DLLB - strne r0, [r1] @ rewrite DLLB to force DLL reload - - mov r5, #0x1000 -loop2: - subs r5, r5, #0x1 - bne loop2 - /* resume*/ - ldmfd sp!, {r0 - r12, pc} @ restore regs and return - -A_SDRC0: - .word A_SDRC0_V - -ENTRY(omap24xx_cpu_suspend_sz) - .word . - omap24xx_cpu_suspend diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S deleted file mode 100644 index dc221249bc22c88ae95b94ce4461781cc9c1c999..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sleep33xx.S +++ /dev/null @@ -1,262 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Low level suspend code for AM33XX SoCs - * - * Copyright (C) 2012-2018 Texas Instruments Incorporated - http://www.ti.com/ - * Dave Gerlach, Vaibhav Bedia - */ - -#include -#include -#include -#include -#include - -#include "iomap.h" -#include "cm33xx.h" -#include "pm-asm-offsets.h" - -#define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED 0x00030000 -#define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE 0x0003 -#define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE 0x0002 - -/* replicated define because linux/bitops.h cannot be included in assembly */ -#define BIT(nr) (1 << (nr)) - - .arm - .arch armv7-a - .align 3 - -ENTRY(am33xx_do_wfi) - stmfd sp!, {r4 - r11, lr} @ save registers on stack - - /* Save wfi_flags arg to data space */ - mov r4, r0 - adr r3, am33xx_pm_ro_sram_data - ldr r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET] - str r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET] - - /* Only flush cache is we know we are losing MPU context */ - tst r4, #WFI_FLAG_FLUSH_CACHE - beq cache_skip_flush - - /* - * Flush all data from the L1 and L2 data cache before disabling - * SCTLR.C bit. - */ - ldr r1, kernel_flush - blx r1 - - /* - * Clear the SCTLR.C bit to prevent further data cache - * allocation. Clearing SCTLR.C would make all the data accesses - * strongly ordered and would not hit the cache. - */ - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #(1 << 2) @ Disable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - - /* - * Invalidate L1 and L2 data cache. - */ - ldr r1, kernel_flush - blx r1 - - adr r3, am33xx_pm_ro_sram_data - ldr r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET] - ldr r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET] - -cache_skip_flush: - /* Check if we want self refresh */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_enter_sr - - adr r9, am33xx_emif_sram_table - - ldr r3, [r9, #EMIF_PM_ENTER_SR_OFFSET] - blx r3 - -emif_skip_enter_sr: - /* Only necessary if PER is losing context */ - tst r4, #WFI_FLAG_SAVE_EMIF - beq emif_skip_save - - ldr r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET] - blx r3 - -emif_skip_save: - /* Only can disable EMIF if we have entered self refresh */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_disable - - /* Disable EMIF */ - ldr r1, virt_emif_clkctrl - ldr r2, [r1] - bic r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE - str r2, [r1] - - ldr r1, virt_emif_clkctrl -wait_emif_disable: - ldr r2, [r1] - mov r3, #AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED - cmp r2, r3 - bne wait_emif_disable - -emif_skip_disable: - tst r4, #WFI_FLAG_WAKE_M3 - beq wkup_m3_skip - - /* - * For the MPU WFI to be registered as an interrupt - * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set - * to DISABLED - */ - ldr r1, virt_mpu_clkctrl - ldr r2, [r1] - bic r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE - str r2, [r1] - -wkup_m3_skip: - /* - * Execute an ISB instruction to ensure that all of the - * CP15 register changes have been committed. - */ - isb - - /* - * Execute a barrier instruction to ensure that all cache, - * TLB and branch predictor maintenance operations issued - * have completed. - */ - dsb - dmb - - /* - * Execute a WFI instruction and wait until the - * STANDBYWFI output is asserted to indicate that the - * CPU is in idle and low power state. CPU can specualatively - * prefetch the instructions so add NOPs after WFI. Thirteen - * NOPs as per Cortex-A8 pipeline. - */ - wfi - - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - - /* We come here in case of an abort due to a late interrupt */ - - /* Set MPU_CLKCTRL.MODULEMODE back to ENABLE */ - ldr r1, virt_mpu_clkctrl - mov r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r2, [r1] - - /* Re-enable EMIF */ - ldr r1, virt_emif_clkctrl - mov r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r2, [r1] -wait_emif_enable: - ldr r3, [r1] - cmp r2, r3 - bne wait_emif_enable - - /* Only necessary if PER is losing context */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_exit_sr_abt - - adr r9, am33xx_emif_sram_table - ldr r1, [r9, #EMIF_PM_ABORT_SR_OFFSET] - blx r1 - -emif_skip_exit_sr_abt: - tst r4, #WFI_FLAG_FLUSH_CACHE - beq cache_skip_restore - - /* - * Set SCTLR.C bit to allow data cache allocation - */ - mrc p15, 0, r0, c1, c0, 0 - orr r0, r0, #(1 << 2) @ Enable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - -cache_skip_restore: - /* Let the suspend code know about the abort */ - mov r0, #1 - ldmfd sp!, {r4 - r11, pc} @ restore regs and return -ENDPROC(am33xx_do_wfi) - - .align -ENTRY(am33xx_resume_offset) - .word . - am33xx_do_wfi - -ENTRY(am33xx_resume_from_deep_sleep) - /* Re-enable EMIF */ - ldr r0, phys_emif_clkctrl - mov r1, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r1, [r0] -wait_emif_enable1: - ldr r2, [r0] - cmp r1, r2 - bne wait_emif_enable1 - - adr r9, am33xx_emif_sram_table - - ldr r1, [r9, #EMIF_PM_RESTORE_CONTEXT_OFFSET] - blx r1 - - ldr r1, [r9, #EMIF_PM_EXIT_SR_OFFSET] - blx r1 - -resume_to_ddr: - /* We are back. Branch to the common CPU resume routine */ - mov r0, #0 - ldr pc, resume_addr -ENDPROC(am33xx_resume_from_deep_sleep) - -/* - * Local variables - */ - .align -kernel_flush: - .word v7_flush_dcache_all -virt_mpu_clkctrl: - .word AM33XX_CM_MPU_MPU_CLKCTRL -virt_emif_clkctrl: - .word AM33XX_CM_PER_EMIF_CLKCTRL -phys_emif_clkctrl: - .word (AM33XX_CM_BASE + AM33XX_CM_PER_MOD + \ - AM33XX_CM_PER_EMIF_CLKCTRL_OFFSET) - -.align 3 -/* DDR related defines */ -am33xx_emif_sram_table: - .space EMIF_PM_FUNCTIONS_SIZE - -ENTRY(am33xx_pm_sram) - .word am33xx_do_wfi - .word am33xx_do_wfi_sz - .word am33xx_resume_offset - .word am33xx_emif_sram_table - .word am33xx_pm_ro_sram_data - -resume_addr: -.word cpu_resume - PAGE_OFFSET + 0x80000000 - -.align 3 -ENTRY(am33xx_pm_ro_sram_data) - .space AMX3_PM_RO_SRAM_DATA_SIZE - -ENTRY(am33xx_do_wfi_sz) - .word . - am33xx_do_wfi diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S deleted file mode 100644 index ac1324c6453b5b121a4466b833787a1a9086d2cd..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sleep34xx.S +++ /dev/null @@ -1,569 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * (C) Copyright 2007 - * Texas Instruments - * Karthik Dasu - * - * (C) Copyright 2004 - * Texas Instruments, - * Richard Woodruff - */ -#include - -#include - -#include "omap34xx.h" -#include "iomap.h" -#include "cm3xxx.h" -#include "prm3xxx.h" -#include "sdrc.h" -#include "sram.h" -#include "control.h" - -/* - * Registers access definitions - */ -#define SDRC_SCRATCHPAD_SEM_OFFS 0xc -#define SDRC_SCRATCHPAD_SEM_V OMAP343X_SCRATCHPAD_REGADDR\ - (SDRC_SCRATCHPAD_SEM_OFFS) -#define PM_PREPWSTST_CORE_P OMAP3430_PRM_BASE + CORE_MOD +\ - OMAP3430_PM_PREPWSTST -#define PM_PWSTCTRL_MPU_P OMAP3430_PRM_BASE + MPU_MOD + OMAP2_PM_PWSTCTRL -#define CM_IDLEST1_CORE_V OMAP34XX_CM_REGADDR(CORE_MOD, CM_IDLEST1) -#define CM_IDLEST_CKGEN_V OMAP34XX_CM_REGADDR(PLL_MOD, CM_IDLEST) -#define SRAM_BASE_P OMAP3_SRAM_PA -#define CONTROL_STAT OMAP343X_CTRL_BASE + OMAP343X_CONTROL_STATUS -#define CONTROL_MEM_RTA_CTRL (OMAP343X_CTRL_BASE +\ - OMAP36XX_CONTROL_MEM_RTA_CTRL) - -/* Move this as correct place is available */ -#define SCRATCHPAD_MEM_OFFS 0x310 -#define SCRATCHPAD_BASE_P (OMAP343X_CTRL_BASE +\ - OMAP343X_CONTROL_MEM_WKUP +\ - SCRATCHPAD_MEM_OFFS) -#define SDRC_POWER_V OMAP34XX_SDRC_REGADDR(SDRC_POWER) -#define SDRC_SYSCONFIG_P (OMAP343X_SDRC_BASE + SDRC_SYSCONFIG) -#define SDRC_MR_0_P (OMAP343X_SDRC_BASE + SDRC_MR_0) -#define SDRC_EMR2_0_P (OMAP343X_SDRC_BASE + SDRC_EMR2_0) -#define SDRC_MANUAL_0_P (OMAP343X_SDRC_BASE + SDRC_MANUAL_0) -#define SDRC_MR_1_P (OMAP343X_SDRC_BASE + SDRC_MR_1) -#define SDRC_EMR2_1_P (OMAP343X_SDRC_BASE + SDRC_EMR2_1) -#define SDRC_MANUAL_1_P (OMAP343X_SDRC_BASE + SDRC_MANUAL_1) -#define SDRC_DLLA_STATUS_V OMAP34XX_SDRC_REGADDR(SDRC_DLLA_STATUS) -#define SDRC_DLLA_CTRL_V OMAP34XX_SDRC_REGADDR(SDRC_DLLA_CTRL) - -/* - * This file needs be built unconditionally as ARM to interoperate correctly - * with non-Thumb-2-capable firmware. - */ - .arm - -/* - * API functions - */ - - .text -/* - * L2 cache needs to be toggled for stable OFF mode functionality on 3630. - * This function sets up a flag that will allow for this toggling to take - * place on 3630. Hopefully some version in the future may not need this. - */ -ENTRY(enable_omap3630_toggle_l2_on_restore) - stmfd sp!, {lr} @ save registers on stack - /* Setup so that we will disable and enable l2 */ - mov r1, #0x1 - adrl r3, l2dis_3630_offset @ may be too distant for plain adr - ldr r2, [r3] @ value for offset - str r1, [r2, r3] @ write to l2dis_3630 - ldmfd sp!, {pc} @ restore regs and return -ENDPROC(enable_omap3630_toggle_l2_on_restore) - -/* - * Function to call rom code to save secure ram context. - * - * r0 = physical address of the parameters - */ - .arch armv7-a - .arch_extension sec -ENTRY(save_secure_ram_context) - stmfd sp!, {r4 - r11, lr} @ save registers on stack - mov r3, r0 @ physical address of parameters - mov r0, #25 @ set service ID for PPA - mov r12, r0 @ copy secure service ID in r12 - mov r1, #0 @ set task id for ROM code in r1 - mov r2, #4 @ set some flags in r2, r6 - mov r6, #0xff - dsb @ data write barrier - dmb @ data memory barrier - smc #1 @ call SMI monitor (smi #1) - nop - nop - nop - nop - ldmfd sp!, {r4 - r11, pc} -ENDPROC(save_secure_ram_context) - -/* - * ====================== - * == Idle entry point == - * ====================== - */ - -/* - * Forces OMAP into idle state - * - * omap34xx_cpu_suspend() - This bit of code saves the CPU context if needed - * and executes the WFI instruction. Calling WFI effectively changes the - * power domains states to the desired target power states. - * - * - * Notes: - * - only the minimum set of functions gets copied to internal SRAM at boot - * and after wake-up from OFF mode, cf. omap_push_sram_idle. The function - * pointers in SDRAM or SRAM are called depending on the desired low power - * target state. - * - when the OMAP wakes up it continues at different execution points - * depending on the low power mode (non-OFF vs OFF modes), - * cf. 'Resume path for xxx mode' comments. - */ - .align 3 -ENTRY(omap34xx_cpu_suspend) - stmfd sp!, {r4 - r11, lr} @ save registers on stack - - /* - * r0 contains information about saving context: - * 0 - No context lost - * 1 - Only L1 and logic lost - * 2 - Only L2 lost (Even L1 is retained we clean it along with L2) - * 3 - Both L1 and L2 lost and logic lost - */ - - /* - * For OFF mode: save context and jump to WFI in SDRAM (omap3_do_wfi) - * For non-OFF modes: jump to the WFI code in SRAM (omap3_do_wfi_sram) - */ - ldr r4, omap3_do_wfi_sram_addr - ldr r5, [r4] - cmp r0, #0x0 @ If no context save required, - bxeq r5 @ jump to the WFI code in SRAM - - - /* Otherwise fall through to the save context code */ -save_context_wfi: - /* - * jump out to kernel flush routine - * - reuse that code is better - * - it executes in a cached space so is faster than refetch per-block - * - should be faster and will change with kernel - * - 'might' have to copy address, load and jump to it - * Flush all data from the L1 data cache before disabling - * SCTLR.C bit. - */ - ldr r1, kernel_flush - mov lr, pc - bx r1 - - /* - * Clear the SCTLR.C bit to prevent further data cache - * allocation. Clearing SCTLR.C would make all the data accesses - * strongly ordered and would not hit the cache. - */ - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #(1 << 2) @ Disable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - - /* - * Invalidate L1 data cache. Even though only invalidate is - * necessary exported flush API is used here. Doing clean - * on already clean cache would be almost NOP. - */ - ldr r1, kernel_flush - blx r1 - b omap3_do_wfi -ENDPROC(omap34xx_cpu_suspend) -omap3_do_wfi_sram_addr: - .word omap3_do_wfi_sram -kernel_flush: - .word v7_flush_dcache_all - -/* =================================== - * == WFI instruction => Enter idle == - * =================================== - */ - -/* - * Do WFI instruction - * Includes the resume path for non-OFF modes - * - * This code gets copied to internal SRAM and is accessible - * from both SDRAM and SRAM: - * - executed from SRAM for non-off modes (omap3_do_wfi_sram), - * - executed from SDRAM for OFF mode (omap3_do_wfi). - */ - .align 3 -ENTRY(omap3_do_wfi) - ldr r4, sdrc_power @ read the SDRC_POWER register - ldr r5, [r4] @ read the contents of SDRC_POWER - orr r5, r5, #0x40 @ enable self refresh on idle req - str r5, [r4] @ write back to SDRC_POWER register - - /* Data memory barrier and Data sync barrier */ - dsb - dmb - -/* - * =================================== - * == WFI instruction => Enter idle == - * =================================== - */ - wfi @ wait for interrupt - -/* - * =================================== - * == Resume path for non-OFF modes == - * =================================== - */ - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - -/* - * This function implements the erratum ID i581 WA: - * SDRC state restore before accessing the SDRAM - * - * Only used at return from non-OFF mode. For OFF - * mode the ROM code configures the SDRC and - * the DPLL before calling the restore code directly - * from DDR. - */ - -/* Make sure SDRC accesses are ok */ -wait_sdrc_ok: - -/* DPLL3 must be locked before accessing the SDRC. Maybe the HW ensures this */ - ldr r4, cm_idlest_ckgen -wait_dpll3_lock: - ldr r5, [r4] - tst r5, #1 - beq wait_dpll3_lock - - ldr r4, cm_idlest1_core -wait_sdrc_ready: - ldr r5, [r4] - tst r5, #0x2 - bne wait_sdrc_ready - /* allow DLL powerdown upon hw idle req */ - ldr r4, sdrc_power - ldr r5, [r4] - bic r5, r5, #0x40 - str r5, [r4] - -is_dll_in_lock_mode: - /* Is dll in lock mode? */ - ldr r4, sdrc_dlla_ctrl - ldr r5, [r4] - tst r5, #0x4 - bne exit_nonoff_modes @ Return if locked - /* wait till dll locks */ -wait_dll_lock_timed: - ldr r4, sdrc_dlla_status - /* Wait 20uS for lock */ - mov r6, #8 -wait_dll_lock: - subs r6, r6, #0x1 - beq kick_dll - ldr r5, [r4] - and r5, r5, #0x4 - cmp r5, #0x4 - bne wait_dll_lock - b exit_nonoff_modes @ Return when locked - - /* disable/reenable DLL if not locked */ -kick_dll: - ldr r4, sdrc_dlla_ctrl - ldr r5, [r4] - mov r6, r5 - bic r6, #(1<<3) @ disable dll - str r6, [r4] - dsb - orr r6, r6, #(1<<3) @ enable dll - str r6, [r4] - dsb - b wait_dll_lock_timed - -exit_nonoff_modes: - /* Re-enable C-bit if needed */ - mrc p15, 0, r0, c1, c0, 0 - tst r0, #(1 << 2) @ Check C bit enabled? - orreq r0, r0, #(1 << 2) @ Enable the C bit if cleared - mcreq p15, 0, r0, c1, c0, 0 - isb - -/* - * =================================== - * == Exit point from non-OFF modes == - * =================================== - */ - ldmfd sp!, {r4 - r11, pc} @ restore regs and return -ENDPROC(omap3_do_wfi) -sdrc_power: - .word SDRC_POWER_V -cm_idlest1_core: - .word CM_IDLEST1_CORE_V -cm_idlest_ckgen: - .word CM_IDLEST_CKGEN_V -sdrc_dlla_status: - .word SDRC_DLLA_STATUS_V -sdrc_dlla_ctrl: - .word SDRC_DLLA_CTRL_V -ENTRY(omap3_do_wfi_sz) - .word . - omap3_do_wfi - - -/* - * ============================== - * == Resume path for OFF mode == - * ============================== - */ - -/* - * The restore_* functions are called by the ROM code - * when back from WFI in OFF mode. - * Cf. the get_*restore_pointer functions. - * - * restore_es3: applies to 34xx >= ES3.0 - * restore_3630: applies to 36xx - * restore: common code for 3xxx - * - * Note: when back from CORE and MPU OFF mode we are running - * from SDRAM, without MMU, without the caches and prediction. - * Also the SRAM content has been cleared. - */ -ENTRY(omap3_restore_es3) - ldr r5, pm_prepwstst_core_p - ldr r4, [r5] - and r4, r4, #0x3 - cmp r4, #0x0 @ Check if previous power state of CORE is OFF - bne omap3_restore @ Fall through to OMAP3 common code - adr r0, es3_sdrc_fix - ldr r1, sram_base - ldr r2, es3_sdrc_fix_sz - mov r2, r2, ror #2 -copy_to_sram: - ldmia r0!, {r3} @ val = *src - stmia r1!, {r3} @ *dst = val - subs r2, r2, #0x1 @ num_words-- - bne copy_to_sram - ldr r1, sram_base - blx r1 - b omap3_restore @ Fall through to OMAP3 common code -ENDPROC(omap3_restore_es3) - -ENTRY(omap3_restore_3630) - ldr r1, pm_prepwstst_core_p - ldr r2, [r1] - and r2, r2, #0x3 - cmp r2, #0x0 @ Check if previous power state of CORE is OFF - bne omap3_restore @ Fall through to OMAP3 common code - /* Disable RTA before giving control */ - ldr r1, control_mem_rta - mov r2, #OMAP36XX_RTA_DISABLE - str r2, [r1] -ENDPROC(omap3_restore_3630) - - /* Fall through to common code for the remaining logic */ - -ENTRY(omap3_restore) - /* - * Read the pwstctrl register to check the reason for mpu reset. - * This tells us what was lost. - */ - ldr r1, pm_pwstctrl_mpu - ldr r2, [r1] - and r2, r2, #0x3 - cmp r2, #0x0 @ Check if target power state was OFF or RET - bne logic_l1_restore - - adr r1, l2dis_3630_offset @ address for offset - ldr r0, [r1] @ value for offset - ldr r0, [r1, r0] @ value at l2dis_3630 - cmp r0, #0x1 @ should we disable L2 on 3630? - bne skipl2dis - mrc p15, 0, r0, c1, c0, 1 - bic r0, r0, #2 @ disable L2 cache - mcr p15, 0, r0, c1, c0, 1 -skipl2dis: - ldr r0, control_stat - ldr r1, [r0] - and r1, #0x700 - cmp r1, #0x300 - beq l2_inv_gp - adr r0, l2_inv_api_params_offset - ldr r3, [r0] - add r3, r3, r0 @ r3 points to dummy parameters - mov r0, #40 @ set service ID for PPA - mov r12, r0 @ copy secure Service ID in r12 - mov r1, #0 @ set task id for ROM code in r1 - mov r2, #4 @ set some flags in r2, r6 - mov r6, #0xff - dsb @ data write barrier - dmb @ data memory barrier - smc #1 @ call SMI monitor (smi #1) - /* Write to Aux control register to set some bits */ - mov r0, #42 @ set service ID for PPA - mov r12, r0 @ copy secure Service ID in r12 - mov r1, #0 @ set task id for ROM code in r1 - mov r2, #4 @ set some flags in r2, r6 - mov r6, #0xff - ldr r4, scratchpad_base - ldr r3, [r4, #0xBC] @ r3 points to parameters - dsb @ data write barrier - dmb @ data memory barrier - smc #1 @ call SMI monitor (smi #1) - -#ifdef CONFIG_OMAP3_L2_AUX_SECURE_SAVE_RESTORE - /* Restore L2 aux control register */ - @ set service ID for PPA - mov r0, #CONFIG_OMAP3_L2_AUX_SECURE_SERVICE_SET_ID - mov r12, r0 @ copy service ID in r12 - mov r1, #0 @ set task ID for ROM code in r1 - mov r2, #4 @ set some flags in r2, r6 - mov r6, #0xff - ldr r4, scratchpad_base - ldr r3, [r4, #0xBC] - adds r3, r3, #8 @ r3 points to parameters - dsb @ data write barrier - dmb @ data memory barrier - smc #1 @ call SMI monitor (smi #1) -#endif - b logic_l1_restore - - .align -l2_inv_api_params_offset: - .long l2_inv_api_params - . -l2_inv_gp: - /* Execute smi to invalidate L2 cache */ - mov r12, #0x1 @ set up to invalidate L2 - smc #0 @ Call SMI monitor (smieq) - /* Write to Aux control register to set some bits */ - ldr r4, scratchpad_base - ldr r3, [r4,#0xBC] - ldr r0, [r3,#4] - mov r12, #0x3 - smc #0 @ Call SMI monitor (smieq) - ldr r4, scratchpad_base - ldr r3, [r4,#0xBC] - ldr r0, [r3,#12] - mov r12, #0x2 - smc #0 @ Call SMI monitor (smieq) -logic_l1_restore: - adr r0, l2dis_3630_offset @ adress for offset - ldr r1, [r0] @ value for offset - ldr r1, [r0, r1] @ value at l2dis_3630 - cmp r1, #0x1 @ Test if L2 re-enable needed on 3630 - bne skipl2reen - mrc p15, 0, r1, c1, c0, 1 - orr r1, r1, #2 @ re-enable L2 cache - mcr p15, 0, r1, c1, c0, 1 -skipl2reen: - - /* Now branch to the common CPU resume function */ - b cpu_resume -ENDPROC(omap3_restore) - - .ltorg - -/* - * Local variables - */ -pm_prepwstst_core_p: - .word PM_PREPWSTST_CORE_P -pm_pwstctrl_mpu: - .word PM_PWSTCTRL_MPU_P -scratchpad_base: - .word SCRATCHPAD_BASE_P -sram_base: - .word SRAM_BASE_P + 0x8000 -control_stat: - .word CONTROL_STAT -control_mem_rta: - .word CONTROL_MEM_RTA_CTRL -l2dis_3630_offset: - .long l2dis_3630 - . - - .data - .align 2 -l2dis_3630: - .word 0 - - .data - .align 2 -l2_inv_api_params: - .word 0x1, 0x00 - -/* - * Internal functions - */ - -/* - * This function implements the erratum ID i443 WA, applies to 34xx >= ES3.0 - * Copied to and run from SRAM in order to reconfigure the SDRC parameters. - */ - .text - .align 3 -ENTRY(es3_sdrc_fix) - ldr r4, sdrc_syscfg @ get config addr - ldr r5, [r4] @ get value - tst r5, #0x100 @ is part access blocked - it eq - biceq r5, r5, #0x100 @ clear bit if set - str r5, [r4] @ write back change - ldr r4, sdrc_mr_0 @ get config addr - ldr r5, [r4] @ get value - str r5, [r4] @ write back change - ldr r4, sdrc_emr2_0 @ get config addr - ldr r5, [r4] @ get value - str r5, [r4] @ write back change - ldr r4, sdrc_manual_0 @ get config addr - mov r5, #0x2 @ autorefresh command - str r5, [r4] @ kick off refreshes - ldr r4, sdrc_mr_1 @ get config addr - ldr r5, [r4] @ get value - str r5, [r4] @ write back change - ldr r4, sdrc_emr2_1 @ get config addr - ldr r5, [r4] @ get value - str r5, [r4] @ write back change - ldr r4, sdrc_manual_1 @ get config addr - mov r5, #0x2 @ autorefresh command - str r5, [r4] @ kick off refreshes - bx lr - -/* - * Local variables - */ - .align -sdrc_syscfg: - .word SDRC_SYSCONFIG_P -sdrc_mr_0: - .word SDRC_MR_0_P -sdrc_emr2_0: - .word SDRC_EMR2_0_P -sdrc_manual_0: - .word SDRC_MANUAL_0_P -sdrc_mr_1: - .word SDRC_MR_1_P -sdrc_emr2_1: - .word SDRC_EMR2_1_P -sdrc_manual_1: - .word SDRC_MANUAL_1_P -ENDPROC(es3_sdrc_fix) -ENTRY(es3_sdrc_fix_sz) - .word . - es3_sdrc_fix diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S deleted file mode 100644 index 90d2907a2eb27eba605421c1252dd98caa228025..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sleep43xx.S +++ /dev/null @@ -1,493 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Low level suspend code for AM43XX SoCs - * - * Copyright (C) 2013-2018 Texas Instruments Incorporated - http://www.ti.com/ - * Dave Gerlach, Vaibhav Bedia - */ - -#include -#include -#include -#include -#include -#include - -#include "cm33xx.h" -#include "common.h" -#include "iomap.h" -#include "omap-secure.h" -#include "omap44xx.h" -#include "pm-asm-offsets.h" -#include "prm33xx.h" -#include "prcm43xx.h" - -/* replicated define because linux/bitops.h cannot be included in assembly */ -#define BIT(nr) (1 << (nr)) - -#define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED 0x00030000 -#define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE 0x0003 -#define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE 0x0002 - -#define AM43XX_EMIF_POWEROFF_ENABLE 0x1 -#define AM43XX_EMIF_POWEROFF_DISABLE 0x0 - -#define AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP 0x1 -#define AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO 0x3 - -#define AM43XX_CM_BASE 0x44DF0000 - -#define AM43XX_CM_REGADDR(inst, reg) \ - AM33XX_L4_WK_IO_ADDRESS(AM43XX_CM_BASE + (inst) + (reg)) - -#define AM43XX_CM_MPU_CLKSTCTRL AM43XX_CM_REGADDR(AM43XX_CM_MPU_INST, \ - AM43XX_CM_MPU_MPU_CDOFFS) -#define AM43XX_CM_MPU_MPU_CLKCTRL AM43XX_CM_REGADDR(AM43XX_CM_MPU_INST, \ - AM43XX_CM_MPU_MPU_CLKCTRL_OFFSET) -#define AM43XX_CM_PER_EMIF_CLKCTRL AM43XX_CM_REGADDR(AM43XX_CM_PER_INST, \ - AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET) -#define AM43XX_PRM_EMIF_CTRL_OFFSET 0x0030 - -#define RTC_SECONDS_REG 0x0 -#define RTC_PMIC_REG 0x98 -#define RTC_PMIC_POWER_EN BIT(16) -#define RTC_PMIC_EXT_WAKEUP_STS BIT(12) -#define RTC_PMIC_EXT_WAKEUP_POL BIT(4) -#define RTC_PMIC_EXT_WAKEUP_EN BIT(0) - - .arm - .arch armv7-a - .arch_extension sec - .align 3 - -ENTRY(am43xx_do_wfi) - stmfd sp!, {r4 - r11, lr} @ save registers on stack - - /* Save wfi_flags arg to data space */ - mov r4, r0 - adr r3, am43xx_pm_ro_sram_data - ldr r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET] - str r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET] - -#ifdef CONFIG_CACHE_L2X0 - /* Retrieve l2 cache virt address BEFORE we shut off EMIF */ - ldr r1, get_l2cache_base - blx r1 - mov r8, r0 -#endif - - /* Only flush cache is we know we are losing MPU context */ - tst r4, #WFI_FLAG_FLUSH_CACHE - beq cache_skip_flush - - /* - * Flush all data from the L1 and L2 data cache before disabling - * SCTLR.C bit. - */ - ldr r1, kernel_flush - blx r1 - - /* - * Clear the SCTLR.C bit to prevent further data cache - * allocation. Clearing SCTLR.C would make all the data accesses - * strongly ordered and would not hit the cache. - */ - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #(1 << 2) @ Disable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - dsb - - /* - * Invalidate L1 and L2 data cache. - */ - ldr r1, kernel_flush - blx r1 - -#ifdef CONFIG_CACHE_L2X0 - /* - * Clean and invalidate the L2 cache. - */ -#ifdef CONFIG_PL310_ERRATA_727915 - mov r0, #0x03 - mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX - dsb - smc #0 - dsb -#endif - mov r0, r8 - adr r4, am43xx_pm_ro_sram_data - ldr r3, [r4, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET] - - mov r2, r0 - ldr r0, [r2, #L2X0_AUX_CTRL] - str r0, [r3, #AMX3_PM_L2_AUX_CTRL_VAL_OFFSET] - ldr r0, [r2, #L310_PREFETCH_CTRL] - str r0, [r3, #AMX3_PM_L2_PREFETCH_CTRL_VAL_OFFSET] - - ldr r0, l2_val - str r0, [r2, #L2X0_CLEAN_INV_WAY] -wait: - ldr r0, [r2, #L2X0_CLEAN_INV_WAY] - ldr r1, l2_val - ands r0, r0, r1 - bne wait -#ifdef CONFIG_PL310_ERRATA_727915 - mov r0, #0x00 - mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX - dsb - smc #0 - dsb -#endif -l2x_sync: - mov r0, r8 - mov r2, r0 - mov r0, #0x0 - str r0, [r2, #L2X0_CACHE_SYNC] -sync: - ldr r0, [r2, #L2X0_CACHE_SYNC] - ands r0, r0, #0x1 - bne sync -#endif - - /* Restore wfi_flags */ - adr r3, am43xx_pm_ro_sram_data - ldr r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET] - ldr r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET] - -cache_skip_flush: - /* - * If we are trying to enter RTC+DDR mode we must perform - * a read from the rtc address space to ensure translation - * presence in the TLB to avoid page table walk after DDR - * is unavailable. - */ - tst r4, #WFI_FLAG_RTC_ONLY - beq skip_rtc_va_refresh - - adr r3, am43xx_pm_ro_sram_data - ldr r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET] - ldr r0, [r1] - -skip_rtc_va_refresh: - /* Check if we want self refresh */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_enter_sr - - adr r9, am43xx_emif_sram_table - - ldr r3, [r9, #EMIF_PM_ENTER_SR_OFFSET] - blx r3 - -emif_skip_enter_sr: - /* Only necessary if PER is losing context */ - tst r4, #WFI_FLAG_SAVE_EMIF - beq emif_skip_save - - ldr r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET] - blx r3 - -emif_skip_save: - /* Only can disable EMIF if we have entered self refresh */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_disable - - /* Disable EMIF */ - ldr r1, am43xx_virt_emif_clkctrl - ldr r2, [r1] - bic r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE - str r2, [r1] - -wait_emif_disable: - ldr r2, [r1] - mov r3, #AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED - cmp r2, r3 - bne wait_emif_disable - -emif_skip_disable: - tst r4, #WFI_FLAG_RTC_ONLY - beq skip_rtc_only - - adr r3, am43xx_pm_ro_sram_data - ldr r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET] - - ldr r0, [r1, #RTC_PMIC_REG] - orr r0, r0, #RTC_PMIC_POWER_EN - orr r0, r0, #RTC_PMIC_EXT_WAKEUP_STS - orr r0, r0, #RTC_PMIC_EXT_WAKEUP_EN - orr r0, r0, #RTC_PMIC_EXT_WAKEUP_POL - str r0, [r1, #RTC_PMIC_REG] - ldr r0, [r1, #RTC_PMIC_REG] - /* Wait for 2 seconds to lose power */ - mov r3, #2 - ldr r2, [r1, #RTC_SECONDS_REG] -rtc_loop: - ldr r0, [r1, #RTC_SECONDS_REG] - cmp r0, r2 - beq rtc_loop - mov r2, r0 - subs r3, r3, #1 - bne rtc_loop - - b re_enable_emif - -skip_rtc_only: - - tst r4, #WFI_FLAG_WAKE_M3 - beq wkup_m3_skip - - /* - * For the MPU WFI to be registered as an interrupt - * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set - * to DISABLED - */ - ldr r1, am43xx_virt_mpu_clkctrl - ldr r2, [r1] - bic r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE - str r2, [r1] - - /* - * Put MPU CLKDM to SW_SLEEP - */ - ldr r1, am43xx_virt_mpu_clkstctrl - mov r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP - str r2, [r1] - -wkup_m3_skip: - /* - * Execute a barrier instruction to ensure that all cache, - * TLB and branch predictor maintenance operations issued - * have completed. - */ - dsb - dmb - - /* - * Execute a WFI instruction and wait until the - * STANDBYWFI output is asserted to indicate that the - * CPU is in idle and low power state. CPU can specualatively - * prefetch the instructions so add NOPs after WFI. Sixteen - * NOPs as per Cortex-A9 pipeline. - */ - wfi - - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - - /* We come here in case of an abort due to a late interrupt */ - ldr r1, am43xx_virt_mpu_clkstctrl - mov r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO - str r2, [r1] - - /* Set MPU_CLKCTRL.MODULEMODE back to ENABLE */ - ldr r1, am43xx_virt_mpu_clkctrl - mov r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r2, [r1] - -re_enable_emif: - /* Re-enable EMIF */ - ldr r1, am43xx_virt_emif_clkctrl - mov r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r2, [r1] -wait_emif_enable: - ldr r3, [r1] - cmp r2, r3 - bne wait_emif_enable - - tst r4, #WFI_FLAG_FLUSH_CACHE - beq cache_skip_restore - - /* - * Set SCTLR.C bit to allow data cache allocation - */ - mrc p15, 0, r0, c1, c0, 0 - orr r0, r0, #(1 << 2) @ Enable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - -cache_skip_restore: - /* Only necessary if PER is losing context */ - tst r4, #WFI_FLAG_SELF_REFRESH - beq emif_skip_exit_sr_abt - - adr r9, am43xx_emif_sram_table - ldr r1, [r9, #EMIF_PM_ABORT_SR_OFFSET] - blx r1 - -emif_skip_exit_sr_abt: - /* Let the suspend code know about the abort */ - mov r0, #1 - ldmfd sp!, {r4 - r11, pc} @ restore regs and return -ENDPROC(am43xx_do_wfi) - - .align -ENTRY(am43xx_resume_offset) - .word . - am43xx_do_wfi - -ENTRY(am43xx_resume_from_deep_sleep) - /* Set MPU CLKSTCTRL to HW AUTO so that CPUidle works properly */ - ldr r1, am43xx_virt_mpu_clkstctrl - mov r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO - str r2, [r1] - - /* For AM43xx, use EMIF power down until context is restored */ - ldr r2, am43xx_phys_emif_poweroff - mov r1, #AM43XX_EMIF_POWEROFF_ENABLE - str r1, [r2, #0x0] - - /* Re-enable EMIF */ - ldr r1, am43xx_phys_emif_clkctrl - mov r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE - str r2, [r1] -wait_emif_enable1: - ldr r3, [r1] - cmp r2, r3 - bne wait_emif_enable1 - - adr r9, am43xx_emif_sram_table - - ldr r1, [r9, #EMIF_PM_RESTORE_CONTEXT_OFFSET] - blx r1 - - ldr r1, [r9, #EMIF_PM_EXIT_SR_OFFSET] - blx r1 - - ldr r2, am43xx_phys_emif_poweroff - mov r1, #AM43XX_EMIF_POWEROFF_DISABLE - str r1, [r2, #0x0] - - ldr r1, [r9, #EMIF_PM_RUN_HW_LEVELING] - blx r1 - -#ifdef CONFIG_CACHE_L2X0 - ldr r2, l2_cache_base - ldr r0, [r2, #L2X0_CTRL] - and r0, #0x0f - cmp r0, #1 - beq skip_l2en @ Skip if already enabled - - adr r4, am43xx_pm_ro_sram_data - ldr r3, [r4, #AMX3_PM_RO_SRAM_DATA_PHYS_OFFSET] - ldr r0, [r3, #AMX3_PM_L2_PREFETCH_CTRL_VAL_OFFSET] - - ldr r12, l2_smc1 - dsb - smc #0 - dsb -set_aux_ctrl: - ldr r0, [r3, #AMX3_PM_L2_AUX_CTRL_VAL_OFFSET] - ldr r12, l2_smc2 - dsb - smc #0 - dsb - - /* L2 invalidate on resume */ - ldr r0, l2_val - ldr r2, l2_cache_base - str r0, [r2, #L2X0_INV_WAY] -wait2: - ldr r0, [r2, #L2X0_INV_WAY] - ldr r1, l2_val - ands r0, r0, r1 - bne wait2 -#ifdef CONFIG_PL310_ERRATA_727915 - mov r0, #0x00 - mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX - dsb - smc #0 - dsb -#endif -l2x_sync2: - ldr r2, l2_cache_base - mov r0, #0x0 - str r0, [r2, #L2X0_CACHE_SYNC] -sync2: - ldr r0, [r2, #L2X0_CACHE_SYNC] - ands r0, r0, #0x1 - bne sync2 - - mov r0, #0x1 - ldr r12, l2_smc3 - dsb - smc #0 - dsb -#endif -skip_l2en: - /* We are back. Branch to the common CPU resume routine */ - mov r0, #0 - ldr pc, resume_addr -ENDPROC(am43xx_resume_from_deep_sleep) - -/* - * Local variables - */ - .align -kernel_flush: - .word v7_flush_dcache_all -ddr_start: - .word PAGE_OFFSET - -am43xx_phys_emif_poweroff: - .word (AM43XX_CM_BASE + AM43XX_PRM_DEVICE_INST + \ - AM43XX_PRM_EMIF_CTRL_OFFSET) -am43xx_virt_mpu_clkstctrl: - .word (AM43XX_CM_MPU_CLKSTCTRL) -am43xx_virt_mpu_clkctrl: - .word (AM43XX_CM_MPU_MPU_CLKCTRL) -am43xx_virt_emif_clkctrl: - .word (AM43XX_CM_PER_EMIF_CLKCTRL) -am43xx_phys_emif_clkctrl: - .word (AM43XX_CM_BASE + AM43XX_CM_PER_INST + \ - AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET) - -#ifdef CONFIG_CACHE_L2X0 -/* L2 cache related defines for AM437x */ -get_l2cache_base: - .word omap4_get_l2cache_base -l2_cache_base: - .word OMAP44XX_L2CACHE_BASE -l2_smc1: - .word OMAP4_MON_L2X0_PREFETCH_INDEX -l2_smc2: - .word OMAP4_MON_L2X0_AUXCTRL_INDEX -l2_smc3: - .word OMAP4_MON_L2X0_CTRL_INDEX -l2_val: - .word 0xffff -#endif - -.align 3 -/* DDR related defines */ -ENTRY(am43xx_emif_sram_table) - .space EMIF_PM_FUNCTIONS_SIZE - -ENTRY(am43xx_pm_sram) - .word am43xx_do_wfi - .word am43xx_do_wfi_sz - .word am43xx_resume_offset - .word am43xx_emif_sram_table - .word am43xx_pm_ro_sram_data - -resume_addr: - .word cpu_resume - PAGE_OFFSET + 0x80000000 -.align 3 - -ENTRY(am43xx_pm_ro_sram_data) - .space AMX3_PM_RO_SRAM_DATA_SIZE - -ENTRY(am43xx_do_wfi_sz) - .word . - am43xx_do_wfi diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S deleted file mode 100644 index f60f6a9aed7351532fbe0116a0fa7025e20dc367..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sleep44xx.S +++ /dev/null @@ -1,388 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * OMAP44xx sleep code. - * - * Copyright (C) 2011 Texas Instruments, Inc. - * Santosh Shilimkar - */ - -#include -#include -#include -#include -#include - -#include "omap-secure.h" - -#include "common.h" -#include "omap44xx.h" -#include "omap4-sar-layout.h" - - .arch armv7-a - -#if defined(CONFIG_SMP) && defined(CONFIG_PM) - - .arch_extension sec -.macro DO_SMC - dsb - smc #0 - dsb -.endm - -#ifdef CONFIG_ARCH_OMAP4 - -/* - * ============================= - * == CPU suspend finisher == - * ============================= - * - * void omap4_finish_suspend(unsigned long cpu_state) - * - * This function code saves the CPU context and performs the CPU - * power down sequence. Calling WFI effectively changes the CPU - * power domains states to the desired target power state. - * - * @cpu_state : contains context save state (r0) - * 0 - No context lost - * 1 - CPUx L1 and logic lost: MPUSS CSWR - * 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR - * 3 - CPUx L1 and logic lost + GIC + L2 lost: MPUSS OFF - * @return: This function never returns for CPU OFF and DORMANT power states. - * Post WFI, CPU transitions to DORMANT or OFF power state and on wake-up - * from this follows a full CPU reset path via ROM code to CPU restore code. - * The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET. - * It returns to the caller for CPU INACTIVE and ON power states or in case - * CPU failed to transition to targeted OFF/DORMANT state. - * - * omap4_finish_suspend() calls v7_flush_dcache_all() which doesn't save - * stack frame and it expects the caller to take care of it. Hence the entire - * stack frame is saved to avoid possible stack corruption. - */ -ENTRY(omap4_finish_suspend) - stmfd sp!, {r4-r12, lr} - cmp r0, #0x0 - beq do_WFI @ No lowpower state, jump to WFI - - /* - * Flush all data from the L1 data cache before disabling - * SCTLR.C bit. - */ - bl omap4_get_sar_ram_base - ldr r9, [r0, #OMAP_TYPE_OFFSET] - cmp r9, #0x1 @ Check for HS device - bne skip_secure_l1_clean - mov r0, #SCU_PM_NORMAL - mov r1, #0xFF @ clean seucre L1 - stmfd r13!, {r4-r12, r14} - ldr r12, =OMAP4_MON_SCU_PWR_INDEX - DO_SMC - ldmfd r13!, {r4-r12, r14} -skip_secure_l1_clean: - bl v7_flush_dcache_all - - /* - * Clear the SCTLR.C bit to prevent further data cache - * allocation. Clearing SCTLR.C would make all the data accesses - * strongly ordered and would not hit the cache. - */ - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #(1 << 2) @ Disable the C bit - mcr p15, 0, r0, c1, c0, 0 - isb - - bl v7_invalidate_l1 - - /* - * Switch the CPU from Symmetric Multiprocessing (SMP) mode - * to AsymmetricMultiprocessing (AMP) mode by programming - * the SCU power status to DORMANT or OFF mode. - * This enables the CPU to be taken out of coherency by - * preventing the CPU from receiving cache, TLB, or BTB - * maintenance operations broadcast by other CPUs in the cluster. - */ - bl omap4_get_sar_ram_base - mov r8, r0 - ldr r9, [r8, #OMAP_TYPE_OFFSET] - cmp r9, #0x1 @ Check for HS device - bne scu_gp_set - mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR - ands r0, r0, #0x0f - ldreq r0, [r8, #SCU_OFFSET0] - ldrne r0, [r8, #SCU_OFFSET1] - mov r1, #0x00 - stmfd r13!, {r4-r12, r14} - ldr r12, =OMAP4_MON_SCU_PWR_INDEX - DO_SMC - ldmfd r13!, {r4-r12, r14} - b skip_scu_gp_set -scu_gp_set: - mrc p15, 0, r0, c0, c0, 5 @ Read MPIDR - ands r0, r0, #0x0f - ldreq r1, [r8, #SCU_OFFSET0] - ldrne r1, [r8, #SCU_OFFSET1] - bl omap4_get_scu_base - bl scu_power_mode -skip_scu_gp_set: - mrc p15, 0, r0, c1, c1, 2 @ Read NSACR data - tst r0, #(1 << 18) - mrcne p15, 0, r0, c1, c0, 1 - bicne r0, r0, #(1 << 6) @ Disable SMP bit - mcrne p15, 0, r0, c1, c0, 1 - isb - dsb -#ifdef CONFIG_CACHE_L2X0 - /* - * Clean and invalidate the L2 cache. - * Common cache-l2x0.c functions can't be used here since it - * uses spinlocks. We are out of coherency here with data cache - * disabled. The spinlock implementation uses exclusive load/store - * instruction which can fail without data cache being enabled. - * OMAP4 hardware doesn't support exclusive monitor which can - * overcome exclusive access issue. Because of this, CPU can - * lead to deadlock. - */ - bl omap4_get_sar_ram_base - mov r8, r0 - mrc p15, 0, r5, c0, c0, 5 @ Read MPIDR - ands r5, r5, #0x0f - ldreq r0, [r8, #L2X0_SAVE_OFFSET0] @ Retrieve L2 state from SAR - ldrne r0, [r8, #L2X0_SAVE_OFFSET1] @ memory. - cmp r0, #3 - bne do_WFI -#ifdef CONFIG_PL310_ERRATA_727915 - mov r0, #0x03 - mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX - DO_SMC -#endif - bl omap4_get_l2cache_base - mov r2, r0 - ldr r0, =0xffff - str r0, [r2, #L2X0_CLEAN_INV_WAY] -wait: - ldr r0, [r2, #L2X0_CLEAN_INV_WAY] - ldr r1, =0xffff - ands r0, r0, r1 - bne wait -#ifdef CONFIG_PL310_ERRATA_727915 - mov r0, #0x00 - mov r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX - DO_SMC -#endif -l2x_sync: - bl omap4_get_l2cache_base - mov r2, r0 - mov r0, #0x0 - str r0, [r2, #L2X0_CACHE_SYNC] -sync: - ldr r0, [r2, #L2X0_CACHE_SYNC] - ands r0, r0, #0x1 - bne sync -#endif - -do_WFI: - bl omap_do_wfi - - /* - * CPU is here when it failed to enter OFF/DORMANT or - * no low power state was attempted. - */ - mrc p15, 0, r0, c1, c0, 0 - tst r0, #(1 << 2) @ Check C bit enabled? - orreq r0, r0, #(1 << 2) @ Enable the C bit - mcreq p15, 0, r0, c1, c0, 0 - isb - - /* - * Ensure the CPU power state is set to NORMAL in - * SCU power state so that CPU is back in coherency. - * In non-coherent mode CPU can lock-up and lead to - * system deadlock. - */ - mrc p15, 0, r0, c1, c0, 1 - tst r0, #(1 << 6) @ Check SMP bit enabled? - orreq r0, r0, #(1 << 6) - mcreq p15, 0, r0, c1, c0, 1 - isb - bl omap4_get_sar_ram_base - mov r8, r0 - ldr r9, [r8, #OMAP_TYPE_OFFSET] - cmp r9, #0x1 @ Check for HS device - bne scu_gp_clear - mov r0, #SCU_PM_NORMAL - mov r1, #0x00 - stmfd r13!, {r4-r12, r14} - ldr r12, =OMAP4_MON_SCU_PWR_INDEX - DO_SMC - ldmfd r13!, {r4-r12, r14} - b skip_scu_gp_clear -scu_gp_clear: - bl omap4_get_scu_base - mov r1, #SCU_PM_NORMAL - bl scu_power_mode -skip_scu_gp_clear: - isb - dsb - ldmfd sp!, {r4-r12, pc} -ENDPROC(omap4_finish_suspend) - -/* - * ============================ - * == CPU resume entry point == - * ============================ - * - * void omap4_cpu_resume(void) - * - * ROM code jumps to this function while waking up from CPU - * OFF or DORMANT state. Physical address of the function is - * stored in the SAR RAM while entering to OFF or DORMANT mode. - * The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET. - */ -ENTRY(omap4_cpu_resume) - /* - * Configure ACTRL and enable NS SMP bit access on CPU1 on HS device. - * OMAP44XX EMU/HS devices - CPU0 SMP bit access is enabled in PPA - * init and for CPU1, a secure PPA API provided. CPU0 must be ON - * while executing NS_SMP API on CPU1 and PPA version must be 1.4.0+. - * OMAP443X GP devices- SMP bit isn't accessible. - * OMAP446X GP devices - SMP bit access is enabled on both CPUs. - */ - ldr r8, =OMAP44XX_SAR_RAM_BASE - ldr r9, [r8, #OMAP_TYPE_OFFSET] - cmp r9, #0x1 @ Skip if GP device - bne skip_ns_smp_enable - mrc p15, 0, r0, c0, c0, 5 - ands r0, r0, #0x0f - beq skip_ns_smp_enable -ppa_actrl_retry: - mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX - adr r1, ppa_zero_params_offset - ldr r3, [r1] - add r3, r3, r1 @ Pointer to ppa_zero_params - mov r1, #0x0 @ Process ID - mov r2, #0x4 @ Flag - mov r6, #0xff - mov r12, #0x00 @ Secure Service ID - DO_SMC - cmp r0, #0x0 @ API returns 0 on success. - beq enable_smp_bit - b ppa_actrl_retry -enable_smp_bit: - mrc p15, 0, r0, c1, c0, 1 - tst r0, #(1 << 6) @ Check SMP bit enabled? - orreq r0, r0, #(1 << 6) - mcreq p15, 0, r0, c1, c0, 1 - isb -skip_ns_smp_enable: -#ifdef CONFIG_CACHE_L2X0 - /* - * Restore the L2 AUXCTRL and enable the L2 cache. - * OMAP4_MON_L2X0_AUXCTRL_INDEX = Program the L2X0 AUXCTRL - * OMAP4_MON_L2X0_CTRL_INDEX = Enable the L2 using L2X0 CTRL - * register r0 contains value to be programmed. - * L2 cache is already invalidate by ROM code as part - * of MPUSS OFF wakeup path. - */ - ldr r2, =OMAP44XX_L2CACHE_BASE - ldr r0, [r2, #L2X0_CTRL] - and r0, #0x0f - cmp r0, #1 - beq skip_l2en @ Skip if already enabled - ldr r3, =OMAP44XX_SAR_RAM_BASE - ldr r1, [r3, #OMAP_TYPE_OFFSET] - cmp r1, #0x1 @ Check for HS device - bne set_gp_por - ldr r0, =OMAP4_PPA_L2_POR_INDEX - ldr r1, =OMAP44XX_SAR_RAM_BASE - ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET] - adr r1, ppa_por_params_offset - ldr r3, [r1] - add r3, r3, r1 @ Pointer to ppa_por_params - str r4, [r3, #0x04] - mov r1, #0x0 @ Process ID - mov r2, #0x4 @ Flag - mov r6, #0xff - mov r12, #0x00 @ Secure Service ID - DO_SMC - b set_aux_ctrl -set_gp_por: - ldr r1, =OMAP44XX_SAR_RAM_BASE - ldr r0, [r1, #L2X0_PREFETCH_CTRL_OFFSET] - ldr r12, =OMAP4_MON_L2X0_PREFETCH_INDEX @ Setup L2 PREFETCH - DO_SMC -set_aux_ctrl: - ldr r1, =OMAP44XX_SAR_RAM_BASE - ldr r0, [r1, #L2X0_AUXCTRL_OFFSET] - ldr r12, =OMAP4_MON_L2X0_AUXCTRL_INDEX @ Setup L2 AUXCTRL - DO_SMC - mov r0, #0x1 - ldr r12, =OMAP4_MON_L2X0_CTRL_INDEX @ Enable L2 cache - DO_SMC -skip_l2en: -#endif - - b cpu_resume @ Jump to generic resume -ppa_por_params_offset: - .long ppa_por_params - . -ENDPROC(omap4_cpu_resume) -#endif /* CONFIG_ARCH_OMAP4 */ - -#endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */ - -ENTRY(omap_do_wfi) - stmfd sp!, {lr} -#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER - /* Drain interconnect write buffers. */ - bl omap_interconnect_sync -#endif - - /* - * Execute an ISB instruction to ensure that all of the - * CP15 register changes have been committed. - */ - isb - - /* - * Execute a barrier instruction to ensure that all cache, - * TLB and branch predictor maintenance operations issued - * by any CPU in the cluster have completed. - */ - dsb - dmb - - /* - * Execute a WFI instruction and wait until the - * STANDBYWFI output is asserted to indicate that the - * CPU is in idle and low power state. CPU can specualatively - * prefetch the instructions so add NOPs after WFI. Sixteen - * NOPs as per Cortex-A9 pipeline. - */ - wfi @ Wait For Interrupt - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - nop - - ldmfd sp!, {pc} -ppa_zero_params_offset: - .long ppa_zero_params - . -ENDPROC(omap_do_wfi) - - .data - .align 2 -ppa_zero_params: - .word 0 - -ppa_por_params: - .word 1, 0 diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S deleted file mode 100644 index 92ef21ac2ac151ddd3b3ed0c2e0cffc991a7ee1b..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sram242x.S +++ /dev/null @@ -1,317 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mach-omap2/sram242x.S - * - * Omap2 specific functions that need to be run in internal SRAM - * - * (C) Copyright 2004 - * Texas Instruments, - * Richard Woodruff - * - * Richard Woodruff notes that any changes to this code must be carefully - * audited and tested to ensure that they don't cause a TLB miss while - * the SDRAM is inaccessible. Such a situation will crash the system - * since it will cause the ARM MMU to attempt to walk the page tables. - * These crashes may be intermittent. - */ -#include - -#include - -#include "soc.h" -#include "iomap.h" -#include "prm2xxx.h" -#include "cm2xxx.h" -#include "sdrc.h" - - .text - - .align 3 -ENTRY(omap242x_sram_ddr_init) - stmfd sp!, {r0 - r12, lr} @ save registers on stack - - mov r12, r2 @ capture CS1 vs CS0 - mov r8, r3 @ capture force parameter - - /* frequency shift down */ - ldr r2, omap242x_sdi_cm_clksel2_pll @ get address of dpllout reg - mov r3, #0x1 @ value for 1x operation - str r3, [r2] @ go to L1-freq operation - - /* voltage shift down */ - mov r9, #0x1 @ set up for L1 voltage call - bl voltage_shift @ go drop voltage - - /* dll lock mode */ - ldr r11, omap242x_sdi_sdrc_dlla_ctrl @ addr of dlla ctrl - ldr r10, [r11] @ get current val - cmp r12, #0x1 @ cs1 base (2422 es2.05/1) - addeq r11, r11, #0x8 @ if cs1 base, move to DLLB - mvn r9, #0x4 @ mask to get clear bit2 - and r10, r10, r9 @ clear bit2 for lock mode. - orr r10, r10, #0x8 @ make sure DLL on (es2 bit pos) - orr r10, r10, #0x2 @ 90 degree phase for all below 133MHz - str r10, [r11] @ commit to DLLA_CTRL - bl i_dll_wait @ wait for dll to lock - - /* get dll value */ - add r11, r11, #0x4 @ get addr of status reg - ldr r10, [r11] @ get locked value - - /* voltage shift up */ - mov r9, #0x0 @ shift back to L0-voltage - bl voltage_shift @ go raise voltage - - /* frequency shift up */ - mov r3, #0x2 @ value for 2x operation - str r3, [r2] @ go to L0-freq operation - - /* reset entry mode for dllctrl */ - sub r11, r11, #0x4 @ move from status to ctrl - cmp r12, #0x1 @ normalize if cs1 based - subeq r11, r11, #0x8 @ possibly back to DLLA - cmp r8, #0x1 @ if forced unlock exit - orreq r1, r1, #0x4 @ make sure exit with unlocked value - str r1, [r11] @ restore DLLA_CTRL high value - add r11, r11, #0x8 @ move to DLLB_CTRL addr - str r1, [r11] @ set value DLLB_CTRL - bl i_dll_wait @ wait for possible lock - - /* set up for return, DDR should be good */ - str r10, [r0] @ write dll_status and return counter - ldmfd sp!, {r0 - r12, pc} @ restore regs and return - - /* ensure the DLL has relocked */ -i_dll_wait: - mov r4, #0x800 @ delay DLL relock, min 0x400 L3 clocks -i_dll_delay: - subs r4, r4, #0x1 - bne i_dll_delay - ret lr - - /* - * shift up or down voltage, use R9 as input to tell level. - * wait for it to finish, use 32k sync counter, 1tick=31uS. - */ -voltage_shift: - ldr r4, omap242x_sdi_prcm_voltctrl @ get addr of volt ctrl. - ldr r5, [r4] @ get value. - ldr r6, prcm_mask_val @ get value of mask - and r5, r5, r6 @ apply mask to clear bits - orr r5, r5, r9 @ bulld value for L0/L1-volt operation. - str r5, [r4] @ set up for change. - mov r3, #0x4000 @ get val for force - orr r5, r5, r3 @ build value for force - str r5, [r4] @ Force transition to L1 - - ldr r3, omap242x_sdi_timer_32ksynct_cr @ get addr of counter - ldr r5, [r3] @ get value - add r5, r5, #0x3 @ give it at most 93uS -volt_delay: - ldr r7, [r3] @ get timer value - cmp r5, r7 @ time up? - bhi volt_delay @ not yet->branch - ret lr @ back to caller. - -omap242x_sdi_cm_clksel2_pll: - .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) -omap242x_sdi_sdrc_dlla_ctrl: - .word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL) -omap242x_sdi_prcm_voltctrl: - .word OMAP2420_PRCM_VOLTCTRL -prcm_mask_val: - .word 0xFFFF3FFC -omap242x_sdi_timer_32ksynct_cr: - .word OMAP2_L4_IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010) -ENTRY(omap242x_sram_ddr_init_sz) - .word . - omap242x_sram_ddr_init - -/* - * Reprograms memory timings. - * r0 = [PRCM_FULL | PRCM_HALF] r1 = SDRC_DLLA_CTRL value r2 = [DDR | SDR] - * PRCM_FULL = 2, PRCM_HALF = 1, DDR = 1, SDR = 0 - */ - .align 3 -ENTRY(omap242x_sram_reprogram_sdrc) - stmfd sp!, {r0 - r10, lr} @ save registers on stack - mov r3, #0x0 @ clear for mrc call - mcr p15, 0, r3, c7, c10, 4 @ memory barrier, finish ARM SDR/DDR - nop - nop - ldr r6, omap242x_srs_sdrc_rfr_ctrl @ get addr of refresh reg - ldr r5, [r6] @ get value - mov r5, r5, lsr #8 @ isolate rfr field and drop burst - - cmp r0, #0x1 @ going to half speed? - movne r9, #0x0 @ if up set flag up for pre up, hi volt - - blne voltage_shift_c @ adjust voltage - - cmp r0, #0x1 @ going to half speed (post branch link) - moveq r5, r5, lsr #1 @ divide by 2 if to half - movne r5, r5, lsl #1 @ mult by 2 if to full - mov r5, r5, lsl #8 @ put rfr field back into place - add r5, r5, #0x1 @ turn on burst of 1 - ldr r4, omap242x_srs_cm_clksel2_pll @ get address of out reg - ldr r3, [r4] @ get curr value - orr r3, r3, #0x3 - bic r3, r3, #0x3 @ clear lower bits - orr r3, r3, r0 @ new state value - str r3, [r4] @ set new state (pll/x, x=1 or 2) - nop - nop - - moveq r9, #0x1 @ if speed down, post down, drop volt - bleq voltage_shift_c - - mcr p15, 0, r3, c7, c10, 4 @ memory barrier - str r5, [r6] @ set new RFR_1 value - add r6, r6, #0x30 @ get RFR_2 addr - str r5, [r6] @ set RFR_2 - nop - cmp r2, #0x1 @ (SDR or DDR) do we need to adjust DLL - bne freq_out @ leave if SDR, no DLL function - - /* With DDR, we need to take care of the DLL for the frequency change */ - ldr r2, omap242x_srs_sdrc_dlla_ctrl @ addr of dlla ctrl - str r1, [r2] @ write out new SDRC_DLLA_CTRL - add r2, r2, #0x8 @ addr to SDRC_DLLB_CTRL - str r1, [r2] @ commit to SDRC_DLLB_CTRL - mov r1, #0x2000 @ wait DLL relock, min 0x400 L3 clocks -dll_wait: - subs r1, r1, #0x1 - bne dll_wait -freq_out: - ldmfd sp!, {r0 - r10, pc} @ restore regs and return - - /* - * shift up or down voltage, use R9 as input to tell level. - * wait for it to finish, use 32k sync counter, 1tick=31uS. - */ -voltage_shift_c: - ldr r10, omap242x_srs_prcm_voltctrl @ get addr of volt ctrl - ldr r8, [r10] @ get value - ldr r7, ddr_prcm_mask_val @ get value of mask - and r8, r8, r7 @ apply mask to clear bits - orr r8, r8, r9 @ bulld value for L0/L1-volt operation. - str r8, [r10] @ set up for change. - mov r7, #0x4000 @ get val for force - orr r8, r8, r7 @ build value for force - str r8, [r10] @ Force transition to L1 - - ldr r10, omap242x_srs_timer_32ksynct @ get addr of counter - ldr r8, [r10] @ get value - add r8, r8, #0x2 @ give it at most 62uS (min 31+) -volt_delay_c: - ldr r7, [r10] @ get timer value - cmp r8, r7 @ time up? - bhi volt_delay_c @ not yet->branch - ret lr @ back to caller - -omap242x_srs_cm_clksel2_pll: - .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) -omap242x_srs_sdrc_dlla_ctrl: - .word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL) -omap242x_srs_sdrc_rfr_ctrl: - .word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0) -omap242x_srs_prcm_voltctrl: - .word OMAP2420_PRCM_VOLTCTRL -ddr_prcm_mask_val: - .word 0xFFFF3FFC -omap242x_srs_timer_32ksynct: - .word OMAP2_L4_IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010) - -ENTRY(omap242x_sram_reprogram_sdrc_sz) - .word . - omap242x_sram_reprogram_sdrc - -/* - * Set dividers and pll. Also recalculate DLL value for DDR and unlock mode. - */ - .align 3 -ENTRY(omap242x_sram_set_prcm) - stmfd sp!, {r0-r12, lr} @ regs to stack - adr r4, pbegin @ addr of preload start - adr r8, pend @ addr of preload end - mcrr p15, 1, r8, r4, c12 @ preload into icache -pbegin: - /* move into fast relock bypass */ - ldr r8, omap242x_ssp_pll_ctl @ get addr - ldr r5, [r8] @ get val - mvn r6, #0x3 @ clear mask - and r5, r5, r6 @ clear field - orr r7, r5, #0x2 @ fast relock val - str r7, [r8] @ go to fast relock - ldr r4, omap242x_ssp_pll_stat @ addr of stat -block: - /* wait for bypass */ - ldr r8, [r4] @ stat value - and r8, r8, #0x3 @ mask for stat - cmp r8, #0x1 @ there yet - bne block @ loop if not - - /* set new dpll dividers _after_ in bypass */ - ldr r4, omap242x_ssp_pll_div @ get addr - str r0, [r4] @ set dpll ctrl val - - ldr r4, omap242x_ssp_set_config @ get addr - mov r8, #1 @ valid cfg msk - str r8, [r4] @ make dividers take - - mov r4, #100 @ dead spin a bit -wait_a_bit: - subs r4, r4, #1 @ dec loop - bne wait_a_bit @ delay done? - - /* check if staying in bypass */ - cmp r2, #0x1 @ stay in bypass? - beq pend @ jump over dpll relock - - /* relock DPLL with new vals */ - ldr r5, omap242x_ssp_pll_stat @ get addr - ldr r4, omap242x_ssp_pll_ctl @ get addr - orr r8, r7, #0x3 @ val for lock dpll - str r8, [r4] @ set val - mov r0, #1000 @ dead spin a bit -wait_more: - subs r0, r0, #1 @ dec loop - bne wait_more @ delay done? -wait_lock: - ldr r8, [r5] @ get lock val - and r8, r8, #3 @ isolate field - cmp r8, #2 @ locked? - bne wait_lock @ wait if not -pend: - /* update memory timings & briefly lock dll */ - ldr r4, omap242x_ssp_sdrc_rfr @ get addr - str r1, [r4] @ update refresh timing - ldr r11, omap242x_ssp_dlla_ctrl @ get addr of DLLA ctrl - ldr r10, [r11] @ get current val - mvn r9, #0x4 @ mask to get clear bit2 - and r10, r10, r9 @ clear bit2 for lock mode - orr r10, r10, #0x8 @ make sure DLL on (es2 bit pos) - str r10, [r11] @ commit to DLLA_CTRL - add r11, r11, #0x8 @ move to dllb - str r10, [r11] @ hit DLLB also - - mov r4, #0x800 @ relock time (min 0x400 L3 clocks) -wait_dll_lock: - subs r4, r4, #0x1 - bne wait_dll_lock - nop - ldmfd sp!, {r0-r12, pc} @ restore regs and return - -omap242x_ssp_set_config: - .word OMAP2420_PRCM_CLKCFG_CTRL -omap242x_ssp_pll_ctl: - .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKEN) -omap242x_ssp_pll_stat: - .word OMAP2420_CM_REGADDR(PLL_MOD, CM_IDLEST) -omap242x_ssp_pll_div: - .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL1) -omap242x_ssp_sdrc_rfr: - .word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0) -omap242x_ssp_dlla_ctrl: - .word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL) - -ENTRY(omap242x_sram_set_prcm_sz) - .word . - omap242x_sram_set_prcm diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S deleted file mode 100644 index faf03b7f08f5c6f3efaf2ad91a6d97034b5e28db..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/sram243x.S +++ /dev/null @@ -1,317 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mach-omap2/sram243x.S - * - * Omap2 specific functions that need to be run in internal SRAM - * - * (C) Copyright 2004 - * Texas Instruments, - * Richard Woodruff - * - * Richard Woodruff notes that any changes to this code must be carefully - * audited and tested to ensure that they don't cause a TLB miss while - * the SDRAM is inaccessible. Such a situation will crash the system - * since it will cause the ARM MMU to attempt to walk the page tables. - * These crashes may be intermittent. - */ -#include - -#include - -#include "soc.h" -#include "iomap.h" -#include "prm2xxx.h" -#include "cm2xxx.h" -#include "sdrc.h" - - .text - - .align 3 -ENTRY(omap243x_sram_ddr_init) - stmfd sp!, {r0 - r12, lr} @ save registers on stack - - mov r12, r2 @ capture CS1 vs CS0 - mov r8, r3 @ capture force parameter - - /* frequency shift down */ - ldr r2, omap243x_sdi_cm_clksel2_pll @ get address of dpllout reg - mov r3, #0x1 @ value for 1x operation - str r3, [r2] @ go to L1-freq operation - - /* voltage shift down */ - mov r9, #0x1 @ set up for L1 voltage call - bl voltage_shift @ go drop voltage - - /* dll lock mode */ - ldr r11, omap243x_sdi_sdrc_dlla_ctrl @ addr of dlla ctrl - ldr r10, [r11] @ get current val - cmp r12, #0x1 @ cs1 base (2422 es2.05/1) - addeq r11, r11, #0x8 @ if cs1 base, move to DLLB - mvn r9, #0x4 @ mask to get clear bit2 - and r10, r10, r9 @ clear bit2 for lock mode. - orr r10, r10, #0x8 @ make sure DLL on (es2 bit pos) - orr r10, r10, #0x2 @ 90 degree phase for all below 133MHz - str r10, [r11] @ commit to DLLA_CTRL - bl i_dll_wait @ wait for dll to lock - - /* get dll value */ - add r11, r11, #0x4 @ get addr of status reg - ldr r10, [r11] @ get locked value - - /* voltage shift up */ - mov r9, #0x0 @ shift back to L0-voltage - bl voltage_shift @ go raise voltage - - /* frequency shift up */ - mov r3, #0x2 @ value for 2x operation - str r3, [r2] @ go to L0-freq operation - - /* reset entry mode for dllctrl */ - sub r11, r11, #0x4 @ move from status to ctrl - cmp r12, #0x1 @ normalize if cs1 based - subeq r11, r11, #0x8 @ possibly back to DLLA - cmp r8, #0x1 @ if forced unlock exit - orreq r1, r1, #0x4 @ make sure exit with unlocked value - str r1, [r11] @ restore DLLA_CTRL high value - add r11, r11, #0x8 @ move to DLLB_CTRL addr - str r1, [r11] @ set value DLLB_CTRL - bl i_dll_wait @ wait for possible lock - - /* set up for return, DDR should be good */ - str r10, [r0] @ write dll_status and return counter - ldmfd sp!, {r0 - r12, pc} @ restore regs and return - - /* ensure the DLL has relocked */ -i_dll_wait: - mov r4, #0x800 @ delay DLL relock, min 0x400 L3 clocks -i_dll_delay: - subs r4, r4, #0x1 - bne i_dll_delay - ret lr - - /* - * shift up or down voltage, use R9 as input to tell level. - * wait for it to finish, use 32k sync counter, 1tick=31uS. - */ -voltage_shift: - ldr r4, omap243x_sdi_prcm_voltctrl @ get addr of volt ctrl. - ldr r5, [r4] @ get value. - ldr r6, prcm_mask_val @ get value of mask - and r5, r5, r6 @ apply mask to clear bits - orr r5, r5, r9 @ bulld value for L0/L1-volt operation. - str r5, [r4] @ set up for change. - mov r3, #0x4000 @ get val for force - orr r5, r5, r3 @ build value for force - str r5, [r4] @ Force transition to L1 - - ldr r3, omap243x_sdi_timer_32ksynct_cr @ get addr of counter - ldr r5, [r3] @ get value - add r5, r5, #0x3 @ give it at most 93uS -volt_delay: - ldr r7, [r3] @ get timer value - cmp r5, r7 @ time up? - bhi volt_delay @ not yet->branch - ret lr @ back to caller. - -omap243x_sdi_cm_clksel2_pll: - .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) -omap243x_sdi_sdrc_dlla_ctrl: - .word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL) -omap243x_sdi_prcm_voltctrl: - .word OMAP2430_PRCM_VOLTCTRL -prcm_mask_val: - .word 0xFFFF3FFC -omap243x_sdi_timer_32ksynct_cr: - .word OMAP2_L4_IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010) -ENTRY(omap243x_sram_ddr_init_sz) - .word . - omap243x_sram_ddr_init - -/* - * Reprograms memory timings. - * r0 = [PRCM_FULL | PRCM_HALF] r1 = SDRC_DLLA_CTRL value r2 = [DDR | SDR] - * PRCM_FULL = 2, PRCM_HALF = 1, DDR = 1, SDR = 0 - */ - .align 3 -ENTRY(omap243x_sram_reprogram_sdrc) - stmfd sp!, {r0 - r10, lr} @ save registers on stack - mov r3, #0x0 @ clear for mrc call - mcr p15, 0, r3, c7, c10, 4 @ memory barrier, finish ARM SDR/DDR - nop - nop - ldr r6, omap243x_srs_sdrc_rfr_ctrl @ get addr of refresh reg - ldr r5, [r6] @ get value - mov r5, r5, lsr #8 @ isolate rfr field and drop burst - - cmp r0, #0x1 @ going to half speed? - movne r9, #0x0 @ if up set flag up for pre up, hi volt - - blne voltage_shift_c @ adjust voltage - - cmp r0, #0x1 @ going to half speed (post branch link) - moveq r5, r5, lsr #1 @ divide by 2 if to half - movne r5, r5, lsl #1 @ mult by 2 if to full - mov r5, r5, lsl #8 @ put rfr field back into place - add r5, r5, #0x1 @ turn on burst of 1 - ldr r4, omap243x_srs_cm_clksel2_pll @ get address of out reg - ldr r3, [r4] @ get curr value - orr r3, r3, #0x3 - bic r3, r3, #0x3 @ clear lower bits - orr r3, r3, r0 @ new state value - str r3, [r4] @ set new state (pll/x, x=1 or 2) - nop - nop - - moveq r9, #0x1 @ if speed down, post down, drop volt - bleq voltage_shift_c - - mcr p15, 0, r3, c7, c10, 4 @ memory barrier - str r5, [r6] @ set new RFR_1 value - add r6, r6, #0x30 @ get RFR_2 addr - str r5, [r6] @ set RFR_2 - nop - cmp r2, #0x1 @ (SDR or DDR) do we need to adjust DLL - bne freq_out @ leave if SDR, no DLL function - - /* With DDR, we need to take care of the DLL for the frequency change */ - ldr r2, omap243x_srs_sdrc_dlla_ctrl @ addr of dlla ctrl - str r1, [r2] @ write out new SDRC_DLLA_CTRL - add r2, r2, #0x8 @ addr to SDRC_DLLB_CTRL - str r1, [r2] @ commit to SDRC_DLLB_CTRL - mov r1, #0x2000 @ wait DLL relock, min 0x400 L3 clocks -dll_wait: - subs r1, r1, #0x1 - bne dll_wait -freq_out: - ldmfd sp!, {r0 - r10, pc} @ restore regs and return - - /* - * shift up or down voltage, use R9 as input to tell level. - * wait for it to finish, use 32k sync counter, 1tick=31uS. - */ -voltage_shift_c: - ldr r10, omap243x_srs_prcm_voltctrl @ get addr of volt ctrl - ldr r8, [r10] @ get value - ldr r7, ddr_prcm_mask_val @ get value of mask - and r8, r8, r7 @ apply mask to clear bits - orr r8, r8, r9 @ bulld value for L0/L1-volt operation. - str r8, [r10] @ set up for change. - mov r7, #0x4000 @ get val for force - orr r8, r8, r7 @ build value for force - str r8, [r10] @ Force transition to L1 - - ldr r10, omap243x_srs_timer_32ksynct @ get addr of counter - ldr r8, [r10] @ get value - add r8, r8, #0x2 @ give it at most 62uS (min 31+) -volt_delay_c: - ldr r7, [r10] @ get timer value - cmp r8, r7 @ time up? - bhi volt_delay_c @ not yet->branch - ret lr @ back to caller - -omap243x_srs_cm_clksel2_pll: - .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) -omap243x_srs_sdrc_dlla_ctrl: - .word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL) -omap243x_srs_sdrc_rfr_ctrl: - .word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0) -omap243x_srs_prcm_voltctrl: - .word OMAP2430_PRCM_VOLTCTRL -ddr_prcm_mask_val: - .word 0xFFFF3FFC -omap243x_srs_timer_32ksynct: - .word OMAP2_L4_IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010) - -ENTRY(omap243x_sram_reprogram_sdrc_sz) - .word . - omap243x_sram_reprogram_sdrc - -/* - * Set dividers and pll. Also recalculate DLL value for DDR and unlock mode. - */ - .align 3 -ENTRY(omap243x_sram_set_prcm) - stmfd sp!, {r0-r12, lr} @ regs to stack - adr r4, pbegin @ addr of preload start - adr r8, pend @ addr of preload end - mcrr p15, 1, r8, r4, c12 @ preload into icache -pbegin: - /* move into fast relock bypass */ - ldr r8, omap243x_ssp_pll_ctl @ get addr - ldr r5, [r8] @ get val - mvn r6, #0x3 @ clear mask - and r5, r5, r6 @ clear field - orr r7, r5, #0x2 @ fast relock val - str r7, [r8] @ go to fast relock - ldr r4, omap243x_ssp_pll_stat @ addr of stat -block: - /* wait for bypass */ - ldr r8, [r4] @ stat value - and r8, r8, #0x3 @ mask for stat - cmp r8, #0x1 @ there yet - bne block @ loop if not - - /* set new dpll dividers _after_ in bypass */ - ldr r4, omap243x_ssp_pll_div @ get addr - str r0, [r4] @ set dpll ctrl val - - ldr r4, omap243x_ssp_set_config @ get addr - mov r8, #1 @ valid cfg msk - str r8, [r4] @ make dividers take - - mov r4, #100 @ dead spin a bit -wait_a_bit: - subs r4, r4, #1 @ dec loop - bne wait_a_bit @ delay done? - - /* check if staying in bypass */ - cmp r2, #0x1 @ stay in bypass? - beq pend @ jump over dpll relock - - /* relock DPLL with new vals */ - ldr r5, omap243x_ssp_pll_stat @ get addr - ldr r4, omap243x_ssp_pll_ctl @ get addr - orr r8, r7, #0x3 @ val for lock dpll - str r8, [r4] @ set val - mov r0, #1000 @ dead spin a bit -wait_more: - subs r0, r0, #1 @ dec loop - bne wait_more @ delay done? -wait_lock: - ldr r8, [r5] @ get lock val - and r8, r8, #3 @ isolate field - cmp r8, #2 @ locked? - bne wait_lock @ wait if not -pend: - /* update memory timings & briefly lock dll */ - ldr r4, omap243x_ssp_sdrc_rfr @ get addr - str r1, [r4] @ update refresh timing - ldr r11, omap243x_ssp_dlla_ctrl @ get addr of DLLA ctrl - ldr r10, [r11] @ get current val - mvn r9, #0x4 @ mask to get clear bit2 - and r10, r10, r9 @ clear bit2 for lock mode - orr r10, r10, #0x8 @ make sure DLL on (es2 bit pos) - str r10, [r11] @ commit to DLLA_CTRL - add r11, r11, #0x8 @ move to dllb - str r10, [r11] @ hit DLLB also - - mov r4, #0x800 @ relock time (min 0x400 L3 clocks) -wait_dll_lock: - subs r4, r4, #0x1 - bne wait_dll_lock - nop - ldmfd sp!, {r0-r12, pc} @ restore regs and return - -omap243x_ssp_set_config: - .word OMAP2430_PRCM_CLKCFG_CTRL -omap243x_ssp_pll_ctl: - .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKEN) -omap243x_ssp_pll_stat: - .word OMAP2430_CM_REGADDR(PLL_MOD, CM_IDLEST) -omap243x_ssp_pll_div: - .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL1) -omap243x_ssp_sdrc_rfr: - .word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0) -omap243x_ssp_dlla_ctrl: - .word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL) - -ENTRY(omap243x_sram_set_prcm_sz) - .word . - omap243x_sram_set_prcm diff --git a/arch/arm/mach-oxnas/headsmp.S b/arch/arm/mach-oxnas/headsmp.S deleted file mode 100644 index 9c0f1479f33a3db6c4f8a00732fecb77319aee79..0000000000000000000000000000000000000000 --- a/arch/arm/mach-oxnas/headsmp.S +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 Ma Haijun - * Copyright (c) 2003 ARM Limited - * All Rights Reserved - */ -#include -#include - - __INIT - -/* - * OX820 specific entry point for secondary CPUs. - */ -ENTRY(ox820_secondary_startup) - mov r4, #0 - /* invalidate both caches and branch target cache */ - mcr p15, 0, r4, c7, c7, 0 - /* - * we've been released from the holding pen: secondary_stack - * should now contain the SVC stack for this core - */ - b secondary_startup diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S deleted file mode 100644 index 88ea1243942ab244400e69bd3ef31a6ddb05778d..0000000000000000000000000000000000000000 --- a/arch/arm/mach-prima2/headsmp.S +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Entry of the second core for CSR Marco dual-core SMP SoCs - * - * Copyright (c) 2012 Cambridge Silicon Radio Limited, a CSR plc group company. - */ - -#include -#include - -/* - * SIRFSOC specific entry point for secondary CPUs. This provides - * a "holding pen" into which all secondary cores are held until we're - * ready for them to initialise. - */ -ENTRY(sirfsoc_secondary_startup) - mrc p15, 0, r0, c0, c0, 5 - and r0, r0, #15 - adr r4, 1f - ldmia r4, {r5, r6} - sub r4, r4, r5 - add r6, r6, r4 -pen: ldr r7, [r6] - cmp r7, r0 - bne pen - - /* - * we've been released from the holding pen: secondary_stack - * should now contain the SVC stack for this core - */ - b secondary_startup -ENDPROC(sirfsoc_secondary_startup) - - .align -1: .long . - .long prima2_pen_release diff --git a/arch/arm/mach-prima2/sleep.S b/arch/arm/mach-prima2/sleep.S deleted file mode 100644 index d9bbc5ca39ef84baf6cd979a51e15f7366172e0a..0000000000000000000000000000000000000000 --- a/arch/arm/mach-prima2/sleep.S +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * sleep mode for CSR SiRFprimaII - * - * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. - */ - -#include -#include -#include - -#include "pm.h" - -#define DENALI_CTL_22_OFF 0x58 -#define DENALI_CTL_112_OFF 0x1c0 - - .text - -ENTRY(sirfsoc_finish_suspend) - @ r5: mem controller - ldr r0, =sirfsoc_memc_base - ldr r5, [r0] - @ r6: pwrc base offset - ldr r0, =sirfsoc_pwrc_base - ldr r6, [r0] - @ r7: rtc iobrg controller - ldr r0, =sirfsoc_rtciobrg_base - ldr r7, [r0] - - @ Read the power control register and set the - @ sleep force bit. - add r0, r6, #SIRFSOC_PWRC_PDN_CTRL - bl __sirfsoc_rtc_iobrg_readl - orr r0,r0,#SIRFSOC_PWR_SLEEPFORCE - add r1, r6, #SIRFSOC_PWRC_PDN_CTRL - bl sirfsoc_rtc_iobrg_pre_writel - mov r1, #0x1 - - @ read the MEM ctl register and set the self - @ refresh bit - - ldr r2, [r5, #DENALI_CTL_22_OFF] - orr r2, r2, #0x1 - - @ Following code has to run from cache since - @ the RAM is going to self refresh mode - .align 5 - str r2, [r5, #DENALI_CTL_22_OFF] - -1: - ldr r4, [r5, #DENALI_CTL_112_OFF] - tst r4, #0x1 - bne 1b - - @ write SLEEPFORCE through rtc iobridge - - str r1, [r7] - @ wait rtc io bridge sync -1: - ldr r3, [r7] - tst r3, #0x01 - bne 1b - b . diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S deleted file mode 100644 index 4ad2fa27fc417ed7854cbc5c1516519cde843bb7..0000000000000000000000000000000000000000 --- a/arch/arm/mach-pxa/mioa701_bootresume.S +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Bootloader to resume MIO A701 - * - * 2007-1-12 Robert Jarzmik -*/ - -#include -#include - -/* - * Note: Yes, part of the following code is located into the .data section. - * This is to allow jumpaddr to be accessed with a relative load - * while we can't rely on any MMU translation. We could have put - * sleep_save_sp in the .text section as well, but some setups might - * insist on it to be truly read-only. - */ - .data - .align 2 -ENTRY(mioa701_bootstrap) -0: - b 1f -ENTRY(mioa701_jumpaddr) - .word 0x40f00008 @ PSPR in no-MMU mode -1: - mov r0, #0xa0000000 @ Don't suppose memory access works - orr r0, r0, #0x00200000 @ even if it's supposed to - orr r0, r0, #0x0000b000 - mov r1, #0 - str r1, [r0] @ Early disable resume for next boot - ldr r0, mioa701_jumpaddr @ (Murphy's Law) - ldr r0, [r0] - ret r0 -2: - -ENTRY(mioa701_bootstrap_lg) - .data - .align 2 - .word 2b-0b diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S deleted file mode 100644 index 6c5b3ffd2cd3f53900a4106696566dee2a2b9c5f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-pxa/sleep.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Low-level PXA250/210 sleep/wakeUp support - * - * Initial SA1110 code: - * Copyright (c) 2001 Cliff Brake - * - * Adapted for PXA by Nicolas Pitre: - * Copyright (c) 2002 Monta Vista Software, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License. - */ - -#include -#include -#include -#include -#include - -#define MDREFR_KDIV 0x200a4000 // all banks -#define CCCR_SLEEP 0x00000107 // L=7 2N=2 A=0 PPDIS=0 CPDIS=0 - - .text - -#ifdef CONFIG_PXA3xx -/* - * pxa3xx_finish_suspend() - forces CPU into sleep state (S2D3C4) - */ -ENTRY(pxa3xx_finish_suspend) - mov r0, #0x06 @ S2D3C4 mode - mcr p14, 0, r0, c7, c0, 0 @ enter sleep - -20: b 20b @ waiting for sleep -#endif /* CONFIG_PXA3xx */ - -#ifdef CONFIG_PXA27x -/* - * pxa27x_finish_suspend() - * - * Forces CPU into sleep state. - * - * r0 = value for PWRMODE M field for desired sleep state - */ -ENTRY(pxa27x_finish_suspend) - @ Put the processor to sleep - @ (also workaround for sighting 28071) - - @ prepare value for sleep mode - mov r1, r0 @ sleep mode - - @ prepare pointer to physical address 0 (virtual mapping in generic.c) - mov r2, #UNCACHED_PHYS_0 - - @ prepare SDRAM refresh settings - ldr r4, =MDREFR - ldr r5, [r4] - - @ enable SDRAM self-refresh mode - orr r5, r5, #MDREFR_SLFRSH - - @ set SDCLKx divide-by-2 bits (this is part of a workaround for Errata 50) - ldr r6, =MDREFR_KDIV - orr r5, r5, r6 - - @ Intel PXA270 Specification Update notes problems sleeping - @ with core operating above 91 MHz - @ (see Errata 50, ...processor does not exit from sleep...) - - ldr r6, =CCCR - ldr r8, [r6] @ keep original value for resume - - ldr r7, =CCCR_SLEEP @ prepare CCCR sleep value - mov r0, #0x2 @ prepare value for CLKCFG - - @ align execution to a cache line - b pxa_cpu_do_suspend -#endif - -#ifdef CONFIG_PXA25x -/* - * pxa25x_finish_suspend() - * - * Forces CPU into sleep state. - * - * r0 = value for PWRMODE M field for desired sleep state - */ - -ENTRY(pxa25x_finish_suspend) - @ prepare value for sleep mode - mov r1, r0 @ sleep mode - - @ prepare pointer to physical address 0 (virtual mapping in generic.c) - mov r2, #UNCACHED_PHYS_0 - - @ prepare SDRAM refresh settings - ldr r4, =MDREFR - ldr r5, [r4] - - @ enable SDRAM self-refresh mode - orr r5, r5, #MDREFR_SLFRSH - - @ Intel PXA255 Specification Update notes problems - @ about suspending with PXBus operating above 133MHz - @ (see Errata 31, GPIO output signals, ... unpredictable in sleep - @ - @ We keep the change-down close to the actual suspend on SDRAM - @ as possible to eliminate messing about with the refresh clock - @ as the system will restore with the original speed settings - @ - @ Ben Dooks, 13-Sep-2004 - - ldr r6, =CCCR - ldr r8, [r6] @ keep original value for resume - - @ ensure x1 for run and turbo mode with memory clock - bic r7, r8, #CCCR_M_MASK | CCCR_N_MASK - orr r7, r7, #(1<<5) | (2<<7) - - @ check that the memory frequency is within limits - and r14, r7, #CCCR_L_MASK - teq r14, #1 - bicne r7, r7, #CCCR_L_MASK - orrne r7, r7, #1 @@ 99.53MHz - - @ get ready for the change - - @ note, turbo is not preserved over sleep so there is no - @ point in preserving it here. we save it on the stack with the - @ other CP registers instead. - mov r0, #0 - mcr p14, 0, r0, c6, c0, 0 - orr r0, r0, #2 @ initiate change bit - b pxa_cpu_do_suspend -#endif - - .ltorg - .align 5 -pxa_cpu_do_suspend: - - @ All needed values are now in registers. - @ These last instructions should be in cache - - @ initiate the frequency change... - str r7, [r6] - mcr p14, 0, r0, c6, c0, 0 - - @ restore the original cpu speed value for resume - str r8, [r6] - - @ need 6 13-MHz cycles before changing PWRMODE - @ just set frequency to 91-MHz... 6*91/13 = 42 - - mov r0, #42 -10: subs r0, r0, #1 - bne 10b - - @ Do not reorder... - @ Intel PXA270 Specification Update notes problems performing - @ external accesses after SDRAM is put in self-refresh mode - @ (see Errata 38 ...hangs when entering self-refresh mode) - - @ force address lines low by reading at physical address 0 - ldr r3, [r2] - - @ put SDRAM into self-refresh - str r5, [r4] - - @ enter sleep mode - mcr p14, 0, r1, c7, c0, 0 @ PWRMODE - -20: b 20b @ loop waiting for sleep diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S deleted file mode 100644 index eab1645bb4adb93a37108dc30f6c335dc75f31e1..0000000000000000000000000000000000000000 --- a/arch/arm/mach-pxa/standby.S +++ /dev/null @@ -1,114 +0,0 @@ -/* - * PXA27x standby mode - * - * Author: David Burrage - * - * 2005 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include -#include - -#include - - .text - -#ifdef CONFIG_PXA27x -ENTRY(pxa_cpu_standby) - ldr r0, =PSSR - mov r1, #(PSSR_PH | PSSR_STS) - mov r2, #PWRMODE_STANDBY - mov r3, #UNCACHED_PHYS_0 @ Read mem context in. - ldr ip, [r3] - b 1f - - .align 5 -1: mcr p14, 0, r2, c7, c0, 0 @ put the system into Standby - str r1, [r0] @ make sure PSSR_PH/STS are clear - ret lr - -#endif - -#ifdef CONFIG_PXA3xx - -#define PXA3_MDCNFG 0x0000 -#define PXA3_MDCNFG_DMCEN (1 << 30) -#define PXA3_DDR_HCAL 0x0060 -#define PXA3_DDR_HCAL_HCRNG 0x1f -#define PXA3_DDR_HCAL_HCPROG (1 << 28) -#define PXA3_DDR_HCAL_HCEN (1 << 31) -#define PXA3_DMCIER 0x0070 -#define PXA3_DMCIER_EDLP (1 << 29) -#define PXA3_DMCISR 0x0078 -#define PXA3_RCOMP 0x0100 -#define PXA3_RCOMP_SWEVAL (1 << 31) - -ENTRY(pm_enter_standby_start) - mov r1, #0xf6000000 @ DMEMC_REG_BASE (PXA3_MDCNFG) - add r1, r1, #0x00100000 - - /* - * Preload the TLB entry for accessing the dynamic memory - * controller registers. Note that page table lookups will - * fail until the dynamic memory controller has been - * reinitialised - and that includes MMU page table walks. - * This also means that only the dynamic memory controller - * can be reliably accessed in the code following standby. - */ - ldr r2, [r1] @ Dummy read PXA3_MDCNFG - - mcr p14, 0, r0, c7, c0, 0 - .rept 8 - nop - .endr - - ldr r0, [r1, #PXA3_DDR_HCAL] @ Clear (and wait for) HCEN - bic r0, r0, #PXA3_DDR_HCAL_HCEN - str r0, [r1, #PXA3_DDR_HCAL] -1: ldr r0, [r1, #PXA3_DDR_HCAL] - tst r0, #PXA3_DDR_HCAL_HCEN - bne 1b - - ldr r0, [r1, #PXA3_RCOMP] @ Initiate RCOMP - orr r0, r0, #PXA3_RCOMP_SWEVAL - str r0, [r1, #PXA3_RCOMP] - - mov r0, #~0 @ Clear interrupts - str r0, [r1, #PXA3_DMCISR] - - ldr r0, [r1, #PXA3_DMCIER] @ set DMIER[EDLP] - orr r0, r0, #PXA3_DMCIER_EDLP - str r0, [r1, #PXA3_DMCIER] - - ldr r0, [r1, #PXA3_DDR_HCAL] @ clear HCRNG, set HCPROG, HCEN - bic r0, r0, #PXA3_DDR_HCAL_HCRNG - orr r0, r0, #PXA3_DDR_HCAL_HCEN | PXA3_DDR_HCAL_HCPROG - str r0, [r1, #PXA3_DDR_HCAL] - -1: ldr r0, [r1, #PXA3_DMCISR] - tst r0, #PXA3_DMCIER_EDLP - beq 1b - - ldr r0, [r1, #PXA3_MDCNFG] @ set PXA3_MDCNFG[DMCEN] - orr r0, r0, #PXA3_MDCNFG_DMCEN - str r0, [r1, #PXA3_MDCNFG] -1: ldr r0, [r1, #PXA3_MDCNFG] - tst r0, #PXA3_MDCNFG_DMCEN - beq 1b - - ldr r0, [r1, #PXA3_DDR_HCAL] @ set PXA3_DDR_HCAL[HCRNG] - orr r0, r0, #2 @ HCRNG - str r0, [r1, #PXA3_DDR_HCAL] - - ldr r0, [r1, #PXA3_DMCIER] @ Clear the interrupt - bic r0, r0, #0x20000000 - str r0, [r1, #PXA3_DMCIER] - - ret lr -ENTRY(pm_enter_standby_end) - -#endif diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S deleted file mode 100644 index 37a7ea524a16077c958f2a9fdff640565500af4c..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rockchip/headsmp.S +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2013 MundoReader S.L. - * Author: Heiko Stuebner - */ -#include -#include - -ENTRY(rockchip_secondary_trampoline) - ldr pc, 1f -ENDPROC(rockchip_secondary_trampoline) - .globl rockchip_boot_fn -rockchip_boot_fn: -1: .space 4 - -ENTRY(rockchip_secondary_trampoline_end) diff --git a/arch/arm/mach-rockchip/sleep.S b/arch/arm/mach-rockchip/sleep.S deleted file mode 100644 index 3eca3922c944576c9c6aae93cea3a94b24055702..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rockchip/sleep.S +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd - * Author: Tony Xie - */ - -#include -#include -#include - -.data -/* - * this code will be copied from - * ddr to sram for system resumeing. - * so it is ".data section". - */ - .align 2 - -ENTRY(rockchip_slp_cpu_resume) - setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set svc, irqs off - mrc p15, 0, r1, c0, c0, 5 - and r1, r1, #0xf - cmp r1, #0 - /* olny cpu0 can continue to run, the others is halt here */ - beq cpu0run -secondary_loop: - wfe - b secondary_loop -cpu0run: - ldr r3, rkpm_bootdata_l2ctlr_f - cmp r3, #0 - beq sp_set - ldr r3, rkpm_bootdata_l2ctlr - mcr p15, 1, r3, c9, c0, 2 -sp_set: - ldr sp, rkpm_bootdata_cpusp - ldr r1, rkpm_bootdata_cpu_code - bx r1 -ENDPROC(rockchip_slp_cpu_resume) - -/* Parameters filled in by the kernel */ - -/* Flag for whether to restore L2CTLR on resume */ - .global rkpm_bootdata_l2ctlr_f -rkpm_bootdata_l2ctlr_f: - .long 0 - -/* Saved L2CTLR to restore on resume */ - .global rkpm_bootdata_l2ctlr -rkpm_bootdata_l2ctlr: - .long 0 - -/* CPU resume SP addr */ - .globl rkpm_bootdata_cpusp -rkpm_bootdata_cpusp: - .long 0 - -/* CPU resume function (physical address) */ - .globl rkpm_bootdata_cpu_code -rkpm_bootdata_cpu_code: - .long 0 - -ENTRY(rk3288_bootram_sz) - .word . - rockchip_slp_cpu_resume diff --git a/arch/arm/mach-rpc/ecard-loader.S b/arch/arm/mach-rpc/ecard-loader.S deleted file mode 100644 index eb8ac0412da6186011b05e074dd3c1cb37e71de3..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rpc/ecard-loader.S +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/ecard.S - * - * Copyright (C) 1995, 1996 Russell King - * - * 27/03/03 Ian Molton Clean up CONFIG_CPU - */ -#include -#include - -#define CPSR2SPSR(rt) \ - mrs rt, cpsr; \ - msr spsr_cxsf, rt - -@ Purpose: call an expansion card loader to read bytes. -@ Proto : char read_loader(int offset, char *card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_read) - stmfd sp!, {r4 - r12, lr} - mov r11, r1 - mov r1, r0 - CPSR2SPSR(r0) - mov lr, pc - mov pc, r2 - ldmfd sp!, {r4 - r12, pc} - -@ Purpose: call an expansion card loader to reset the card -@ Proto : void read_loader(int card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_reset) - stmfd sp!, {r4 - r12, lr} - mov r11, r0 - CPSR2SPSR(r0) - mov lr, pc - add pc, r1, #8 - ldmfd sp!, {r4 - r12, pc} - diff --git a/arch/arm/mach-rpc/fiq.S b/arch/arm/mach-rpc/fiq.S deleted file mode 100644 index 0de83e9b0b39398231b5b543a6e6d0762961cfd5..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rpc/fiq.S +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include - - .text - - .global rpc_default_fiq_end -ENTRY(rpc_default_fiq_start) - mov r12, #ioc_base_high - .if ioc_base_low - orr r12, r12, #ioc_base_low - .endif - strb r12, [r12, #0x38] @ Disable FIQ register - subs pc, lr, #4 -rpc_default_fiq_end: diff --git a/arch/arm/mach-rpc/floppydma.S b/arch/arm/mach-rpc/floppydma.S deleted file mode 100644 index 6698b83050dc2c4f1a1b3b34ec67287f97e2c4c1..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rpc/floppydma.S +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/floppydma.S - * - * Copyright (C) 1995, 1996 Russell King - */ -#include -#include - .text - - .global floppy_fiqin_end -ENTRY(floppy_fiqin_start) - subs r9, r9, #1 - ldrbgt r12, [r11, #-4] - ldrble r12, [r11], #0 - strb r12, [r10], #1 - subs pc, lr, #4 -floppy_fiqin_end: - - .global floppy_fiqout_end -ENTRY(floppy_fiqout_start) - subs r9, r9, #1 - ldrbge r12, [r10], #1 - movlt r12, #0 - strble r12, [r11], #0 - subsle pc, lr, #4 - strb r12, [r11, #-4] - subs pc, lr, #4 -floppy_fiqout_end: diff --git a/arch/arm/mach-rpc/include/mach/entry-macro.S b/arch/arm/mach-rpc/include/mach/entry-macro.S deleted file mode 100644 index a6d1a9f4bb791b946641ceb5dd1e72129eba3758..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rpc/include/mach/entry-macro.S +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - - .equ ioc_base_high, IOC_BASE & 0xff000000 - .equ ioc_base_low, IOC_BASE & 0x00ff0000 - - .macro get_irqnr_preamble, base, tmp - mov \base, #ioc_base_high @ point at IOC - .if ioc_base_low - orr \base, \base, #ioc_base_low - .endif - .endm diff --git a/arch/arm/mach-rpc/io-acorn.S b/arch/arm/mach-rpc/io-acorn.S deleted file mode 100644 index b9082a2a2a01436784ff3d34161dbc83bfefa750..0000000000000000000000000000000000000000 --- a/arch/arm/mach-rpc/io-acorn.S +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/lib/io-acorn.S - * - * Copyright (C) 1995, 1996 Russell King - * - * 27/03/03 Ian Molton Clean up CONFIG_CPU - */ -#include -#include -#include - - .text - .align - -.Liosl_warning: - .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0" - .align - -/* - * These make no sense on Acorn machines. - * Print a warning message. - */ -ENTRY(insl) -ENTRY(outsl) - adr r0, .Liosl_warning - mov r1, lr - b printk diff --git a/arch/arm/mach-s3c24xx/pm-h1940.S b/arch/arm/mach-s3c24xx/pm-h1940.S deleted file mode 100644 index a7bbe336ac6b6d28760434e05c6f9b57039dcd43..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s3c24xx/pm-h1940.S +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2006 Ben Dooks - * - * H1940 Suspend to RAM - */ - -#include -#include -#include -#include - -#include - - .text - .global h1940_pm_return - -h1940_pm_return: - mov r0, #S3C2410_PA_GPIO - ldr pc, [r0, #S3C2410_GSTATUS3 - S3C24XX_VA_GPIO] diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S deleted file mode 100644 index 659f9eff9de2d808ea6557d00a64eee53cec2415..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 Power Manager (Suspend-To-RAM) support - * - * Based on PXA/SA1100 sleep code by: - * Nicolas Pitre, (c) 2002 Monta Vista Software Inc - * Cliff Brake, (c) 2001 - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "regs-mem.h" - - /* s3c2410_cpu_suspend - * - * put the cpu into sleep mode - */ - -ENTRY(s3c2410_cpu_suspend) - @@ prepare cpu to sleep - - ldr r4, =S3C2410_REFRESH - ldr r5, =S3C24XX_MISCCR - ldr r6, =S3C2410_CLKCON - ldr r7, [r4] @ get REFRESH (and ensure in TLB) - ldr r8, [r5] @ get MISCCR (and ensure in TLB) - ldr r9, [r6] @ get CLKCON (and ensure in TLB) - - orr r7, r7, #S3C2410_REFRESH_SELF @ SDRAM sleep command - orr r8, r8, #S3C2410_MISCCR_SDSLEEP @ SDRAM power-down signals - orr r9, r9, #S3C2410_CLKCON_POWER @ power down command - - teq pc, #0 @ first as a trial-run to load cache - bl s3c2410_do_sleep - teq r0, r0 @ now do it for real - b s3c2410_do_sleep @ - - @@ align next bit of code to cache line - .align 5 -s3c2410_do_sleep: - streq r7, [r4] @ SDRAM sleep command - streq r8, [r5] @ SDRAM power-down config - streq r9, [r6] @ CPU sleep -1: beq 1b - ret lr diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S deleted file mode 100644 index c373f1ca862bca608b72e4bbce8b7624603e8cd7..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2007 Simtec Electronics - * Ben Dooks - * - * S3C2412 Power Manager low-level sleep support - */ - -#include -#include -#include -#include - -#include - - .text - - .global s3c2412_sleep_enter - -s3c2412_sleep_enter: - mov r0, #0 /* argument for coprocessors */ - ldr r1, =S3C2410_INTPND - ldr r2, =S3C2410_SRCPND - ldr r3, =S3C2410_EINTPEND - - teq r0, r0 - bl s3c2412_sleep_enter1 - teq pc, r0 - bl s3c2412_sleep_enter1 - - .align 5 - - /* this is called twice, first with the Z flag to ensure that the - * instructions have been loaded into the cache, and the second - * time to try and suspend the system. - */ -s3c2412_sleep_enter1: - mcr p15, 0, r0, c7, c10, 4 - mcrne p15, 0, r0, c7, c0, 4 - - /* if we return from here, it is because an interrupt was - * active when we tried to shutdown. Try and ack the IRQ and - * retry, as simply returning causes the system to lock. - */ - - ldrne r9, [r1] - strne r9, [r1] - ldrne r9, [r2] - strne r9, [r2] - ldrne r9, [r3] - strne r9, [r3] - bne s3c2412_sleep_enter1 - - ret lr diff --git a/arch/arm/mach-s3c24xx/sleep.S b/arch/arm/mach-s3c24xx/sleep.S deleted file mode 100644 index f0f11ad60c52c4bc262ebb7194a0ab46f597bf39..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s3c24xx/sleep.S +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 Power Manager (Suspend-To-RAM) support - * - * Based on PXA/SA1100 sleep code by: - * Nicolas Pitre, (c) 2002 Monta Vista Software Inc - * Cliff Brake, (c) 2001 - */ - -#include -#include -#include -#include -#include - -#include -#include - -/* - * S3C24XX_DEBUG_RESUME is dangerous if your bootloader does not - * reset the UART configuration, only enable if you really need this! - */ -//#define S3C24XX_DEBUG_RESUME - - .text - - /* sleep magic, to allow the bootloader to check for an valid - * image to resume to. Must be the first word before the - * s3c_cpu_resume entry. - */ - - .word 0x2bedf00d - - /* s3c_cpu_resume - * - * resume code entry for bootloader to call - */ - -ENTRY(s3c_cpu_resume) - mov r0, #PSR_I_BIT | PSR_F_BIT | SVC_MODE - msr cpsr_c, r0 - - @@ load UART to allow us to print the two characters for - @@ resume debug - - mov r2, #S3C24XX_PA_UART & 0xff000000 - orr r2, r2, #S3C24XX_PA_UART & 0xff000 - -#if 0 - /* SMDK2440 LED set */ - mov r14, #S3C24XX_PA_GPIO - ldr r12, [ r14, #0x54 ] - bic r12, r12, #3<<4 - orr r12, r12, #1<<7 - str r12, [ r14, #0x54 ] -#endif - -#ifdef S3C24XX_DEBUG_RESUME - mov r3, #'L' - strb r3, [ r2, #S3C2410_UTXH ] -1001: - ldrb r14, [ r3, #S3C2410_UTRSTAT ] - tst r14, #S3C2410_UTRSTAT_TXE - beq 1001b -#endif /* S3C24XX_DEBUG_RESUME */ - - b cpu_resume diff --git a/arch/arm/mach-s3c64xx/sleep.S b/arch/arm/mach-s3c64xx/sleep.S deleted file mode 100644 index 39e16a07a5e4b43daac12d8d386b650cb9f98d0b..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s3c64xx/sleep.S +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* linux/arch/arm/plat-s3c64xx/sleep.S - * - * Copyright 2008 Openmoko, Inc. - * Copyright 2008 Simtec Electronics - * Ben Dooks - * http://armlinux.simtec.co.uk/ - * - * S3C64XX CPU sleep code - */ - -#include -#include -#include - -#undef S3C64XX_VA_GPIO -#define S3C64XX_VA_GPIO (0x0) - -#include - -#define LL_UART (S3C_PA_UART + (0x400 * CONFIG_S3C_LOWLEVEL_UART_PORT)) - - .text - - /* Sleep magic, the word before the resume entry point so that the - * bootloader can check for a resumeable image. */ - - .word 0x2bedf00d - - /* s3c_cpu_reusme - * - * This is the entry point, stored by whatever method the bootloader - * requires to get the kernel runnign again. This code expects to be - * entered with no caches live and the MMU disabled. It will then - * restore the MMU and other basic CP registers saved and restart - * the kernel C code to finish the resume code. - */ - -ENTRY(s3c_cpu_resume) - msr cpsr_c, #PSR_I_BIT | PSR_F_BIT | SVC_MODE - ldr r2, =LL_UART /* for debug */ - -#ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK - -#define S3C64XX_GPNCON (S3C64XX_GPN_BASE + 0x00) -#define S3C64XX_GPNDAT (S3C64XX_GPN_BASE + 0x04) - -#define S3C64XX_GPN_CONMASK(__gpio) (0x3 << ((__gpio) * 2)) -#define S3C64XX_GPN_OUTPUT(__gpio) (0x1 << ((__gpio) * 2)) - - /* Initialise the GPIO state if we are debugging via the SMDK LEDs, - * as the uboot version supplied resets these to inputs during the - * resume checks. - */ - - ldr r3, =S3C64XX_PA_GPIO - ldr r0, [ r3, #S3C64XX_GPNCON ] - bic r0, r0, #(S3C64XX_GPN_CONMASK(12) | S3C64XX_GPN_CONMASK(13) | \ - S3C64XX_GPN_CONMASK(14) | S3C64XX_GPN_CONMASK(15)) - orr r0, r0, #(S3C64XX_GPN_OUTPUT(12) | S3C64XX_GPN_OUTPUT(13) | \ - S3C64XX_GPN_OUTPUT(14) | S3C64XX_GPN_OUTPUT(15)) - str r0, [ r3, #S3C64XX_GPNCON ] - - ldr r0, [ r3, #S3C64XX_GPNDAT ] - bic r0, r0, #0xf << 12 @ GPN12..15 - orr r0, r0, #1 << 15 @ GPN15 - str r0, [ r3, #S3C64XX_GPNDAT ] -#endif - b cpu_resume diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S deleted file mode 100644 index 81568767f30a83bc3a40bccac0dfbe89eed5534f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-s5pv210/sleep.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * S5PV210 Sleep Code - * Based on S3C64XX sleep code by: - * Ben Dooks, (c) 2008 Simtec Electronics - */ - -#include - - .text - .align - - /* - * sleep magic, to allow the bootloader to check for an valid - * image to resume to. Must be the first word before the - * s3c_cpu_resume entry. - */ - - .word 0x2bedf00d - - /* - * s3c_cpu_resume - * - * resume code entry for bootloader to call - */ - -ENTRY(s5pv210_cpu_resume) - b cpu_resume -ENDPROC(s5pv210_cpu_resume) diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S deleted file mode 100644 index 85863741ef8bb6d5b6c0fa9199036557ac3aa734..0000000000000000000000000000000000000000 --- a/arch/arm/mach-sa1100/sleep.S +++ /dev/null @@ -1,143 +0,0 @@ -/* - * SA11x0 Assembler Sleep/WakeUp Management Routines - * - * Copyright (c) 2001 Cliff Brake - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License. - * - * History: - * - * 2001-02-06: Cliff Brake Initial code - * - * 2001-08-29: Nicolas Pitre Simplified. - * - * 2002-05-27: Nicolas Pitre Revisited, more cleanup and simplification. - * Storage is on the stack now. - */ - -#include -#include -#include - - .text -/* - * sa1100_finish_suspend() - * - * Causes sa11x0 to enter sleep state - * - * Must be aligned to a cacheline. - */ - .balign 32 -ENTRY(sa1100_finish_suspend) - @ disable clock switching - mcr p15, 0, r1, c15, c2, 2 - - ldr r6, =MDREFR - ldr r4, [r6] - orr r4, r4, #MDREFR_K1DB2 - ldr r5, =PPCR - - @ Pre-load __loop_udelay into the I-cache - mov r0, #1 - bl __loop_udelay - mov r0, r0 - - @ The following must all exist in a single cache line to - @ avoid accessing memory until this sequence is complete, - @ otherwise we occasionally hang. - - @ Adjust memory timing before lowering CPU clock - str r4, [r6] - - @ delay 90us and set CPU PLL to lowest speed - @ fixes resume problem on high speed SA1110 - mov r0, #90 - bl __loop_udelay - mov r1, #0 - str r1, [r5] - mov r0, #90 - bl __loop_udelay - - /* - * SA1110 SDRAM controller workaround. register values: - * - * r0 = &MSC0 - * r1 = &MSC1 - * r2 = &MSC2 - * r3 = MSC0 value - * r4 = MSC1 value - * r5 = MSC2 value - * r6 = &MDREFR - * r7 = first MDREFR value - * r8 = second MDREFR value - * r9 = &MDCNFG - * r10 = MDCNFG value - * r11 = third MDREFR value - * r12 = &PMCR - * r13 = PMCR value (1) - */ - - ldr r0, =MSC0 - ldr r1, =MSC1 - ldr r2, =MSC2 - - ldr r3, [r0] - bic r3, r3, #FMsk(MSC_RT) - bic r3, r3, #FMsk(MSC_RT)<<16 - - ldr r4, [r1] - bic r4, r4, #FMsk(MSC_RT) - bic r4, r4, #FMsk(MSC_RT)<<16 - - ldr r5, [r2] - bic r5, r5, #FMsk(MSC_RT) - bic r5, r5, #FMsk(MSC_RT)<<16 - - ldr r7, [r6] - bic r7, r7, #0x0000FF00 - bic r7, r7, #0x000000F0 - orr r8, r7, #MDREFR_SLFRSH - - ldr r9, =MDCNFG - ldr r10, [r9] - bic r10, r10, #(MDCNFG_DE0+MDCNFG_DE1) - bic r10, r10, #(MDCNFG_DE2+MDCNFG_DE3) - - bic r11, r8, #MDREFR_SLFRSH - bic r11, r11, #MDREFR_E1PIN - - ldr r12, =PMCR - - mov r13, #PMCR_SF - - b sa1110_sdram_controller_fix - - .align 5 -sa1110_sdram_controller_fix: - - @ Step 1 clear RT field of all MSCx registers - str r3, [r0] - str r4, [r1] - str r5, [r2] - - @ Step 2 clear DRI field in MDREFR - str r7, [r6] - - @ Step 3 set SLFRSH bit in MDREFR - str r8, [r6] - - @ Step 4 clear DE bis in MDCNFG - str r10, [r9] - - @ Step 5 clear DRAM refresh control register - str r11, [r6] - - @ Wow, now the hardware suspend request pins can be used, that makes them functional for - @ about 7 ns out of the entire time that the CPU is running! - - @ Step 6 set force sleep bit in PMCR - - str r13, [r12] - -20: b 20b @ loop waiting for sleep diff --git a/arch/arm/mach-shmobile/headsmp-apmu.S b/arch/arm/mach-shmobile/headsmp-apmu.S deleted file mode 100644 index fabe9cadd12ef0b1d82ed9c3ead65f35943e8a70..0000000000000000000000000000000000000000 --- a/arch/arm/mach-shmobile/headsmp-apmu.S +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * SMP support for APMU based systems with Cortex A7/A15 - * - * Copyright (C) 2014 Renesas Electronics Corporation - */ - -#include -#include - -ENTRY(shmobile_boot_apmu) - bl secure_cntvoff_init - b secondary_startup -ENDPROC(shmobile_boot_apmu) diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S deleted file mode 100644 index d0234296ae622b5f1745d9be7002d4705c3e4913..0000000000000000000000000000000000000000 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ - * - * Shared SCU setup for mach-shmobile - * - * Copyright (C) 2012 Bastian Hecht - */ - -#include -#include -#include - -/* - * Boot code for secondary CPUs. - * - * First we turn on L1 cache coherency for our CPU. Then we jump to - * secondary_startup that invalidates the cache and hands over control - * to the common ARM startup code. - */ -ENTRY(shmobile_boot_scu) - @ r0 = SCU base address - mrc p15, 0, r1, c0, c0, 5 @ read MPIDR - and r1, r1, #3 @ mask out cpu ID - lsl r1, r1, #3 @ we will shift by cpu_id * 8 bits - ldr r2, [r0, #8] @ SCU Power Status Register - mov r3, #3 - lsl r3, r3, r1 - bic r2, r2, r3 @ Clear bits of our CPU (Run Mode) - str r2, [r0, #8] @ write back - - b secondary_startup -ENDPROC(shmobile_boot_scu) diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S deleted file mode 100644 index 9466ae61f56abd17726098143c4019789da1b201..0000000000000000000000000000000000000000 --- a/arch/arm/mach-shmobile/headsmp.S +++ /dev/null @@ -1,147 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * SMP support for R-Mobile / SH-Mobile - * - * Copyright (C) 2010 Magnus Damm - * Copyright (C) 2010 Takashi Yoshii - * - * Based on vexpress, Copyright (c) 2003 ARM Limited, All Rights Reserved - */ -#include -#include -#include -#include -#include - -#define SCTLR_MMU 0x01 -#define BOOTROM_ADDRESS 0xE6340000 -#define RWTCSRA_ADDRESS 0xE6020004 -#define RWTCSRA_WOVF 0x10 - -/* - * Reset vector for secondary CPUs. - * This will be mapped at address 0 by SBAR register. - * We need _long_ jump to the physical address. - */ - .arm - .align 12 -ENTRY(shmobile_boot_vector) - ldr r1, 1f - bx r1 - -ENDPROC(shmobile_boot_vector) - - .align 2 - .globl shmobile_boot_fn -shmobile_boot_fn: -1: .space 4 - .globl shmobile_boot_size -shmobile_boot_size: - .long . - shmobile_boot_vector - -#ifdef CONFIG_ARCH_RCAR_GEN2 -/* - * Reset vector for R-Car Gen2 and RZ/G1 secondary CPUs. - * This will be mapped at address 0 by SBAR register. - */ -ENTRY(shmobile_boot_vector_gen2) - mrc p15, 0, r0, c0, c0, 5 @ r0 = MPIDR - ldr r1, shmobile_boot_cpu_gen2 - cmp r0, r1 - bne shmobile_smp_continue_gen2 - - mrc p15, 0, r1, c1, c0, 0 @ r1 = SCTLR - and r0, r1, #SCTLR_MMU - cmp r0, #SCTLR_MMU - beq shmobile_smp_continue_gen2 - - ldr r0, rwtcsra - mov r1, #0 - ldrb r1, [r0] - and r0, r1, #RWTCSRA_WOVF - cmp r0, #RWTCSRA_WOVF - bne shmobile_smp_continue_gen2 - - ldr r0, bootrom - bx r0 - -shmobile_smp_continue_gen2: - ldr r1, shmobile_boot_fn_gen2 - bx r1 - -ENDPROC(shmobile_boot_vector_gen2) - - .align 4 -rwtcsra: - .word RWTCSRA_ADDRESS -bootrom: - .word BOOTROM_ADDRESS - .globl shmobile_boot_cpu_gen2 -shmobile_boot_cpu_gen2: - .word 0x00000000 - - .align 2 - .globl shmobile_boot_fn_gen2 -shmobile_boot_fn_gen2: - .space 4 - .globl shmobile_boot_size_gen2 -shmobile_boot_size_gen2: - .long . - shmobile_boot_vector_gen2 -#endif /* CONFIG_ARCH_RCAR_GEN2 */ - -/* - * Per-CPU SMP boot function/argument selection code based on MPIDR - */ - -ENTRY(shmobile_smp_boot) - mrc p15, 0, r1, c0, c0, 5 @ r1 = MPIDR - and r0, r1, #0xffffff @ MPIDR_HWID_BITMASK - @ r0 = cpu_logical_map() value - mov r1, #0 @ r1 = CPU index - adr r2, 1f - ldmia r2, {r5, r6, r7} - add r5, r5, r2 @ array of per-cpu mpidr values - add r6, r6, r2 @ array of per-cpu functions - add r7, r7, r2 @ array of per-cpu arguments - -shmobile_smp_boot_find_mpidr: - ldr r8, [r5, r1, lsl #2] - cmp r8, r0 - bne shmobile_smp_boot_next - - ldr r9, [r6, r1, lsl #2] - cmp r9, #0 - bne shmobile_smp_boot_found - -shmobile_smp_boot_next: - add r1, r1, #1 - cmp r1, #NR_CPUS - blo shmobile_smp_boot_find_mpidr - - b shmobile_smp_sleep - -shmobile_smp_boot_found: - ldr r0, [r7, r1, lsl #2] - ret r9 -ENDPROC(shmobile_smp_boot) - -ENTRY(shmobile_smp_sleep) - wfi - b shmobile_smp_boot -ENDPROC(shmobile_smp_sleep) - - .align 2 -1: .long shmobile_smp_mpidr - . - .long shmobile_smp_fn - 1b - .long shmobile_smp_arg - 1b - - .bss - .globl shmobile_smp_mpidr -shmobile_smp_mpidr: - .space NR_CPUS * 4 - .globl shmobile_smp_fn -shmobile_smp_fn: - .space NR_CPUS * 4 - .globl shmobile_smp_arg -shmobile_smp_arg: - .space NR_CPUS * 4 diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S deleted file mode 100644 index 54f1844eac031bd165484daec502040fee2a992a..0000000000000000000000000000000000000000 --- a/arch/arm/mach-socfpga/headsmp.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2003 ARM Limited - * Copyright (c) u-boot contributors - * Copyright (c) 2012 Pavel Machek - */ -#include -#include -#include -#include - - .arch armv7-a - .arm - -ENTRY(secondary_trampoline) - /* CPU1 will always fetch from 0x0 when it is brought out of reset. - * Thus, we can just subtract the PAGE_OFFSET to get the physical - * address of &cpu1start_addr. This would not work for platforms - * where the physical memory does not start at 0x0. - */ -ARM_BE8(setend be) - adr r0, 1f - ldmia r0, {r1, r2} - sub r2, r2, #PAGE_OFFSET - ldr r3, [r2] - ldr r4, [r3] -ARM_BE8(rev r4, r4) - bx r4 - - .align -1: .long . - .long socfpga_cpu1start_addr -ENTRY(secondary_trampoline_end) diff --git a/arch/arm/mach-socfpga/self-refresh.S b/arch/arm/mach-socfpga/self-refresh.S deleted file mode 100644 index 649f2779053d11a26c966e645a920b1995fc1e85..0000000000000000000000000000000000000000 --- a/arch/arm/mach-socfpga/self-refresh.S +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-2015 Altera Corporation. All rights reserved. - */ -#include -#include - -#define MAX_LOOP_COUNT 1000 - -/* Register offset */ -#define SDR_CTRLGRP_LOWPWREQ_ADDR 0x54 -#define SDR_CTRLGRP_LOWPWRACK_ADDR 0x58 - -/* Bitfield positions */ -#define SELFRSHREQ_POS 3 -#define SELFRSHREQ_MASK 0x8 - -#define SELFRFSHACK_POS 1 -#define SELFRFSHACK_MASK 0x2 - - /* - * This code assumes that when the bootloader configured - * the sdram controller for the DDR on the board it - * configured the following fields depending on the DDR - * vendor/configuration: - * - * sdr.ctrlcfg.lowpwreq.selfrfshmask - * sdr.ctrlcfg.lowpwrtiming.clkdisablecycles - * sdr.ctrlcfg.dramtiming4.selfrfshexit - */ - - .arch armv7-a - .text - .align 3 - - /* - * socfpga_sdram_self_refresh - * - * r0 : sdr_ctl_base_addr - * r1 : temp storage of return value - * r2 : temp storage of register values - * r3 : loop counter - * - * return value: lower 16 bits: loop count going into self refresh - * upper 16 bits: loop count exiting self refresh - */ -ENTRY(socfpga_sdram_self_refresh) - /* Enable dynamic clock gating in the Power Control Register. */ - mrc p15, 0, r2, c15, c0, 0 - orr r2, r2, #1 - mcr p15, 0, r2, c15, c0, 0 - - /* Enable self refresh: set sdr.ctrlgrp.lowpwreq.selfrshreq = 1 */ - ldr r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR] - orr r2, r2, #SELFRSHREQ_MASK - str r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR] - - /* Poll until sdr.ctrlgrp.lowpwrack.selfrfshack == 1 or hit max loops */ - mov r3, #0 -while_ack_0: - ldr r2, [r0, #SDR_CTRLGRP_LOWPWRACK_ADDR] - and r2, r2, #SELFRFSHACK_MASK - cmp r2, #SELFRFSHACK_MASK - beq ack_1 - - add r3, #1 - cmp r3, #MAX_LOOP_COUNT - bne while_ack_0 - -ack_1: - mov r1, r3 - - /* - * Execute an ISB instruction to ensure that all of the - * CP15 register changes have been committed. - */ - isb - - /* - * Execute a barrier instruction to ensure that all cache, - * TLB and branch predictor maintenance operations issued - * by any CPU in the cluster have completed. - */ - dsb - dmb - - wfi - - /* Disable self-refresh: set sdr.ctrlgrp.lowpwreq.selfrshreq = 0 */ - ldr r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR] - bic r2, r2, #SELFRSHREQ_MASK - str r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR] - - /* Poll until sdr.ctrlgrp.lowpwrack.selfrfshack == 0 or hit max loops */ - mov r3, #0 -while_ack_1: - ldr r2, [r0, #SDR_CTRLGRP_LOWPWRACK_ADDR] - and r2, r2, #SELFRFSHACK_MASK - cmp r2, #SELFRFSHACK_MASK - bne ack_0 - - add r3, #1 - cmp r3, #MAX_LOOP_COUNT - bne while_ack_1 - -ack_0: - /* - * Prepare return value: - * Shift loop count for exiting self refresh into upper 16 bits. - * Leave loop count for requesting self refresh in lower 16 bits. - */ - mov r3, r3, lsl #16 - add r1, r1, r3 - - /* Disable dynamic clock gating in the Power Control Register. */ - mrc p15, 0, r2, c15, c0, 0 - bic r2, r2, #1 - mcr p15, 0, r2, c15, c0, 0 - - mov r0, r1 @ return value - bx lr @ return - -ENDPROC(socfpga_sdram_self_refresh) -ENTRY(socfpga_sdram_self_refresh_sz) - .word . - socfpga_sdram_self_refresh diff --git a/arch/arm/mach-spear/headsmp.S b/arch/arm/mach-spear/headsmp.S deleted file mode 100644 index 96f89436ccf61db20a336b1dc433a46765070636..0000000000000000000000000000000000000000 --- a/arch/arm/mach-spear/headsmp.S +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/mach-spear13XX/headsmp.S - * - * Picked from realview - * Copyright (c) 2012 ST Microelectronics Limited - * Shiraz Hashim - */ - -#include -#include - - __INIT - -/* - * spear13xx specific entry point for secondary CPUs. This provides - * a "holding pen" into which all secondary cores are held until we're - * ready for them to initialise. - */ -ENTRY(spear13xx_secondary_startup) - mrc p15, 0, r0, c0, c0, 5 - and r0, r0, #15 - adr r4, 1f - ldmia r4, {r5, r6} - sub r4, r4, r5 - add r6, r6, r4 -pen: ldr r7, [r6] - cmp r7, r0 - bne pen - - /* re-enable coherency */ - mrc p15, 0, r0, c1, c0, 1 - orr r0, r0, #(1 << 6) | (1 << 0) - mcr p15, 0, r0, c1, c0, 1 - /* - * we've been released from the holding pen: secondary_stack - * should now contain the SVC stack for this core - */ - b secondary_startup - - .align -1: .long . - .long spear_pen_release -ENDPROC(spear13xx_secondary_startup) diff --git a/arch/arm/mach-sunxi/headsmp.S b/arch/arm/mach-sunxi/headsmp.S deleted file mode 100644 index 32d76be98541acc857c6c038b98c789426e89869..0000000000000000000000000000000000000000 --- a/arch/arm/mach-sunxi/headsmp.S +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (c) 2018 Chen-Yu Tsai - * Copyright (c) 2018 Bootlin - * - * Chen-Yu Tsai - * Mylène Josserand - * - * SMP support for sunxi based systems with Cortex A7/A15 - * - */ - -#include -#include -#include - -ENTRY(sunxi_mc_smp_cluster_cache_enable) - .arch armv7-a - /* - * Enable cluster-level coherency, in preparation for turning on the MMU. - * - * Also enable regional clock gating and L2 data latency settings for - * Cortex-A15. These settings are from the vendor kernel. - */ - mrc p15, 0, r1, c0, c0, 0 - movw r2, #(ARM_CPU_PART_MASK & 0xffff) - movt r2, #(ARM_CPU_PART_MASK >> 16) - and r1, r1, r2 - movw r2, #(ARM_CPU_PART_CORTEX_A15 & 0xffff) - movt r2, #(ARM_CPU_PART_CORTEX_A15 >> 16) - cmp r1, r2 - bne not_a15 - - /* The following is Cortex-A15 specific */ - - /* ACTLR2: Enable CPU regional clock gates */ - mrc p15, 1, r1, c15, c0, 4 - orr r1, r1, #(0x1 << 31) - mcr p15, 1, r1, c15, c0, 4 - - /* L2ACTLR */ - mrc p15, 1, r1, c15, c0, 0 - /* Enable L2, GIC, and Timer regional clock gates */ - orr r1, r1, #(0x1 << 26) - /* Disable clean/evict from being pushed to external */ - orr r1, r1, #(0x1<<3) - mcr p15, 1, r1, c15, c0, 0 - - /* L2CTRL: L2 data RAM latency */ - mrc p15, 1, r1, c9, c0, 2 - bic r1, r1, #(0x7 << 0) - orr r1, r1, #(0x3 << 0) - mcr p15, 1, r1, c9, c0, 2 - - /* End of Cortex-A15 specific setup */ - not_a15: - - /* Get value of sunxi_mc_smp_first_comer */ - adr r1, first - ldr r0, [r1] - ldr r0, [r1, r0] - - /* Skip cci_enable_port_for_self if not first comer */ - cmp r0, #0 - bxeq lr - b cci_enable_port_for_self - - .align 2 - first: .word sunxi_mc_smp_first_comer - . -ENDPROC(sunxi_mc_smp_cluster_cache_enable) - -ENTRY(sunxi_mc_smp_secondary_startup) - bl sunxi_mc_smp_cluster_cache_enable - bl secure_cntvoff_init - b secondary_startup -ENDPROC(sunxi_mc_smp_secondary_startup) - -ENTRY(sunxi_mc_smp_resume) - bl sunxi_mc_smp_cluster_cache_enable - b cpu_resume -ENDPROC(sunxi_mc_smp_resume) diff --git a/arch/arm/mach-tango/smc.S b/arch/arm/mach-tango/smc.S deleted file mode 100644 index b1752aaa72bcbf0267a81e80728aa0999bfd0a87..0000000000000000000000000000000000000000 --- a/arch/arm/mach-tango/smc.S +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include - - .arch armv7-a - .arch_extension sec -ENTRY(tango_smc) - push {lr} - mov ip, r1 - dsb /* This barrier is probably unnecessary */ - smc #0 - pop {pc} -ENDPROC(tango_smc) diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S deleted file mode 100644 index e3f34815c9da7dc844de4d00d168e2b8cfebbae2..0000000000000000000000000000000000000000 --- a/arch/arm/mach-tegra/reset-handler.S +++ /dev/null @@ -1,307 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2012, NVIDIA Corporation. All rights reserved. - */ - -#include -#include - -#include -#include - -#include -#include -#include - -#include "iomap.h" -#include "reset.h" -#include "sleep.h" - -#define PMC_SCRATCH41 0x140 - -#ifdef CONFIG_PM_SLEEP -/* - * tegra_resume - * - * CPU boot vector when restarting the a CPU following - * an LP2 transition. Also branched to by LP0 and LP1 resume after - * re-enabling sdram. - * - * r6: SoC ID - * r8: CPU part number - */ -ENTRY(tegra_resume) - check_cpu_part_num 0xc09, r8, r9 - bleq v7_invalidate_l1 - - cpu_id r0 - cmp r0, #0 @ CPU0? - THUMB( it ne ) - bne cpu_resume @ no - - tegra_get_soc_id TEGRA_APB_MISC_BASE, r6 - /* Are we on Tegra20? */ - cmp r6, #TEGRA20 - beq 1f @ Yes - /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r3, r0 - mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r3] - /* Clear event & intr flag */ - orr r1, r1, \ - #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG - movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps - @ & ext flags for CPU power mgnt - bic r1, r1, r0 - str r1, [r2, r3] -1: - - mov32 r9, 0xc09 - cmp r8, r9 - bne end_ca9_scu_l2_resume -#ifdef CONFIG_HAVE_ARM_SCU - /* enable SCU */ - mov32 r0, TEGRA_ARM_PERIF_BASE - ldr r1, [r0] - orr r1, r1, #1 - str r1, [r0] -#endif - bl tegra_resume_trusted_foundations - -#ifdef CONFIG_CACHE_L2X0 - /* L2 cache resume & re-enable */ - bl l2c310_early_resume -#endif -end_ca9_scu_l2_resume: - mov32 r9, 0xc0f - cmp r8, r9 - bleq tegra_init_l2_for_a15 - - b cpu_resume -ENDPROC(tegra_resume) - -/* - * tegra_resume_trusted_foundations - * - * Trusted Foundations firmware initialization. - * - * Doesn't return if firmware presents. - * Corrupted registers: r1, r2 - */ -ENTRY(tegra_resume_trusted_foundations) - /* Check whether Trusted Foundations firmware presents. */ - mov32 r2, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET - ldr r1, =__tegra_cpu_reset_handler_data_offset + \ - RESET_DATA(TF_PRESENT) - ldr r1, [r2, r1] - cmp r1, #0 - reteq lr - - .arch_extension sec - /* First call after suspend wakes firmware. No arguments required. */ - smc #0 - - b cpu_resume -ENDPROC(tegra_resume_trusted_foundations) -#endif - - .align L1_CACHE_SHIFT -ENTRY(__tegra_cpu_reset_handler_start) - -/* - * __tegra_cpu_reset_handler: - * - * Common handler for all CPU reset events. - * - * Register usage within the reset handler: - * - * Others: scratch - * R6 = SoC ID - * R7 = CPU present (to the OS) mask - * R8 = CPU in LP1 state mask - * R9 = CPU in LP2 state mask - * R10 = CPU number - * R11 = CPU mask - * R12 = pointer to reset handler data - * - * NOTE: This code is copied to IRAM. All code and data accesses - * must be position-independent. - */ - - .arm - .align L1_CACHE_SHIFT -ENTRY(__tegra_cpu_reset_handler) - - cpsid aif, 0x13 @ SVC mode, interrupts disabled - - tegra_get_soc_id TEGRA_APB_MISC_BASE, r6 - - adr r12, __tegra_cpu_reset_handler_data - ldr r5, [r12, #RESET_DATA(TF_PRESENT)] - cmp r5, #0 - bne after_errata - -#ifdef CONFIG_ARCH_TEGRA_2x_SOC -t20_check: - cmp r6, #TEGRA20 - bne after_t20_check -t20_errata: - # Tegra20 is a Cortex-A9 r1p1 - mrc p15, 0, r0, c1, c0, 0 @ read system control register - orr r0, r0, #1 << 14 @ erratum 716044 - mcr p15, 0, r0, c1, c0, 0 @ write system control register - mrc p15, 0, r0, c15, c0, 1 @ read diagnostic register - orr r0, r0, #1 << 4 @ erratum 742230 - orr r0, r0, #1 << 11 @ erratum 751472 - mcr p15, 0, r0, c15, c0, 1 @ write diagnostic register - b after_errata -after_t20_check: -#endif -#ifdef CONFIG_ARCH_TEGRA_3x_SOC -t30_check: - cmp r6, #TEGRA30 - bne after_t30_check -t30_errata: - # Tegra30 is a Cortex-A9 r2p9 - mrc p15, 0, r0, c15, c0, 1 @ read diagnostic register - orr r0, r0, #1 << 6 @ erratum 743622 - orr r0, r0, #1 << 11 @ erratum 751472 - mcr p15, 0, r0, c15, c0, 1 @ write diagnostic register - b after_errata -after_t30_check: -#endif -after_errata: - mrc p15, 0, r10, c0, c0, 5 @ MPIDR - and r10, r10, #0x3 @ R10 = CPU number - mov r11, #1 - mov r11, r11, lsl r10 @ R11 = CPU mask - -#ifdef CONFIG_SMP - /* Does the OS know about this CPU? */ - ldr r7, [r12, #RESET_DATA(MASK_PRESENT)] - tst r7, r11 @ if !present - bleq __die @ CPU not present (to OS) -#endif - -#ifdef CONFIG_ARCH_TEGRA_2x_SOC - /* Are we on Tegra20? */ - cmp r6, #TEGRA20 - bne 1f - /* If not CPU0, don't let CPU0 reset CPU1 now that CPU1 is coming up. */ - mov r0, #CPU_NOT_RESETTABLE - cmp r10, #0 - strbne r0, [r12, #RESET_DATA(RESETTABLE_STATUS)] -1: -#endif - - /* Waking up from LP1? */ - ldr r8, [r12, #RESET_DATA(MASK_LP1)] - tst r8, r11 @ if in_lp1 - beq __is_not_lp1 - cmp r10, #0 - bne __die @ only CPU0 can be here - ldr lr, [r12, #RESET_DATA(STARTUP_LP1)] - cmp lr, #0 - bleq __die @ no LP1 startup handler - THUMB( add lr, lr, #1 ) @ switch to Thumb mode - bx lr -__is_not_lp1: - - /* Waking up from LP2? */ - ldr r9, [r12, #RESET_DATA(MASK_LP2)] - tst r9, r11 @ if in_lp2 - beq __is_not_lp2 - ldr lr, [r12, #RESET_DATA(STARTUP_LP2)] - cmp lr, #0 - bleq __die @ no LP2 startup handler - bx lr - -__is_not_lp2: - -#ifdef CONFIG_SMP - /* - * Can only be secondary boot (initial or hotplug) - * CPU0 can't be here for Tegra20/30 - */ - cmp r6, #TEGRA114 - beq __no_cpu0_chk - cmp r10, #0 - bleq __die @ CPU0 cannot be here -__no_cpu0_chk: - ldr lr, [r12, #RESET_DATA(STARTUP_SECONDARY)] - cmp lr, #0 - bleq __die @ no secondary startup handler - bx lr -#endif - -/* - * We don't know why the CPU reset. Just kill it. - * The LR register will contain the address we died at + 4. - */ - -__die: - sub lr, lr, #4 - mov32 r7, TEGRA_PMC_BASE - str lr, [r7, #PMC_SCRATCH41] - - mov32 r7, TEGRA_CLK_RESET_BASE - - /* Are we on Tegra20? */ - cmp r6, #TEGRA20 - bne 1f - -#ifdef CONFIG_ARCH_TEGRA_2x_SOC - mov32 r0, 0x1111 - mov r1, r0, lsl r10 - str r1, [r7, #0x340] @ CLK_RST_CPU_CMPLX_SET -#endif -1: -#ifdef CONFIG_ARCH_TEGRA_3x_SOC - mov32 r6, TEGRA_FLOW_CTRL_BASE - - cmp r10, #0 - moveq r1, #FLOW_CTRL_HALT_CPU0_EVENTS - moveq r2, #FLOW_CTRL_CPU0_CSR - movne r1, r10, lsl #3 - addne r2, r1, #(FLOW_CTRL_CPU1_CSR-8) - addne r1, r1, #(FLOW_CTRL_HALT_CPU1_EVENTS-8) - - /* Clear CPU "event" and "interrupt" flags and power gate - it when halting but not before it is in the "WFI" state. */ - ldr r0, [r6, +r2] - orr r0, r0, #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG - orr r0, r0, #FLOW_CTRL_CSR_ENABLE - str r0, [r6, +r2] - - /* Unconditionally halt this CPU */ - mov r0, #FLOW_CTRL_WAITEVENT - str r0, [r6, +r1] - ldr r0, [r6, +r1] @ memory barrier - - dsb - isb - wfi @ CPU should be power gated here - - /* If the CPU didn't power gate above just kill it's clock. */ - - mov r0, r11, lsl #8 - str r0, [r7, #348] @ CLK_CPU_CMPLX_SET -#endif - - /* If the CPU still isn't dead, just spin here. */ - b . -ENDPROC(__tegra_cpu_reset_handler) - - .align L1_CACHE_SHIFT - .type __tegra_cpu_reset_handler_data, %object - .globl __tegra_cpu_reset_handler_data - .globl __tegra_cpu_reset_handler_data_offset - .equ __tegra_cpu_reset_handler_data_offset, \ - . - __tegra_cpu_reset_handler_start -__tegra_cpu_reset_handler_data: - .rept TEGRA_RESET_DATA_SIZE - .long 0 - .endr - .align L1_CACHE_SHIFT - -ENTRY(__tegra_cpu_reset_handler_end) diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S deleted file mode 100644 index 9a89f30d53ca172d31e9ded2878fbba21b1c2575..0000000000000000000000000000000000000000 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ /dev/null @@ -1,575 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved. - * Copyright (c) 2011, Google, Inc. - * - * Author: Colin Cross - * Gary King - */ - -#include - -#include - -#include -#include -#include -#include - -#include "irammap.h" -#include "reset.h" -#include "sleep.h" - -#define EMC_CFG 0xc -#define EMC_ADR_CFG 0x10 -#define EMC_NOP 0xdc -#define EMC_SELF_REF 0xe0 -#define EMC_REQ_CTRL 0x2b0 -#define EMC_EMC_STATUS 0x2b4 - -#define CLK_RESET_CCLK_BURST 0x20 -#define CLK_RESET_CCLK_DIVIDER 0x24 -#define CLK_RESET_SCLK_BURST 0x28 -#define CLK_RESET_SCLK_DIVIDER 0x2c -#define CLK_RESET_PLLC_BASE 0x80 -#define CLK_RESET_PLLM_BASE 0x90 -#define CLK_RESET_PLLP_BASE 0xa0 - -#define APB_MISC_XM2CFGCPADCTRL 0x8c8 -#define APB_MISC_XM2CFGDPADCTRL 0x8cc -#define APB_MISC_XM2CLKCFGPADCTRL 0x8d0 -#define APB_MISC_XM2COMPPADCTRL 0x8d4 -#define APB_MISC_XM2VTTGENPADCTRL 0x8d8 -#define APB_MISC_XM2CFGCPADCTRL2 0x8e4 -#define APB_MISC_XM2CFGDPADCTRL2 0x8e8 - -#define __tegra20_cpu1_resettable_status_offset \ - (__tegra_cpu_reset_handler_data_offset + RESET_DATA(RESETTABLE_STATUS)) - -.macro pll_enable, rd, r_car_base, pll_base - ldr \rd, [\r_car_base, #\pll_base] - tst \rd, #(1 << 30) - orreq \rd, \rd, #(1 << 30) - streq \rd, [\r_car_base, #\pll_base] -.endm - -.macro emc_device_mask, rd, base - ldr \rd, [\base, #EMC_ADR_CFG] - tst \rd, #(0x3 << 24) - moveq \rd, #(0x1 << 8) @ just 1 device - movne \rd, #(0x3 << 8) @ 2 devices -.endm - -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP) -/* - * tegra20_hotplug_shutdown(void) - * - * puts the current cpu in reset - * should never return - */ -ENTRY(tegra20_hotplug_shutdown) - /* Put this CPU down */ - cpu_id r0 - bl tegra20_cpu_shutdown - ret lr @ should never get here -ENDPROC(tegra20_hotplug_shutdown) - -/* - * tegra20_cpu_shutdown(int cpu) - * - * r0 is cpu to reset - * - * puts the specified CPU in wait-for-event mode on the flow controller - * and puts the CPU in reset - * can be called on the current cpu or another cpu - * if called on the current cpu, does not return - * MUST NOT BE CALLED FOR CPU 0. - * - * corrupts r0-r3, r12 - */ -ENTRY(tegra20_cpu_shutdown) - cmp r0, #0 - reteq lr @ must not be called for CPU 0 - mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT - ldr r2, =__tegra20_cpu1_resettable_status_offset - mov r12, #CPU_RESETTABLE - strb r12, [r1, r2] - - cpu_to_halt_reg r1, r0 - ldr r3, =TEGRA_FLOW_CTRL_VIRT - mov r2, #FLOW_CTRL_WAITEVENT | FLOW_CTRL_JTAG_RESUME - str r2, [r3, r1] @ put flow controller in wait event mode - ldr r2, [r3, r1] - isb - dsb - movw r1, 0x1011 - mov r1, r1, lsl r0 - ldr r3, =TEGRA_CLK_RESET_VIRT - str r1, [r3, #0x340] @ put slave CPU in reset - isb - dsb - cpu_id r3 - cmp r3, r0 - beq . - ret lr -ENDPROC(tegra20_cpu_shutdown) -#endif - -#ifdef CONFIG_PM_SLEEP -/* - * tegra_pen_lock - * - * spinlock implementation with no atomic test-and-set and no coherence - * using Peterson's algorithm on strongly-ordered registers - * used to synchronize a cpu waking up from wfi with entering lp2 on idle - * - * The reference link of Peterson's algorithm: - * http://en.wikipedia.org/wiki/Peterson's_algorithm - * - * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm) - * on cpu 0: - * r2 = flag[0] (in SCRATCH38) - * r3 = flag[1] (in SCRATCH39) - * on cpu1: - * r2 = flag[1] (in SCRATCH39) - * r3 = flag[0] (in SCRATCH38) - * - * must be called with MMU on - * corrupts r0-r3, r12 - */ -ENTRY(tegra_pen_lock) - mov32 r3, TEGRA_PMC_VIRT - cpu_id r0 - add r1, r3, #PMC_SCRATCH37 - cmp r0, #0 - addeq r2, r3, #PMC_SCRATCH38 - addeq r3, r3, #PMC_SCRATCH39 - addne r2, r3, #PMC_SCRATCH39 - addne r3, r3, #PMC_SCRATCH38 - - mov r12, #1 - str r12, [r2] @ flag[cpu] = 1 - dsb - str r12, [r1] @ !turn = cpu -1: dsb - ldr r12, [r3] - cmp r12, #1 @ flag[!cpu] == 1? - ldreq r12, [r1] - cmpeq r12, r0 @ !turn == cpu? - beq 1b @ while !turn == cpu && flag[!cpu] == 1 - - ret lr @ locked -ENDPROC(tegra_pen_lock) - -ENTRY(tegra_pen_unlock) - dsb - mov32 r3, TEGRA_PMC_VIRT - cpu_id r0 - cmp r0, #0 - addeq r2, r3, #PMC_SCRATCH38 - addne r2, r3, #PMC_SCRATCH39 - mov r12, #0 - str r12, [r2] - ret lr -ENDPROC(tegra_pen_unlock) - -/* - * tegra20_cpu_clear_resettable(void) - * - * Called to clear the "resettable soon" flag in IRAM variable when - * it is expected that the secondary CPU will be idle soon. - */ -ENTRY(tegra20_cpu_clear_resettable) - mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT - ldr r2, =__tegra20_cpu1_resettable_status_offset - mov r12, #CPU_NOT_RESETTABLE - strb r12, [r1, r2] - ret lr -ENDPROC(tegra20_cpu_clear_resettable) - -/* - * tegra20_cpu_set_resettable_soon(void) - * - * Called to set the "resettable soon" flag in IRAM variable when - * it is expected that the secondary CPU will be idle soon. - */ -ENTRY(tegra20_cpu_set_resettable_soon) - mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT - ldr r2, =__tegra20_cpu1_resettable_status_offset - mov r12, #CPU_RESETTABLE_SOON - strb r12, [r1, r2] - ret lr -ENDPROC(tegra20_cpu_set_resettable_soon) - -/* - * tegra20_cpu_is_resettable_soon(void) - * - * Returns true if the "resettable soon" flag in IRAM variable has been - * set because it is expected that the secondary CPU will be idle soon. - */ -ENTRY(tegra20_cpu_is_resettable_soon) - mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT - ldr r2, =__tegra20_cpu1_resettable_status_offset - ldrb r12, [r1, r2] - cmp r12, #CPU_RESETTABLE_SOON - moveq r0, #1 - movne r0, #0 - ret lr -ENDPROC(tegra20_cpu_is_resettable_soon) - -/* - * tegra20_sleep_core_finish(unsigned long v2p) - * - * Enters suspend in LP0 or LP1 by turning off the mmu and jumping to - * tegra20_tear_down_core in IRAM - */ -ENTRY(tegra20_sleep_core_finish) - mov r4, r0 - /* Flush, disable the L1 data cache and exit SMP */ - mov r0, #TEGRA_FLUSH_CACHE_ALL - bl tegra_disable_clean_inv_dcache - mov r0, r4 - - mov32 r3, tegra_shut_off_mmu - add r3, r3, r0 - - mov32 r0, tegra20_tear_down_core - mov32 r1, tegra20_iram_start - sub r0, r0, r1 - mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA - add r0, r0, r1 - - ret r3 -ENDPROC(tegra20_sleep_core_finish) - -/* - * tegra20_sleep_cpu_secondary_finish(unsigned long v2p) - * - * Enters WFI on secondary CPU by exiting coherency. - */ -ENTRY(tegra20_sleep_cpu_secondary_finish) - stmfd sp!, {r4-r11, lr} - - mrc p15, 0, r11, c1, c0, 1 @ save actlr before exiting coherency - - /* Flush and disable the L1 data cache */ - mov r0, #TEGRA_FLUSH_CACHE_LOUIS - bl tegra_disable_clean_inv_dcache - - mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT - ldr r4, =__tegra20_cpu1_resettable_status_offset - mov r3, #CPU_RESETTABLE - strb r3, [r0, r4] - - bl tegra_cpu_do_idle - - /* - * cpu may be reset while in wfi, which will return through - * tegra_resume to cpu_resume - * or interrupt may wake wfi, which will return here - * cpu state is unchanged - MMU is on, cache is on, coherency - * is off, and the data cache is off - * - * r11 contains the original actlr - */ - - bl tegra_pen_lock - - mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT - ldr r4, =__tegra20_cpu1_resettable_status_offset - mov r3, #CPU_NOT_RESETTABLE - strb r3, [r0, r4] - - bl tegra_pen_unlock - - /* Re-enable the data cache */ - mrc p15, 0, r10, c1, c0, 0 - orr r10, r10, #CR_C - mcr p15, 0, r10, c1, c0, 0 - isb - - mcr p15, 0, r11, c1, c0, 1 @ reenable coherency - - /* Invalidate the TLBs & BTAC */ - mov r1, #0 - mcr p15, 0, r1, c8, c3, 0 @ invalidate shared TLBs - mcr p15, 0, r1, c7, c1, 6 @ invalidate shared BTAC - dsb - isb - - /* the cpu was running with coherency disabled, - * caches may be out of date */ - bl v7_flush_kern_cache_louis - - ldmfd sp!, {r4 - r11, pc} -ENDPROC(tegra20_sleep_cpu_secondary_finish) - -/* - * tegra20_tear_down_cpu - * - * Switches the CPU cluster to PLL-P and enters sleep. - */ -ENTRY(tegra20_tear_down_cpu) - bl tegra_switch_cpu_to_pllp - b tegra20_enter_sleep -ENDPROC(tegra20_tear_down_cpu) - -/* START OF ROUTINES COPIED TO IRAM */ - .align L1_CACHE_SHIFT - .globl tegra20_iram_start -tegra20_iram_start: - -/* - * tegra20_lp1_reset - * - * reset vector for LP1 restore; copied into IRAM during suspend. - * Brings the system back up to a safe staring point (SDRAM out of - * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLP, - * system clock running on the same PLL that it suspended at), and - * jumps to tegra_resume to restore virtual addressing and PLLX. - * The physical address of tegra_resume expected to be stored in - * PMC_SCRATCH41. - * - * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_LPx_RESUME_AREA. - */ -ENTRY(tegra20_lp1_reset) - /* - * The CPU and system bus are running at 32KHz and executing from - * IRAM when this code is executed; immediately switch to CLKM and - * enable PLLM, PLLP, PLLC. - */ - mov32 r0, TEGRA_CLK_RESET_BASE - - mov r1, #(1 << 28) - str r1, [r0, #CLK_RESET_SCLK_BURST] - str r1, [r0, #CLK_RESET_CCLK_BURST] - mov r1, #0 - str r1, [r0, #CLK_RESET_CCLK_DIVIDER] - str r1, [r0, #CLK_RESET_SCLK_DIVIDER] - - pll_enable r1, r0, CLK_RESET_PLLM_BASE - pll_enable r1, r0, CLK_RESET_PLLP_BASE - pll_enable r1, r0, CLK_RESET_PLLC_BASE - - adr r2, tegra20_sdram_pad_address - adr r4, tegra20_sdram_pad_save - mov r5, #0 - - ldr r6, tegra20_sdram_pad_size -padload: - ldr r7, [r2, r5] @ r7 is the addr in the pad_address - - ldr r1, [r4, r5] - str r1, [r7] @ restore the value in pad_save - - add r5, r5, #4 - cmp r6, r5 - bne padload - -padload_done: - /* 255uS delay for PLL stabilization */ - mov32 r7, TEGRA_TMRUS_BASE - ldr r1, [r7] - add r1, r1, #0xff - wait_until r1, r7, r9 - - adr r4, tegra20_sclk_save - ldr r4, [r4] - str r4, [r0, #CLK_RESET_SCLK_BURST] - mov32 r4, ((1 << 28) | (4)) @ burst policy is PLLP - str r4, [r0, #CLK_RESET_CCLK_BURST] - - mov32 r0, TEGRA_EMC_BASE - ldr r1, [r0, #EMC_CFG] - bic r1, r1, #(1 << 31) @ disable DRAM_CLK_STOP - str r1, [r0, #EMC_CFG] - - mov r1, #0 - str r1, [r0, #EMC_SELF_REF] @ take DRAM out of self refresh - mov r1, #1 - str r1, [r0, #EMC_NOP] - str r1, [r0, #EMC_NOP] - - emc_device_mask r1, r0 - -exit_selfrefresh_loop: - ldr r2, [r0, #EMC_EMC_STATUS] - ands r2, r2, r1 - bne exit_selfrefresh_loop - - mov r1, #0 @ unstall all transactions - str r1, [r0, #EMC_REQ_CTRL] - - mov32 r0, TEGRA_PMC_BASE - ldr r0, [r0, #PMC_SCRATCH41] - ret r0 @ jump to tegra_resume -ENDPROC(tegra20_lp1_reset) - -/* - * tegra20_tear_down_core - * - * copied into and executed from IRAM - * puts memory in self-refresh for LP0 and LP1 - */ -tegra20_tear_down_core: - bl tegra20_sdram_self_refresh - bl tegra20_switch_cpu_to_clk32k - b tegra20_enter_sleep - -/* - * tegra20_switch_cpu_to_clk32k - * - * In LP0 and LP1 all PLLs will be turned off. Switch the CPU and system clock - * to the 32KHz clock. - */ -tegra20_switch_cpu_to_clk32k: - /* - * start by switching to CLKM to safely disable PLLs, then switch to - * CLKS. - */ - mov r0, #(1 << 28) - str r0, [r5, #CLK_RESET_SCLK_BURST] - str r0, [r5, #CLK_RESET_CCLK_BURST] - mov r0, #0 - str r0, [r5, #CLK_RESET_CCLK_DIVIDER] - str r0, [r5, #CLK_RESET_SCLK_DIVIDER] - - /* 2uS delay delay between changing SCLK and disabling PLLs */ - mov32 r7, TEGRA_TMRUS_BASE - ldr r1, [r7] - add r1, r1, #2 - wait_until r1, r7, r9 - - /* disable PLLM, PLLP and PLLC */ - ldr r0, [r5, #CLK_RESET_PLLM_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLM_BASE] - ldr r0, [r5, #CLK_RESET_PLLP_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLP_BASE] - ldr r0, [r5, #CLK_RESET_PLLC_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLC_BASE] - - /* switch to CLKS */ - mov r0, #0 /* brust policy = 32KHz */ - str r0, [r5, #CLK_RESET_SCLK_BURST] - - ret lr - -/* - * tegra20_enter_sleep - * - * uses flow controller to enter sleep state - * executes from IRAM with SDRAM in selfrefresh when target state is LP0 or LP1 - * executes from SDRAM with target state is LP2 - */ -tegra20_enter_sleep: - mov32 r6, TEGRA_FLOW_CTRL_BASE - - mov r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT - orr r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ - cpu_id r1 - cpu_to_halt_reg r1, r1 - str r0, [r6, r1] - dsb - ldr r0, [r6, r1] /* memory barrier */ - -halted: - dsb - wfe /* CPU should be power gated here */ - isb - b halted - -/* - * tegra20_sdram_self_refresh - * - * called with MMU off and caches disabled - * puts sdram in self refresh - * must be executed from IRAM - */ -tegra20_sdram_self_refresh: - mov32 r1, TEGRA_EMC_BASE @ r1 reserved for emc base addr - - mov r2, #3 - str r2, [r1, #EMC_REQ_CTRL] @ stall incoming DRAM requests - -emcidle: - ldr r2, [r1, #EMC_EMC_STATUS] - tst r2, #4 - beq emcidle - - mov r2, #1 - str r2, [r1, #EMC_SELF_REF] - - emc_device_mask r2, r1 - -emcself: - ldr r3, [r1, #EMC_EMC_STATUS] - and r3, r3, r2 - cmp r3, r2 - bne emcself @ loop until DDR in self-refresh - - adr r2, tegra20_sdram_pad_address - adr r3, tegra20_sdram_pad_safe - adr r4, tegra20_sdram_pad_save - mov r5, #0 - - ldr r6, tegra20_sdram_pad_size -padsave: - ldr r0, [r2, r5] @ r0 is the addr in the pad_address - - ldr r1, [r0] - str r1, [r4, r5] @ save the content of the addr - - ldr r1, [r3, r5] - str r1, [r0] @ set the save val to the addr - - add r5, r5, #4 - cmp r6, r5 - bne padsave -padsave_done: - - mov32 r5, TEGRA_CLK_RESET_BASE - ldr r0, [r5, #CLK_RESET_SCLK_BURST] - adr r2, tegra20_sclk_save - str r0, [r2] - dsb - ret lr - -tegra20_sdram_pad_address: - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CLKCFGPADCTRL - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2COMPPADCTRL - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2VTTGENPADCTRL - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL2 - .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL2 - -tegra20_sdram_pad_size: - .word tegra20_sdram_pad_size - tegra20_sdram_pad_address - -tegra20_sdram_pad_safe: - .word 0x8 - .word 0x8 - .word 0x0 - .word 0x8 - .word 0x5500 - .word 0x08080040 - .word 0x0 - -tegra20_sclk_save: - .word 0x0 - -tegra20_sdram_pad_save: - .rept (tegra20_sdram_pad_size - tegra20_sdram_pad_address) / 4 - .long 0 - .endr - - .ltorg -/* dummy symbol for end of IRAM */ - .align L1_CACHE_SHIFT - .globl tegra20_iram_end -tegra20_iram_end: - b . -#endif diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S deleted file mode 100644 index 6922dd8d3e2d95232d95c70fea25da69d6715632..0000000000000000000000000000000000000000 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ /dev/null @@ -1,834 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2012, NVIDIA Corporation. All rights reserved. - */ - -#include - -#include -#include - -#include -#include -#include - -#include "irammap.h" -#include "sleep.h" - -#define EMC_CFG 0xc -#define EMC_ADR_CFG 0x10 -#define EMC_TIMING_CONTROL 0x28 -#define EMC_NOP 0xdc -#define EMC_SELF_REF 0xe0 -#define EMC_MRW 0xe8 -#define EMC_FBIO_CFG5 0x104 -#define EMC_AUTO_CAL_CONFIG 0x2a4 -#define EMC_AUTO_CAL_INTERVAL 0x2a8 -#define EMC_AUTO_CAL_STATUS 0x2ac -#define EMC_REQ_CTRL 0x2b0 -#define EMC_CFG_DIG_DLL 0x2bc -#define EMC_EMC_STATUS 0x2b4 -#define EMC_ZCAL_INTERVAL 0x2e0 -#define EMC_ZQ_CAL 0x2ec -#define EMC_XM2VTTGENPADCTRL 0x310 -#define EMC_XM2VTTGENPADCTRL2 0x314 - -#define PMC_CTRL 0x0 -#define PMC_CTRL_SIDE_EFFECT_LP0 (1 << 14) /* enter LP0 when CPU pwr gated */ - -#define PMC_PLLP_WB0_OVERRIDE 0xf8 -#define PMC_IO_DPD_REQ 0x1b8 -#define PMC_IO_DPD_STATUS 0x1bc - -#define CLK_RESET_CCLK_BURST 0x20 -#define CLK_RESET_CCLK_DIVIDER 0x24 -#define CLK_RESET_SCLK_BURST 0x28 -#define CLK_RESET_SCLK_DIVIDER 0x2c - -#define CLK_RESET_PLLC_BASE 0x80 -#define CLK_RESET_PLLC_MISC 0x8c -#define CLK_RESET_PLLM_BASE 0x90 -#define CLK_RESET_PLLM_MISC 0x9c -#define CLK_RESET_PLLP_BASE 0xa0 -#define CLK_RESET_PLLP_MISC 0xac -#define CLK_RESET_PLLA_BASE 0xb0 -#define CLK_RESET_PLLA_MISC 0xbc -#define CLK_RESET_PLLX_BASE 0xe0 -#define CLK_RESET_PLLX_MISC 0xe4 -#define CLK_RESET_PLLX_MISC3 0x518 -#define CLK_RESET_PLLX_MISC3_IDDQ 3 -#define CLK_RESET_PLLM_MISC_IDDQ 5 -#define CLK_RESET_PLLC_MISC_IDDQ 26 - -#define CLK_RESET_CLK_SOURCE_MSELECT 0x3b4 - -#define MSELECT_CLKM (0x3 << 30) - -#define LOCK_DELAY 50 /* safety delay after lock is detected */ - -#define TEGRA30_POWER_HOTPLUG_SHUTDOWN (1 << 27) /* Hotplug shutdown */ - -.macro emc_device_mask, rd, base - ldr \rd, [\base, #EMC_ADR_CFG] - tst \rd, #0x1 - moveq \rd, #(0x1 << 8) @ just 1 device - movne \rd, #(0x3 << 8) @ 2 devices -.endm - -.macro emc_timing_update, rd, base - mov \rd, #1 - str \rd, [\base, #EMC_TIMING_CONTROL] -1001: - ldr \rd, [\base, #EMC_EMC_STATUS] - tst \rd, #(0x1<<23) @ wait EMC_STATUS_TIMING_UPDATE_STALLED is clear - bne 1001b -.endm - -.macro pll_enable, rd, r_car_base, pll_base, pll_misc - ldr \rd, [\r_car_base, #\pll_base] - tst \rd, #(1 << 30) - orreq \rd, \rd, #(1 << 30) - streq \rd, [\r_car_base, #\pll_base] - /* Enable lock detector */ - .if \pll_misc - ldr \rd, [\r_car_base, #\pll_misc] - bic \rd, \rd, #(1 << 18) - str \rd, [\r_car_base, #\pll_misc] - ldr \rd, [\r_car_base, #\pll_misc] - ldr \rd, [\r_car_base, #\pll_misc] - orr \rd, \rd, #(1 << 18) - str \rd, [\r_car_base, #\pll_misc] - .endif -.endm - -.macro pll_locked, rd, r_car_base, pll_base -1: - ldr \rd, [\r_car_base, #\pll_base] - tst \rd, #(1 << 27) - beq 1b -.endm - -.macro pll_iddq_exit, rd, car, iddq, iddq_bit - ldr \rd, [\car, #\iddq] - bic \rd, \rd, #(1<<\iddq_bit) - str \rd, [\car, #\iddq] -.endm - -.macro pll_iddq_entry, rd, car, iddq, iddq_bit - ldr \rd, [\car, #\iddq] - orr \rd, \rd, #(1<<\iddq_bit) - str \rd, [\car, #\iddq] -.endm - -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP) -/* - * tegra30_hotplug_shutdown(void) - * - * Powergates the current CPU. - * Should never return. - */ -ENTRY(tegra30_hotplug_shutdown) - /* Powergate this CPU */ - mov r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN - bl tegra30_cpu_shutdown - ret lr @ should never get here -ENDPROC(tegra30_hotplug_shutdown) - -/* - * tegra30_cpu_shutdown(unsigned long flags) - * - * Puts the current CPU in wait-for-event mode on the flow controller - * and powergates it -- flags (in R0) indicate the request type. - * - * r10 = SoC ID - * corrupts r0-r4, r10-r12 - */ -ENTRY(tegra30_cpu_shutdown) - cpu_id r3 - tegra_get_soc_id TEGRA_APB_MISC_VIRT, r10 - cmp r10, #TEGRA30 - bne _no_cpu0_chk @ It's not Tegra30 - - cmp r3, #0 - reteq lr @ Must never be called for CPU 0 -_no_cpu0_chk: - - ldr r12, =TEGRA_FLOW_CTRL_VIRT - cpu_to_csr_reg r1, r3 - add r1, r1, r12 @ virtual CSR address for this CPU - cpu_to_halt_reg r2, r3 - add r2, r2, r12 @ virtual HALT_EVENTS address for this CPU - - /* - * Clear this CPU's "event" and "interrupt" flags and power gate - * it when halting but not before it is in the "WFE" state. - */ - movw r12, \ - FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG | \ - FLOW_CTRL_CSR_ENABLE - cmp r10, #TEGRA30 - moveq r4, #(1 << 4) @ wfe bitmap - movne r4, #(1 << 8) @ wfi bitmap - ARM( orr r12, r12, r4, lsl r3 ) - THUMB( lsl r4, r4, r3 ) - THUMB( orr r12, r12, r4 ) - str r12, [r1] - - /* Halt this CPU. */ - mov r3, #0x400 -delay_1: - subs r3, r3, #1 @ delay as a part of wfe war. - bge delay_1; - cpsid a @ disable imprecise aborts. - ldr r3, [r1] @ read CSR - str r3, [r1] @ clear CSR - - tst r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN - beq flow_ctrl_setting_for_lp2 - - /* flow controller set up for hotplug */ - mov r3, #FLOW_CTRL_WAITEVENT @ For hotplug - b flow_ctrl_done -flow_ctrl_setting_for_lp2: - /* flow controller set up for LP2 */ - cmp r10, #TEGRA30 - moveq r3, #FLOW_CTRL_WAIT_FOR_INTERRUPT @ For LP2 - movne r3, #FLOW_CTRL_WAITEVENT - orrne r3, r3, #FLOW_CTRL_HALT_GIC_IRQ - orrne r3, r3, #FLOW_CTRL_HALT_GIC_FIQ -flow_ctrl_done: - cmp r10, #TEGRA30 - str r3, [r2] - ldr r0, [r2] - b wfe_war - -__cpu_reset_again: - dsb - .align 5 - wfeeq @ CPU should be power gated here - wfine -wfe_war: - b __cpu_reset_again - - /* - * 38 nop's, which fills rest of wfe cache line and - * 4 more cachelines with nop - */ - .rept 38 - nop - .endr - b . @ should never get here - -ENDPROC(tegra30_cpu_shutdown) -#endif - -#ifdef CONFIG_PM_SLEEP -/* - * tegra30_sleep_core_finish(unsigned long v2p) - * - * Enters suspend in LP0 or LP1 by turning off the MMU and jumping to - * tegra30_tear_down_core in IRAM - */ -ENTRY(tegra30_sleep_core_finish) - mov r4, r0 - /* Flush, disable the L1 data cache and exit SMP */ - mov r0, #TEGRA_FLUSH_CACHE_ALL - bl tegra_disable_clean_inv_dcache - mov r0, r4 - - /* - * Preload all the address literals that are needed for the - * CPU power-gating process, to avoid loading from SDRAM which - * are not supported once SDRAM is put into self-refresh. - * LP0 / LP1 use physical address, since the MMU needs to be - * disabled before putting SDRAM into self-refresh to avoid - * memory access due to page table walks. - */ - mov32 r4, TEGRA_PMC_BASE - mov32 r5, TEGRA_CLK_RESET_BASE - mov32 r6, TEGRA_FLOW_CTRL_BASE - mov32 r7, TEGRA_TMRUS_BASE - - mov32 r3, tegra_shut_off_mmu - add r3, r3, r0 - - mov32 r0, tegra30_tear_down_core - mov32 r1, tegra30_iram_start - sub r0, r0, r1 - mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA - add r0, r0, r1 - - ret r3 -ENDPROC(tegra30_sleep_core_finish) - -/* - * tegra30_sleep_cpu_secondary_finish(unsigned long v2p) - * - * Enters LP2 on secondary CPU by exiting coherency and powergating the CPU. - */ -ENTRY(tegra30_sleep_cpu_secondary_finish) - mov r7, lr - - /* Flush and disable the L1 data cache */ - mov r0, #TEGRA_FLUSH_CACHE_LOUIS - bl tegra_disable_clean_inv_dcache - - /* Powergate this CPU. */ - mov r0, #0 @ power mode flags (!hotplug) - bl tegra30_cpu_shutdown - mov r0, #1 @ never return here - ret r7 -ENDPROC(tegra30_sleep_cpu_secondary_finish) - -/* - * tegra30_tear_down_cpu - * - * Switches the CPU to enter sleep. - */ -ENTRY(tegra30_tear_down_cpu) - mov32 r6, TEGRA_FLOW_CTRL_BASE - - b tegra30_enter_sleep -ENDPROC(tegra30_tear_down_cpu) - -/* START OF ROUTINES COPIED TO IRAM */ - .align L1_CACHE_SHIFT - .globl tegra30_iram_start -tegra30_iram_start: - -/* - * tegra30_lp1_reset - * - * reset vector for LP1 restore; copied into IRAM during suspend. - * Brings the system back up to a safe staring point (SDRAM out of - * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLX, - * system clock running on the same PLL that it suspended at), and - * jumps to tegra_resume to restore virtual addressing. - * The physical address of tegra_resume expected to be stored in - * PMC_SCRATCH41. - * - * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_LPx_RESUME_AREA. - */ -ENTRY(tegra30_lp1_reset) - /* - * The CPU and system bus are running at 32KHz and executing from - * IRAM when this code is executed; immediately switch to CLKM and - * enable PLLP, PLLM, PLLC, PLLA and PLLX. - */ - mov32 r0, TEGRA_CLK_RESET_BASE - - mov r1, #(1 << 28) - str r1, [r0, #CLK_RESET_SCLK_BURST] - str r1, [r0, #CLK_RESET_CCLK_BURST] - mov r1, #0 - str r1, [r0, #CLK_RESET_CCLK_DIVIDER] - str r1, [r0, #CLK_RESET_SCLK_DIVIDER] - - tegra_get_soc_id TEGRA_APB_MISC_BASE, r10 - cmp r10, #TEGRA30 - beq _no_pll_iddq_exit - - pll_iddq_exit r1, r0, CLK_RESET_PLLM_MISC, CLK_RESET_PLLM_MISC_IDDQ - pll_iddq_exit r1, r0, CLK_RESET_PLLC_MISC, CLK_RESET_PLLC_MISC_IDDQ - pll_iddq_exit r1, r0, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ - - mov32 r7, TEGRA_TMRUS_BASE - ldr r1, [r7] - add r1, r1, #2 - wait_until r1, r7, r3 - - /* enable PLLM via PMC */ - mov32 r2, TEGRA_PMC_BASE - ldr r1, [r2, #PMC_PLLP_WB0_OVERRIDE] - orr r1, r1, #(1 << 12) - str r1, [r2, #PMC_PLLP_WB0_OVERRIDE] - - pll_enable r1, r0, CLK_RESET_PLLM_BASE, 0 - pll_enable r1, r0, CLK_RESET_PLLC_BASE, 0 - pll_enable r1, r0, CLK_RESET_PLLX_BASE, 0 - - b _pll_m_c_x_done - -_no_pll_iddq_exit: - /* enable PLLM via PMC */ - mov32 r2, TEGRA_PMC_BASE - ldr r1, [r2, #PMC_PLLP_WB0_OVERRIDE] - orr r1, r1, #(1 << 12) - str r1, [r2, #PMC_PLLP_WB0_OVERRIDE] - - pll_enable r1, r0, CLK_RESET_PLLM_BASE, CLK_RESET_PLLM_MISC - pll_enable r1, r0, CLK_RESET_PLLC_BASE, CLK_RESET_PLLC_MISC - pll_enable r1, r0, CLK_RESET_PLLX_BASE, CLK_RESET_PLLX_MISC - -_pll_m_c_x_done: - pll_enable r1, r0, CLK_RESET_PLLP_BASE, CLK_RESET_PLLP_MISC - pll_enable r1, r0, CLK_RESET_PLLA_BASE, CLK_RESET_PLLA_MISC - - pll_locked r1, r0, CLK_RESET_PLLM_BASE - pll_locked r1, r0, CLK_RESET_PLLP_BASE - pll_locked r1, r0, CLK_RESET_PLLA_BASE - pll_locked r1, r0, CLK_RESET_PLLC_BASE - pll_locked r1, r0, CLK_RESET_PLLX_BASE - - tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 - cmp r1, #TEGRA30 - beq 1f - ldr r1, [r0, #CLK_RESET_PLLP_BASE] - bic r1, r1, #(1<<31) @ disable PllP bypass - str r1, [r0, #CLK_RESET_PLLP_BASE] -1: - - mov32 r7, TEGRA_TMRUS_BASE - ldr r1, [r7] - add r1, r1, #LOCK_DELAY - wait_until r1, r7, r3 - - adr r5, tegra_sdram_pad_save - - ldr r4, [r5, #0x18] @ restore CLK_SOURCE_MSELECT - str r4, [r0, #CLK_RESET_CLK_SOURCE_MSELECT] - - ldr r4, [r5, #0x1C] @ restore SCLK_BURST - str r4, [r0, #CLK_RESET_SCLK_BURST] - - cmp r10, #TEGRA30 - movweq r4, #:lower16:((1 << 28) | (0x8)) @ burst policy is PLLX - movteq r4, #:upper16:((1 << 28) | (0x8)) - movwne r4, #:lower16:((1 << 28) | (0xe)) - movtne r4, #:upper16:((1 << 28) | (0xe)) - str r4, [r0, #CLK_RESET_CCLK_BURST] - - /* Restore pad power state to normal */ - ldr r1, [r5, #0x14] @ PMC_IO_DPD_STATUS - mvn r1, r1 - bic r1, r1, #(1 << 31) - orr r1, r1, #(1 << 30) - str r1, [r2, #PMC_IO_DPD_REQ] @ DPD_OFF - - cmp r10, #TEGRA30 - movweq r0, #:lower16:TEGRA_EMC_BASE @ r0 reserved for emc base - movteq r0, #:upper16:TEGRA_EMC_BASE - cmp r10, #TEGRA114 - movweq r0, #:lower16:TEGRA_EMC0_BASE - movteq r0, #:upper16:TEGRA_EMC0_BASE - cmp r10, #TEGRA124 - movweq r0, #:lower16:TEGRA124_EMC_BASE - movteq r0, #:upper16:TEGRA124_EMC_BASE - -exit_self_refresh: - ldr r1, [r5, #0xC] @ restore EMC_XM2VTTGENPADCTRL - str r1, [r0, #EMC_XM2VTTGENPADCTRL] - ldr r1, [r5, #0x10] @ restore EMC_XM2VTTGENPADCTRL2 - str r1, [r0, #EMC_XM2VTTGENPADCTRL2] - ldr r1, [r5, #0x8] @ restore EMC_AUTO_CAL_INTERVAL - str r1, [r0, #EMC_AUTO_CAL_INTERVAL] - - /* Relock DLL */ - ldr r1, [r0, #EMC_CFG_DIG_DLL] - orr r1, r1, #(1 << 30) @ set DLL_RESET - str r1, [r0, #EMC_CFG_DIG_DLL] - - emc_timing_update r1, r0 - - cmp r10, #TEGRA114 - movweq r1, #:lower16:TEGRA_EMC1_BASE - movteq r1, #:upper16:TEGRA_EMC1_BASE - cmpeq r0, r1 - - ldr r1, [r0, #EMC_AUTO_CAL_CONFIG] - orr r1, r1, #(1 << 31) @ set AUTO_CAL_ACTIVE - orreq r1, r1, #(1 << 27) @ set slave mode for channel 1 - str r1, [r0, #EMC_AUTO_CAL_CONFIG] - -emc_wait_auto_cal_onetime: - ldr r1, [r0, #EMC_AUTO_CAL_STATUS] - tst r1, #(1 << 31) @ wait until AUTO_CAL_ACTIVE is cleared - bne emc_wait_auto_cal_onetime - - ldr r1, [r0, #EMC_CFG] - bic r1, r1, #(1 << 31) @ disable DRAM_CLK_STOP_PD - str r1, [r0, #EMC_CFG] - - mov r1, #0 - str r1, [r0, #EMC_SELF_REF] @ take DRAM out of self refresh - mov r1, #1 - cmp r10, #TEGRA30 - streq r1, [r0, #EMC_NOP] - streq r1, [r0, #EMC_NOP] - - emc_device_mask r1, r0 - -exit_selfrefresh_loop: - ldr r2, [r0, #EMC_EMC_STATUS] - ands r2, r2, r1 - bne exit_selfrefresh_loop - - lsr r1, r1, #8 @ devSel, bit0:dev0, bit1:dev1 - - mov32 r7, TEGRA_TMRUS_BASE - ldr r2, [r0, #EMC_FBIO_CFG5] - - and r2, r2, #3 @ check DRAM_TYPE - cmp r2, #2 - beq emc_lpddr2 - - /* Issue a ZQ_CAL for dev0 - DDR3 */ - mov32 r2, 0x80000011 @ DEV_SELECTION=2, LENGTH=LONG, CMD=1 - str r2, [r0, #EMC_ZQ_CAL] - ldr r2, [r7] - add r2, r2, #10 - wait_until r2, r7, r3 - - tst r1, #2 - beq zcal_done - - /* Issue a ZQ_CAL for dev1 - DDR3 */ - mov32 r2, 0x40000011 @ DEV_SELECTION=1, LENGTH=LONG, CMD=1 - str r2, [r0, #EMC_ZQ_CAL] - ldr r2, [r7] - add r2, r2, #10 - wait_until r2, r7, r3 - b zcal_done - -emc_lpddr2: - /* Issue a ZQ_CAL for dev0 - LPDDR2 */ - mov32 r2, 0x800A00AB @ DEV_SELECTION=2, MA=10, OP=0xAB - str r2, [r0, #EMC_MRW] - ldr r2, [r7] - add r2, r2, #1 - wait_until r2, r7, r3 - - tst r1, #2 - beq zcal_done - - /* Issue a ZQ_CAL for dev0 - LPDDR2 */ - mov32 r2, 0x400A00AB @ DEV_SELECTION=1, MA=10, OP=0xAB - str r2, [r0, #EMC_MRW] - ldr r2, [r7] - add r2, r2, #1 - wait_until r2, r7, r3 - -zcal_done: - mov r1, #0 @ unstall all transactions - str r1, [r0, #EMC_REQ_CTRL] - ldr r1, [r5, #0x4] @ restore EMC_ZCAL_INTERVAL - str r1, [r0, #EMC_ZCAL_INTERVAL] - ldr r1, [r5, #0x0] @ restore EMC_CFG - str r1, [r0, #EMC_CFG] - - emc_timing_update r1, r0 - - /* Tegra114 had dual EMC channel, now config the other one */ - cmp r10, #TEGRA114 - bne __no_dual_emc_chanl - mov32 r1, TEGRA_EMC1_BASE - cmp r0, r1 - movne r0, r1 - addne r5, r5, #0x20 - bne exit_self_refresh -__no_dual_emc_chanl: - - mov32 r0, TEGRA_PMC_BASE - ldr r0, [r0, #PMC_SCRATCH41] - ret r0 @ jump to tegra_resume -ENDPROC(tegra30_lp1_reset) - - .align L1_CACHE_SHIFT -tegra30_sdram_pad_address: - .word TEGRA_EMC_BASE + EMC_CFG @0x0 - .word TEGRA_EMC_BASE + EMC_ZCAL_INTERVAL @0x4 - .word TEGRA_EMC_BASE + EMC_AUTO_CAL_INTERVAL @0x8 - .word TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL @0xc - .word TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL2 @0x10 - .word TEGRA_PMC_BASE + PMC_IO_DPD_STATUS @0x14 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT @0x18 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST @0x1c -tegra30_sdram_pad_address_end: - -tegra114_sdram_pad_address: - .word TEGRA_EMC0_BASE + EMC_CFG @0x0 - .word TEGRA_EMC0_BASE + EMC_ZCAL_INTERVAL @0x4 - .word TEGRA_EMC0_BASE + EMC_AUTO_CAL_INTERVAL @0x8 - .word TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL @0xc - .word TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL2 @0x10 - .word TEGRA_PMC_BASE + PMC_IO_DPD_STATUS @0x14 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT @0x18 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST @0x1c - .word TEGRA_EMC1_BASE + EMC_CFG @0x20 - .word TEGRA_EMC1_BASE + EMC_ZCAL_INTERVAL @0x24 - .word TEGRA_EMC1_BASE + EMC_AUTO_CAL_INTERVAL @0x28 - .word TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL @0x2c - .word TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL2 @0x30 -tegra114_sdram_pad_adress_end: - -tegra124_sdram_pad_address: - .word TEGRA124_EMC_BASE + EMC_CFG @0x0 - .word TEGRA124_EMC_BASE + EMC_ZCAL_INTERVAL @0x4 - .word TEGRA124_EMC_BASE + EMC_AUTO_CAL_INTERVAL @0x8 - .word TEGRA124_EMC_BASE + EMC_XM2VTTGENPADCTRL @0xc - .word TEGRA124_EMC_BASE + EMC_XM2VTTGENPADCTRL2 @0x10 - .word TEGRA_PMC_BASE + PMC_IO_DPD_STATUS @0x14 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT @0x18 - .word TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST @0x1c -tegra124_sdram_pad_address_end: - -tegra30_sdram_pad_size: - .word tegra30_sdram_pad_address_end - tegra30_sdram_pad_address - -tegra114_sdram_pad_size: - .word tegra114_sdram_pad_adress_end - tegra114_sdram_pad_address - - .type tegra_sdram_pad_save, %object -tegra_sdram_pad_save: - .rept (tegra114_sdram_pad_adress_end - tegra114_sdram_pad_address) / 4 - .long 0 - .endr - -/* - * tegra30_tear_down_core - * - * copied into and executed from IRAM - * puts memory in self-refresh for LP0 and LP1 - */ -tegra30_tear_down_core: - bl tegra30_sdram_self_refresh - bl tegra30_switch_cpu_to_clk32k - b tegra30_enter_sleep - -/* - * tegra30_switch_cpu_to_clk32k - * - * In LP0 and LP1 all PLLs will be turned off. Switching the CPU and System CLK - * to the 32KHz clock. - * r4 = TEGRA_PMC_BASE - * r5 = TEGRA_CLK_RESET_BASE - * r6 = TEGRA_FLOW_CTRL_BASE - * r7 = TEGRA_TMRUS_BASE - * r10= SoC ID - */ -tegra30_switch_cpu_to_clk32k: - /* - * start by jumping to CLKM to safely disable PLLs, then jump to - * CLKS. - */ - mov r0, #(1 << 28) - str r0, [r5, #CLK_RESET_SCLK_BURST] - /* 2uS delay delay between changing SCLK and CCLK */ - ldr r1, [r7] - add r1, r1, #2 - wait_until r1, r7, r9 - str r0, [r5, #CLK_RESET_CCLK_BURST] - mov r0, #0 - str r0, [r5, #CLK_RESET_CCLK_DIVIDER] - str r0, [r5, #CLK_RESET_SCLK_DIVIDER] - - /* switch the clock source of mselect to be CLK_M */ - ldr r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT] - orr r0, r0, #MSELECT_CLKM - str r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT] - - /* 2uS delay delay between changing SCLK and disabling PLLs */ - ldr r1, [r7] - add r1, r1, #2 - wait_until r1, r7, r9 - - /* disable PLLM via PMC in LP1 */ - ldr r0, [r4, #PMC_PLLP_WB0_OVERRIDE] - bic r0, r0, #(1 << 12) - str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] - - /* disable PLLP, PLLA, PLLC and PLLX */ - tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 - cmp r1, #TEGRA30 - ldr r0, [r5, #CLK_RESET_PLLP_BASE] - orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLP_BASE] - ldr r0, [r5, #CLK_RESET_PLLA_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLA_BASE] - ldr r0, [r5, #CLK_RESET_PLLC_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLC_BASE] - ldr r0, [r5, #CLK_RESET_PLLX_BASE] - bic r0, r0, #(1 << 30) - str r0, [r5, #CLK_RESET_PLLX_BASE] - - cmp r10, #TEGRA30 - beq _no_pll_in_iddq - pll_iddq_entry r1, r5, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ -_no_pll_in_iddq: - - /* switch to CLKS */ - mov r0, #0 /* brust policy = 32KHz */ - str r0, [r5, #CLK_RESET_SCLK_BURST] - - ret lr - -/* - * tegra30_enter_sleep - * - * uses flow controller to enter sleep state - * executes from IRAM with SDRAM in selfrefresh when target state is LP0 or LP1 - * executes from SDRAM with target state is LP2 - * r6 = TEGRA_FLOW_CTRL_BASE - */ -tegra30_enter_sleep: - cpu_id r1 - - cpu_to_csr_reg r2, r1 - ldr r0, [r6, r2] - orr r0, r0, #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG - orr r0, r0, #FLOW_CTRL_CSR_ENABLE - str r0, [r6, r2] - - tegra_get_soc_id TEGRA_APB_MISC_BASE, r10 - cmp r10, #TEGRA30 - mov r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT - orreq r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ - orrne r0, r0, #FLOW_CTRL_HALT_LIC_IRQ | FLOW_CTRL_HALT_LIC_FIQ - - cpu_to_halt_reg r2, r1 - str r0, [r6, r2] - dsb - ldr r0, [r6, r2] /* memory barrier */ - -halted: - isb - dsb - wfi /* CPU should be power gated here */ - - /* !!!FIXME!!! Implement halt failure handler */ - b halted - -/* - * tegra30_sdram_self_refresh - * - * called with MMU off and caches disabled - * must be executed from IRAM - * r4 = TEGRA_PMC_BASE - * r5 = TEGRA_CLK_RESET_BASE - * r6 = TEGRA_FLOW_CTRL_BASE - * r7 = TEGRA_TMRUS_BASE - * r10= SoC ID - */ -tegra30_sdram_self_refresh: - - adr r8, tegra_sdram_pad_save - tegra_get_soc_id TEGRA_APB_MISC_BASE, r10 - cmp r10, #TEGRA30 - adreq r2, tegra30_sdram_pad_address - ldreq r3, tegra30_sdram_pad_size - cmp r10, #TEGRA114 - adreq r2, tegra114_sdram_pad_address - ldreq r3, tegra114_sdram_pad_size - cmp r10, #TEGRA124 - adreq r2, tegra124_sdram_pad_address - ldreq r3, tegra30_sdram_pad_size - - mov r9, #0 - -padsave: - ldr r0, [r2, r9] @ r0 is the addr in the pad_address - - ldr r1, [r0] - str r1, [r8, r9] @ save the content of the addr - - add r9, r9, #4 - cmp r3, r9 - bne padsave -padsave_done: - - dsb - - cmp r10, #TEGRA30 - ldreq r0, =TEGRA_EMC_BASE @ r0 reserved for emc base addr - cmp r10, #TEGRA114 - ldreq r0, =TEGRA_EMC0_BASE - cmp r10, #TEGRA124 - ldreq r0, =TEGRA124_EMC_BASE - -enter_self_refresh: - cmp r10, #TEGRA30 - mov r1, #0 - str r1, [r0, #EMC_ZCAL_INTERVAL] - str r1, [r0, #EMC_AUTO_CAL_INTERVAL] - ldr r1, [r0, #EMC_CFG] - bic r1, r1, #(1 << 28) - bicne r1, r1, #(1 << 29) - str r1, [r0, #EMC_CFG] @ disable DYN_SELF_REF - - emc_timing_update r1, r0 - - ldr r1, [r7] - add r1, r1, #5 - wait_until r1, r7, r2 - -emc_wait_auto_cal: - ldr r1, [r0, #EMC_AUTO_CAL_STATUS] - tst r1, #(1 << 31) @ wait until AUTO_CAL_ACTIVE is cleared - bne emc_wait_auto_cal - - mov r1, #3 - str r1, [r0, #EMC_REQ_CTRL] @ stall incoming DRAM requests - -emcidle: - ldr r1, [r0, #EMC_EMC_STATUS] - tst r1, #4 - beq emcidle - - mov r1, #1 - str r1, [r0, #EMC_SELF_REF] - - emc_device_mask r1, r0 - -emcself: - ldr r2, [r0, #EMC_EMC_STATUS] - and r2, r2, r1 - cmp r2, r1 - bne emcself @ loop until DDR in self-refresh - - /* Put VTTGEN in the lowest power mode */ - ldr r1, [r0, #EMC_XM2VTTGENPADCTRL] - mov32 r2, 0xF8F8FFFF @ clear XM2VTTGEN_DRVUP and XM2VTTGEN_DRVDN - and r1, r1, r2 - str r1, [r0, #EMC_XM2VTTGENPADCTRL] - ldr r1, [r0, #EMC_XM2VTTGENPADCTRL2] - cmp r10, #TEGRA30 - orreq r1, r1, #7 @ set E_NO_VTTGEN - orrne r1, r1, #0x3f - str r1, [r0, #EMC_XM2VTTGENPADCTRL2] - - emc_timing_update r1, r0 - - /* Tegra114 had dual EMC channel, now config the other one */ - cmp r10, #TEGRA114 - bne no_dual_emc_chanl - mov32 r1, TEGRA_EMC1_BASE - cmp r0, r1 - movne r0, r1 - bne enter_self_refresh -no_dual_emc_chanl: - - ldr r1, [r4, #PMC_CTRL] - tst r1, #PMC_CTRL_SIDE_EFFECT_LP0 - bne pmc_io_dpd_skip - /* - * Put DDR_DATA, DISC_ADDR_CMD, DDR_ADDR_CMD, POP_ADDR_CMD, POP_CLK - * and COMP in the lowest power mode when LP1. - */ - mov32 r1, 0x8EC00000 - str r1, [r4, #PMC_IO_DPD_REQ] -pmc_io_dpd_skip: - - dsb - - ret lr - - .ltorg -/* dummy symbol for end of IRAM */ - .align L1_CACHE_SHIFT - .global tegra30_iram_end -tegra30_iram_end: - b . -#endif diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S deleted file mode 100644 index 8f88944831c5353ef7e8206664f914a90248ca59..0000000000000000000000000000000000000000 --- a/arch/arm/mach-tegra/sleep.S +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/arm/mach-tegra/sleep.S - * - * Copyright (c) 2010-2011, NVIDIA Corporation. - * Copyright (c) 2011, Google, Inc. - * - * Author: Colin Cross - * Gary King - */ - -#include - -#include -#include -#include -#include - -#include "iomap.h" -#include "sleep.h" - -#define CLK_RESET_CCLK_BURST 0x20 -#define CLK_RESET_CCLK_DIVIDER 0x24 - -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP) -/* - * tegra_disable_clean_inv_dcache - * - * disable, clean & invalidate the D-cache - * - * Corrupted registers: r1-r3, r6, r8, r9-r11 - */ -ENTRY(tegra_disable_clean_inv_dcache) - stmfd sp!, {r0, r4-r5, r7, r9-r11, lr} - dmb @ ensure ordering - - /* Disable the D-cache */ - mrc p15, 0, r2, c1, c0, 0 - tst r2, #CR_C @ see tegra_sleep_cpu() - bic r2, r2, #CR_C - mcrne p15, 0, r2, c1, c0, 0 - isb - - /* Flush the D-cache */ - cmp r0, #TEGRA_FLUSH_CACHE_ALL - blne v7_flush_dcache_louis - bleq v7_flush_dcache_all - - /* Trun off coherency */ - exit_smp r4, r5 - - ldmfd sp!, {r0, r4-r5, r7, r9-r11, pc} -ENDPROC(tegra_disable_clean_inv_dcache) -#endif - -#ifdef CONFIG_PM_SLEEP -/* - * tegra_init_l2_for_a15 - * - * set up the correct L2 cache data RAM latency - */ -ENTRY(tegra_init_l2_for_a15) - mrc p15, 0, r0, c0, c0, 5 - ubfx r0, r0, #8, #4 - tst r0, #1 @ only need for cluster 0 - bne _exit_init_l2_a15 - - mrc p15, 0x1, r0, c9, c0, 2 - and r0, r0, #7 - cmp r0, #2 - bicne r0, r0, #7 - orrne r0, r0, #2 - mcrne p15, 0x1, r0, c9, c0, 2 -_exit_init_l2_a15: - - ret lr -ENDPROC(tegra_init_l2_for_a15) - -/* - * tegra_sleep_cpu_finish(unsigned long v2p) - * - * enters suspend in LP2 by turning off the mmu and jumping to - * tegra?_tear_down_cpu - */ -ENTRY(tegra_sleep_cpu_finish) - mov r4, r0 - /* Flush and disable the L1 data cache */ - mov r0, #TEGRA_FLUSH_CACHE_ALL - bl tegra_disable_clean_inv_dcache - - mov r0, r4 - mov32 r6, tegra_tear_down_cpu - ldr r1, [r6] - add r1, r1, r0 - - mov32 r3, tegra_shut_off_mmu - add r3, r3, r0 - mov r0, r1 - - ret r3 -ENDPROC(tegra_sleep_cpu_finish) - -/* - * tegra_shut_off_mmu - * - * r0 = physical address to jump to with mmu off - * - * called with VA=PA mapping - * turns off MMU, icache, dcache and branch prediction - */ - .align L1_CACHE_SHIFT - .pushsection .idmap.text, "ax" -ENTRY(tegra_shut_off_mmu) - mrc p15, 0, r3, c1, c0, 0 - movw r2, #CR_I | CR_Z | CR_C | CR_M - bic r3, r3, r2 - dsb - mcr p15, 0, r3, c1, c0, 0 - isb -#ifdef CONFIG_CACHE_L2X0 - /* Disable L2 cache */ - check_cpu_part_num 0xc09, r9, r10 - retne r0 - - mov32 r2, TEGRA_ARM_PERIF_BASE + 0x3000 - ldr r3, [r2, #L2X0_CTRL] - tst r3, #L2X0_CTRL_EN @ see tegra_sleep_cpu() - mov r3, #0 - strne r3, [r2, #L2X0_CTRL] -#endif - ret r0 -ENDPROC(tegra_shut_off_mmu) - .popsection - -/* - * tegra_switch_cpu_to_pllp - * - * In LP2 the normal cpu clock pllx will be turned off. Switch the CPU to pllp - */ -ENTRY(tegra_switch_cpu_to_pllp) - /* in LP2 idle (SDRAM active), set the CPU burst policy to PLLP */ - mov32 r5, TEGRA_CLK_RESET_BASE - mov r0, #(2 << 28) @ burst policy = run mode - orr r0, r0, #(4 << 4) @ use PLLP in run mode burst - str r0, [r5, #CLK_RESET_CCLK_BURST] - mov r0, #0 - str r0, [r5, #CLK_RESET_CCLK_DIVIDER] - ret lr -ENDPROC(tegra_switch_cpu_to_pllp) -#endif diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S deleted file mode 100644 index 0614b2ebd354c0a5c73f2cc53adac29f78c6669f..0000000000000000000000000000000000000000 --- a/arch/arm/mach-vexpress/dcscb_setup.S +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/include/asm/dcscb_setup.S - * - * Created by: Dave Martin, 2012-06-22 - * Copyright: (C) 2012-2013 Linaro Limited - */ - -#include - - -ENTRY(dcscb_power_up_setup) - - cmp r0, #0 @ check affinity level - beq 2f - -/* - * Enable cluster-level coherency, in preparation for turning on the MMU. - * The ACTLR SMP bit does not need to be set here, because cpu_resume() - * already restores that. - * - * A15/A7 may not require explicit L2 invalidation on reset, dependent - * on hardware integration decisions. - * For now, this code assumes that L2 is either already invalidated, - * or invalidation is not required. - */ - - b cci_enable_port_for_self - -2: @ Implementation-specific local CPU setup operations should go here, - @ if any. In this case, there is nothing to do. - - bx lr - -ENDPROC(dcscb_power_up_setup) diff --git a/arch/arm/mach-zx/headsmp.S b/arch/arm/mach-zx/headsmp.S deleted file mode 100644 index 0846859b05739bbe8ad49a717d23c62ab3abd8ef..0000000000000000000000000000000000000000 --- a/arch/arm/mach-zx/headsmp.S +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2014 Linaro Ltd. - * Copyright (C) 2014 ZTE Corporation. - */ - -#include - - .align 3 - .arm - -/* It runs from physical address */ -ENTRY(zx_resume_jump) - adr r1, zx_secondary_startup_pa - ldr r0, [r1] - bx r0 -ENDPROC(zx_resume_jump) - -ENTRY(zx_secondary_startup_pa) - .word zx_secondary_startup_pa - -ENTRY(zx_suspend_iram_sz) - .word . - zx_resume_jump -ENDPROC(zx_secondary_startup_pa) - - -ENTRY(zx_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(zx_secondary_startup) diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S deleted file mode 100644 index 3449e0d1f9900db4890489dd39a77ffc05599115..0000000000000000000000000000000000000000 --- a/arch/arm/mach-zynq/headsmp.S +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013 Steffen Trumtrar - * Copyright (c) 2012-2013 Xilinx - */ -#include -#include -#include - - .arm - -ENTRY(zynq_secondary_trampoline) -ARM_BE8(setend be) @ ensure we are in BE8 mode - ldr r0, zynq_secondary_trampoline_jump -ARM_BE8(rev r0, r0) - bx r0 -.globl zynq_secondary_trampoline_jump -zynq_secondary_trampoline_jump: - /* Space for jumping address */ - .word 0 /* cpu 1 */ -.globl zynq_secondary_trampoline_end -zynq_secondary_trampoline_end: -ENDPROC(zynq_secondary_trampoline) diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S deleted file mode 100644 index a10bcb89594dd38ce31ca30bf97d68cf421afc42..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev4.S +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -/* - * Function: v4_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ - .align 5 -ENTRY(v4_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r3, [r4] @ read aborted ARM instruction - uaccess_disable ip @ disable userspace access - bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r3, #1 << 20 @ L = 1 -> write? - orreq r1, r1, #1 << 11 @ yes. - b do_DataAbort diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S deleted file mode 100644 index 14743a2f6997fcae0eeb55f53dec07b7b2ec227c..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev4t.S +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include "abort-macro.S" -/* - * Function: v4t_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ - .align 5 -ENTRY(v4t_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 - ldreq r3, [r4] @ read aborted ARM instruction - bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r3, #1 << 20 @ check write - orreq r1, r1, #1 << 11 - b do_DataAbort diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S deleted file mode 100644 index 98c523118820798668bf04d065ad86ea05fb6d2c..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev5t.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include "abort-macro.S" -/* - * Function: v5t_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ - .align 5 -ENTRY(v5t_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 - ldreq r3, [r4] @ read aborted ARM instruction - uaccess_disable ip @ disable user access - bic r1, r1, #1 << 11 @ clear bits 11 of FSR - teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? - beq do_DataAbort @ yes - tst r3, #1 << 20 @ check write - orreq r1, r1, #1 << 11 - b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S deleted file mode 100644 index fec72f4fbaf508597d826e58d0dc084ee6e58dd0..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev5tj.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include "abort-macro.S" -/* - * Function: v5tj_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ - .align 5 -ENTRY(v5tj_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r5, #PSR_J_BIT @ Java? - bne do_DataAbort - do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 - ldreq r3, [r4] @ read aborted ARM instruction - uaccess_disable ip @ disable userspace access - teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? - beq do_DataAbort @ yes - tst r3, #1 << 20 @ L = 0 -> write - orreq r1, r1, #1 << 11 @ yes. - b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S deleted file mode 100644 index c58bf8b43fea64f240ea66d079dc840c9c9d141f..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev6.S +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include "abort-macro.S" -/* - * Function: v6_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ - .align 5 -ENTRY(v6_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR -/* - * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. - */ -#ifdef CONFIG_ARM_ERRATA_326103 - ldr ip, =0x4107b36 - mrc p15, 0, r3, c0, c0, 0 @ get processor id - teq ip, r3, lsr #4 @ r0 ARM1136? - bne 1f - tst r5, #PSR_J_BIT @ Java? - tsteq r5, #PSR_T_BIT @ Thumb? - bne 1f - bic r1, r1, #1 << 11 @ clear bit 11 of FSR - ldr r3, [r4] @ read aborted ARM instruction - ARM_BE8(rev r3, r3) - - teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? - beq 1f @ yes - tst r3, #1 << 20 @ L = 0 -> write - orreq r1, r1, #1 << 11 @ yes. -#endif -1: uaccess_disable ip @ disable userspace access - b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S deleted file mode 100644 index f7cc5d68444b56217a613bead95bf0d492fc372e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-ev7.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -/* - * Function: v7_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - */ - .align 5 -ENTRY(v7_early_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - uaccess_disable ip @ disable userspace access - - /* - * V6 code adjusts the returned DFSR. - * New designs should not need to patch up faults. - */ - -#if defined(CONFIG_VERIFY_PERMISSION_FAULT) - /* - * Detect erroneous permission failures and fix - */ - ldr r3, =0x40d @ On permission fault - and r3, r1, r3 - cmp r3, #0x0d - bne do_DataAbort - - mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR - isb - mrc p15, 0, ip, c7, c4, 0 @ Read the PAR - and r3, ip, #0x7b @ On translation fault - cmp r3, #0x0b - bne do_DataAbort - bic r1, r1, #0xf @ Fix up FSR FS[5:0] - and ip, ip, #0x7e - orr r1, r1, ip, LSR #1 -#endif - - b do_DataAbort -ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S deleted file mode 100644 index fbd60a120f6684c56c63cea10b00200765473f1d..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-lv4t.S +++ /dev/null @@ -1,237 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -/* - * Function: v4t_late_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4-r5, r9-r11, r13 preserved - * - * Purpose : obtain information about current aborted instruction. - * Note: we read user space. This means we might cause a data - * abort here if the I-TLB and D-TLB aren't seeing the same - * picture. Unfortunately, this does happen. We live with it. - */ -ENTRY(v4t_late_abort) - tst r5, #PSR_T_BIT @ check for thumb mode -#ifdef CONFIG_CPU_CP15_MMU - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR -#else - mov r0, #0 @ clear r0, r1 (no FSR/FAR) - mov r1, #0 -#endif - bne .data_thumb_abort - ldr r8, [r4] @ read arm instruction - uaccess_disable ip @ disable userspace access - tst r8, #1 << 20 @ L = 1 -> write? - orreq r1, r1, #1 << 11 @ yes. - and r7, r8, #15 << 24 - add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine - nop - -/* 0 */ b .data_arm_lateldrhpost @ ldrh rd, [rn], #m/rm -/* 1 */ b .data_arm_lateldrhpre @ ldrh rd, [rn, #m/rm] -/* 2 */ b .data_unknown -/* 3 */ b .data_unknown -/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m -/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] -/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm -/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] -/* 8 */ b .data_arm_ldmstm @ ldm*a rn, -/* 9 */ b .data_arm_ldmstm @ ldm*b rn, -/* a */ b .data_unknown -/* b */ b .data_unknown -/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m -/* d */ b do_DataAbort @ ldc rd, [rn, #m] -/* e */ b .data_unknown -/* f */ b .data_unknown - -.data_unknown_r9: - ldr r9, [sp], #4 -.data_unknown: @ Part of jumptable - mov r0, r4 - mov r1, r8 - b baddataabort - -.data_arm_ldmstm: - tst r8, #1 << 21 @ check writeback bit - beq do_DataAbort @ no writeback -> no fixup - str r9, [sp, #-4]! - mov r7, #0x11 - orr r7, r7, #0x1100 - and r6, r8, r7 - and r9, r8, r7, lsl #1 - add r6, r6, r9, lsr #1 - and r9, r8, r7, lsl #2 - add r6, r6, r9, lsr #2 - and r9, r8, r7, lsl #3 - add r6, r6, r9, lsr #3 - add r6, r6, r6, lsr #8 - add r6, r6, r6, lsr #4 - and r6, r6, #15 @ r6 = no. of registers to transfer. - and r9, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6, lsl #2 @ Undo increment - addeq r7, r7, r6, lsl #2 @ Undo decrement - str r7, [r2, r9, lsr #14] @ Put register 'Rn' - ldr r9, [sp], #4 - b do_DataAbort - -.data_arm_lateldrhpre: - tst r8, #1 << 21 @ Check writeback bit - beq do_DataAbort @ No writeback -> no fixup -.data_arm_lateldrhpost: - str r9, [sp, #-4]! - and r9, r8, #0x00f @ get Rm / low nibble of immediate value - tst r8, #1 << 22 @ if (immediate offset) - andne r6, r8, #0xf00 @ { immediate high nibble - orrne r6, r9, r6, lsr #4 @ combine nibbles } else - ldreq r6, [r2, r9, lsl #2] @ { load Rm value } -.data_arm_apply_r6_and_rn: - and r9, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6 @ Undo incrmenet - addeq r7, r7, r6 @ Undo decrement - str r7, [r2, r9, lsr #14] @ Put register 'Rn' - ldr r9, [sp], #4 - b do_DataAbort - -.data_arm_lateldrpreconst: - tst r8, #1 << 21 @ check writeback bit - beq do_DataAbort @ no writeback -> no fixup -.data_arm_lateldrpostconst: - movs r6, r8, lsl #20 @ Get offset - beq do_DataAbort @ zero -> no fixup - str r9, [sp, #-4]! - and r9, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6, lsr #20 @ Undo increment - addeq r7, r7, r6, lsr #20 @ Undo decrement - str r7, [r2, r9, lsr #14] @ Put register 'Rn' - ldr r9, [sp], #4 - b do_DataAbort - -.data_arm_lateldrprereg: - tst r8, #1 << 21 @ check writeback bit - beq do_DataAbort @ no writeback -> no fixup -.data_arm_lateldrpostreg: - and r7, r8, #15 @ Extract 'm' from instruction - ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' - str r9, [sp, #-4]! - mov r9, r8, lsr #7 @ get shift count - ands r9, r9, #31 - and r7, r8, #0x70 @ get shift type - orreq r7, r7, #8 @ shift count = 0 - add pc, pc, r7 - nop - - mov r6, r6, lsl r9 @ 0: LSL #!0 - b .data_arm_apply_r6_and_rn - b .data_arm_apply_r6_and_rn @ 1: LSL #0 - nop - b .data_unknown_r9 @ 2: MUL? - nop - b .data_unknown_r9 @ 3: MUL? - nop - mov r6, r6, lsr r9 @ 4: LSR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, lsr #32 @ 5: LSR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown_r9 @ 6: MUL? - nop - b .data_unknown_r9 @ 7: MUL? - nop - mov r6, r6, asr r9 @ 8: ASR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, asr #32 @ 9: ASR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown_r9 @ A: MUL? - nop - b .data_unknown_r9 @ B: MUL? - nop - mov r6, r6, ror r9 @ C: ROR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, rrx @ D: RRX - b .data_arm_apply_r6_and_rn - b .data_unknown_r9 @ E: MUL? - nop - b .data_unknown_r9 @ F: MUL? - -.data_thumb_abort: - ldrh r8, [r4] @ read instruction - uaccess_disable ip @ disable userspace access - tst r8, #1 << 11 @ L = 1 -> write? - orreq r1, r1, #1 << 8 @ yes - and r7, r8, #15 << 12 - add pc, pc, r7, lsr #10 @ lookup in table - nop - -/* 0 */ b .data_unknown -/* 1 */ b .data_unknown -/* 2 */ b .data_unknown -/* 3 */ b .data_unknown -/* 4 */ b .data_unknown -/* 5 */ b .data_thumb_reg -/* 6 */ b do_DataAbort -/* 7 */ b do_DataAbort -/* 8 */ b do_DataAbort -/* 9 */ b do_DataAbort -/* A */ b .data_unknown -/* B */ b .data_thumb_pushpop -/* C */ b .data_thumb_ldmstm -/* D */ b .data_unknown -/* E */ b .data_unknown -/* F */ b .data_unknown - -.data_thumb_reg: - tst r8, #1 << 9 - beq do_DataAbort - tst r8, #1 << 10 @ If 'S' (signed) bit is set - movne r1, #0 @ it must be a load instr - b do_DataAbort - -.data_thumb_pushpop: - tst r8, #1 << 10 - beq .data_unknown - str r9, [sp, #-4]! - and r6, r8, #0x55 @ hweight8(r8) + R bit - and r9, r8, #0xaa - add r6, r6, r9, lsr #1 - and r9, r6, #0xcc - and r6, r6, #0x33 - add r6, r6, r9, lsr #2 - movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) - adc r6, r6, r6, lsr #4 @ high + low nibble + R bit - and r6, r6, #15 @ number of regs to transfer - ldr r7, [r2, #13 << 2] - tst r8, #1 << 11 - addeq r7, r7, r6, lsl #2 @ increment SP if PUSH - subne r7, r7, r6, lsl #2 @ decrement SP if POP - str r7, [r2, #13 << 2] - ldr r9, [sp], #4 - b do_DataAbort - -.data_thumb_ldmstm: - str r9, [sp, #-4]! - and r6, r8, #0x55 @ hweight8(r8) - and r9, r8, #0xaa - add r6, r6, r9, lsr #1 - and r9, r6, #0xcc - and r6, r6, #0x33 - add r6, r6, r9, lsr #2 - add r6, r6, r6, lsr #4 - and r9, r8, #7 << 8 - ldr r7, [r2, r9, lsr #6] - and r6, r6, #15 @ number of regs to transfer - sub r7, r7, r6, lsl #2 @ always decrement - str r7, [r2, r9, lsr #6] - ldr r9, [sp], #4 - b do_DataAbort diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S deleted file mode 100644 index bacf53fd0b70c6307e74ef8601d8dcc7db292700..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-macro.S +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb) - * differently than every other instruction, so it is set to 0 (write) - * even though the instructions are read instructions. This means that - * during an abort the instructions will be treated as a write and the - * handler will raise a signal from unwriteable locations if they - * fault. We have to specifically check for these instructions - * from the abort handlers to treat them properly. - * - */ - - .macro do_thumb_abort, fsr, pc, psr, tmp - tst \psr, #PSR_T_BIT - beq not_thumb - ldrh \tmp, [\pc] @ Read aborted Thumb instruction - uaccess_disable ip @ disable userspace access - and \tmp, \tmp, # 0xfe00 @ Mask opcode field - cmp \tmp, # 0x5600 @ Is it ldrsb? - orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes - tst \tmp, #1 << 11 @ L = 0 -> write - orreq \fsr, \fsr, #1 << 11 @ yes. - b do_DataAbort -not_thumb: - .endm - -/* - * We check for the following instruction encoding for LDRD. - * - * [27:25] == 000 - * [7:4] == 1101 - * [20] == 0 - */ - .macro teq_ldrd, tmp, insn - mov \tmp, #0x0e100000 - orr \tmp, #0x000000f0 - and \tmp, \insn, \tmp - teq \tmp, #0x000000d0 - .endm diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S deleted file mode 100644 index 6e2366a263219b379f4fdb43cc8e5413fb36e52a..0000000000000000000000000000000000000000 --- a/arch/arm/mm/abort-nommu.S +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -/* - * Function: nommu_early_abort - * - * Params : r2 = pt_regs - * : r4 = aborted context pc - * : r5 = aborted context psr - * - * Returns : r4 - r11, r13 preserved - * - * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. - * Just fill zero into the registers. - */ - .align 5 -ENTRY(nommu_early_abort) - mov r0, #0 @ clear r0, r1 (no FSR/FAR) - mov r1, #0 - b do_DataAbort -ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S deleted file mode 100644 index 3a464d1649b4b25b8e718be84d1d42d7093646e7..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-fa.S +++ /dev/null @@ -1,247 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-fa.S - * - * Copyright (C) 2005 Faraday Corp. - * Copyright (C) 2008-2009 Paulius Zaleckas - * - * Based on cache-v4wb.S: - * Copyright (C) 1997-2002 Russell king - * - * Processors: FA520 FA526 FA626 - */ -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 16 - -/* - * The total size of the data cache. - */ -#ifdef CONFIG_ARCH_GEMINI -#define CACHE_DSIZE 8192 -#else -#define CACHE_DSIZE 16384 -#endif - -/* FIXME: put optimal value here. Current one is just estimation */ -#define CACHE_DLIMIT (CACHE_DSIZE * 2) - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(fa_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(fa_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular address - * space. - */ -ENTRY(fa_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(fa_flush_kern_cache_all) - mov ip, #0 - mov r2, #VM_EXEC -__flush_whole_cache: - mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB - mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer - mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive, page aligned) - * - end - end address (exclusive, page aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(fa_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT @ total size >= limit? - bhs __flush_whole_cache @ flush whole D cache - -1: tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB - mcrne p15, 0, ip, c7, c10, 4 @ data write barrier - mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(fa_coherent_kern_range) - /* fall through */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(fa_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure that the data held in the page kaddr is written back - * to the page in question. - * - * - addr - kernel address - * - size - size of region - */ -ENTRY(fa_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -fa_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry - tst r1, #CACHE_DLINESIZE - 1 - bic r1, r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -fa_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_flush_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(fa_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(fa_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq fa_dma_clean_range - bcs fa_dma_inv_range - b fa_dma_flush_range -ENDPROC(fa_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(fa_dma_unmap_area) - ret lr -ENDPROC(fa_dma_unmap_area) - - .globl fa_flush_kern_cache_louis - .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions fa diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S deleted file mode 100644 index 72d939ef87985ca40cc3a27e47360dac52daa6c2..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-nop.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#include -#include -#include - -#include "proc-macros.S" - -ENTRY(nop_flush_icache_all) - ret lr -ENDPROC(nop_flush_icache_all) - - .globl nop_flush_kern_cache_all - .equ nop_flush_kern_cache_all, nop_flush_icache_all - - .globl nop_flush_kern_cache_louis - .equ nop_flush_kern_cache_louis, nop_flush_icache_all - - .globl nop_flush_user_cache_all - .equ nop_flush_user_cache_all, nop_flush_icache_all - - .globl nop_flush_user_cache_range - .equ nop_flush_user_cache_range, nop_flush_icache_all - - .globl nop_coherent_kern_range - .equ nop_coherent_kern_range, nop_flush_icache_all - -ENTRY(nop_coherent_user_range) - mov r0, 0 - ret lr -ENDPROC(nop_coherent_user_range) - - .globl nop_flush_kern_dcache_area - .equ nop_flush_kern_dcache_area, nop_flush_icache_all - - .globl nop_dma_flush_range - .equ nop_dma_flush_range, nop_flush_icache_all - - .globl nop_dma_map_area - .equ nop_dma_map_area, nop_flush_icache_all - - .globl nop_dma_unmap_area - .equ nop_dma_unmap_area, nop_flush_icache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions nop diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S deleted file mode 100644 index 7787057e4990fbba9005999ea59da0f4b912c838..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v4.S +++ /dev/null @@ -1,147 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v4.S - * - * Copyright (C) 1997-2002 Russell king - */ -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v4_flush_icache_all) - ret lr -ENDPROC(v4_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v4_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v4_flush_kern_cache_all) -#ifdef CONFIG_CPU_CP15 - mov r0, #0 - mcr p15, 0, r0, c7, c7, 0 @ flush ID cache - ret lr -#else - /* FALLTHROUGH */ -#endif - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v4_flush_user_cache_range) -#ifdef CONFIG_CPU_CP15 - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ flush ID cache - ret lr -#else - /* FALLTHROUGH */ -#endif - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4_coherent_user_range) - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v4_flush_kern_dcache_area) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4_dma_flush_range) -#ifdef CONFIG_CPU_CP15 - mov r0, #0 - mcr p15, 0, r0, c7, c7, 0 @ flush ID cache -#endif - ret lr - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4_dma_unmap_area) - teq r2, #DMA_TO_DEVICE - bne v4_dma_flush_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4_dma_map_area) - ret lr -ENDPROC(v4_dma_unmap_area) -ENDPROC(v4_dma_map_area) - - .globl v4_flush_kern_cache_louis - .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v4 diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S deleted file mode 100644 index 905ac2fa2b1ea27b918f1d3c06960425403ac8fb..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v4wb.S +++ /dev/null @@ -1,259 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v4wb.S - * - * Copyright (C) 1997-2002 Russell king - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The total size of the data cache. - */ -#if defined(CONFIG_CPU_SA110) -# define CACHE_DSIZE 16384 -#elif defined(CONFIG_CPU_SA1100) -# define CACHE_DSIZE 8192 -#else -# error Unknown cache size -#endif - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - * - * Size Clean (ticks) Dirty (ticks) - * 4096 21 20 21 53 55 54 - * 8192 40 41 40 106 100 102 - * 16384 77 77 76 140 140 138 - * 32768 150 149 150 214 216 212 <--- - * 65536 296 297 296 351 358 361 - * 131072 591 591 591 656 657 651 - * Whole 132 136 132 221 217 207 <--- - */ -#define CACHE_DLIMIT (CACHE_DSIZE * 4) - - .data - .align 2 -flush_base: - .long FLUSH_BASE - .text - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v4wb_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(v4wb_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular address - * space. - */ -ENTRY(v4wb_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v4wb_flush_kern_cache_all) - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache -__flush_whole_cache: - ldr r3, =flush_base - ldr r1, [r3, #0] - eor r1, r1, #CACHE_DSIZE - str r1, [r3, #0] - add r2, r1, #CACHE_DSIZE -1: ldr r3, [r1], #32 - cmp r1, r2 - blo 1b -#ifdef FLUSH_BASE_MINICACHE - add r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE - sub r1, r2, #512 @ only 512 bytes -1: ldr r3, [r1], #32 - cmp r1, r2 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain write buffer - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive, page aligned) - * - end - end address (exclusive, page aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v4wb_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - tst r2, #VM_EXEC @ executable region? - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - - cmp r3, #CACHE_DLIMIT @ total size >= limit? - bhs __flush_whole_cache @ flush whole D cache - -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v4wb_flush_kern_dcache_area) - add r1, r0, r1 - /* fall through */ - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wb_coherent_kern_range) - /* fall through */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wb_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -v4wb_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -v4wb_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * This is actually the same as v4wb_coherent_kern_range() - */ - .globl v4wb_dma_flush_range - .set v4wb_dma_flush_range, v4wb_coherent_kern_range - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4wb_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq v4wb_dma_clean_range - bcs v4wb_dma_inv_range - b v4wb_dma_flush_range -ENDPROC(v4wb_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4wb_dma_unmap_area) - ret lr -ENDPROC(v4wb_dma_unmap_area) - - .globl v4wb_flush_kern_cache_louis - .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v4wb diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S deleted file mode 100644 index 0b290c25a99dd6c522d9371e0b542bc054894e5f..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v4wt.S +++ /dev/null @@ -1,203 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v4wt.S - * - * Copyright (C) 1997-2002 Russell king - * - * ARMv4 write through cache operations support. - * - * We assume that the write buffer is not enabled. - */ -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 8 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - * - * *** This needs benchmarking - */ -#define CACHE_DLIMIT 16384 - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v4wt_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(v4wt_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(v4wt_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v4wt_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive, page aligned) - * - end - end address (exclusive, page aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v4wt_flush_user_cache_range) - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wt_coherent_kern_range) - /* FALLTRHOUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wt_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v4wt_flush_kern_dcache_area) - mov r2, #0 - mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache - add r1, r0, r1 - /* fallthrough */ - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -v4wt_dma_inv_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ - .globl v4wt_dma_flush_range - .equ v4wt_dma_flush_range, v4wt_dma_inv_range - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4wt_dma_unmap_area) - add r1, r1, r0 - teq r2, #DMA_TO_DEVICE - bne v4wt_dma_inv_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v4wt_dma_map_area) - ret lr -ENDPROC(v4wt_dma_unmap_area) -ENDPROC(v4wt_dma_map_area) - - .globl v4wt_flush_kern_cache_louis - .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v4wt diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S deleted file mode 100644 index f0f65eb073e481e082e216f846c7aebf20d0e279..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v6.S +++ /dev/null @@ -1,332 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v6.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * - * This is the "shell" of the ARMv6 processor support. - */ -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -#define HARVARD_CACHE -#define CACHE_LINE_SIZE 32 -#define D_CACHE_LINE_SIZE 32 -#define BTB_FLUSH_SIZE 8 - -/* - * v6_flush_icache_all() - * - * Flush the whole I-cache. - * - * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail. - * This erratum is present in 1136, 1156 and 1176. It does not affect the - * MPCore. - * - * Registers: - * r0 - set to 0 - * r1 - corrupted - */ -ENTRY(v6_flush_icache_all) - mov r0, #0 -#ifdef CONFIG_ARM_ERRATA_411920 - mrs r1, cpsr - cpsid ifa @ disable interrupts - mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache - mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache - mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache - mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache - msr cpsr_cx, r1 @ restore interrupts - .rept 11 @ ARM Ltd recommends at least - nop @ 11 NOPs - .endr -#else - mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache -#endif - ret lr -ENDPROC(v6_flush_icache_all) - -/* - * v6_flush_cache_all() - * - * Flush the entire cache. - * - * It is assumed that: - */ -ENTRY(v6_flush_kern_cache_all) - mov r0, #0 -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate -#ifndef CONFIG_ARM_ERRATA_411920 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate -#else - b v6_flush_icache_all -#endif -#else - mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate -#endif - ret lr - -/* - * v6_flush_cache_all() - * - * Flush all TLB entries in a particular address space - * - * - mm - mm_struct describing address space - */ -ENTRY(v6_flush_user_cache_all) - /*FALLTHROUGH*/ - -/* - * v6_flush_cache_range(start, end, flags) - * - * Flush a range of TLB entries in the specified address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vm_area_struct flags describing address space - * - * It is assumed that: - * - we have a VIPT cache. - */ -ENTRY(v6_flush_user_cache_range) - ret lr - -/* - * v6_coherent_kern_range(start,end) - * - * Ensure that the I and D caches are coherent within specified - * region. This is typically used when code has been written to - * a memory region, and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - * - * It is assumed that: - * - the Icache does not read data from the write buffer - */ -ENTRY(v6_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * v6_coherent_user_range(start,end) - * - * Ensure that the I and D caches are coherent within specified - * region. This is typically used when code has been written to - * a memory region, and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - * - * It is assumed that: - * - the Icache does not read data from the write buffer - */ -ENTRY(v6_coherent_user_range) - UNWIND(.fnstart ) -#ifdef HARVARD_CACHE - bic r0, r0, #CACHE_LINE_SIZE - 1 -1: - USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line - add r0, r0, #CACHE_LINE_SIZE - cmp r0, r1 - blo 1b -#endif - mov r0, #0 -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer -#ifndef CONFIG_ARM_ERRATA_411920 - mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate -#else - b v6_flush_icache_all -#endif -#else - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB -#endif - ret lr - -/* - * Fault handling for the cache operation above. If the virtual address in r0 - * isn't mapped, fail with -EFAULT. - */ -9001: - mov r0, #-EFAULT - ret lr - UNWIND(.fnend ) -ENDPROC(v6_coherent_user_range) -ENDPROC(v6_coherent_kern_range) - -/* - * v6_flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure that the data held in the page kaddr is written back - * to the page in question. - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v6_flush_kern_dcache_area) - add r1, r0, r1 - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line -#else - mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate unified line -#endif - add r0, r0, #D_CACHE_LINE_SIZE - cmp r0, r1 - blo 1b -#ifdef HARVARD_CACHE - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 -#endif - ret lr - - -/* - * v6_dma_inv_range(start,end) - * - * Invalidate the data cache within the specified region; we will - * be performing a DMA operation in this region and we want to - * purge old data in the cache. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -v6_dma_inv_range: -#ifdef CONFIG_DMA_CACHE_RWFO - ldrb r2, [r0] @ read for ownership - strb r2, [r0] @ write for ownership -#endif - tst r0, #D_CACHE_LINE_SIZE - 1 - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -#ifdef HARVARD_CACHE - mcrne p15, 0, r0, c7, c10, 1 @ clean D line -#else - mcrne p15, 0, r0, c7, c11, 1 @ clean unified line -#endif - tst r1, #D_CACHE_LINE_SIZE - 1 -#ifdef CONFIG_DMA_CACHE_RWFO - ldrbne r2, [r1, #-1] @ read for ownership - strbne r2, [r1, #-1] @ write for ownership -#endif - bic r1, r1, #D_CACHE_LINE_SIZE - 1 -#ifdef HARVARD_CACHE - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line -#else - mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line -#endif -1: -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c6, 1 @ invalidate D line -#else - mcr p15, 0, r0, c7, c7, 1 @ invalidate unified line -#endif - add r0, r0, #D_CACHE_LINE_SIZE - cmp r0, r1 -#ifdef CONFIG_DMA_CACHE_RWFO - ldrlo r2, [r0] @ read for ownership - strlo r2, [r0] @ write for ownership -#endif - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * v6_dma_clean_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -v6_dma_clean_range: - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: -#ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership -#endif -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c10, 1 @ clean D line -#else - mcr p15, 0, r0, c7, c11, 1 @ clean unified line -#endif - add r0, r0, #D_CACHE_LINE_SIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * v6_dma_flush_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(v6_dma_flush_range) -#ifdef CONFIG_DMA_CACHE_RWFO - ldrb r2, [r0] @ read for ownership - strb r2, [r0] @ write for ownership -#endif - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line -#else - mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate line -#endif - add r0, r0, #D_CACHE_LINE_SIZE - cmp r0, r1 -#ifdef CONFIG_DMA_CACHE_RWFO - ldrblo r2, [r0] @ read for ownership - strblo r2, [r0] @ write for ownership -#endif - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v6_dma_map_area) - add r1, r1, r0 - teq r2, #DMA_FROM_DEVICE - beq v6_dma_inv_range -#ifndef CONFIG_DMA_CACHE_RWFO - b v6_dma_clean_range -#else - teq r2, #DMA_TO_DEVICE - beq v6_dma_clean_range - b v6_dma_flush_range -#endif -ENDPROC(v6_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v6_dma_unmap_area) -#ifndef CONFIG_DMA_CACHE_RWFO - add r1, r1, r0 - teq r2, #DMA_TO_DEVICE - bne v6_dma_inv_range -#endif - ret lr -ENDPROC(v6_dma_unmap_area) - - .globl v6_flush_kern_cache_louis - .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v6 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S deleted file mode 100644 index 0ee8fc4b4672c6d2123c231a47b61cb870ec6052..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v7.S +++ /dev/null @@ -1,484 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v7.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2005 ARM Ltd. - * - * This is the "shell" of the ARMv7 processor support. - */ -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND -.globl icache_size - .data - .align 2 -icache_size: - .long 64 - .text -#endif -/* - * The secondary kernel init calls v7_flush_dcache_all before it enables - * the L1; however, the L1 comes out of reset in an undefined state, so - * the clean + invalidate performed by v7_flush_dcache_all causes a bunch - * of cache lines with uninitialized data and uninitialized tags to get - * written out to memory, which does really unpleasant things to the main - * processor. We fix this by performing an invalidate, rather than a - * clean + invalidate, before jumping into the kernel. - * - * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs - * to be called for both secondary cores startup and primary core resume - * procedures. - */ -ENTRY(v7_invalidate_l1) - mov r0, #0 - mcr p15, 2, r0, c0, c0, 0 - mrc p15, 1, r0, c0, c0, 0 - - movw r1, #0x7fff - and r2, r1, r0, lsr #13 - - movw r1, #0x3ff - - and r3, r1, r0, lsr #3 @ NumWays - 1 - add r2, r2, #1 @ NumSets - - and r0, r0, #0x7 - add r0, r0, #4 @ SetShift - - clz r1, r3 @ WayShift - add r4, r3, #1 @ NumWays -1: sub r2, r2, #1 @ NumSets-- - mov r3, r4 @ Temp = NumWays -2: subs r3, r3, #1 @ Temp-- - mov r5, r3, lsl r1 - mov r6, r2, lsl r0 - orr r5, r5, r6 @ Reg = (Temp<> 4) @ ID of ARM Cortex A9 r0p? - movt r1, #:upper16:(0x410fc090 >> 4) - teq r1, r2, lsr #4 @ test for errata affected core and if so... - moveq r3, #1 << 1 @ fix LoUIS value - beq start_flush_levels @ start flushing cache levels -#endif - ret lr -ENDPROC(v7_flush_dcache_louis) - -/* - * v7_flush_dcache_all() - * - * Flush the whole D-cache. - * - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) - * - * - mm - mm_struct describing address space - */ -ENTRY(v7_flush_dcache_all) - dmb @ ensure ordering with previous memory accesses - mrc p15, 1, r0, c0, c0, 1 @ read clidr - mov r3, r0, lsr #23 @ move LoC into position - ands r3, r3, #7 << 1 @ extract LoC*2 from clidr - beq finished @ if loc is 0, then no need to clean -start_flush_levels: - mov r10, #0 @ start clean at cache level 0 -flush_levels: - add r2, r10, r10, lsr #1 @ work out 3x current cache level - mov r1, r0, lsr r2 @ extract cache type bits from clidr - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache -#ifdef CONFIG_PREEMPT - save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic -#endif - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - isb @ isb to sych the new cssr&csidr - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr -#ifdef CONFIG_PREEMPT - restore_irqs_notrace r9 -#endif - and r2, r1, #7 @ extract the length of the cache lines - add r2, r2, #4 @ add 4 (line length offset) - movw r4, #0x3ff - ands r4, r4, r1, lsr #3 @ find maximum number on the way size - clz r5, r4 @ find bit position of way size increment - movw r7, #0x7fff - ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop1: - mov r9, r7 @ create working copy of max index -loop2: - ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 - THUMB( lsl r6, r4, r5 ) - THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r9, r2 ) - THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the index - bge loop2 - subs r4, r4, #1 @ decrement the way - bge loop1 -skip: - add r10, r10, #2 @ increment cache number - cmp r3, r10 -#ifdef CONFIG_ARM_ERRATA_814220 - dsb -#endif - bgt flush_levels -finished: - mov r10, #0 @ switch back to cache level 0 - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - dsb st - isb - ret lr -ENDPROC(v7_flush_dcache_all) - -/* - * v7_flush_cache_all() - * - * Flush the entire cache system. - * The data cache flush is now achieved using atomic clean / invalidates - * working outwards from L1 cache. This is done using Set/Way based cache - * maintenance instructions. - * The instruction cache can still be invalidated back to the point of - * unification in a single instruction. - * - */ -ENTRY(v7_flush_kern_cache_all) - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) - bl v7_flush_dcache_all - mov r0, #0 - ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable - ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - ret lr -ENDPROC(v7_flush_kern_cache_all) - - /* - * v7_flush_kern_cache_louis(void) - * - * Flush the data cache up to Level of Unification Inner Shareable. - * Invalidate the I-cache to the point of unification. - */ -ENTRY(v7_flush_kern_cache_louis) - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) - bl v7_flush_dcache_louis - mov r0, #0 - ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable - ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - ret lr -ENDPROC(v7_flush_kern_cache_louis) - -/* - * v7_flush_cache_all() - * - * Flush all TLB entries in a particular address space - * - * - mm - mm_struct describing address space - */ -ENTRY(v7_flush_user_cache_all) - /*FALLTHROUGH*/ - -/* - * v7_flush_cache_range(start, end, flags) - * - * Flush a range of TLB entries in the specified address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vm_area_struct flags describing address space - * - * It is assumed that: - * - we have a VIPT cache. - */ -ENTRY(v7_flush_user_cache_range) - ret lr -ENDPROC(v7_flush_user_cache_all) -ENDPROC(v7_flush_user_cache_range) - -/* - * v7_coherent_kern_range(start,end) - * - * Ensure that the I and D caches are coherent within specified - * region. This is typically used when code has been written to - * a memory region, and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - * - * It is assumed that: - * - the Icache does not read data from the write buffer - */ -ENTRY(v7_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * v7_coherent_user_range(start,end) - * - * Ensure that the I and D caches are coherent within specified - * region. This is typically used when code has been written to - * a memory region, and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - * - * It is assumed that: - * - the Icache does not read data from the write buffer - */ -ENTRY(v7_coherent_user_range) - UNWIND(.fnstart ) - dcache_line_size r2, r3 - sub r3, r2, #1 - bic r12, r0, r3 -#ifdef CONFIG_ARM_ERRATA_764369 - ALT_SMP(W(dsb)) - ALT_UP(W(nop)) -#endif -1: - USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification - add r12, r12, r2 - cmp r12, r1 - blo 1b - dsb ishst -#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND - ldr r3, =icache_size - ldr r2, [r3, #0] -#else - icache_line_size r2, r3 -#endif - sub r3, r2, #1 - bic r12, r0, r3 -2: - USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line - add r12, r12, r2 - cmp r12, r1 - blo 2b - mov r0, #0 - ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable - ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB - dsb ishst - isb - ret lr - -/* - * Fault handling for the cache operation above. If the virtual address in r0 - * isn't mapped, fail with -EFAULT. - */ -9001: -#ifdef CONFIG_ARM_ERRATA_775420 - dsb -#endif - mov r0, #-EFAULT - ret lr - UNWIND(.fnend ) -ENDPROC(v7_coherent_kern_range) -ENDPROC(v7_coherent_user_range) - -/* - * v7_flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure that the data held in the page kaddr is written back - * to the page in question. - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v7_flush_kern_dcache_area) - dcache_line_size r2, r3 - add r1, r0, r1 - sub r3, r2, #1 - bic r0, r0, r3 -#ifdef CONFIG_ARM_ERRATA_764369 - ALT_SMP(W(dsb)) - ALT_UP(W(nop)) -#endif -1: - mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line - add r0, r0, r2 - cmp r0, r1 - blo 1b - dsb st - ret lr -ENDPROC(v7_flush_kern_dcache_area) - -/* - * v7_dma_inv_range(start,end) - * - * Invalidate the data cache within the specified region; we will - * be performing a DMA operation in this region and we want to - * purge old data in the cache. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -v7_dma_inv_range: - dcache_line_size r2, r3 - sub r3, r2, #1 - tst r0, r3 - bic r0, r0, r3 -#ifdef CONFIG_ARM_ERRATA_764369 - ALT_SMP(W(dsb)) - ALT_UP(W(nop)) -#endif - mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line - addne r0, r0, r2 - - tst r1, r3 - bic r1, r1, r3 - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line - cmp r0, r1 -1: - mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line - addlo r0, r0, r2 - cmplo r0, r1 - blo 1b - dsb st - ret lr -ENDPROC(v7_dma_inv_range) - -/* - * v7_dma_clean_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -v7_dma_clean_range: - dcache_line_size r2, r3 - sub r3, r2, #1 - bic r0, r0, r3 -#ifdef CONFIG_ARM_ERRATA_764369 - ALT_SMP(W(dsb)) - ALT_UP(W(nop)) -#endif -1: - mcr p15, 0, r0, c7, c10, 1 @ clean D / U line - add r0, r0, r2 - cmp r0, r1 - blo 1b - dsb st - ret lr -ENDPROC(v7_dma_clean_range) - -/* - * v7_dma_flush_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(v7_dma_flush_range) - dcache_line_size r2, r3 - sub r3, r2, #1 - bic r0, r0, r3 -#ifdef CONFIG_ARM_ERRATA_764369 - ALT_SMP(W(dsb)) - ALT_UP(W(nop)) -#endif -1: - mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line - add r0, r0, r2 - cmp r0, r1 - blo 1b - dsb st - ret lr -ENDPROC(v7_dma_flush_range) - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v7_dma_map_area) - add r1, r1, r0 - teq r2, #DMA_FROM_DEVICE - beq v7_dma_inv_range - b v7_dma_clean_range -ENDPROC(v7_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v7_dma_unmap_area) - add r1, r1, r0 - teq r2, #DMA_TO_DEVICE - bne v7_dma_inv_range - ret lr -ENDPROC(v7_dma_unmap_area) - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v7 - - /* The Broadcom Brahma-B15 read-ahead cache requires some modifications - * to the v7_cache_fns, we only override the ones we need - */ -#ifndef CONFIG_CACHE_B15_RAC - globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all -#endif - globl_equ b15_flush_icache_all, v7_flush_icache_all - globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis - globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all - globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range - globl_equ b15_coherent_kern_range, v7_coherent_kern_range - globl_equ b15_coherent_user_range, v7_coherent_user_range - globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area - - globl_equ b15_dma_map_area, v7_dma_map_area - globl_equ b15_dma_unmap_area, v7_dma_unmap_area - globl_equ b15_dma_flush_range, v7_dma_flush_range - - define_cache_functions b15 diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S deleted file mode 100644 index a0035c426ce635b3ceacc647d712aabb61d0e63e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/cache-v7m.S +++ /dev/null @@ -1,454 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/cache-v7m.S - * - * Based on linux/arch/arm/mm/cache-v7.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2005 ARM Ltd. - * - * This is the "shell" of the ARMv7M processor support. - */ -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* Generic V7M read/write macros for memory mapped cache operations */ -.macro v7m_cache_read, rt, reg - movw \rt, #:lower16:BASEADDR_V7M_SCB + \reg - movt \rt, #:upper16:BASEADDR_V7M_SCB + \reg - ldr \rt, [\rt] -.endm - -.macro v7m_cacheop, rt, tmp, op, c = al - movw\c \tmp, #:lower16:BASEADDR_V7M_SCB + \op - movt\c \tmp, #:upper16:BASEADDR_V7M_SCB + \op - str\c \rt, [\tmp] -.endm - - -.macro read_ccsidr, rt - v7m_cache_read \rt, V7M_SCB_CCSIDR -.endm - -.macro read_clidr, rt - v7m_cache_read \rt, V7M_SCB_CLIDR -.endm - -.macro write_csselr, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR -.endm - -/* - * dcisw: Invalidate data cache by set/way - */ -.macro dcisw, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCISW -.endm - -/* - * dccisw: Clean and invalidate data cache by set/way - */ -.macro dccisw, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW -.endm - -/* - * dccimvac: Clean and invalidate data cache line by MVA to PoC. - */ -.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo -.macro dccimvac\c, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c -.endm -.endr - -/* - * dcimvac: Invalidate data cache line by MVA to PoC - */ -.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo -.macro dcimvac\c, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c -.endm -.endr - -/* - * dccmvau: Clean data cache line by MVA to PoU - */ -.macro dccmvau, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU -.endm - -/* - * dccmvac: Clean data cache line by MVA to PoC - */ -.macro dccmvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC -.endm - -/* - * icimvau: Invalidate instruction caches by MVA to PoU - */ -.macro icimvau, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU -.endm - -/* - * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP. - * rt data ignored by ICIALLU(IS), so can be used for the address - */ -.macro invalidate_icache, rt - v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU - mov \rt, #0 -.endm - -/* - * Invalidate the BTB, inner shareable if SMP. - * rt data ignored by BPIALL, so it can be used for the address - */ -.macro invalidate_bp, rt - v7m_cacheop \rt, \rt, V7M_SCB_BPIALL - mov \rt, #0 -.endm - -ENTRY(v7m_invalidate_l1) - mov r0, #0 - - write_csselr r0, r1 - read_ccsidr r0 - - movw r1, #0x7fff - and r2, r1, r0, lsr #13 - - movw r1, #0x3ff - - and r3, r1, r0, lsr #3 @ NumWays - 1 - add r2, r2, #1 @ NumSets - - and r0, r0, #0x7 - add r0, r0, #4 @ SetShift - - clz r1, r3 @ WayShift - add r4, r3, #1 @ NumWays -1: sub r2, r2, #1 @ NumSets-- - mov r3, r4 @ Temp = NumWays -2: subs r3, r3, #1 @ Temp-- - mov r5, r3, lsl r1 - mov r6, r2, lsl r0 - orr r5, r5, r6 @ Reg = (Temp< and proc-macros.S) - define_cache_functions v7m diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S deleted file mode 100644 index fc01f1b18523653a4378421b6f89dd877bbb53fd..0000000000000000000000000000000000000000 --- a/arch/arm/mm/l2c-l2x0-resume.S +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * L2C-310 early resume code. This can be used by platforms to restore - * the settings of their L2 cache controller before restoring the - * processor state. - * - * This code can only be used to if you are running in the secure world. - */ -#include -#include -#include - - .text - -ENTRY(l2c310_early_resume) - adr r0, 1f - ldr r2, [r0] - add r0, r2, r0 - - ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8} - @ r1 = phys address of L2C-310 controller - @ r2 = aux_ctrl - @ r3 = tag_latency - @ r4 = data_latency - @ r5 = filter_start - @ r6 = filter_end - @ r7 = prefetch_ctrl - @ r8 = pwr_ctrl - - @ Check that the address has been initialised - teq r1, #0 - reteq lr - - @ The prefetch and power control registers are revision dependent - @ and can be written whether or not the L2 cache is enabled - ldr r0, [r1, #L2X0_CACHE_ID] - and r0, r0, #L2X0_CACHE_ID_RTL_MASK - cmp r0, #L310_CACHE_ID_RTL_R2P0 - strcs r7, [r1, #L310_PREFETCH_CTRL] - cmp r0, #L310_CACHE_ID_RTL_R3P0 - strcs r8, [r1, #L310_POWER_CTRL] - - @ Don't setup the L2 cache if it is already enabled - ldr r0, [r1, #L2X0_CTRL] - tst r0, #L2X0_CTRL_EN - retne lr - - str r3, [r1, #L310_TAG_LATENCY_CTRL] - str r4, [r1, #L310_DATA_LATENCY_CTRL] - str r6, [r1, #L310_ADDR_FILTER_END] - str r5, [r1, #L310_ADDR_FILTER_START] - - str r2, [r1, #L2X0_AUX_CTRL] - mov r9, #L2X0_CTRL_EN - str r9, [r1, #L2X0_CTRL] - ret lr -ENDPROC(l2c310_early_resume) - - .align -1: .long l2x0_saved_regs - . diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S deleted file mode 100644 index b2ffce4201062e3ec2045364ddc454cf706bab8d..0000000000000000000000000000000000000000 --- a/arch/arm/mm/pabort-legacy.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -/* - * Function: legacy_pabort - * - * Params : r2 = pt_regs - * : r4 = address of aborted instruction - * : r5 = psr for parent context - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current prefetch abort. - */ - - .align 5 -ENTRY(legacy_pabort) - mov r0, r4 - mov r1, #5 - b do_PrefetchAbort -ENDPROC(legacy_pabort) diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S deleted file mode 100644 index 8686265dc9418b29381942bfd87a937a3234d46e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/pabort-v6.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -/* - * Function: v6_pabort - * - * Params : r2 = pt_regs - * : r4 = address of aborted instruction - * : r5 = psr for parent context - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current prefetch abort. - */ - - .align 5 -ENTRY(v6_pabort) - mov r0, r4 - mrc p15, 0, r1, c5, c0, 1 @ get IFSR - b do_PrefetchAbort -ENDPROC(v6_pabort) diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S deleted file mode 100644 index 9c70b1a21dc9204f24524df9905fbc077a82f2dc..0000000000000000000000000000000000000000 --- a/arch/arm/mm/pabort-v7.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -/* - * Function: v7_pabort - * - * Params : r2 = pt_regs - * : r4 = address of aborted instruction - * : r5 = psr for parent context - * - * Returns : r4 - r11, r13 preserved - * - * Purpose : obtain information about current prefetch abort. - */ - - .align 5 -ENTRY(v7_pabort) - mrc p15, 0, r0, c6, c0, 2 @ get IFAR - mrc p15, 0, r1, c5, c0, 1 @ get IFSR - b do_PrefetchAbort -ENDPROC(v7_pabort) diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S deleted file mode 100644 index 4fa5371bc6624ce63be9963edd268280662d1566..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm1020.S +++ /dev/null @@ -1,515 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020 - * - * Copyright (C) 2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm1020. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define MAX_AREA_SIZE 32768 - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 16 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 32768 - - .text -/* - * cpu_arm1020_proc_init() - */ -ENTRY(cpu_arm1020_proc_init) - ret lr - -/* - * cpu_arm1020_proc_fin() - */ -ENTRY(cpu_arm1020_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm1020_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1020_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm1020_reset) - .popsection - -/* - * cpu_arm1020_do_idle() - */ - .align 5 -ENTRY(cpu_arm1020_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* ================================= CACHE ================================ */ - - .align 5 - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm1020_flush_icache_all) -#ifndef CONFIG_CPU_ICACHE_DISABLE - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache -#endif - ret lr -ENDPROC(arm1020_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(arm1020_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm1020_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - mcr p15, 0, ip, c7, c10, 4 @ drain WB - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 15 to 0 -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags for this space - */ -ENTRY(arm1020_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, ip, c7, c10, 4 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020_coherent_kern_range) - /* FALLTRHOUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020_coherent_user_range) - mov ip, #0 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcr p15, 0, ip, c7, c10, 4 -1: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#endif -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm1020_flush_kern_dcache_area) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1020_dma_inv_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, ip, c7, c10, 4 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, ip, c7, c10, 4 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry - mcrne p15, 0, ip, c7, c10, 4 @ drain WB -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1020_dma_clean_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020_dma_flush_range) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 - mcr p15, 0, ip, c7, c10, 4 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1020_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm1020_dma_clean_range - bcs arm1020_dma_inv_range - b arm1020_dma_flush_range -ENDPROC(arm1020_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1020_dma_unmap_area) - ret lr -ENDPROC(arm1020_dma_unmap_area) - - .globl arm1020_flush_kern_cache_louis - .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm1020 - - .align 5 -ENTRY(cpu_arm1020_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov ip, #0 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, ip, c7, c10, 4 @ drain WB - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm1020_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm1020_switch_mm) -#ifdef CONFIG_MMU -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r3, c7, c10, 4 - mov r1, #0xF @ 16 segments -1: mov r3, #0x3F @ 64 entries -2: mov ip, r3, LSL #26 @ shift up entry - orr ip, ip, r1, LSL #5 @ shift in/up index - mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry - mov ip, #0 - mcr p15, 0, ip, c7, c10, 4 - subs r3, r3, #1 - cmp r3, #0 - bge 2b @ entries 3F to 0 - subs r1, r1, #1 - cmp r1, #0 - bge 1b @ segments 15 to 0 - -#endif - mov r1, #0 -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache -#endif - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs -#endif /* CONFIG_MMU */ - ret lr - -/* - * cpu_arm1020_set_pte(ptep, pte) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm1020_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 4 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif /* CONFIG_MMU */ - ret lr - - .type __arm1020_setup, #function -__arm1020_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - - adr r5, arm1020_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .R.. .... .... .... -#endif - ret lr - .size __arm1020_setup, . - __arm1020_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 1001 ..11 0101 - */ - .type arm1020_crval, #object -arm1020_crval: - crval clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort - - - .section ".rodata" - - string cpu_arch_name, "armv5t" - string cpu_elf_name, "v5" - - .type cpu_arm1020_name, #object -cpu_arm1020_name: - .ascii "ARM1020" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif -#ifndef CONFIG_CPU_BPREDICT_DISABLE - .ascii "B" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif - .ascii "\0" - .size cpu_arm1020_name, . - cpu_arm1020_name - - .align - - .section ".proc.info.init", #alloc - - .type __arm1020_proc_info,#object -__arm1020_proc_info: - .long 0x4104a200 @ ARM 1020T (Architecture v5T) - .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm1020_setup, __arm1020_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm1020_name - .long arm1020_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm1020_cache_fns - .size __arm1020_proc_info, . - __arm1020_proc_info diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S deleted file mode 100644 index 5d8a8339e09a4ea7c90093f007a37a3f54bba99e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm1020e.S +++ /dev/null @@ -1,475 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020 - * - * Copyright (C) 2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm1020e. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define MAX_AREA_SIZE 32768 - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 16 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 32768 - - .text -/* - * cpu_arm1020e_proc_init() - */ -ENTRY(cpu_arm1020e_proc_init) - ret lr - -/* - * cpu_arm1020e_proc_fin() - */ -ENTRY(cpu_arm1020e_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm1020e_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1020e_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm1020e_reset) - .popsection - -/* - * cpu_arm1020e_do_idle() - */ - .align 5 -ENTRY(cpu_arm1020e_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* ================================= CACHE ================================ */ - - .align 5 - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm1020e_flush_icache_all) -#ifndef CONFIG_CPU_ICACHE_DISABLE - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache -#endif - ret lr -ENDPROC(arm1020e_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(arm1020e_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm1020e_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 15 to 0 -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags for this space - */ -ENTRY(arm1020e_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -#ifndef CONFIG_CPU_DCACHE_DISABLE -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020e_coherent_kern_range) - /* FALLTHROUGH */ -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020e_coherent_user_range) - mov ip, #0 - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm1020e_flush_kern_dcache_area) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1020e_dma_inv_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1020e_dma_clean_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1020e_dma_flush_range) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1020e_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm1020e_dma_clean_range - bcs arm1020e_dma_inv_range - b arm1020e_dma_flush_range -ENDPROC(arm1020e_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1020e_dma_unmap_area) - ret lr -ENDPROC(arm1020e_dma_unmap_area) - - .globl arm1020e_flush_kern_cache_louis - .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm1020e - - .align 5 -ENTRY(cpu_arm1020e_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov ip, #0 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm1020e_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm1020e_switch_mm) -#ifdef CONFIG_MMU -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r3, c7, c10, 4 - mov r1, #0xF @ 16 segments -1: mov r3, #0x3F @ 64 entries -2: mov ip, r3, LSL #26 @ shift up entry - orr ip, ip, r1, LSL #5 @ shift in/up index - mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry - mov ip, #0 - subs r3, r3, #1 - cmp r3, #0 - bge 2b @ entries 3F to 0 - subs r1, r1, #1 - cmp r1, #0 - bge 1b @ segments 15 to 0 - -#endif - mov r1, #0 -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache -#endif - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm1020e_set_pte(ptep, pte) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm1020e_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#endif /* CONFIG_MMU */ - ret lr - - .type __arm1020e_setup, #function -__arm1020e_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - adr r5, arm1020e_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .R.. .... .... .... -#endif - ret lr - .size __arm1020e_setup, . - __arm1020e_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 1001 ..11 0101 - */ - .type arm1020e_crval, #object -arm1020e_crval: - crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - string cpu_arm1020e_name, "ARM1020E" - - .align - - .section ".proc.info.init", #alloc - - .type __arm1020e_proc_info,#object -__arm1020e_proc_info: - .long 0x4105a200 @ ARM 1020TE (Architecture v5TE) - .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm1020e_setup, __arm1020e_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP - .long cpu_arm1020e_name - .long arm1020e_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm1020e_cache_fns - .size __arm1020e_proc_info, . - __arm1020e_proc_info diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S deleted file mode 100644 index b3dd95c345e482f20ac898f1610bdcb2ceb6a815..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm1022.S +++ /dev/null @@ -1,469 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E - * - * Copyright (C) 2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the ARM1022E. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define MAX_AREA_SIZE 32768 - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 16 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 32768 - - .text -/* - * cpu_arm1022_proc_init() - */ -ENTRY(cpu_arm1022_proc_init) - ret lr - -/* - * cpu_arm1022_proc_fin() - */ -ENTRY(cpu_arm1022_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm1022_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1022_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm1022_reset) - .popsection - -/* - * cpu_arm1022_do_idle() - */ - .align 5 -ENTRY(cpu_arm1022_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* ================================= CACHE ================================ */ - - .align 5 - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm1022_flush_icache_all) -#ifndef CONFIG_CPU_ICACHE_DISABLE - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache -#endif - ret lr -ENDPROC(arm1022_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(arm1022_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm1022_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 15 to 0 -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags for this space - */ -ENTRY(arm1022_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -#ifndef CONFIG_CPU_DCACHE_DISABLE -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1022_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1022_coherent_user_range) - mov ip, #0 - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm1022_flush_kern_dcache_area) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1022_dma_inv_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1022_dma_clean_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1022_dma_flush_range) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1022_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm1022_dma_clean_range - bcs arm1022_dma_inv_range - b arm1022_dma_flush_range -ENDPROC(arm1022_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1022_dma_unmap_area) - ret lr -ENDPROC(arm1022_dma_unmap_area) - - .globl arm1022_flush_kern_cache_louis - .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm1022 - - .align 5 -ENTRY(cpu_arm1022_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov ip, #0 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm1022_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm1022_switch_mm) -#ifdef CONFIG_MMU -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 15 to 0 -#endif - mov r1, #0 -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache -#endif - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm1022_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm1022_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#endif /* CONFIG_MMU */ - ret lr - - .type __arm1022_setup, #function -__arm1022_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - adr r5, arm1022_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .R.............. -#endif - ret lr - .size __arm1022_setup, . - __arm1022_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 1001 ..11 0101 - * - */ - .type arm1022_crval, #object -arm1022_crval: - crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - string cpu_arm1022_name, "ARM1022" - - .align - - .section ".proc.info.init", #alloc - - .type __arm1022_proc_info,#object -__arm1022_proc_info: - .long 0x4105a220 @ ARM 1022E (v5TE) - .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm1022_setup, __arm1022_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP - .long cpu_arm1022_name - .long arm1022_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm1022_cache_fns - .size __arm1022_proc_info, . - __arm1022_proc_info diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S deleted file mode 100644 index ac5afde12f35cfe09eccd8fa449f0ae4b7057962..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm1026.S +++ /dev/null @@ -1,463 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S - * - * Copyright (C) 2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the ARM1026EJ-S. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define MAX_AREA_SIZE 32768 - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 16 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 32768 - - .text -/* - * cpu_arm1026_proc_init() - */ -ENTRY(cpu_arm1026_proc_init) - ret lr - -/* - * cpu_arm1026_proc_fin() - */ -ENTRY(cpu_arm1026_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm1026_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1026_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm1026_reset) - .popsection - -/* - * cpu_arm1026_do_idle() - */ - .align 5 -ENTRY(cpu_arm1026_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* ================================= CACHE ================================ */ - - .align 5 - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm1026_flush_icache_all) -#ifndef CONFIG_CPU_ICACHE_DISABLE - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache -#endif - ret lr -ENDPROC(arm1026_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(arm1026_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm1026_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate - bne 1b -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags for this space - */ -ENTRY(arm1026_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -#ifndef CONFIG_CPU_DCACHE_DISABLE -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - tst r2, #VM_EXEC -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache -#endif - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1026_coherent_kern_range) - /* FALLTHROUGH */ -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1026_coherent_user_range) - mov ip, #0 - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm1026_flush_kern_dcache_area) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1026_dma_inv_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm1026_dma_clean_range: - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm1026_dma_flush_range) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1026_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm1026_dma_clean_range - bcs arm1026_dma_inv_range - b arm1026_dma_flush_range -ENDPROC(arm1026_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm1026_dma_unmap_area) - ret lr -ENDPROC(arm1026_dma_unmap_area) - - .globl arm1026_flush_kern_cache_louis - .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm1026 - - .align 5 -ENTRY(cpu_arm1026_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_DISABLE - mov ip, #0 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm1026_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm1026_switch_mm) -#ifdef CONFIG_MMU - mov r1, #0 -#ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate - bne 1b -#endif -#ifndef CONFIG_CPU_ICACHE_DISABLE - mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache -#endif - mcr p15, 0, r1, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm1026_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm1026_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_DISABLE - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif -#endif /* CONFIG_MMU */ - ret lr - - .type __arm1026_setup, #function -__arm1026_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 - mcr p15, 0, r4, c2, c0 @ load page table pointer -#endif -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #4 @ explicitly disable writeback - mcr p15, 7, r0, c15, c0, 0 -#endif - adr r5, arm1026_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .R.. .... .... .... -#endif - ret lr - .size __arm1026_setup, . - __arm1026_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 1001 ..11 0101 - * - */ - .type arm1026_crval, #object -arm1026_crval: - crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort - - .section .rodata - - string cpu_arch_name, "armv5tej" - string cpu_elf_name, "v5" - .align - string cpu_arm1026_name, "ARM1026EJ-S" - .align - - .section ".proc.info.init", #alloc - - .type __arm1026_proc_info,#object -__arm1026_proc_info: - .long 0x4106a260 @ ARM 1026EJ-S (v5TEJ) - .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm1026_setup, __arm1026_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA - .long cpu_arm1026_name - .long arm1026_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm1026_cache_fns - .size __arm1026_proc_info, . - __arm1026_proc_info diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S deleted file mode 100644 index c99d24363f32ee64754068b971b059be36ee8f05..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm720.S +++ /dev/null @@ -1,205 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720 - * - * Copyright (C) 2000 Steve Hill (sjhill@cotw.com) - * Rob Scott (rscott@mtrob.fdns.net) - * Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2004. - * - * These are the low level assembler for performing cache and TLB - * functions on the ARM720T. The ARM720T has a writethrough IDC - * cache, so we don't need to clean it. - * - * Changelog: - * 05-09-2000 SJH Created by moving 720 specific functions - * out of 'proc-arm6,7.S' per RMK discussion - * 07-25-2000 SJH Added idle function. - * 08-25-2000 DBS Updated for integration of ARM Ltd version. - * 04-20-2004 HSC modified for non-paged memory management mode. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * Function: arm720_proc_init (void) - * : arm720_proc_fin (void) - * - * Notes : This processor does not require these - */ -ENTRY(cpu_arm720_dcache_clean_area) -ENTRY(cpu_arm720_proc_init) - ret lr - -ENTRY(cpu_arm720_proc_fin) - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * Function: arm720_proc_do_idle(void) - * Params : r0 = unused - * Purpose : put the processor in proper idle mode - */ -ENTRY(cpu_arm720_do_idle) - ret lr - -/* - * Function: arm720_switch_mm(unsigned long pgd_phys) - * Params : pgd_phys Physical address of page table - * Purpose : Perform a task switch, saving the old process' state and restoring - * the new. - */ -ENTRY(cpu_arm720_switch_mm) -#ifdef CONFIG_MMU - mov r1, #0 - mcr p15, 0, r1, c7, c7, 0 @ invalidate cache - mcr p15, 0, r0, c2, c0, 0 @ update page table ptr - mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) -#endif - ret lr - -/* - * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) - * Params : r0 = Address to set - * : r1 = value to set - * Purpose : Set a PTE and flush it out of any WB cache - */ - .align 5 -ENTRY(cpu_arm720_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext wc_disable=0 -#endif - ret lr - -/* - * Function: arm720_reset - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm720_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate cache -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ flush TLB (v4) -#endif - mrc p15, 0, ip, c1, c0, 0 @ get ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x2100 @ ..v....s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm720_reset) - .popsection - - .type __arm710_setup, #function -__arm710_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7, 0 @ invalidate caches -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) -#endif - mrc p15, 0, r0, c1, c0 @ get control register - ldr r5, arm710_cr1_clear - bic r0, r0, r5 - ldr r5, arm710_cr1_set - orr r0, r0, r5 - ret lr @ __ret (head.S) - .size __arm710_setup, . - __arm710_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .... 0001 ..11 1101 - * - */ - .type arm710_cr1_clear, #object - .type arm710_cr1_set, #object -arm710_cr1_clear: - .word 0x0f3f -arm710_cr1_set: - .word 0x013d - - .type __arm720_setup, #function -__arm720_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7, 0 @ invalidate caches -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) -#endif - adr r5, arm720_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register - bic r0, r0, r5 - orr r0, r0, r6 - ret lr @ __ret (head.S) - .size __arm720_setup, . - __arm720_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..1. 1001 ..11 1101 - * - */ - .type arm720_crval, #object -arm720_crval: - crval clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm710_name, "ARM710T" - string cpu_arm720_name, "ARM720T" - - .align - -/* - * See for a definition of this structure. - */ - - .section ".proc.info.init", #alloc - -.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn \cpu_flush, __\name\()_proc_info @ cpu_flush - .long cpu_arch_name @ arch_name - .long cpu_elf_name @ elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap - .long \cpu_name - .long arm720_processor_functions - .long v4_tlb_fns - .long v4wt_user_fns - .long v4_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup - arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S deleted file mode 100644 index 1b4a3838393fbda090d9528d834947bfae96d4ae..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm740.S +++ /dev/null @@ -1,147 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/arm740.S: utility functions for ARM740 - * - * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - - .text -/* - * cpu_arm740_proc_init() - * cpu_arm740_do_idle() - * cpu_arm740_dcache_clean_area() - * cpu_arm740_switch_mm() - * - * These are not required. - */ -ENTRY(cpu_arm740_proc_init) -ENTRY(cpu_arm740_do_idle) -ENTRY(cpu_arm740_dcache_clean_area) -ENTRY(cpu_arm740_switch_mm) - ret lr - -/* - * cpu_arm740_proc_fin() - */ -ENTRY(cpu_arm740_proc_fin) - mrc p15, 0, r0, c1, c0, 0 - bic r0, r0, #0x3f000000 @ bank/f/lock/s - bic r0, r0, #0x0000000c @ w-buffer/cache - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm740_reset(loc) - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm740_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c0, 0 @ invalidate cache - mrc p15, 0, ip, c1, c0, 0 @ get ctrl register - bic ip, ip, #0x0000000c @ ............wc.. - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm740_reset) - .popsection - - .type __arm740_setup, #function -__arm740_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ invalidate caches - - mcr p15, 0, r0, c6, c3 @ disable area 3~7 - mcr p15, 0, r0, c6, c4 - mcr p15, 0, r0, c6, c5 - mcr p15, 0, r0, c6, c6 - mcr p15, 0, r0, c6, c7 - - mov r0, #0x0000003F @ base = 0, size = 4GB - mcr p15, 0, r0, c6, c0 @ set area 0, default - - ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r4, #10 @ 11 is the minimum (4KB) -1: add r4, r4, #1 @ area size *= 2 - movs r3, r3, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r4, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c1 @ set area 1, RAM - - ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - cmp r3, #0 - moveq r0, #0 - beq 2f - mov r4, #10 @ 11 is the minimum (4KB) -1: add r4, r4, #1 @ area size *= 2 - movs r3, r3, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r4, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit -2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH - - mov r0, #0x06 - mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #0x00 @ disable whole write buffer -#else - mov r0, #0x02 @ Region 1 write bufferred -#endif - mcr p15, 0, r0, c3, c0 - - mov r0, #0x10000 - sub r0, r0, #1 @ r0 = 0xffff - mcr p15, 0, r0, c5, c0 @ all read/write access - - mrc p15, 0, r0, c1, c0 @ get control register - bic r0, r0, #0x3F000000 @ set to standard caching mode - @ need some benchmark - orr r0, r0, #0x0000000d @ MPU/Cache/WB - - ret lr - - .size __arm740_setup, . - __arm740_setup - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - - string cpu_arch_name, "armv4" - string cpu_elf_name, "v4" - string cpu_arm740_name, "ARM740T" - - .align - - .section ".proc.info.init", #alloc - .type __arm740_proc_info,#object -__arm740_proc_info: - .long 0x41807400 - .long 0xfffffff0 - .long 0 - .long 0 - initfn __arm740_setup, __arm740_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT - .long cpu_arm740_name - .long arm740_processor_functions - .long 0 - .long 0 - .long v4_cache_fns @ cache model - .size __arm740_proc_info, . - __arm740_proc_info diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S deleted file mode 100644 index 17a4687065c7f9cc6a7b2fad4d0605e2767fbca6..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm7tdmi.S +++ /dev/null @@ -1,110 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI - * - * Copyright (C) 2003-2006 Hyok S. Choi - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - - .text -/* - * cpu_arm7tdmi_proc_init() - * cpu_arm7tdmi_do_idle() - * cpu_arm7tdmi_dcache_clean_area() - * cpu_arm7tdmi_switch_mm() - * - * These are not required. - */ -ENTRY(cpu_arm7tdmi_proc_init) -ENTRY(cpu_arm7tdmi_do_idle) -ENTRY(cpu_arm7tdmi_dcache_clean_area) -ENTRY(cpu_arm7tdmi_switch_mm) - ret lr - -/* - * cpu_arm7tdmi_proc_fin() - */ -ENTRY(cpu_arm7tdmi_proc_fin) - ret lr - -/* - * Function: cpu_arm7tdmi_reset(loc) - * Params : loc(r0) address to jump to - * Purpose : Sets up everything for a reset and jump to the location for soft reset. - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm7tdmi_reset) - ret r0 -ENDPROC(cpu_arm7tdmi_reset) - .popsection - - .type __arm7tdmi_setup, #function -__arm7tdmi_setup: - ret lr - .size __arm7tdmi_setup, . - __arm7tdmi_setup - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm7tdmi_name, "ARM7TDMI" - string cpu_triscenda7_name, "Triscend-A7x" - string cpu_at91_name, "Atmel-AT91M40xxx" - string cpu_s3c3410_name, "Samsung-S3C3410" - string cpu_s3c44b0x_name, "Samsung-S3C44B0x" - string cpu_s3c4510b_name, "Samsung-S3C4510B" - string cpu_s3c4530_name, "Samsung-S3C4530" - string cpu_netarm_name, "NETARM" - - .align - - .section ".proc.info.init", #alloc - -.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ - extra_hwcaps=0 - .type __\name\()_proc_info, #object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long 0 - .long 0 - initfn __arm7tdmi_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) - .long \cpu_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \ - cpu_arm7tdmi_name - arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \ - cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB - arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \ - cpu_at91_name, extra_hwcaps=HWCAP_THUMB - arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \ - cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB - arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \ - cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB - arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \ - cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB - arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \ - cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S deleted file mode 100644 index 298c76b47749f962ce8ba46072df1af7dfefecac..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm920.S +++ /dev/null @@ -1,466 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920 - * - * Copyright (C) 1999,2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm920. - * - * CONFIG_CPU_ARM920_CPU_IDLE -> nohlt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 8 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 65536 - - - .text -/* - * cpu_arm920_proc_init() - */ -ENTRY(cpu_arm920_proc_init) - ret lr - -/* - * cpu_arm920_proc_fin() - */ -ENTRY(cpu_arm920_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm920_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm920_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm920_reset) - .popsection - -/* - * cpu_arm920_do_idle() - */ - .align 5 -ENTRY(cpu_arm920_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - - -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm920_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm920_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(arm920_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm920_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 7 to 0 - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags for address space - */ -ENTRY(arm920_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm920_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm920_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm920_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm920_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm920_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm920_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm920_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm920_dma_clean_range - bcs arm920_dma_inv_range - b arm920_dma_flush_range -ENDPROC(arm920_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm920_dma_unmap_area) - ret lr -ENDPROC(arm920_dma_unmap_area) - - .globl arm920_flush_kern_cache_louis - .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm920 -#endif - - -ENTRY(cpu_arm920_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm920_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm920_switch_mm) -#ifdef CONFIG_MMU - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else -@ && 'Clean & Invalidate whole DCache' -@ && Re-written to use Index Ops. -@ && Uses registers r1, r3 and ip - - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 7 to 0 -#endif - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm920_set_pte(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm920_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - -/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ -.globl cpu_arm920_suspend_size -.equ cpu_arm920_suspend_size, 4 * 3 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_arm920_do_suspend) - stmfd sp!, {r4 - r6, lr} - mrc p15, 0, r4, c13, c0, 0 @ PID - mrc p15, 0, r5, c3, c0, 0 @ Domain ID - mrc p15, 0, r6, c1, c0, 0 @ Control register - stmia r0, {r4 - r6} - ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_arm920_do_suspend) - -ENTRY(cpu_arm920_do_resume) - mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs - mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches - ldmia r0, {r4 - r6} - mcr p15, 0, r4, c13, c0, 0 @ PID - mcr p15, 0, r5, c3, c0, 0 @ Domain ID - mcr p15, 0, r1, c2, c0, 0 @ TTB address - mov r0, r6 @ control register - b cpu_resume_mmu -ENDPROC(cpu_arm920_do_resume) -#endif - - .type __arm920_setup, #function -__arm920_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - adr r5, arm920_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __arm920_setup, . - __arm920_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..11 0001 ..11 0101 - * - */ - .type arm920_crval, #object -arm920_crval: - crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm920_name, "ARM920T" - - .align - - .section ".proc.info.init", #alloc - - .type __arm920_proc_info,#object -__arm920_proc_info: - .long 0x41009200 - .long 0xff00fff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm920_setup, __arm920_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm920_name - .long arm920_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - .long arm920_cache_fns -#else - .long v4wt_cache_fns -#endif - .size __arm920_proc_info, . - __arm920_proc_info diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S deleted file mode 100644 index 824be3a0bc23820149f99bc0a6ef8f66dfbc7560..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm922.S +++ /dev/null @@ -1,444 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922 - * - * Copyright (C) 1999,2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * Copyright (C) 2001 Altera Corporation - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm922. - * - * CONFIG_CPU_ARM922_CPU_IDLE -> nohlt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 32 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 4 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 64 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. (I think this should - * be 32768). - */ -#define CACHE_DLIMIT 8192 - - - .text -/* - * cpu_arm922_proc_init() - */ -ENTRY(cpu_arm922_proc_init) - ret lr - -/* - * cpu_arm922_proc_fin() - */ -ENTRY(cpu_arm922_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm922_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm922_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm922_reset) - .popsection - -/* - * cpu_arm922_do_idle() - */ - .align 5 -ENTRY(cpu_arm922_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - - -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm922_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm922_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular - * address space. - */ -ENTRY(arm922_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm922_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 7 to 0 - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - */ -ENTRY(arm922_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm922_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm922_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm922_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm922_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm922_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm922_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm922_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm922_dma_clean_range - bcs arm922_dma_inv_range - b arm922_dma_flush_range -ENDPROC(arm922_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm922_dma_unmap_area) - ret lr -ENDPROC(arm922_dma_unmap_area) - - .globl arm922_flush_kern_cache_louis - .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm922 -#endif - - -ENTRY(cpu_arm922_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm922_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm922_switch_mm) -#ifdef CONFIG_MMU - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else -@ && 'Clean & Invalidate whole DCache' -@ && Re-written to use Index Ops. -@ && Uses registers r1, r3 and ip - - mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 5 - bcs 1b @ segments 7 to 0 -#endif - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm922_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm922_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif /* CONFIG_MMU */ - ret lr - - .type __arm922_setup, #function -__arm922_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - adr r5, arm922_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __arm922_setup, . - __arm922_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..11 0001 ..11 0101 - * - */ - .type arm922_crval, #object -arm922_crval: - crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm922_name, "ARM922T" - - .align - - .section ".proc.info.init", #alloc - - .type __arm922_proc_info,#object -__arm922_proc_info: - .long 0x41009220 - .long 0xff00fff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm922_setup, __arm922_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm922_name - .long arm922_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - .long arm922_cache_fns -#else - .long v4wt_cache_fns -#endif - .size __arm922_proc_info, . - __arm922_proc_info diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S deleted file mode 100644 index d40cff8f102c2b5c7d603f74ceb9215693a33010..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm925.S +++ /dev/null @@ -1,509 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/arm925.S: MMU functions for ARM925 - * - * Copyright (C) 1999,2000 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * Copyright (C) 2002 RidgeRun, Inc. - * Copyright (C) 2002-2003 MontaVista Software, Inc. - * - * Update for Linux-2.6 and cache flush improvements - * Copyright (C) 2004 Nokia Corporation by Tony Lindgren - * - * hacked for non-paged-MM by Hyok S. Choi, 2004. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm925. - * - * CONFIG_CPU_ARM925_CPU_IDLE -> nohlt - * - * Some additional notes based on deciphering the TI TRM on OMAP-5910: - * - * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush - * entry mode" must be 0 to flush the entries in both segments - * at once. This is the default value. See TRM 2-20 and 2-24 for - * more information. - * - * NOTE2: Default is the "D-cache clean and flush entry mode". It looks - * like the "Transparent mode" must be on for partial cache flushes - * to work in this mode. This mode only works with 16-bit external - * memory. See TRM 2-24 for more information. - * - * NOTE3: Write-back cache flushing seems to be flakey with devices using - * direct memory access, such as USB OHCI. The workaround is to use - * write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is - * the default for OMAP-1510). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * The size of one data cache line. - */ -#define CACHE_DLINESIZE 16 - -/* - * The number of data cache segments. - */ -#define CACHE_DSEGMENTS 2 - -/* - * The number of lines in a cache segment. - */ -#define CACHE_DENTRIES 256 - -/* - * This is the size at which it becomes more efficient to - * clean the whole cache, rather than using the individual - * cache line maintenance instructions. - */ -#define CACHE_DLIMIT 8192 - - .text -/* - * cpu_arm925_proc_init() - */ -ENTRY(cpu_arm925_proc_init) - ret lr - -/* - * cpu_arm925_proc_fin() - */ -ENTRY(cpu_arm925_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm925_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm925_reset) - /* Send software reset to MPU and DSP */ - mov ip, #0xff000000 - orr ip, ip, #0x00fe0000 - orr ip, ip, #0x0000ce00 - mov r4, #1 - strh r4, [ip, #0x10] -ENDPROC(cpu_arm925_reset) - .popsection - - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 - -/* - * cpu_arm925_do_idle() - * - * Called with IRQs disabled - */ - .align 10 -ENTRY(cpu_arm925_do_idle) - mov r0, #0 - mrc p15, 0, r1, c1, c0, 0 @ Read control register - mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer - bic r2, r1, #1 << 12 - mcr p15, 0, r2, c1, c0, 0 @ Disable I cache - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm925_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm925_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular - * address space. - */ -ENTRY(arm925_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm925_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else - /* Flush entries in both segments at once, see NOTE1 above */ - mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment -2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index - subs r3, r3, #1 << 4 - bcs 2b @ entries 255 to 0 -#endif - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - */ -ENTRY(arm925_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bgt __flush_whole_cache -1: tst r2, #VM_EXEC -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#else - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#endif - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm925_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm925_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm925_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm925_dma_inv_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - tst r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -#endif - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm925_dma_clean_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm925_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry -#else - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm925_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm925_dma_clean_range - bcs arm925_dma_inv_range - b arm925_dma_flush_range -ENDPROC(arm925_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm925_dma_unmap_area) - ret lr -ENDPROC(arm925_dma_unmap_area) - - .globl arm925_flush_kern_cache_louis - .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm925 - -ENTRY(cpu_arm925_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm925_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm925_switch_mm) -#ifdef CONFIG_MMU - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else - /* Flush entries in bothe segments at once, see NOTE1 above */ - mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment -2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index - subs r3, r3, #1 << 4 - bcs 2b @ entries 255 to 0 -#endif - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm925_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm925_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif /* CONFIG_MMU */ - ret lr - - .type __arm925_setup, #function -__arm925_setup: - mov r0, #0 - - /* Transparent on, D-cache clean & flush mode. See NOTE2 above */ - orr r0,r0,#1 << 1 @ transparent mode on - mcr p15, 0, r0, c15, c1, 0 @ write TI config register - - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #4 @ disable write-back on caches explicitly - mcr p15, 7, r0, c15, c0, 0 -#endif - - adr r5, arm925_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .1.. .... .... .... -#endif - ret lr - .size __arm925_setup, . - __arm925_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 0001 ..11 1101 - * - */ - .type arm925_crval, #object -arm925_crval: - crval clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130 - - __INITDATA - @ define struct processor (see and proc-macros.S) - define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm925_name, "ARM925T" - - .align - - .section ".proc.info.init", #alloc - -.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm925_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm925_name - .long arm925_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm925_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name - arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S deleted file mode 100644 index f3cd08f353f00a92e186a075ee7ab17ec41f65a1..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm926.S +++ /dev/null @@ -1,488 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S - * - * Copyright (C) 1999-2001 ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * These are the low level assembler for performing cache and TLB - * functions on the arm926. - * - * CONFIG_CPU_ARM926_CPU_IDLE -> nohlt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define CACHE_DLIMIT 16384 - -/* - * the cache line size of the I and D cache - */ -#define CACHE_DLINESIZE 32 - - .text -/* - * cpu_arm926_proc_init() - */ -ENTRY(cpu_arm926_proc_init) - ret lr - -/* - * cpu_arm926_proc_fin() - */ -ENTRY(cpu_arm926_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm926_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm926_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm926_reset) - .popsection - -/* - * cpu_arm926_do_idle() - * - * Called with IRQs disabled - */ - .align 10 -ENTRY(cpu_arm926_do_idle) - mov r0, #0 - mrc p15, 0, r1, c1, c0, 0 @ Read control register - mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer - bic r2, r1, #1 << 12 - mrs r3, cpsr @ Disable FIQs while Icache - orr ip, r3, #PSR_F_BIT @ is disabled - msr cpsr_c, ip - mcr p15, 0, r2, c1, c0, 0 @ Disable I cache - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable - msr cpsr_c, r3 @ Restore FIQ state - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm926_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm926_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular - * address space. - */ -ENTRY(arm926_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm926_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b -#endif - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - */ -ENTRY(arm926_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bgt __flush_whole_cache -1: tst r2, #VM_EXEC -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#else - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#endif - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm926_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm926_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm926_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm926_dma_inv_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - tst r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -#endif - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -arm926_dma_clean_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm926_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry -#else - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm926_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm926_dma_clean_range - bcs arm926_dma_inv_range - b arm926_dma_flush_range -ENDPROC(arm926_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm926_dma_unmap_area) - ret lr -ENDPROC(arm926_dma_unmap_area) - - .globl arm926_flush_kern_cache_louis - .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm926 - -ENTRY(cpu_arm926_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_arm926_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_arm926_switch_mm) -#ifdef CONFIG_MMU - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else -@ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b -#endif - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - ret lr - -/* - * cpu_arm926_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_arm926_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - -/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ -.globl cpu_arm926_suspend_size -.equ cpu_arm926_suspend_size, 4 * 3 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_arm926_do_suspend) - stmfd sp!, {r4 - r6, lr} - mrc p15, 0, r4, c13, c0, 0 @ PID - mrc p15, 0, r5, c3, c0, 0 @ Domain ID - mrc p15, 0, r6, c1, c0, 0 @ Control register - stmia r0, {r4 - r6} - ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_arm926_do_suspend) - -ENTRY(cpu_arm926_do_resume) - mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs - mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches - ldmia r0, {r4 - r6} - mcr p15, 0, r4, c13, c0, 0 @ PID - mcr p15, 0, r5, c3, c0, 0 @ Domain ID - mcr p15, 0, r1, c2, c0, 0 @ TTB address - mov r0, r6 @ control register - b cpu_resume_mmu -ENDPROC(cpu_arm926_do_resume) -#endif - - .type __arm926_setup, #function -__arm926_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #4 @ disable write-back on caches explicitly - mcr p15, 7, r0, c15, c0, 0 -#endif - - adr r5, arm926_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x4000 @ .1.. .... .... .... -#endif - ret lr - .size __arm926_setup, . - __arm926_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 0001 ..11 0101 - * - */ - .type arm926_crval, #object -arm926_crval: - crval clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv5tej" - string cpu_elf_name, "v5" - string cpu_arm926_name, "ARM926EJ-S" - - .align - - .section ".proc.info.init", #alloc - - .type __arm926_proc_info,#object -__arm926_proc_info: - .long 0x41069260 @ ARM926EJ-S (v5TEJ) - .long 0xff0ffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __arm926_setup, __arm926_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA - .long cpu_arm926_name - .long arm926_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm926_cache_fns - .size __arm926_proc_info, . - __arm926_proc_info diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S deleted file mode 100644 index 1c26d991386d7d6f92abd9c3c6c77dcb89ab7392..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm940.S +++ /dev/null @@ -1,360 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/arm940.S: utility functions for ARM940T - * - * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) - */ -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */ -#define CACHE_DLINESIZE 16 -#define CACHE_DSEGMENTS 4 -#define CACHE_DENTRIES 64 - - .text -/* - * cpu_arm940_proc_init() - * cpu_arm940_switch_mm() - * - * These are not required. - */ -ENTRY(cpu_arm940_proc_init) -ENTRY(cpu_arm940_switch_mm) - ret lr - -/* - * cpu_arm940_proc_fin() - */ -ENTRY(cpu_arm940_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x00001000 @ i-cache - bic r0, r0, #0x00000004 @ d-cache - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm940_reset(loc) - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm940_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ flush I cache - mcr p15, 0, ip, c7, c6, 0 @ flush D cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x00000005 @ .............c.p - bic ip, ip, #0x00001000 @ i-cache - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm940_reset) - .popsection - -/* - * cpu_arm940_do_idle() - */ - .align 5 -ENTRY(cpu_arm940_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm940_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm940_flush_icache_all) - -/* - * flush_user_cache_all() - */ -ENTRY(arm940_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm940_flush_kern_cache_all) - mov r2, #VM_EXEC - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * There is no efficient way to flush a range of cache entries - * in the specified address range. Thus, flushes all. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - */ -ENTRY(arm940_flush_user_cache_range) - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ flush D cache -#else - mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 4 - bcs 1b @ segments 3 to 0 -#endif - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm940_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm940_coherent_user_range) - /* FALLTHROUGH */ - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(arm940_flush_kern_dcache_area) - mov r0, #0 - mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 4 - bcs 1b @ segments 7 to 0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * There is no efficient way to invalidate a specifid virtual - * address range. Thus, invalidates all. - * - * - start - virtual start address - * - end - virtual end address - */ -arm940_dma_inv_range: - mov ip, #0 - mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c6, 2 @ flush D entry - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 4 - bcs 1b @ segments 7 to 0 - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * There is no efficient way to clean a specifid virtual - * address range. Thus, cleans all. - * - * - start - virtual start address - * - end - virtual end address - */ -arm940_dma_clean_range: -ENTRY(cpu_arm940_dcache_clean_area) - mov ip, #0 -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: mcr p15, 0, r3, c7, c10, 2 @ clean D entry - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 4 - bcs 1b @ segments 7 to 0 -#endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * There is no efficient way to clean and invalidate a specifid - * virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm940_dma_flush_range) - mov ip, #0 - mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries -2: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r3, c7, c14, 2 @ clean/flush D entry -#else - mcr p15, 0, r3, c7, c6, 2 @ invalidate D entry -#endif - subs r3, r3, #1 << 26 - bcs 2b @ entries 63 to 0 - subs r1, r1, #1 << 4 - bcs 1b @ segments 7 to 0 - mcr p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm940_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm940_dma_clean_range - bcs arm940_dma_inv_range - b arm940_dma_flush_range -ENDPROC(arm940_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm940_dma_unmap_area) - ret lr -ENDPROC(arm940_dma_unmap_area) - - .globl arm940_flush_kern_cache_louis - .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm940 - - .type __arm940_setup, #function -__arm940_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - - mcr p15, 0, r0, c6, c3, 0 @ disable data area 3~7 - mcr p15, 0, r0, c6, c4, 0 - mcr p15, 0, r0, c6, c5, 0 - mcr p15, 0, r0, c6, c6, 0 - mcr p15, 0, r0, c6, c7, 0 - - mcr p15, 0, r0, c6, c3, 1 @ disable instruction area 3~7 - mcr p15, 0, r0, c6, c4, 1 - mcr p15, 0, r0, c6, c5, 1 - mcr p15, 0, r0, c6, c6, 1 - mcr p15, 0, r0, c6, c7, 1 - - mov r0, #0x0000003F @ base = 0, size = 4GB - mcr p15, 0, r0, c6, c0, 0 @ set area 0, default - mcr p15, 0, r0, c6, c0, 1 - - ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r7, =CONFIG_DRAM_SIZE >> 12 @ size of RAM (must be >= 4KB) - pr_val r3, r0, r7, #1 - mcr p15, 0, r3, c6, c1, 0 @ set area 1, RAM - mcr p15, 0, r3, c6, c1, 1 - - ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) - pr_val r3, r0, r6, #1 - mcr p15, 0, r3, c6, c2, 0 @ set area 2, ROM/FLASH - mcr p15, 0, r3, c6, c2, 1 - - mov r0, #0x06 - mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable - mcr p15, 0, r0, c2, c0, 1 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #0x00 @ disable whole write buffer -#else - mov r0, #0x02 @ Region 1 write bufferred -#endif - mcr p15, 0, r0, c3, c0, 0 - - mov r0, #0x10000 - sub r0, r0, #1 @ r0 = 0xffff - mcr p15, 0, r0, c5, c0, 0 @ all read/write access - mcr p15, 0, r0, c5, c0, 1 - - mrc p15, 0, r0, c1, c0 @ get control register - orr r0, r0, #0x00001000 @ I-cache - orr r0, r0, #0x00000005 @ MPU/D-cache - - ret lr - - .size __arm940_setup, . - __arm940_setup - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm940_name, "ARM940T" - - .align - - .section ".proc.info.init", #alloc - - .type __arm940_proc_info,#object -__arm940_proc_info: - .long 0x41009400 - .long 0xff00fff0 - .long 0 - initfn __arm940_setup, __arm940_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm940_name - .long arm940_processor_functions - .long 0 - .long 0 - .long arm940_cache_fns - .size __arm940_proc_info, . - __arm940_proc_info - diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S deleted file mode 100644 index 2dc1c75a4fd4a8131de6db7a77be2ed227053e0e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm946.S +++ /dev/null @@ -1,415 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S - * - * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) - * - * (Many of cache codes are from proc-arm926.S) - */ -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache, - * comprising 256 lines of 32 bytes (8 words). - */ -#define CACHE_DSIZE (CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */ -#define CACHE_DLINESIZE 32 /* fixed */ -#define CACHE_DSEGMENTS 4 /* fixed */ -#define CACHE_DENTRIES (CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE) -#define CACHE_DLIMIT (CACHE_DSIZE * 4) /* benchmark needed */ - - .text -/* - * cpu_arm946_proc_init() - * cpu_arm946_switch_mm() - * - * These are not required. - */ -ENTRY(cpu_arm946_proc_init) -ENTRY(cpu_arm946_switch_mm) - ret lr - -/* - * cpu_arm946_proc_fin() - */ -ENTRY(cpu_arm946_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x00001000 @ i-cache - bic r0, r0, #0x00000004 @ d-cache - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_arm946_reset(loc) - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm946_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ flush I cache - mcr p15, 0, ip, c7, c6, 0 @ flush D cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x00000005 @ .............c.p - bic ip, ip, #0x00001000 @ i-cache - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_arm946_reset) - .popsection - -/* - * cpu_arm946_do_idle() - */ - .align 5 -ENTRY(cpu_arm946_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(arm946_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(arm946_flush_icache_all) - -/* - * flush_user_cache_all() - */ -ENTRY(arm946_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(arm946_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ flush D cache -#else - mov r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments -1: orr r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries -2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index - subs r3, r3, #1 << 4 - bcs 2b @ entries n to 0 - subs r1, r1, #1 << 29 - bcs 1b @ segments 3 to 0 -#endif - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ flush I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - * (same as arm926) - */ -ENTRY(arm946_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bhs __flush_whole_cache - -1: tst r2, #VM_EXEC -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#else - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE -#endif - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(arm946_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - * (same as arm926) - */ -ENTRY(arm946_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - * (same as arm926) - */ -ENTRY(arm946_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * (same as arm926) - */ -arm946_dma_inv_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - tst r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -#endif - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as arm926) - */ -arm946_dma_clean_range: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as arm926) - */ -ENTRY(arm946_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry -#else - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry -#endif - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm946_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq arm946_dma_clean_range - bcs arm946_dma_inv_range - b arm946_dma_flush_range -ENDPROC(arm946_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(arm946_dma_unmap_area) - ret lr -ENDPROC(arm946_dma_unmap_area) - - .globl arm946_flush_kern_cache_louis - .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions arm946 - -ENTRY(cpu_arm946_dcache_clean_area) -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - .type __arm946_setup, #function -__arm946_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - - mcr p15, 0, r0, c6, c3, 0 @ disable memory region 3~7 - mcr p15, 0, r0, c6, c4, 0 - mcr p15, 0, r0, c6, c5, 0 - mcr p15, 0, r0, c6, c6, 0 - mcr p15, 0, r0, c6, c7, 0 - - mov r0, #0x0000003F @ base = 0, size = 4GB - mcr p15, 0, r0, c6, c0, 0 @ set region 0, default - - ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r7, =CONFIG_DRAM_SIZE @ size of RAM (must be >= 4KB) - pr_val r3, r0, r7, #1 - mcr p15, 0, r3, c6, c1, 0 - - ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) - pr_val r3, r0, r7, #1 - mcr p15, 0, r3, c6, c2, 0 - - mov r0, #0x06 - mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable - mcr p15, 0, r0, c2, c0, 1 @ region 1,2 i-cacheable -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mov r0, #0x00 @ disable whole write buffer -#else - mov r0, #0x02 @ region 1 write bufferred -#endif - mcr p15, 0, r0, c3, c0, 0 - -/* - * Access Permission Settings for future permission control by PU. - * - * priv. user - * region 0 (whole) rw -- : b0001 - * region 1 (RAM) rw rw : b0011 - * region 2 (FLASH) rw r- : b0010 - * region 3~7 (none) -- -- : b0000 - */ - mov r0, #0x00000031 - orr r0, r0, #0x00000200 - mcr p15, 0, r0, c5, c0, 2 @ set data access permission - mcr p15, 0, r0, c5, c0, 3 @ set inst. access permission - - mrc p15, 0, r0, c1, c0 @ get control register - orr r0, r0, #0x00001000 @ I-cache - orr r0, r0, #0x00000005 @ MPU/D-cache -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - orr r0, r0, #0x00004000 @ .1.. .... .... .... -#endif - ret lr - - .size __arm946_setup, . - __arm946_setup - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5t" - string cpu_arm946_name, "ARM946E-S" - - .align - - .section ".proc.info.init", #alloc - .type __arm946_proc_info,#object -__arm946_proc_info: - .long 0x41009460 - .long 0xff00fff0 - .long 0 - .long 0 - initfn __arm946_setup, __arm946_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm946_name - .long arm946_processor_functions - .long 0 - .long 0 - .long arm946_cache_fns - .size __arm946_proc_info, . - __arm946_proc_info - diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S deleted file mode 100644 index 913c06e590af516c438a9a20c7d254bae38f1ac7..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-arm9tdmi.S +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI - * - * Copyright (C) 2003-2006 Hyok S. Choi - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - - .text -/* - * cpu_arm9tdmi_proc_init() - * cpu_arm9tdmi_do_idle() - * cpu_arm9tdmi_dcache_clean_area() - * cpu_arm9tdmi_switch_mm() - * - * These are not required. - */ -ENTRY(cpu_arm9tdmi_proc_init) -ENTRY(cpu_arm9tdmi_do_idle) -ENTRY(cpu_arm9tdmi_dcache_clean_area) -ENTRY(cpu_arm9tdmi_switch_mm) - ret lr - -/* - * cpu_arm9tdmi_proc_fin() - */ -ENTRY(cpu_arm9tdmi_proc_fin) - ret lr - -/* - * Function: cpu_arm9tdmi_reset(loc) - * Params : loc(r0) address to jump to - * Purpose : Sets up everything for a reset and jump to the location for soft reset. - */ - .pushsection .idmap.text, "ax" -ENTRY(cpu_arm9tdmi_reset) - ret r0 -ENDPROC(cpu_arm9tdmi_reset) - .popsection - - .type __arm9tdmi_setup, #function -__arm9tdmi_setup: - ret lr - .size __arm9tdmi_setup, . - __arm9tdmi_setup - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - - string cpu_arch_name, "armv4t" - string cpu_elf_name, "v4" - string cpu_arm9tdmi_name, "ARM9TDMI" - string cpu_p2001_name, "P2001" - - .align - - .section ".proc.info.init", #alloc - -.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req - .type __\name\()_proc_info, #object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long 0 - .long 0 - initfn __arm9tdmi_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long \cpu_name - .long arm9tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name - arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S deleted file mode 100644 index 8120b6f4dbb83ec2dbff229060018e66e7317562..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-fa526.S +++ /dev/null @@ -1,213 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 - * - * Written by : Luke Lee - * Copyright (C) 2005 Faraday Corp. - * Copyright (C) 2008-2009 Paulius Zaleckas - * - * These are the low level assembler for performing cache and TLB - * functions on the fa526. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -#define CACHE_DLINESIZE 16 - - .text -/* - * cpu_fa526_proc_init() - */ -ENTRY(cpu_fa526_proc_init) - ret lr - -/* - * cpu_fa526_proc_fin() - */ -ENTRY(cpu_fa526_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - nop - nop - ret lr - -/* - * cpu_fa526_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 4 - .pushsection .idmap.text, "ax" -ENTRY(cpu_fa526_reset) -/* TODO: Use CP8 if possible... */ - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - bic ip, ip, #0x0800 @ BTB off - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - nop - nop - ret r0 -ENDPROC(cpu_fa526_reset) - .popsection - -/* - * cpu_fa526_do_idle() - */ - .align 4 -ENTRY(cpu_fa526_do_idle) - ret lr - - -ENTRY(cpu_fa526_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_fa526_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 4 -ENTRY(cpu_fa526_switch_mm) -#ifdef CONFIG_MMU - mov ip, #0 -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache -#else - mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache -#endif - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed - mcr p15, 0, ip, c7, c10, 4 @ data write barrier - mcr p15, 0, ip, c7, c5, 4 @ prefetch flush - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB -#endif - ret lr - -/* - * cpu_fa526_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 4 -ENTRY(cpu_fa526_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - - .type __fa526_setup, #function -__fa526_setup: - /* On return of this routine, r0 must carry correct flags for CFG register */ - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM - - mov r0, #1 - mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR - - mov r0, #0 - mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - - mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client - mcr p15, 0, r0, c3, c0 @ load domain access register - - mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, fa526_cr1_clear - bic r0, r0, r5 - ldr r5, fa526_cr1_set - orr r0, r0, r5 - ret lr - .size __fa526_setup, . - __fa526_setup - - /* - * .RVI ZFRS BLDP WCAM - * ..11 1001 .111 1101 - * - */ - .type fa526_cr1_clear, #object - .type fa526_cr1_set, #object -fa526_cr1_clear: - .word 0x3f3f -fa526_cr1_set: - .word 0x397D - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv4" - string cpu_elf_name, "v4" - string cpu_fa526_name, "FA526" - - .align - - .section ".proc.info.init", #alloc - - .type __fa526_proc_info,#object -__fa526_proc_info: - .long 0x66015261 - .long 0xff01fff1 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __fa526_setup, __fa526_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF - .long cpu_fa526_name - .long fa526_processor_functions - .long fa_tlb_fns - .long fa_user_fns - .long fa_cache_fns - .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S deleted file mode 100644 index bb6dc34d42a374298c2b50fccb191b02504b402b..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-feroceon.S +++ /dev/null @@ -1,613 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon - * - * Heavily based on proc-arm926.S - * Maintainer: Assaf Hoffman - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be invalidated - * using the single invalidate entry instructions. Anything larger - * than this, and we go for the whole cache. - * - * This value should be chosen such that we choose the cheapest - * alternative. - */ -#define CACHE_DLIMIT 16384 - -/* - * the cache line size of the I and D cache - */ -#define CACHE_DLINESIZE 32 - - .bss - .align 3 -__cache_params_loc: - .space 8 - - .text -__cache_params: - .word __cache_params_loc - -/* - * cpu_feroceon_proc_init() - */ -ENTRY(cpu_feroceon_proc_init) - mrc p15, 0, r0, c0, c0, 1 @ read cache type register - ldr r1, __cache_params - mov r2, #(16 << 5) - tst r0, #(1 << 16) @ get way - mov r0, r0, lsr #18 @ get cache size order - movne r3, #((4 - 1) << 30) @ 4-way - and r0, r0, #0xf - moveq r3, #0 @ 1-way - mov r2, r2, lsl r0 @ actual cache size - movne r2, r2, lsr #2 @ turned into # of sets - sub r2, r2, #(1 << 5) - stmia r1, {r2, r3} - ret lr - -/* - * cpu_feroceon_proc_fin() - */ -ENTRY(cpu_feroceon_proc_fin) -#if defined(CONFIG_CACHE_FEROCEON_L2) && \ - !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) - mov r0, #0 - mcr p15, 1, r0, c15, c9, 0 @ clean L2 - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_feroceon_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_feroceon_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_feroceon_reset) - .popsection - -/* - * cpu_feroceon_do_idle() - * - * Called with IRQs disabled - */ - .align 5 -ENTRY(cpu_feroceon_do_idle) - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(feroceon_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(feroceon_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular - * address space. - */ - .align 5 -ENTRY(feroceon_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(feroceon_flush_kern_cache_all) - mov r2, #VM_EXEC - -__flush_whole_cache: - ldr r1, __cache_params - ldmia r1, {r1, r3} -1: orr ip, r1, r3 -2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way - subs ip, ip, #(1 << 30) @ next way - bcs 2b - subs r1, r1, #(1 << 5) @ next set - bcs 1b - - tst r2, #VM_EXEC - mov ip, #0 - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - */ - .align 5 -ENTRY(feroceon_flush_user_cache_range) - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bgt __flush_whole_cache -1: tst r2, #VM_EXEC - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mov ip, #0 - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ - .align 5 -ENTRY(feroceon_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(feroceon_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ - .align 5 -ENTRY(feroceon_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - .align 5 -ENTRY(feroceon_range_flush_kern_dcache_area) - mrs r2, cpsr - add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive - orr r3, r2, #PSR_I_BIT - msr cpsr_c, r3 @ disable interrupts - mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start - mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top - msr cpsr_c, r2 @ restore interrupts - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ - .align 5 -feroceon_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - bic r0, r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - .align 5 -feroceon_range_dma_inv_range: - mrs r2, cpsr - tst r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry - cmp r1, r0 - subne r1, r1, #1 @ top address is inclusive - orr r3, r2, #PSR_I_BIT - msr cpsr_c, r3 @ disable interrupts - mcr p15, 5, r0, c15, c14, 0 @ D inv range start - mcr p15, 5, r1, c15, c14, 1 @ D inv range top - msr cpsr_c, r2 @ restore interrupts - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ - .align 5 -feroceon_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - .align 5 -feroceon_range_dma_clean_range: - mrs r2, cpsr - cmp r1, r0 - subne r1, r1, #1 @ top address is inclusive - orr r3, r2, #PSR_I_BIT - msr cpsr_c, r3 @ disable interrupts - mcr p15, 5, r0, c15, c13, 0 @ D clean range start - mcr p15, 5, r1, c15, c13, 1 @ D clean range top - msr cpsr_c, r2 @ restore interrupts - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ - .align 5 -ENTRY(feroceon_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - - .align 5 -ENTRY(feroceon_range_dma_flush_range) - mrs r2, cpsr - cmp r1, r0 - subne r1, r1, #1 @ top address is inclusive - orr r3, r2, #PSR_I_BIT - msr cpsr_c, r3 @ disable interrupts - mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start - mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top - msr cpsr_c, r2 @ restore interrupts - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(feroceon_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq feroceon_dma_clean_range - bcs feroceon_dma_inv_range - b feroceon_dma_flush_range -ENDPROC(feroceon_dma_map_area) - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(feroceon_range_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq feroceon_range_dma_clean_range - bcs feroceon_range_dma_inv_range - b feroceon_range_dma_flush_range -ENDPROC(feroceon_range_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(feroceon_dma_unmap_area) - ret lr -ENDPROC(feroceon_dma_unmap_area) - - .globl feroceon_flush_kern_cache_louis - .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions feroceon - -.macro range_alias basename - .globl feroceon_range_\basename - .type feroceon_range_\basename , %function - .equ feroceon_range_\basename , feroceon_\basename -.endm - -/* - * Most of the cache functions are unchanged for this case. - * Export suitable alias symbols for the unchanged functions: - */ - range_alias flush_icache_all - range_alias flush_user_cache_all - range_alias flush_kern_cache_all - range_alias flush_kern_cache_louis - range_alias flush_user_cache_range - range_alias coherent_kern_range - range_alias coherent_user_range - range_alias dma_unmap_area - - define_cache_functions feroceon_range - - .align 5 -ENTRY(cpu_feroceon_dcache_clean_area) -#if defined(CONFIG_CACHE_FEROCEON_L2) && \ - !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) - mov r2, r0 - mov r3, r1 -#endif -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b -#if defined(CONFIG_CACHE_FEROCEON_L2) && \ - !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) -1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry - add r2, r2, #CACHE_DLINESIZE - subs r3, r3, #CACHE_DLINESIZE - bhi 1b -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_feroceon_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_feroceon_switch_mm) -#ifdef CONFIG_MMU - /* - * Note: we wish to call __flush_whole_cache but we need to preserve - * lr to do so. The only way without touching main memory is to - * use r2 which is normally used to test the VM_EXEC flag, and - * compensate locally for the skipped ops if it is not set. - */ - mov r2, lr @ abuse r2 to preserve lr - bl __flush_whole_cache - @ if r2 contains the VM_EXEC bit then the next 2 ops are done already - tst r2, #VM_EXEC - mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcreq p15, 0, ip, c7, c10, 4 @ drain WB - - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - ret r2 -#else - ret lr -#endif - -/* - * cpu_feroceon_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_feroceon_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext wc_disable=0 - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry -#if defined(CONFIG_CACHE_FEROCEON_L2) && \ - !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) - mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry -#endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - -/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ -.globl cpu_feroceon_suspend_size -.equ cpu_feroceon_suspend_size, 4 * 3 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_feroceon_do_suspend) - stmfd sp!, {r4 - r6, lr} - mrc p15, 0, r4, c13, c0, 0 @ PID - mrc p15, 0, r5, c3, c0, 0 @ Domain ID - mrc p15, 0, r6, c1, c0, 0 @ Control register - stmia r0, {r4 - r6} - ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_feroceon_do_suspend) - -ENTRY(cpu_feroceon_do_resume) - mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs - mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches - ldmia r0, {r4 - r6} - mcr p15, 0, r4, c13, c0, 0 @ PID - mcr p15, 0, r5, c3, c0, 0 @ Domain ID - mcr p15, 0, r1, c2, c0, 0 @ TTB address - mov r0, r6 @ control register - b cpu_resume_mmu -ENDPROC(cpu_feroceon_do_resume) -#endif - - .type __feroceon_setup, #function -__feroceon_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - - adr r5, feroceon_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __feroceon_setup, . - __feroceon_setup - - /* - * B - * R P - * .RVI UFRS BLDP WCAM - * .011 .001 ..11 0101 - * - */ - .type feroceon_crval, #object -feroceon_crval: - crval clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - string cpu_feroceon_name, "Feroceon" - string cpu_88fr531_name, "Feroceon 88FR531-vd" - string cpu_88fr571_name, "Feroceon 88FR571-vd" - string cpu_88fr131_name, "Feroceon 88FR131" - - .align - - .section ".proc.info.init", #alloc - -.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __feroceon_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long \cpu_name - .long feroceon_processor_functions - .long v4wbi_tlb_fns - .long feroceon_user_fns - .long \cache - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - -#ifdef CONFIG_CPU_FEROCEON_OLD_ID - feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \ - cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns -#endif - - feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \ - cache=feroceon_cache_fns - feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \ - cache=feroceon_range_cache_fns - feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \ - cache=feroceon_range_cache_fns diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S deleted file mode 100644 index 60ac7c5999a98eb3046d32e17299cc42dfd980ec..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-macros.S +++ /dev/null @@ -1,388 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * We need constants.h for: - * VMA_VM_MM - * VMA_VM_FLAGS - * VM_EXEC - */ -#include -#include -#include - -#ifdef CONFIG_CPU_V7M -#include -#endif - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * vma_vm_flags - get vma->vm_flags - */ - .macro vma_vm_flags, rd, rn - ldr \rd, [\rn, #VMA_VM_FLAGS] - .endm - -/* - * act_mm - get current->active_mm - */ - .macro act_mm, rd - bic \rd, sp, #(THREAD_SIZE - 1) & ~63 - bic \rd, \rd, #63 - ldr \rd, [\rd, #TI_TASK] - .if (TSK_ACTIVE_MM > IMM12_MASK) - add \rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK - .endif - ldr \rd, [\rd, #TSK_ACTIVE_MM & IMM12_MASK] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - * note, this field is 64bit, so in big-endian the two words are swapped too. - */ - .macro mmid, rd, rn -#ifdef __ARMEB__ - ldr \rd, [\rn, #MM_CONTEXT_ID + 4 ] -#else - ldr \rd, [\rn, #MM_CONTEXT_ID] -#endif - .endm - -/* - * mask_asid - mask the ASID from the context ID - */ - .macro asid, rd, rn - and \rd, \rn, #255 - .endm - - .macro crval, clear, mmuset, ucset -#ifdef CONFIG_MMU - .word \clear - .word \mmuset -#else - .word \clear - .word \ucset -#endif - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register - * on ARMv7. - */ - .macro dcache_line_size, reg, tmp -#ifdef CONFIG_CPU_V7M - movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR - movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR - ldr \tmp, [\tmp] -#else - mrc p15, 0, \tmp, c0, c0, 1 @ read ctr -#endif - lsr \tmp, \tmp, #16 - and \tmp, \tmp, #0xf @ cache line size encoding - mov \reg, #4 @ bytes per word - mov \reg, \reg, lsl \tmp @ actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register - * on ARMv7. - */ - .macro icache_line_size, reg, tmp -#ifdef CONFIG_CPU_V7M - movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR - movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR - ldr \tmp, [\tmp] -#else - mrc p15, 0, \tmp, c0, c0, 1 @ read ctr -#endif - and \tmp, \tmp, #0xf @ cache line size encoding - mov \reg, #4 @ bytes per word - mov \reg, \reg, lsl \tmp @ actual cache line size - .endm - -/* - * Sanity check the PTE configuration for the code below - which makes - * certain assumptions about how these bits are laid out. - */ -#ifdef CONFIG_MMU -#if L_PTE_SHARED != PTE_EXT_SHARED -#error PTE shared bit mismatch -#endif -#if !defined (CONFIG_ARM_LPAE) && \ - (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ - L_PTE_PRESENT) > L_PTE_SHARED -#error Invalid Linux PTE bit settings -#endif -#endif /* CONFIG_MMU */ - -/* - * The ARMv6 and ARMv7 set_pte_ext translation function. - * - * Permission translation: - * YUWD APX AP1 AP0 SVC User - * 0xxx 0 0 0 no acc no acc - * 100x 1 0 1 r/o no acc - * 10x0 1 0 1 r/o no acc - * 1011 0 0 1 r/w no acc - * 110x 1 1 1 r/o r/o - * 11x0 1 1 1 r/o r/o - * 1111 0 1 1 r/w r/w - */ - .macro armv6_mt_table pfx -\pfx\()_mt_table: - .long 0x00 @ L_PTE_MT_UNCACHED - .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE - .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK - .long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED - .long 0x00 @ unused - .long 0x00 @ L_PTE_MT_MINICACHE (not present) - .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC - .long 0x00 @ unused - .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC - .long 0x00 @ unused - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED - .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED - .long 0x00 @ unused - .long 0x00 @ unused - .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS - .endm - - .macro armv6_set_pte_ext pfx - str r1, [r0], #2048 @ linux version - - bic r3, r1, #0x000003fc - bic r3, r3, #PTE_TYPE_MASK - orr r3, r3, r2 - orr r3, r3, #PTE_EXT_AP0 | 2 - - adr ip, \pfx\()_mt_table - and r2, r1, #L_PTE_MT_MASK - ldr r2, [ip, r2] - - eor r1, r1, #L_PTE_DIRTY - tst r1, #L_PTE_DIRTY|L_PTE_RDONLY - orrne r3, r3, #PTE_EXT_APX - - tst r1, #L_PTE_USER - orrne r3, r3, #PTE_EXT_AP1 - tstne r3, #PTE_EXT_APX - - @ user read-only -> kernel read-only - bicne r3, r3, #PTE_EXT_AP0 - - tst r1, #L_PTE_XN - orrne r3, r3, #PTE_EXT_XN - - eor r3, r3, r2 - - tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_PRESENT - moveq r3, #0 - tstne r1, #L_PTE_NONE - movne r3, #0 - - str r3, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte - .endm - - -/* - * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function, - * covering most CPUs except Xscale and Xscale 3. - * - * Permission translation: - * YUWD AP SVC User - * 0xxx 0x00 no acc no acc - * 100x 0x00 r/o no acc - * 10x0 0x00 r/o no acc - * 1011 0x55 r/w no acc - * 110x 0xaa r/w r/o - * 11x0 0xaa r/w r/o - * 1111 0xff r/w r/w - */ - .macro armv3_set_pte_ext wc_disable=1 - str r1, [r0], #2048 @ linux version - - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r3, #L_PTE_USER @ user? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? - movne r2, #0 - - .if \wc_disable -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - tst r2, #PTE_CACHEABLE - bicne r2, r2, #PTE_BUFFERABLE -#endif - .endif - str r2, [r0] @ hardware version - .endm - - -/* - * Xscale set_pte_ext translation, split into two halves to cope - * with work-arounds. r3 must be preserved by code between these - * two macros. - * - * Permission translation: - * YUWD AP SVC User - * 0xxx 00 no acc no acc - * 100x 00 r/o no acc - * 10x0 00 r/o no acc - * 1011 01 r/w no acc - * 110x 10 r/w r/o - * 11x0 10 r/w r/o - * 1111 11 r/w r/w - */ - .macro xscale_set_pte_ext_prologue - str r1, [r0] @ linux version - - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits - orr r2, r2, #PTE_TYPE_EXT @ extended page - - tst r3, #L_PTE_USER @ user? - orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - - tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? - orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w - @ combined with user -> user r/w - .endm - - .macro xscale_set_pte_ext_epilogue - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? - movne r2, #0 @ no -> fault - - str r2, [r0, #2048]! @ hardware version - mov ip, #0 - mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - mcr p15, 0, ip, c7, c10, 4 @ data write barrier - .endm - -.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 -/* - * If we are building for big.Little with branch predictor hardening, - * we need the processor function tables to remain available after boot. - */ -#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) - .section ".rodata" -#endif - .type \name\()_processor_functions, #object - .align 2 -ENTRY(\name\()_processor_functions) - .word \dabort - .word \pabort - .word cpu_\name\()_proc_init - .word \bugs - .word cpu_\name\()_proc_fin - .word cpu_\name\()_reset - .word cpu_\name\()_do_idle - .word cpu_\name\()_dcache_clean_area - .word cpu_\name\()_switch_mm - - .if \nommu - .word 0 - .else - .word cpu_\name\()_set_pte_ext - .endif - - .if \suspend - .word cpu_\name\()_suspend_size -#ifdef CONFIG_ARM_CPU_SUSPEND - .word cpu_\name\()_do_suspend - .word cpu_\name\()_do_resume -#else - .word 0 - .word 0 -#endif - .else - .word 0 - .word 0 - .word 0 - .endif - - .size \name\()_processor_functions, . - \name\()_processor_functions -#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) - .previous -#endif -.endm - -.macro define_cache_functions name:req - .align 2 - .type \name\()_cache_fns, #object -ENTRY(\name\()_cache_fns) - .long \name\()_flush_icache_all - .long \name\()_flush_kern_cache_all - .long \name\()_flush_kern_cache_louis - .long \name\()_flush_user_cache_all - .long \name\()_flush_user_cache_range - .long \name\()_coherent_kern_range - .long \name\()_coherent_user_range - .long \name\()_flush_kern_dcache_area - .long \name\()_dma_map_area - .long \name\()_dma_unmap_area - .long \name\()_dma_flush_range - .size \name\()_cache_fns, . - \name\()_cache_fns -.endm - -.macro define_tlb_functions name:req, flags_up:req, flags_smp - .type \name\()_tlb_fns, #object -ENTRY(\name\()_tlb_fns) - .long \name\()_flush_user_tlb_range - .long \name\()_flush_kern_tlb_range - .ifnb \flags_smp - ALT_SMP(.long \flags_smp ) - ALT_UP(.long \flags_up ) - .else - .long \flags_up - .endif - .size \name\()_tlb_fns, . - \name\()_tlb_fns -.endm - -.macro globl_equ x, y - .globl \x - .equ \x, \y -.endm - -.macro initfn, func, base - .long \func - \base -.endm - - /* - * Macro to calculate the log2 size for the protection region - * registers. This calculates rd = log2(size) - 1. tmp must - * not be the same register as rd. - */ -.macro pr_sz, rd, size, tmp - mov \tmp, \size, lsr #12 - mov \rd, #11 -1: movs \tmp, \tmp, lsr #1 - addne \rd, \rd, #1 - bne 1b -.endm - - /* - * Macro to generate a protection region register value - * given a pre-masked address, size, and enable bit. - * Corrupts size. - */ -.macro pr_val, dest, addr, size, enable - pr_sz \dest, \size, \size @ calculate log2(size) - 1 - orr \dest, \addr, \dest, lsl #1 @ mask in the region size - orr \dest, \dest, \enable -.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S deleted file mode 100644 index f083085788857b8999a00c62025f240cb74b0a66..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-mohawk.S +++ /dev/null @@ -1,444 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core - * - * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core. - * - * Heavily based on proc-arm926.S and proc-xsc3.S - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be flushed. If the - * area is larger than this, then we flush the whole cache. - */ -#define CACHE_DLIMIT 32768 - -/* - * The cache line size of the L1 D cache. - */ -#define CACHE_DLINESIZE 32 - -/* - * cpu_mohawk_proc_init() - */ -ENTRY(cpu_mohawk_proc_init) - ret lr - -/* - * cpu_mohawk_proc_fin() - */ -ENTRY(cpu_mohawk_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1800 @ ...iz........... - bic r0, r0, #0x0006 @ .............ca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_mohawk_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - * - * (same as arm926) - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_mohawk_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x0007 @ .............cam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_mohawk_reset) - .popsection - -/* - * cpu_mohawk_do_idle() - * - * Called with IRQs disabled - */ - .align 5 -ENTRY(cpu_mohawk_do_idle) - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt - ret lr - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(mohawk_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(mohawk_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Clean and invalidate all cache entries in a particular - * address space. - */ -ENTRY(mohawk_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(mohawk_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer - ret lr - -/* - * flush_user_cache_range(start, end, flags) - * - * Clean and invalidate a range of cache entries in the - * specified address range. - * - * - start - start address (inclusive) - * - end - end address (exclusive) - * - flags - vm_flags describing address space - * - * (same as arm926) - */ -ENTRY(mohawk_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #CACHE_DLIMIT - bgt __flush_whole_cache -1: tst r2, #VM_EXEC - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry - mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c10, 4 @ drain WB - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(mohawk_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start, end. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as arm926) - */ -ENTRY(mohawk_coherent_user_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov r0, #0 - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(mohawk_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -mohawk_dma_inv_range: - tst r0, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHE_DLINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - * - * (same as v4wb) - */ -mohawk_dma_clean_range: - bic r0, r0, #CACHE_DLINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(mohawk_dma_flush_range) - bic r0, r0, #CACHE_DLINESIZE - 1 -1: - mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry - add r0, r0, #CACHE_DLINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(mohawk_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq mohawk_dma_clean_range - bcs mohawk_dma_inv_range - b mohawk_dma_flush_range -ENDPROC(mohawk_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(mohawk_dma_unmap_area) - ret lr -ENDPROC(mohawk_dma_unmap_area) - - .globl mohawk_flush_kern_cache_louis - .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions mohawk - -ENTRY(cpu_mohawk_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHE_DLINESIZE - subs r1, r1, #CACHE_DLINESIZE - bhi 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr - -/* - * cpu_mohawk_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_mohawk_switch_mm) - mov ip, #0 - mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB - orr r0, r0, #0x18 @ cache the page table in L2 - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - ret lr - -/* - * cpu_mohawk_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_mohawk_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c10, 4 @ drain WB - ret lr -#endif - -.globl cpu_mohawk_suspend_size -.equ cpu_mohawk_suspend_size, 4 * 6 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_mohawk_do_suspend) - stmfd sp!, {r4 - r9, lr} - mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode - mrc p15, 0, r5, c15, c1, 0 @ CP access reg - mrc p15, 0, r6, c13, c0, 0 @ PID - mrc p15, 0, r7, c3, c0, 0 @ domain ID - mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mrc p15, 0, r9, c1, c0, 0 @ control reg - bic r4, r4, #2 @ clear frequency change bit - stmia r0, {r4 - r9} @ store cp regs - ldmia sp!, {r4 - r9, pc} -ENDPROC(cpu_mohawk_do_suspend) - -ENTRY(cpu_mohawk_do_resume) - ldmia r0, {r4 - r9} @ load cp regs - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB - mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer - mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. - mcr p15, 0, r5, c15, c1, 0 @ CP access reg - mcr p15, 0, r6, c13, c0, 0 @ PID - mcr p15, 0, r7, c3, c0, 0 @ domain ID - orr r1, r1, #0x18 @ cache the page table in L2 - mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mov r0, r9 @ control register - b cpu_resume_mmu -ENDPROC(cpu_mohawk_do_resume) -#endif - - .type __mohawk_setup, #function -__mohawk_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs - orr r4, r4, #0x18 @ cache the page table in L2 - mcr p15, 0, r4, c2, c0, 0 @ load page table pointer - - mov r0, #0 @ don't allow CP access - mcr p15, 0, r0, c15, c1, 0 @ write CP access register - - adr r5, mohawk_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - - .size __mohawk_setup, . - __mohawk_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * .011 1001 ..00 0101 - * - */ - .type mohawk_crval, #object -mohawk_crval: - crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - string cpu_mohawk_name, "Marvell 88SV331x" - - .align - - .section ".proc.info.init", #alloc - - .type __88sv331x_proc_info,#object -__88sv331x_proc_info: - .long 0x56158000 @ Marvell 88SV331x (MOHAWK) - .long 0xfffff000 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __mohawk_setup, __88sv331x_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_mohawk_name - .long mohawk_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long mohawk_cache_fns - .size __88sv331x_proc_info, . - __88sv331x_proc_info diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S deleted file mode 100644 index d5bc5d70256399723f29065b0ab09cdb20a32a9b..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-sa110.S +++ /dev/null @@ -1,222 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-sa110.S - * - * Copyright (C) 1997-2002 Russell King - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * MMU functions for SA110 - * - * These are the low level assembler for performing cache and TLB - * functions on the StrongARM-110. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * the cache line size of the I and D cache - */ -#define DCACHELINESIZE 32 - - .text - -/* - * cpu_sa110_proc_init() - */ -ENTRY(cpu_sa110_proc_init) - mov r0, #0 - mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching - ret lr - -/* - * cpu_sa110_proc_fin() - */ -ENTRY(cpu_sa110_proc_fin) - mov r0, #0 - mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_sa110_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_sa110_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_sa110_reset) - .popsection - -/* - * cpu_sa110_do_idle(type) - * - * Cause the processor to idle - * - * type: call type: - * 0 = slow idle - * 1 = fast idle - * 2 = switch to slow processor clock - * 3 = switch to fast processor clock - */ - .align 5 - -ENTRY(cpu_sa110_do_idle) - mcr p15, 0, ip, c15, c2, 2 @ disable clock switching - ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc - ldr r1, [r1, #0] @ force switch to MCLK - mov r0, r0 @ safety - mov r0, r0 @ safety - mov r0, r0 @ safety - mcr p15, 0, r0, c15, c8, 2 @ Wait for interrupt, cache aligned - mov r0, r0 @ safety - mov r0, r0 @ safety - mov r0, r0 @ safety - mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - ret lr - -/* ================================= CACHE ================================ */ - -/* - * cpu_sa110_dcache_clean_area(addr,sz) - * - * Clean the specified entry of any caches such that the MMU - * translation fetches will obtain correct data. - * - * addr: cache-unaligned virtual address - */ - .align 5 -ENTRY(cpu_sa110_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #DCACHELINESIZE - subs r1, r1, #DCACHELINESIZE - bhi 1b - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_sa110_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_sa110_switch_mm) -#ifdef CONFIG_MMU - str lr, [sp, #-4]! - bl v4wb_flush_kern_cache_all @ clears IP - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - ldr pc, [sp], #4 -#else - ret lr -#endif - -/* - * cpu_sa110_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_sa110_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext wc_disable=0 - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - - .type __sa110_setup, #function -__sa110_setup: - mov r10, #0 - mcr p15, 0, r10, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r10, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r10, c8, c7 @ invalidate I,D TLBs on v4 -#endif - - adr r5, sa110_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __sa110_setup, . - __sa110_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..01 0001 ..11 1101 - * - */ - .type sa110_crval, #object -sa110_crval: - crval clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort - - .section ".rodata" - - string cpu_arch_name, "armv4" - string cpu_elf_name, "v4" - string cpu_sa110_name, "StrongARM-110" - - .align - - .section ".proc.info.init", #alloc - - .type __sa110_proc_info,#object -__sa110_proc_info: - .long 0x4401a100 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __sa110_setup, __sa110_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long cpu_sa110_name - .long sa110_processor_functions - .long v4wb_tlb_fns - .long v4wb_user_fns - .long v4wb_cache_fns - .size __sa110_proc_info, . - __sa110_proc_info diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S deleted file mode 100644 index be7b611c76c76ada0a8f5d48737a49e70f4be56e..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-sa1100.S +++ /dev/null @@ -1,270 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-sa1100.S - * - * Copyright (C) 1997-2002 Russell King - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * MMU functions for SA110 - * - * These are the low level assembler for performing cache and TLB - * functions on the StrongARM-1100 and StrongARM-1110. - * - * Note that SA1100 and SA1110 share everything but their name and CPU ID. - * - * 12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl): - * Flush the read buffer at context switches - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -/* - * the cache line size of the I and D cache - */ -#define DCACHELINESIZE 32 - - .section .text - -/* - * cpu_sa1100_proc_init() - */ -ENTRY(cpu_sa1100_proc_init) - mov r0, #0 - mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching - mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland - ret lr - -/* - * cpu_sa1100_proc_fin() - * - * Prepare the CPU for reset: - * - Disable interrupts - * - Clean and turn off caches. - */ -ENTRY(cpu_sa1100_proc_fin) - mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x000e @ ............wca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_sa1100_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_sa1100_reset) - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches - mcr p15, 0, ip, c7, c10, 4 @ drain WB -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif - mrc p15, 0, ip, c1, c0, 0 @ ctrl register - bic ip, ip, #0x000f @ ............wcam - bic ip, ip, #0x1100 @ ...i...s........ - mcr p15, 0, ip, c1, c0, 0 @ ctrl register - ret r0 -ENDPROC(cpu_sa1100_reset) - .popsection - -/* - * cpu_sa1100_do_idle(type) - * - * Cause the processor to idle - * - * type: call type: - * 0 = slow idle - * 1 = fast idle - * 2 = switch to slow processor clock - * 3 = switch to fast processor clock - */ - .align 5 -ENTRY(cpu_sa1100_do_idle) - mov r0, r0 @ 4 nop padding - mov r0, r0 - mov r0, r0 - mov r0, r0 @ 4 nop padding - mov r0, r0 - mov r0, r0 - mov r0, #0 - ldr r1, =UNCACHEABLE_ADDR @ ptr to uncacheable address - @ --- aligned to a cache line - mcr p15, 0, r0, c15, c2, 2 @ disable clock switching - ldr r1, [r1, #0] @ force switch to MCLK - mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt - mov r0, r0 @ safety - mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - ret lr - -/* ================================= CACHE ================================ */ - -/* - * cpu_sa1100_dcache_clean_area(addr,sz) - * - * Clean the specified entry of any caches such that the MMU - * translation fetches will obtain correct data. - * - * addr: cache-unaligned virtual address - */ - .align 5 -ENTRY(cpu_sa1100_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #DCACHELINESIZE - subs r1, r1, #DCACHELINESIZE - bhi 1b - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_sa1100_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_sa1100_switch_mm) -#ifdef CONFIG_MMU - str lr, [sp, #-4]! - bl v4wb_flush_kern_cache_all @ clears IP - mcr p15, 0, ip, c9, c0, 0 @ invalidate RB - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - ldr pc, [sp], #4 -#else - ret lr -#endif - -/* - * cpu_sa1100_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ - .align 5 -ENTRY(cpu_sa1100_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext wc_disable=0 - mov r0, r0 - mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c10, 4 @ drain WB -#endif - ret lr - -.globl cpu_sa1100_suspend_size -.equ cpu_sa1100_suspend_size, 4 * 3 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_sa1100_do_suspend) - stmfd sp!, {r4 - r6, lr} - mrc p15, 0, r4, c3, c0, 0 @ domain ID - mrc p15, 0, r5, c13, c0, 0 @ PID - mrc p15, 0, r6, c1, c0, 0 @ control reg - stmia r0, {r4 - r6} @ store cp regs - ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_sa1100_do_suspend) - -ENTRY(cpu_sa1100_do_resume) - ldmia r0, {r4 - r6} @ load cp regs - mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs - mcr p15, 0, ip, c7, c7, 0 @ flush I&D cache - mcr p15, 0, ip, c9, c0, 0 @ invalidate RB - mcr p15, 0, ip, c9, c0, 5 @ allow user space to use RB - - mcr p15, 0, r4, c3, c0, 0 @ domain ID - mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r5, c13, c0, 0 @ PID - mov r0, r6 @ control register - b cpu_resume_mmu -ENDPROC(cpu_sa1100_do_resume) -#endif - - .type __sa1100_setup, #function -__sa1100_setup: - mov r0, #0 - mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 -#endif - adr r5, sa1100_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0 @ get control register v4 - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __sa1100_setup, . - __sa1100_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..11 0001 ..11 1101 - * - */ - .type sa1100_crval, #object -sa1100_crval: - crval clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130 - - __INITDATA - -/* - * SA1100 and SA1110 share the same function calls - */ - - @ define struct processor (see and proc-macros.S) - define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv4" - string cpu_elf_name, "v4" - string cpu_sa1100_name, "StrongARM-1100" - string cpu_sa1110_name, "StrongARM-1110" - - .align - - .section ".proc.info.init", #alloc - -.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __sa1100_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long \cpu_name - .long sa1100_processor_functions - .long v4wb_tlb_fns - .long v4_mc_user_fns - .long v4wb_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name - sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S deleted file mode 100644 index c1c85eb3484f319d853b854b96a272cd6f3c6e06..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-v6.S +++ /dev/null @@ -1,297 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-v6.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Modified by Catalin Marinas for noMMU support - * - * This is the "shell" of the ARMv6 processor support. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -#define D_CACHE_LINE_SIZE 32 - -#define TTB_C (1 << 0) -#define TTB_S (1 << 1) -#define TTB_IMP (1 << 2) -#define TTB_RGN_NC (0 << 3) -#define TTB_RGN_WBWA (1 << 3) -#define TTB_RGN_WT (2 << 3) -#define TTB_RGN_WB (3 << 3) - -#define TTB_FLAGS_UP TTB_RGN_WBWA -#define PMD_FLAGS_UP PMD_SECT_WB -#define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S -#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S - -ENTRY(cpu_v6_proc_init) - ret lr - -ENTRY(cpu_v6_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x0006 @ .............ca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_v6_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * - loc - location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_v6_reset) - mrc p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x1 @ ...............m - mcr p15, 0, r1, c1, c0, 0 @ disable MMU - mov r1, #0 - mcr p15, 0, r1, c7, c5, 4 @ ISB - ret r0 -ENDPROC(cpu_v6_reset) - .popsection - -/* - * cpu_v6_do_idle() - * - * Idle the processor (eg, wait for interrupt). - * - * IRQs are already disabled. - */ -ENTRY(cpu_v6_do_idle) - mov r1, #0 - mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode - mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt - ret lr - -ENTRY(cpu_v6_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #D_CACHE_LINE_SIZE - subs r1, r1, #D_CACHE_LINE_SIZE - bhi 1b - ret lr - -/* - * cpu_v6_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys - * - * - pgd_phys - physical address of new TTB - * - * It is assumed that: - * - we are not using split page tables - */ -ENTRY(cpu_v6_switch_mm) -#ifdef CONFIG_MMU - mov r2, #0 - mmid r1, r1 @ get mm->context.id - ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) - ALT_UP(orr r0, r0, #TTB_FLAGS_UP) - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB - mcr p15, 0, r2, c7, c10, 4 @ drain write buffer - mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 -#ifdef CONFIG_PID_IN_CONTEXTIDR - mrc p15, 0, r2, c13, c0, 1 @ read current context ID - bic r2, r2, #0xff @ extract the PID - and r1, r1, #0xff - orr r1, r1, r2 @ insert into new context ID -#endif - mcr p15, 0, r1, c13, c0, 1 @ set context ID -#endif - ret lr - -/* - * cpu_v6_set_pte_ext(ptep, pte, ext) - * - * Set a level 2 translation table entry. - * - * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at -1024 bytes) - * - pte - PTE value to store - * - ext - value for extended PTE bits - */ - armv6_mt_table cpu_v6 - -ENTRY(cpu_v6_set_pte_ext) -#ifdef CONFIG_MMU - armv6_set_pte_ext cpu_v6 -#endif - ret lr - -/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ -.globl cpu_v6_suspend_size -.equ cpu_v6_suspend_size, 4 * 6 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v6_do_suspend) - stmfd sp!, {r4 - r9, lr} - mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID -#ifdef CONFIG_MMU - mrc p15, 0, r5, c3, c0, 0 @ Domain ID - mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 -#endif - mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register - mrc p15, 0, r8, c1, c0, 2 @ co-processor access control - mrc p15, 0, r9, c1, c0, 0 @ control register - stmia r0, {r4 - r9} - ldmfd sp!, {r4- r9, pc} -ENDPROC(cpu_v6_do_suspend) - -ENTRY(cpu_v6_do_resume) - mov ip, #0 - mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c15, 0 @ clean+invalidate cache - mcr p15, 0, ip, c7, c10, 4 @ drain write buffer - mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID - ldmia r0, {r4 - r9} - mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID -#ifdef CONFIG_MMU - mcr p15, 0, r5, c3, c0, 0 @ Domain ID - ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) - ALT_UP(orr r1, r1, #TTB_FLAGS_UP) - mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 - mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 - mcr p15, 0, ip, c2, c0, 2 @ TTB control register -#endif - mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register - mcr p15, 0, r8, c1, c0, 2 @ co-processor access control - mcr p15, 0, ip, c7, c5, 4 @ ISB - mov r0, r9 @ control register - b cpu_resume_mmu -ENDPROC(cpu_v6_do_resume) -#endif - - string cpu_v6_name, "ARMv6-compatible processor" - - .align - -/* - * __v6_setup - * - * Initialise TLB, Caches, and MMU state ready to switch the MMU - * on. Return in r0 the new CP15 C1 control register setting. - * - * We automatically detect if we have a Harvard cache, and use the - * Harvard cache control instructions insead of the unified cache - * control instructions. - * - * This should be able to cover all ARMv6 cores. - * - * It is assumed that: - * - cache type register is implemented - */ -__v6_setup: -#ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, r0, c1, c0, 1) @ Enable SMP/nAMP mode - ALT_UP(nop) - orr r0, r0, #0x20 - ALT_SMP(mcr p15, 0, r0, c1, c0, 1) - ALT_UP(nop) -#endif - - mov r0, #0 - mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache -#ifdef CONFIG_MMU - mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs - mcr p15, 0, r0, c2, c0, 2 @ TTB control register - ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) - ALT_UP(orr r4, r4, #TTB_FLAGS_UP) - ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP) - ALT_UP(orr r8, r8, #TTB_FLAGS_UP) - mcr p15, 0, r8, c2, c0, 1 @ load TTB1 -#endif /* CONFIG_MMU */ - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer and - @ complete invalidations - adr r5, v6_crval - ldmia r5, {r5, r6} - ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables - mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r5 @ clear bits them - orr r0, r0, r6 @ set them -#ifdef CONFIG_ARM_ERRATA_364296 - /* - * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data - * corruption with hit-under-miss enabled). The conditional code below - * (setting the undocumented bit 31 in the auxiliary control register - * and the FI bit in the control register) disables hit-under-miss - * without putting the processor into full low interrupt latency mode. - */ - ldr r6, =0x4107b362 @ id for ARM1136 r0p2 - mrc p15, 0, r5, c0, c0, 0 @ get processor id - teq r5, r6 @ check for the faulty core - mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg - orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 - mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg - orreq r0, r0, #(1 << 21) @ low interrupt latency configuration -#endif - ret lr @ return to head.S:__ret - - /* - * V X F I D LR - * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM - * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 0 110 0011 1.00 .111 1101 < we want - */ - .type v6_crval, #object -v6_crval: - crval clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv6" - string cpu_elf_name, "v6" - .align - - .section ".proc.info.init", #alloc - - /* - * Match any ARMv6 processor core. - */ - .type __v6_proc_info, #object -__v6_proc_info: - .long 0x0007b000 - .long 0x0007f000 - ALT_SMP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_SMP) - ALT_UP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_UP) - .long PMD_TYPE_SECT | \ - PMD_SECT_XN | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __v6_setup, __v6_proc_info - .long cpu_arch_name - .long cpu_elf_name - /* See also feat_v6_fixup() for HWCAP_TLS */ - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS - .long cpu_v6_name - .long v6_processor_functions - .long v6wbi_tlb_fns - .long v6_user_fns - .long v6_cache_fns - .size __v6_proc_info, . - __v6_proc_info diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S deleted file mode 100644 index 5db029c8f9876c2b5f0bffa4d6139e9e93c2ed7b..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-v7-2level.S +++ /dev/null @@ -1,162 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/mm/proc-v7-2level.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - */ - -#define TTB_S (1 << 1) -#define TTB_RGN_NC (0 << 3) -#define TTB_RGN_OC_WBWA (1 << 3) -#define TTB_RGN_OC_WT (2 << 3) -#define TTB_RGN_OC_WB (3 << 3) -#define TTB_NOS (1 << 5) -#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) -#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) -#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) -#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) - -/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ -#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB -#define PMD_FLAGS_UP PMD_SECT_WB - -/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ -#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA -#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S - -/* - * cpu_v7_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys - * - * - pgd_phys - physical address of new TTB - * - * It is assumed that: - * - we are not using split page tables - * - * Note that we always need to flush BTAC/BTB if IBE is set - * even on Cortex-A8 revisions not affected by 430973. - * If IBE is not set, the flush BTAC/BTB won't do anything. - */ -ENTRY(cpu_v7_switch_mm) -#ifdef CONFIG_MMU - mmid r1, r1 @ get mm->context.id - ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) - ALT_UP(orr r0, r0, #TTB_FLAGS_UP) -#ifdef CONFIG_PID_IN_CONTEXTIDR - mrc p15, 0, r2, c13, c0, 1 @ read current context ID - lsr r2, r2, #8 @ extract the PID - bfi r1, r2, #8, #24 @ insert into new context ID -#endif -#ifdef CONFIG_ARM_ERRATA_754322 - dsb -#endif - mcr p15, 0, r1, c13, c0, 1 @ set context ID - isb - mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 - isb -#endif - bx lr -ENDPROC(cpu_v7_switch_mm) - -/* - * cpu_v7_set_pte_ext(ptep, pte) - * - * Set a level 2 translation table entry. - * - * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at +2048 bytes) - * - pte - PTE value to store - * - ext - value for extended PTE bits - */ -ENTRY(cpu_v7_set_pte_ext) -#ifdef CONFIG_MMU - str r1, [r0] @ linux version - - bic r3, r1, #0x000003f0 - bic r3, r3, #PTE_TYPE_MASK - orr r3, r3, r2 - orr r3, r3, #PTE_EXT_AP0 | 2 - - tst r1, #1 << 4 - orrne r3, r3, #PTE_EXT_TEX(1) - - eor r1, r1, #L_PTE_DIRTY - tst r1, #L_PTE_RDONLY | L_PTE_DIRTY - orrne r3, r3, #PTE_EXT_APX - - tst r1, #L_PTE_USER - orrne r3, r3, #PTE_EXT_AP1 - - tst r1, #L_PTE_XN - orrne r3, r3, #PTE_EXT_XN - - tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_VALID - eorne r1, r1, #L_PTE_NONE - tstne r1, #L_PTE_NONE - moveq r3, #0 - - ARM( str r3, [r0, #2048]! ) - THUMB( add r0, r0, #2048 ) - THUMB( str r3, [r0] ) - ALT_SMP(W(nop)) - ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte -#endif - bx lr -ENDPROC(cpu_v7_set_pte_ext) - - /* - * Memory region attributes with SCTLR.TRE=1 - * - * n = TEX[0],C,B - * TR = PRRR[2n+1:2n] - memory type - * IR = NMRR[2n+1:2n] - inner cacheable property - * OR = NMRR[2n+17:2n+16] - outer cacheable property - * - * n TR IR OR - * UNCACHED 000 00 - * BUFFERABLE 001 10 00 00 - * WRITETHROUGH 010 10 10 10 - * WRITEBACK 011 10 11 11 - * reserved 110 - * WRITEALLOC 111 10 01 01 - * DEV_SHARED 100 01 - * DEV_NONSHARED 100 01 - * DEV_WC 001 10 - * DEV_CACHED 011 10 - * - * Other attributes: - * - * DS0 = PRRR[16] = 0 - device shareable property - * DS1 = PRRR[17] = 1 - device shareable property - * NS0 = PRRR[18] = 0 - normal shareable property - * NS1 = PRRR[19] = 1 - normal shareable property - * NOS = PRRR[24+n] = 1 - not outer shareable - */ -.equ PRRR, 0xff0a81a8 -.equ NMRR, 0x40e040e0 - - /* - * Macro for setting up the TTBRx and TTBCR registers. - * - \ttb0 and \ttb1 updated with the corresponding flags. - */ - .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp - mcr p15, 0, \zero, c2, c0, 2 @ TTB control register - ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) - ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) - mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 - .endm - - /* AT - * TFR EV X F I D LR S - * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM - * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 01 0 110 0011 1100 .111 1101 < we want - */ - .align 2 - .type v7_crval, #object -v7_crval: - crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S deleted file mode 100644 index 131984462d0d5e1f057abab392bf2bca6832400d..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-v7-3level.S +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/mm/proc-v7-3level.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2011 ARM Ltd. - * Author: Catalin Marinas - * based on arch/arm/mm/proc-v7-2level.S - */ -#include - -#define TTB_IRGN_NC (0 << 8) -#define TTB_IRGN_WBWA (1 << 8) -#define TTB_IRGN_WT (2 << 8) -#define TTB_IRGN_WB (3 << 8) -#define TTB_RGN_NC (0 << 10) -#define TTB_RGN_OC_WBWA (1 << 10) -#define TTB_RGN_OC_WT (2 << 10) -#define TTB_RGN_OC_WB (3 << 10) -#define TTB_S (3 << 12) -#define TTB_EAE (1 << 31) - -/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ -#define TTB_FLAGS_UP (TTB_IRGN_WB|TTB_RGN_OC_WB) -#define PMD_FLAGS_UP (PMD_SECT_WB) - -/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ -#define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) -#define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) - -#ifndef __ARMEB__ -# define rpgdl r0 -# define rpgdh r1 -#else -# define rpgdl r1 -# define rpgdh r0 -#endif - -/* - * cpu_v7_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys (physical address of - * the new TTB). - */ -ENTRY(cpu_v7_switch_mm) -#ifdef CONFIG_MMU - mmid r2, r2 - asid r2, r2 - orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd - mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 - isb -#endif - ret lr -ENDPROC(cpu_v7_switch_mm) - -#ifdef __ARMEB__ -#define rl r3 -#define rh r2 -#else -#define rl r2 -#define rh r3 -#endif - -/* - * cpu_v7_set_pte_ext(ptep, pte) - * - * Set a level 2 translation table entry. - * - ptep - pointer to level 3 translation table entry - * - pte - PTE value to store (64-bit in r2 and r3) - */ -ENTRY(cpu_v7_set_pte_ext) -#ifdef CONFIG_MMU - tst rl, #L_PTE_VALID - beq 1f - tst rh, #1 << (57 - 32) @ L_PTE_NONE - bicne rl, #L_PTE_VALID - bne 1f - - eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to - @ test for !L_PTE_DIRTY || L_PTE_RDONLY - tst ip, #1 << (55 - 32) | 1 << (58 - 32) - orrne rl, #PTE_AP2 - biceq rl, #PTE_AP2 - -1: strd r2, r3, [r0] - ALT_SMP(W(nop)) - ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte -#endif - ret lr -ENDPROC(cpu_v7_set_pte_ext) - - /* - * Memory region attributes for LPAE (defined in pgtable-3level.h): - * - * n = AttrIndx[2:0] - * - * n MAIR - * UNCACHED 000 00000000 - * BUFFERABLE 001 01000100 - * DEV_WC 001 01000100 - * WRITETHROUGH 010 10101010 - * WRITEBACK 011 11101110 - * DEV_CACHED 011 11101110 - * DEV_SHARED 100 00000100 - * DEV_NONSHARED 100 00000100 - * unused 101 - * unused 110 - * WRITEALLOC 111 11111111 - */ -.equ PRRR, 0xeeaa4400 @ MAIR0 -.equ NMRR, 0xff000004 @ MAIR1 - - /* - * Macro for setting up the TTBRx and TTBCR registers. - * - \ttbr1 updated. - */ - .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp - ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? - mov \tmp, #TTB_EAE @ for TTB control egister - ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) - ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) - ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) - ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) - /* - * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), - * otherwise booting secondary CPUs would end up using TTBR1 for the - * identity mapping set up in TTBR0. - */ - orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ - mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR - mov \tmp, \ttbr1, lsr #20 - mov \ttbr1, \ttbr1, lsl #12 - addls \ttbr1, \ttbr1, #TTBR1_OFFSET - mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 - .endm - - /* - * AT - * TFR EV X F IHD LR S - * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM - * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 11 0 110 0 0011 1100 .111 1101 < we want - */ - .align 2 - .type v7_crval, #object -v7_crval: - crval clear=0x0122c302, mmuset=0x30c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S deleted file mode 100644 index c4e8006a1a8cdd13ae3e622007e7c5b34883393a..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-v7.S +++ /dev/null @@ -1,826 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-v7.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * - * This is the "shell" of the ARMv7 processor support. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proc-macros.S" - -#ifdef CONFIG_ARM_LPAE -#include "proc-v7-3level.S" -#else -#include "proc-v7-2level.S" -#endif - -ENTRY(cpu_v7_proc_init) - ret lr -ENDPROC(cpu_v7_proc_init) - -ENTRY(cpu_v7_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1000 @ ...i............ - bic r0, r0, #0x0006 @ .............ca. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr -ENDPROC(cpu_v7_proc_fin) - -/* - * cpu_v7_reset(loc, hyp) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * - loc - location to jump to for soft reset - * - hyp - indicate if restart occurs in HYP mode - * - * This code must be executed using a flat identity mapping with - * caches disabled. - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_v7_reset) - mrc p15, 0, r2, c1, c0, 0 @ ctrl register - bic r2, r2, #0x1 @ ...............m - THUMB( bic r2, r2, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) - mcr p15, 0, r2, c1, c0, 0 @ disable MMU - isb -#ifdef CONFIG_ARM_VIRT_EXT - teq r1, #0 - bne __hyp_soft_restart -#endif - bx r0 -ENDPROC(cpu_v7_reset) - .popsection - -/* - * cpu_v7_do_idle() - * - * Idle the processor (eg, wait for interrupt). - * - * IRQs are already disabled. - */ -ENTRY(cpu_v7_do_idle) - dsb @ WFI may enter a low-power mode - wfi - ret lr -ENDPROC(cpu_v7_do_idle) - -ENTRY(cpu_v7_dcache_clean_area) - ALT_SMP(W(nop)) @ MP extensions imply L1 PTW - ALT_UP_B(1f) - ret lr -1: dcache_line_size r2, r3 -2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, r2 - subs r1, r1, r2 - bhi 2b - dsb ishst - ret lr -ENDPROC(cpu_v7_dcache_clean_area) - -#ifdef CONFIG_ARM_PSCI - .arch_extension sec -ENTRY(cpu_v7_smc_switch_mm) - stmfd sp!, {r0 - r3} - movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 - movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldmfd sp!, {r0 - r3} - b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) - .arch_extension virt -ENTRY(cpu_v7_hvc_switch_mm) - stmfd sp!, {r0 - r3} - movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 - movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 - hvc #0 - ldmfd sp!, {r0 - r3} - b cpu_v7_switch_mm -ENDPROC(cpu_v7_hvc_switch_mm) -#endif -ENTRY(cpu_v7_iciallu_switch_mm) - mov r3, #0 - mcr p15, 0, r3, c7, c5, 0 @ ICIALLU - b cpu_v7_switch_mm -ENDPROC(cpu_v7_iciallu_switch_mm) -ENTRY(cpu_v7_bpiall_switch_mm) - mov r3, #0 - mcr p15, 0, r3, c7, c5, 6 @ flush BTAC/BTB - b cpu_v7_switch_mm -ENDPROC(cpu_v7_bpiall_switch_mm) - - string cpu_v7_name, "ARMv7 Processor" - .align - -/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ -.globl cpu_v7_suspend_size -.equ cpu_v7_suspend_size, 4 * 9 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v7_do_suspend) - stmfd sp!, {r4 - r11, lr} - mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID - mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID - stmia r0!, {r4 - r5} -#ifdef CONFIG_MMU - mrc p15, 0, r6, c3, c0, 0 @ Domain ID -#ifdef CONFIG_ARM_LPAE - mrrc p15, 1, r5, r7, c2 @ TTB 1 -#else - mrc p15, 0, r7, c2, c0, 1 @ TTB 1 -#endif - mrc p15, 0, r11, c2, c0, 2 @ TTB control register -#endif - mrc p15, 0, r8, c1, c0, 0 @ Control register - mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register - mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control - stmia r0, {r5 - r11} - ldmfd sp!, {r4 - r11, pc} -ENDPROC(cpu_v7_do_suspend) - -ENTRY(cpu_v7_do_resume) - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID - ldmia r0!, {r4 - r5} - mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID - mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID - ldmia r0, {r5 - r11} -#ifdef CONFIG_MMU - mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs - mcr p15, 0, r6, c3, c0, 0 @ Domain ID -#ifdef CONFIG_ARM_LPAE - mcrr p15, 0, r1, ip, c2 @ TTB 0 - mcrr p15, 1, r5, r7, c2 @ TTB 1 -#else - ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) - ALT_UP(orr r1, r1, #TTB_FLAGS_UP) - mcr p15, 0, r1, c2, c0, 0 @ TTB 0 - mcr p15, 0, r7, c2, c0, 1 @ TTB 1 -#endif - mcr p15, 0, r11, c2, c0, 2 @ TTB control register - ldr r4, =PRRR @ PRRR - ldr r5, =NMRR @ NMRR - mcr p15, 0, r4, c10, c2, 0 @ write PRRR - mcr p15, 0, r5, c10, c2, 1 @ write NMRR -#endif /* CONFIG_MMU */ - mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register - teq r4, r9 @ Is it already set? - mcrne p15, 0, r9, c1, c0, 1 @ No, so write it - mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control - isb - dsb - mov r0, r8 @ control register - b cpu_resume_mmu -ENDPROC(cpu_v7_do_resume) -#endif - -.globl cpu_ca9mp_suspend_size -.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_ca9mp_do_suspend) - stmfd sp!, {r4 - r5} - mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register - mrc p15, 0, r5, c15, c0, 0 @ Power register - stmia r0!, {r4 - r5} - ldmfd sp!, {r4 - r5} - b cpu_v7_do_suspend -ENDPROC(cpu_ca9mp_do_suspend) - -ENTRY(cpu_ca9mp_do_resume) - ldmia r0!, {r4 - r5} - mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register - teq r4, r10 @ Already restored? - mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it - mrc p15, 0, r10, c15, c0, 0 @ Read Power register - teq r5, r10 @ Already restored? - mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it - b cpu_v7_do_resume -ENDPROC(cpu_ca9mp_do_resume) -#endif - -#ifdef CONFIG_CPU_PJ4B - globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm - globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext - globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init - globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin - globl_equ cpu_pj4b_reset, cpu_v7_reset -#ifdef CONFIG_PJ4B_ERRATA_4742 -ENTRY(cpu_pj4b_do_idle) - dsb @ WFI may enter a low-power mode - wfi - dsb @barrier - ret lr -ENDPROC(cpu_pj4b_do_idle) -#else - globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle -#endif - globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_pj4b_do_suspend) - stmfd sp!, {r6 - r10} - mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features - mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 - mrc p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 - mrc p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 - mrc p15, 0, r10, c9, c14, 0 @ save CP15 - PMC - stmia r0!, {r6 - r10} - ldmfd sp!, {r6 - r10} - b cpu_v7_do_suspend -ENDPROC(cpu_pj4b_do_suspend) - -ENTRY(cpu_pj4b_do_resume) - ldmia r0!, {r6 - r10} - mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features - mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0 - mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2 - mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1 - mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC - b cpu_v7_do_resume -ENDPROC(cpu_pj4b_do_resume) -#endif -.globl cpu_pj4b_suspend_size -.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5 - -#endif - -/* - * __v7_setup - * - * Initialise TLB, Caches, and MMU state ready to switch the MMU - * on. Return in r0 the new CP15 C1 control register setting. - * - * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack - * r4: TTBR0 (low word) - * r5: TTBR0 (high word if LPAE) - * r8: TTBR1 - * r9: Main ID register - * - * This should be able to cover all ARMv7 cores. - * - * It is assumed that: - * - cache type register is implemented - */ -__v7_ca5mp_setup: -__v7_ca9mp_setup: -__v7_cr7mp_setup: -__v7_cr8mp_setup: - mov r10, #(1 << 0) @ Cache/TLB ops broadcasting - b 1f -__v7_ca7mp_setup: -__v7_ca12mp_setup: -__v7_ca15mp_setup: -__v7_b15mp_setup: -__v7_ca17mp_setup: - mov r10, #0 -1: adr r0, __v7_setup_stack_ptr - ldr r12, [r0] - add r12, r12, r0 @ the local stack - stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6 - bl v7_invalidate_l1 - ldmia r12, {r1-r6, lr} -#ifdef CONFIG_SMP - orr r10, r10, #(1 << 6) @ Enable SMP/nAMP mode - ALT_SMP(mrc p15, 0, r0, c1, c0, 1) - ALT_UP(mov r0, r10) @ fake it for UP - orr r10, r10, r0 @ Set required bits - teq r10, r0 @ Were they already set? - mcrne p15, 0, r10, c1, c0, 1 @ No, update register -#endif - b __v7_setup_cont - -/* - * Errata: - * r0, r10 available for use - * r1, r2, r4, r5, r9, r13: must be preserved - * r3: contains MIDR rX number in bits 23-20 - * r6: contains MIDR rXpY as 8-bit XY number - * r9: MIDR - */ -__ca8_errata: -#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) - teq r3, #0x00100000 @ only present in r1p* - mrceq p15, 0, r0, c1, c0, 1 @ read aux control register - orreq r0, r0, #(1 << 6) @ set IBE to 1 - mcreq p15, 0, r0, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_458693 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 0, r0, c1, c0, 1 @ read aux control register - orreq r0, r0, #(1 << 5) @ set L1NEON to 1 - orreq r0, r0, #(1 << 9) @ set PLDNOP to 1 - mcreq p15, 0, r0, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_460075 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register - tsteq r0, #1 << 22 - orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit - mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register -#endif - b __errata_finish - -__ca9_errata: -#ifdef CONFIG_ARM_ERRATA_742230 - cmp r6, #0x22 @ only present up to r2p2 - mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register - orrle r0, r0, #1 << 4 @ set bit #4 - mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_742231 - teq r6, #0x20 @ present in r2p0 - teqne r6, #0x21 @ present in r2p1 - teqne r6, #0x22 @ present in r2p2 - mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register - orreq r0, r0, #1 << 12 @ set bit #12 - orreq r0, r0, #1 << 22 @ set bit #22 - mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_743622 - teq r3, #0x00200000 @ only present in r2p* - mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register - orreq r0, r0, #1 << 6 @ set bit #6 - mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register -#endif -#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) - ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 - ALT_UP_B(1f) - mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register - orrlt r0, r0, #1 << 11 @ set bit #11 - mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register -1: -#endif - b __errata_finish - -__ca15_errata: -#ifdef CONFIG_ARM_ERRATA_773022 - cmp r6, #0x4 @ only present up to r0p4 - mrcle p15, 0, r0, c1, c0, 1 @ read aux control register - orrle r0, r0, #1 << 1 @ disable loop buffer - mcrle p15, 0, r0, c1, c0, 1 @ write aux control register -#endif - b __errata_finish - -__ca12_errata: -#ifdef CONFIG_ARM_ERRATA_818325_852422 - mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register - orr r10, r10, #1 << 12 @ set bit #12 - mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_821420 - mrc p15, 0, r10, c15, c0, 2 @ read internal feature reg - orr r10, r10, #1 << 1 @ set bit #1 - mcr p15, 0, r10, c15, c0, 2 @ write internal feature reg -#endif -#ifdef CONFIG_ARM_ERRATA_825619 - mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register - orr r10, r10, #1 << 24 @ set bit #24 - mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_857271 - mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register - orr r10, r10, #3 << 10 @ set bits #10 and #11 - mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif - b __errata_finish - -__ca17_errata: -#ifdef CONFIG_ARM_ERRATA_852421 - cmp r6, #0x12 @ only present up to r1p2 - mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrle r10, r10, #1 << 24 @ set bit #24 - mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_852423 - cmp r6, #0x12 @ only present up to r1p2 - mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrle r10, r10, #1 << 12 @ set bit #12 - mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_857272 - mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register - orr r10, r10, #3 << 10 @ set bits #10 and #11 - mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif - b __errata_finish - -__v7_pj4b_setup: -#ifdef CONFIG_CPU_PJ4B - -/* Auxiliary Debug Modes Control 1 Register */ -#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ -#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ -#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ - -/* Auxiliary Debug Modes Control 2 Register */ -#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */ -#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */ -#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */ -#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */ -#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */ -#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\ - PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR) - -/* Auxiliary Functional Modes Control Register 0 */ -#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */ -#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */ -#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */ - -/* Auxiliary Debug Modes Control 0 Register */ -#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */ - - /* Auxiliary Debug Modes Control 1 Register */ - mrc p15, 1, r0, c15, c1, 1 - orr r0, r0, #PJ4B_CLEAN_LINE - orr r0, r0, #PJ4B_INTER_PARITY - bic r0, r0, #PJ4B_STATIC_BP - mcr p15, 1, r0, c15, c1, 1 - - /* Auxiliary Debug Modes Control 2 Register */ - mrc p15, 1, r0, c15, c1, 2 - bic r0, r0, #PJ4B_FAST_LDR - orr r0, r0, #PJ4B_AUX_DBG_CTRL2 - mcr p15, 1, r0, c15, c1, 2 - - /* Auxiliary Functional Modes Control Register 0 */ - mrc p15, 1, r0, c15, c2, 0 -#ifdef CONFIG_SMP - orr r0, r0, #PJ4B_SMP_CFB -#endif - orr r0, r0, #PJ4B_L1_PAR_CHK - orr r0, r0, #PJ4B_BROADCAST_CACHE - mcr p15, 1, r0, c15, c2, 0 - - /* Auxiliary Debug Modes Control 0 Register */ - mrc p15, 1, r0, c15, c1, 0 - orr r0, r0, #PJ4B_WFI_WFE - mcr p15, 1, r0, c15, c1, 0 - -#endif /* CONFIG_CPU_PJ4B */ - -__v7_setup: - adr r0, __v7_setup_stack_ptr - ldr r12, [r0] - add r12, r12, r0 @ the local stack - stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6 - bl v7_invalidate_l1 - ldmia r12, {r1-r6, lr} - -__v7_setup_cont: - and r0, r9, #0xff000000 @ ARM? - teq r0, #0x41000000 - bne __errata_finish - and r3, r9, #0x00f00000 @ variant - and r6, r9, #0x0000000f @ revision - orr r6, r6, r3, lsr #20-4 @ combine variant and revision - ubfx r0, r9, #4, #12 @ primary part number - - /* Cortex-A8 Errata */ - ldr r10, =0x00000c08 @ Cortex-A8 primary part number - teq r0, r10 - beq __ca8_errata - - /* Cortex-A9 Errata */ - ldr r10, =0x00000c09 @ Cortex-A9 primary part number - teq r0, r10 - beq __ca9_errata - - /* Cortex-A12 Errata */ - ldr r10, =0x00000c0d @ Cortex-A12 primary part number - teq r0, r10 - beq __ca12_errata - - /* Cortex-A17 Errata */ - ldr r10, =0x00000c0e @ Cortex-A17 primary part number - teq r0, r10 - beq __ca17_errata - - /* Cortex-A15 Errata */ - ldr r10, =0x00000c0f @ Cortex-A15 primary part number - teq r0, r10 - beq __ca15_errata - -__errata_finish: - mov r10, #0 - mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate -#ifdef CONFIG_MMU - mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup - ldr r3, =PRRR @ PRRR - ldr r6, =NMRR @ NMRR - mcr p15, 0, r3, c10, c2, 0 @ write PRRR - mcr p15, 0, r6, c10, c2, 1 @ write NMRR -#endif - dsb @ Complete invalidations -#ifndef CONFIG_ARM_THUMBEE - mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE - and r0, r0, #(0xf << 12) @ ThumbEE enabled field - teq r0, #(1 << 12) @ check if ThumbEE is present - bne 1f - mov r3, #0 - mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 - mrc p14, 6, r0, c0, c0, 0 @ load TEECR - orr r0, r0, #1 @ set the 1st bit in order to - mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access -1: -#endif - adr r3, v7_crval - ldmia r3, {r3, r6} - ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables -#ifdef CONFIG_SWP_EMULATE - orr r3, r3, #(1 << 10) @ set SW bit in "clear" - bic r6, r6, #(1 << 10) @ clear it in "mmuset" -#endif - mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r3 @ clear bits them - orr r0, r0, r6 @ set them - THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions - ret lr @ return to head.S:__ret - - .align 2 -__v7_setup_stack_ptr: - .word PHYS_RELATIVE(__v7_setup_stack, .) -ENDPROC(__v7_setup) - - .bss - .align 2 -__v7_setup_stack: - .space 4 * 7 @ 7 registers - - __INITDATA - - .weak cpu_v7_bugs_init - - @ define struct processor (see and proc-macros.S) - define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init - -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - @ generic v7 bpiall on context switch - globl_equ cpu_v7_bpiall_proc_init, cpu_v7_proc_init - globl_equ cpu_v7_bpiall_proc_fin, cpu_v7_proc_fin - globl_equ cpu_v7_bpiall_reset, cpu_v7_reset - globl_equ cpu_v7_bpiall_do_idle, cpu_v7_do_idle - globl_equ cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area - globl_equ cpu_v7_bpiall_set_pte_ext, cpu_v7_set_pte_ext - globl_equ cpu_v7_bpiall_suspend_size, cpu_v7_suspend_size -#ifdef CONFIG_ARM_CPU_SUSPEND - globl_equ cpu_v7_bpiall_do_suspend, cpu_v7_do_suspend - globl_equ cpu_v7_bpiall_do_resume, cpu_v7_do_resume -#endif - define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init - -#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions -#else -#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions -#endif - -#ifndef CONFIG_ARM_LPAE - @ Cortex-A8 - always needs bpiall switch_mm implementation - globl_equ cpu_ca8_proc_init, cpu_v7_proc_init - globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin - globl_equ cpu_ca8_reset, cpu_v7_reset - globl_equ cpu_ca8_do_idle, cpu_v7_do_idle - globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area - globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext - globl_equ cpu_ca8_switch_mm, cpu_v7_bpiall_switch_mm - globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size -#ifdef CONFIG_ARM_CPU_SUSPEND - globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend - globl_equ cpu_ca8_do_resume, cpu_v7_do_resume -#endif - define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe - - @ Cortex-A9 - needs more registers preserved across suspend/resume - @ and bpiall switch_mm for hardening - globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init - globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin - globl_equ cpu_ca9mp_reset, cpu_v7_reset - globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle - globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - globl_equ cpu_ca9mp_switch_mm, cpu_v7_bpiall_switch_mm -#else - globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm -#endif - globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext - define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init -#endif - - @ Cortex-A15 - needs iciallu switch_mm for hardening - globl_equ cpu_ca15_proc_init, cpu_v7_proc_init - globl_equ cpu_ca15_proc_fin, cpu_v7_proc_fin - globl_equ cpu_ca15_reset, cpu_v7_reset - globl_equ cpu_ca15_do_idle, cpu_v7_do_idle - globl_equ cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - globl_equ cpu_ca15_switch_mm, cpu_v7_iciallu_switch_mm -#else - globl_equ cpu_ca15_switch_mm, cpu_v7_switch_mm -#endif - globl_equ cpu_ca15_set_pte_ext, cpu_v7_set_pte_ext - globl_equ cpu_ca15_suspend_size, cpu_v7_suspend_size - globl_equ cpu_ca15_do_suspend, cpu_v7_do_suspend - globl_equ cpu_ca15_do_resume, cpu_v7_do_resume - define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe -#ifdef CONFIG_CPU_PJ4B - define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 -#endif - - .section ".rodata" - - string cpu_arch_name, "armv7" - string cpu_elf_name, "v7" - .align - - .section ".proc.info.init", #alloc - - /* - * Standard v7 proc info content - */ -.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns - ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ - PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) - ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ - PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) - .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags - initfn \initfunc, \name - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ - HWCAP_EDSP | HWCAP_TLS | \hwcaps - .long cpu_v7_name - .long \proc_fns - .long v7wbi_tlb_fns - .long v6_user_fns - .long \cache_fns -.endm - -#ifndef CONFIG_ARM_LPAE - /* - * ARM Ltd. Cortex A5 processor. - */ - .type __v7_ca5mp_proc_info, #object -__v7_ca5mp_proc_info: - .long 0x410fc050 - .long 0xff0ffff0 - __v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup - .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info - - /* - * ARM Ltd. Cortex A9 processor. - */ - .type __v7_ca9mp_proc_info, #object -__v7_ca9mp_proc_info: - .long 0x410fc090 - .long 0xff0ffff0 - __v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions - .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info - - /* - * ARM Ltd. Cortex A8 processor. - */ - .type __v7_ca8_proc_info, #object -__v7_ca8_proc_info: - .long 0x410fc080 - .long 0xff0ffff0 - __v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions - .size __v7_ca8_proc_info, . - __v7_ca8_proc_info - -#endif /* CONFIG_ARM_LPAE */ - - /* - * Marvell PJ4B processor. - */ -#ifdef CONFIG_CPU_PJ4B - .type __v7_pj4b_proc_info, #object -__v7_pj4b_proc_info: - .long 0x560f5800 - .long 0xff0fff00 - __v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions - .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info -#endif - - /* - * ARM Ltd. Cortex R7 processor. - */ - .type __v7_cr7mp_proc_info, #object -__v7_cr7mp_proc_info: - .long 0x410fc170 - .long 0xff0ffff0 - __v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup - .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info - - /* - * ARM Ltd. Cortex R8 processor. - */ - .type __v7_cr8mp_proc_info, #object -__v7_cr8mp_proc_info: - .long 0x410fc180 - .long 0xff0ffff0 - __v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup - .size __v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info - - /* - * ARM Ltd. Cortex A7 processor. - */ - .type __v7_ca7mp_proc_info, #object -__v7_ca7mp_proc_info: - .long 0x410fc070 - .long 0xff0ffff0 - __v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup - .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info - - /* - * ARM Ltd. Cortex A12 processor. - */ - .type __v7_ca12mp_proc_info, #object -__v7_ca12mp_proc_info: - .long 0x410fc0d0 - .long 0xff0ffff0 - __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS - .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info - - /* - * ARM Ltd. Cortex A15 processor. - */ - .type __v7_ca15mp_proc_info, #object -__v7_ca15mp_proc_info: - .long 0x410fc0f0 - .long 0xff0ffff0 - __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions - .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info - - /* - * Broadcom Corporation Brahma-B15 processor. - */ - .type __v7_b15mp_proc_info, #object -__v7_b15mp_proc_info: - .long 0x420f00f0 - .long 0xff0ffff0 - __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns - .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info - - /* - * ARM Ltd. Cortex A17 processor. - */ - .type __v7_ca17mp_proc_info, #object -__v7_ca17mp_proc_info: - .long 0x410fc0e0 - .long 0xff0ffff0 - __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS - .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info - - /* ARM Ltd. Cortex A73 processor */ - .type __v7_ca73_proc_info, #object -__v7_ca73_proc_info: - .long 0x410fd090 - .long 0xff0ffff0 - __v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS - .size __v7_ca73_proc_info, . - __v7_ca73_proc_info - - /* ARM Ltd. Cortex A75 processor */ - .type __v7_ca75_proc_info, #object -__v7_ca75_proc_info: - .long 0x410fd0a0 - .long 0xff0ffff0 - __v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS - .size __v7_ca75_proc_info, . - __v7_ca75_proc_info - - /* - * Qualcomm Inc. Krait processors. - */ - .type __krait_proc_info, #object -__krait_proc_info: - .long 0x510f0400 @ Required ID value - .long 0xff0ffc00 @ Mask for ID - /* - * Some Krait processors don't indicate support for SDIV and UDIV - * instructions in the ARM instruction set, even though they actually - * do support them. They also don't indicate support for fused multiply - * instructions even though they actually do support them. - */ - __v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 - .size __krait_proc_info, . - __krait_proc_info - - /* - * Match any ARMv7 processor core. - */ - .type __v7_proc_info, #object -__v7_proc_info: - .long 0x000f0000 @ Required ID value - .long 0x000f0000 @ Mask for ID - __v7_proc __v7_proc_info, __v7_setup - .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S deleted file mode 100644 index 1a49d503eafc80b461d256f4f068e9a54c6d85f6..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-v7m.S +++ /dev/null @@ -1,235 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-v7m.S - * - * Copyright (C) 2008 ARM Ltd. - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * - * This is the "shell" of the ARMv7-M processor support. - */ -#include -#include -#include -#include -#include "proc-macros.S" - -ENTRY(cpu_v7m_proc_init) - ret lr -ENDPROC(cpu_v7m_proc_init) - -ENTRY(cpu_v7m_proc_fin) - ret lr -ENDPROC(cpu_v7m_proc_fin) - -/* - * cpu_v7m_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * - loc - location to jump to for soft reset - */ - .align 5 -ENTRY(cpu_v7m_reset) - ret r0 -ENDPROC(cpu_v7m_reset) - -/* - * cpu_v7m_do_idle() - * - * Idle the processor (eg, wait for interrupt). - * - * IRQs are already disabled. - */ -ENTRY(cpu_v7m_do_idle) - wfi - ret lr -ENDPROC(cpu_v7m_do_idle) - -ENTRY(cpu_v7m_dcache_clean_area) - ret lr -ENDPROC(cpu_v7m_dcache_clean_area) - -/* - * There is no MMU, so here is nothing to do. - */ -ENTRY(cpu_v7m_switch_mm) - ret lr -ENDPROC(cpu_v7m_switch_mm) - -.globl cpu_v7m_suspend_size -.equ cpu_v7m_suspend_size, 0 - -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v7m_do_suspend) - ret lr -ENDPROC(cpu_v7m_do_suspend) - -ENTRY(cpu_v7m_do_resume) - ret lr -ENDPROC(cpu_v7m_do_resume) -#endif - -ENTRY(cpu_cm7_dcache_clean_area) - dcache_line_size r2, r3 - movw r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC - movt r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC - -1: str r0, [r3] @ clean D entry - add r0, r0, r2 - subs r1, r1, r2 - bhi 1b - dsb - ret lr -ENDPROC(cpu_cm7_dcache_clean_area) - -ENTRY(cpu_cm7_proc_fin) - movw r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) - movt r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) - ldr r0, [r2] - bic r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC) - str r0, [r2] - ret lr -ENDPROC(cpu_cm7_proc_fin) - - .section ".init.text", #alloc, #execinstr - -__v7m_cm7_setup: - mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) - b __v7m_setup_cont -/* - * __v7m_setup - * - * This should be able to cover all ARMv7-M cores. - */ -__v7m_setup: - mov r8, 0 - -__v7m_setup_cont: - @ Configure the vector table base address - ldr r0, =BASEADDR_V7M_SCB - ldr r12, =vector_table - str r12, [r0, V7M_SCB_VTOR] - - @ enable UsageFault, BusFault and MemManage fault. - ldr r5, [r0, #V7M_SCB_SHCSR] - orr r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA) - str r5, [r0, #V7M_SCB_SHCSR] - - @ Lower the priority of the SVC and PendSV exceptions - mov r5, #0x80000000 - str r5, [r0, V7M_SCB_SHPR2] @ set SVC priority - mov r5, #0x00800000 - str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority - - @ SVC to switch to handler mode. Notice that this requires sp to - @ point to writeable memory because the processor saves - @ some registers to the stack. - badr r1, 1f - ldr r5, [r12, #11 * 4] @ read the SVC vector entry - str r1, [r12, #11 * 4] @ write the temporary SVC vector entry - dsb - mov r6, lr @ save LR - ldr sp, =init_thread_union + THREAD_START_SP - cpsie i - svc #0 -1: cpsid i - /* Calculate exc_ret */ - orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK - ldmia sp, {r0-r3, r12} - str r5, [r12, #11 * 4] @ restore the original SVC vector entry - mov lr, r6 @ restore LR - - @ Special-purpose control register - mov r1, #1 - msr control, r1 @ Thread mode has unpriviledged access - - @ Configure caches (if implemented) - teq r8, #0 - stmiane sp, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 - blne v7m_invalidate_l1 - teq r8, #0 @ re-evalutae condition - ldmiane sp, {r0-r6, lr} - - @ Configure the System Control Register to ensure 8-byte stack alignment - @ Note the STKALIGN bit is either RW or RAO. - ldr r0, [r0, V7M_SCB_CCR] @ system control register - orr r0, #V7M_SCB_CCR_STKALIGN - orr r0, r0, r8 - - ret lr -ENDPROC(__v7m_setup) - -/* - * Cortex-M7 processor functions - */ - globl_equ cpu_cm7_proc_init, cpu_v7m_proc_init - globl_equ cpu_cm7_reset, cpu_v7m_reset - globl_equ cpu_cm7_do_idle, cpu_v7m_do_idle - globl_equ cpu_cm7_switch_mm, cpu_v7m_switch_mm - - define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 - define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 - - .section ".rodata" - string cpu_arch_name, "armv7m" - string cpu_elf_name "v7m" - string cpu_v7m_name "ARMv7-M" - - .section ".proc.info.init", #alloc - -.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions - .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ - .long 0 /* proc_info_list.__cpu_io_mmu_flags */ - initfn \initfunc, \name - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps - .long cpu_v7m_name - .long \proc_fns - .long 0 /* proc_info_list.tlb */ - .long 0 /* proc_info_list.user */ - .long \cache_fns -.endm - - /* - * Match ARM Cortex-M7 processor. - */ - .type __v7m_cm7_proc_info, #object -__v7m_cm7_proc_info: - .long 0x410fc270 /* ARM Cortex-M7 0xC27 */ - .long 0xff0ffff0 /* Mask off revision, patch release */ - __v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions - .size __v7m_cm7_proc_info, . - __v7m_cm7_proc_info - - /* - * Match ARM Cortex-M4 processor. - */ - .type __v7m_cm4_proc_info, #object -__v7m_cm4_proc_info: - .long 0x410fc240 /* ARM Cortex-M4 0xC24 */ - .long 0xff0ffff0 /* Mask off revision, patch release */ - __v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP - .size __v7m_cm4_proc_info, . - __v7m_cm4_proc_info - - /* - * Match ARM Cortex-M3 processor. - */ - .type __v7m_cm3_proc_info, #object -__v7m_cm3_proc_info: - .long 0x410fc230 /* ARM Cortex-M3 0xC23 */ - .long 0xff0ffff0 /* Mask off revision, patch release */ - __v7m_proc __v7m_cm3_proc_info, __v7m_setup - .size __v7m_cm3_proc_info, . - __v7m_cm3_proc_info - - /* - * Match any ARMv7-M processor core. - */ - .type __v7m_proc_info, #object -__v7m_proc_info: - .long 0x000f0000 @ Required ID value - .long 0x000f0000 @ Mask for ID - __v7m_proc __v7m_proc_info, __v7m_setup - .size __v7m_proc_info, . - __v7m_proc_info - diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S deleted file mode 100644 index 1ac0fbbe9f127f3524eee28503a9ded19790cc98..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-xsc3.S +++ /dev/null @@ -1,529 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-xsc3.S - * - * Original Author: Matthew Gilbert - * Current Maintainer: Lennert Buytenhek - * - * Copyright 2004 (C) Intel Corp. - * Copyright 2005 (C) MontaVista Software, Inc. - * - * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is - * an extension to Intel's original XScale core that adds the following - * features: - * - * - ARMv6 Supersections - * - Low Locality Reference pages (replaces mini-cache) - * - 36-bit addressing - * - L2 cache - * - Cache coherency if chipset supports it - * - * Based on original XScale code by Nicolas Pitre. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be flushed. If the - * area is larger than this, then we flush the whole cache. - */ -#define MAX_AREA_SIZE 32768 - -/* - * The cache line size of the L1 I, L1 D and unified L2 cache. - */ -#define CACHELINESIZE 32 - -/* - * The size of the L1 D cache. - */ -#define CACHESIZE 32768 - -/* - * This macro is used to wait for a CP15 write and is needed when we - * have to ensure that the last operation to the coprocessor was - * completed before continuing with operation. - */ - .macro cpwait_ret, lr, rd - mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 - sub pc, \lr, \rd, LSR #32 @ wait for completion and - @ flush instruction pipeline - .endm - -/* - * This macro cleans and invalidates the entire L1 D cache. - */ - - .macro clean_d_cache rd, rs - mov \rd, #0x1f00 - orr \rd, \rd, #0x00e0 -1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line - adds \rd, \rd, #0x40000000 - bcc 1b - subs \rd, \rd, #0x20 - bpl 1b - .endm - - .text - -/* - * cpu_xsc3_proc_init() - * - * Nothing too exciting at the moment - */ -ENTRY(cpu_xsc3_proc_init) - ret lr - -/* - * cpu_xsc3_proc_fin() - */ -ENTRY(cpu_xsc3_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1800 @ ...IZ........... - bic r0, r0, #0x0006 @ .............CA. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_xsc3_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_xsc3_reset) - mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE - msr cpsr_c, r1 @ reset CPSR - mrc p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x3900 @ ..VIZ..S........ - bic r1, r1, #0x0086 @ ........B....CA. - mcr p15, 0, r1, c1, c0, 0 @ ctrl register - mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB - bic r1, r1, #0x0001 @ ...............M - mcr p15, 0, r1, c1, c0, 0 @ ctrl register - @ CAUTION: MMU turned off from this point. We count on the pipeline - @ already containing those two last instructions to survive. - mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs - ret r0 -ENDPROC(cpu_xsc3_reset) - .popsection - -/* - * cpu_xsc3_do_idle() - * - * Cause the processor to idle - * - * For now we do nothing but go to idle mode for every case - * - * XScale supports clock switching, but using idle mode support - * allows external hardware to react to system state changes. - */ - .align 5 - -ENTRY(cpu_xsc3_do_idle) - mov r0, #1 - mcr p14, 0, r0, c7, c0, 0 @ go to idle - ret lr - -/* ================================= CACHE ================================ */ - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(xsc3_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(xsc3_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(xsc3_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(xsc3_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - clean_d_cache r0, r1 - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB - mcrne p15, 0, ip, c7, c10, 4 @ data write barrier - mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - ret lr - -/* - * flush_user_cache_range(start, end, vm_flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - vma - vma_area_struct describing address space - */ - .align 5 -ENTRY(xsc3_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #MAX_AREA_SIZE - bhs __flush_whole_cache - -1: tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line - mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB - mcrne p15, 0, ip, c7, c10, 4 @ data write barrier - mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the I cache and the D cache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - * - * Note: single I-cache line invalidation isn't used here since - * it also trashes the mini I-cache used by JTAG debuggers. - */ -ENTRY(xsc3_coherent_kern_range) -/* FALLTHROUGH */ -ENTRY(xsc3_coherent_user_range) - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache. - * - * - addr - kernel address - * - size - region size - */ -ENTRY(xsc3_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -xsc3_dma_inv_range: - tst r0, #CACHELINESIZE - 1 - bic r0, r0, #CACHELINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line - tst r1, #CACHELINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -xsc3_dma_clean_range: - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(xsc3_dma_flush_range) - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(xsc3_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq xsc3_dma_clean_range - bcs xsc3_dma_inv_range - b xsc3_dma_flush_range -ENDPROC(xsc3_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(xsc3_dma_unmap_area) - ret lr -ENDPROC(xsc3_dma_unmap_area) - - .globl xsc3_flush_kern_cache_louis - .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions xsc3 - -ENTRY(cpu_xsc3_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line - add r0, r0, #CACHELINESIZE - subs r1, r1, #CACHELINESIZE - bhi 1b - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_xsc3_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_xsc3_switch_mm) - clean_d_cache r1, r2 - mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB - mcr p15, 0, ip, c7, c10, 4 @ data write barrier - mcr p15, 0, ip, c7, c5, 4 @ prefetch flush - orr r0, r0, #0x18 @ cache the page table in L2 - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs - cpwait_ret lr, ip - -/* - * cpu_xsc3_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - */ -cpu_xsc3_mt_table: - .long 0x00 @ L_PTE_MT_UNCACHED - .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE - .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK - .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED - .long 0x00 @ unused - .long 0x00 @ L_PTE_MT_MINICACHE (not present) - .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) - .long 0x00 @ unused - .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC - .long 0x00 @ unused - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED - .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED - .long 0x00 @ unused - .long 0x00 @ unused - .long 0x00 @ unused - - .align 5 -ENTRY(cpu_xsc3_set_pte_ext) - xscale_set_pte_ext_prologue - - tst r1, #L_PTE_SHARED @ shared? - and r1, r1, #L_PTE_MT_MASK - adr ip, cpu_xsc3_mt_table - ldr ip, [ip, r1] - orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit - bic r2, r2, #0x0c @ clear old C,B bits - orr r2, r2, ip - - xscale_set_pte_ext_epilogue - ret lr - - .ltorg - .align - -.globl cpu_xsc3_suspend_size -.equ cpu_xsc3_suspend_size, 4 * 6 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_xsc3_do_suspend) - stmfd sp!, {r4 - r9, lr} - mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode - mrc p15, 0, r5, c15, c1, 0 @ CP access reg - mrc p15, 0, r6, c13, c0, 0 @ PID - mrc p15, 0, r7, c3, c0, 0 @ domain ID - mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mrc p15, 0, r9, c1, c0, 0 @ control reg - bic r4, r4, #2 @ clear frequency change bit - stmia r0, {r4 - r9} @ store cp regs - ldmia sp!, {r4 - r9, pc} -ENDPROC(cpu_xsc3_do_suspend) - -ENTRY(cpu_xsc3_do_resume) - ldmia r0, {r4 - r9} @ load cp regs - mov ip, #0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB - mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer - mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. - mcr p15, 0, r5, c15, c1, 0 @ CP access reg - mcr p15, 0, r6, c13, c0, 0 @ PID - mcr p15, 0, r7, c3, c0, 0 @ domain ID - orr r1, r1, #0x18 @ cache the page table in L2 - mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mov r0, r9 @ control register - b cpu_resume_mmu -ENDPROC(cpu_xsc3_do_resume) -#endif - - .type __xsc3_setup, #function -__xsc3_setup: - mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE - msr cpsr_c, r0 - mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB - mcr p15, 0, ip, c7, c10, 4 @ data write barrier - mcr p15, 0, ip, c7, c5, 4 @ prefetch flush - mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs - orr r4, r4, #0x18 @ cache the page table in L2 - mcr p15, 0, r4, c2, c0, 0 @ load page table pointer - - mov r0, #1 << 6 @ cp6 access for early sched_clock - mcr p15, 0, r0, c15, c1, 0 @ write CP access register - - mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg - and r0, r0, #2 @ preserve bit P bit setting - orr r0, r0, #(1 << 10) @ enable L2 for LLR cache - mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg - - adr r5, xsc3_crval - ldmia r5, {r5, r6} - -#ifdef CONFIG_CACHE_XSC3L2 - mrc p15, 1, r0, c0, c0, 1 @ get L2 present information - ands r0, r0, #0xf8 - orrne r6, r6, #(1 << 26) @ enable L2 if present -#endif - - mrc p15, 0, r0, c1, c0, 0 @ get control register - bic r0, r0, r5 @ ..V. ..R. .... ..A. - orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) - @ ...I Z..S .... .... (uc) - ret lr - - .size __xsc3_setup, . - __xsc3_setup - - .type xsc3_crval, #object -xsc3_crval: - crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - string cpu_xsc3_name, "XScale-V3 based processor" - - .align - - .section ".proc.info.init", #alloc - -.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __xsc3_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_xsc3_name - .long xsc3_processor_functions - .long v4wbi_tlb_fns - .long xsc3_mc_user_fns - .long xsc3_cache_fns - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - xsc3_proc_info xsc3, 0x69056000, 0xffffe000 - -/* Note: PXA935 changed its implementor ID from Intel to Marvell */ - xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S deleted file mode 100644 index bdb2b7749b0393dec09fc39236c32d72cdfa1f06..0000000000000000000000000000000000000000 --- a/arch/arm/mm/proc-xscale.S +++ /dev/null @@ -1,658 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/proc-xscale.S - * - * Author: Nicolas Pitre - * Created: November 2000 - * Copyright: (C) 2000, 2001 MontaVista Software Inc. - * - * MMU functions for the Intel XScale CPUs - * - * 2001 Aug 21: - * some contributions by Brett Gaines - * Copyright 2001 by Intel Corp. - * - * 2001 Sep 08: - * Completely revisited, many important fixes - * Nicolas Pitre - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * This is the maximum size of an area which will be flushed. If the area - * is larger than this, then we flush the whole cache - */ -#define MAX_AREA_SIZE 32768 - -/* - * the cache line size of the I and D cache - */ -#define CACHELINESIZE 32 - -/* - * the size of the data cache - */ -#define CACHESIZE 32768 - -/* - * Virtual address used to allocate the cache when flushed - * - * This must be an address range which is _never_ used. It should - * apparently have a mapping in the corresponding page table for - * compatibility with future CPUs that _could_ require it. For instance we - * don't care. - * - * This must be aligned on a 2*CACHESIZE boundary. The code selects one of - * the 2 areas in alternance each time the clean_d_cache macro is used. - * Without this the XScale core exhibits cache eviction problems and no one - * knows why. - * - * Reminder: the vector table is located at 0xffff0000-0xffff0fff. - */ -#define CLEAN_ADDR 0xfffe0000 - -/* - * This macro is used to wait for a CP15 write and is needed - * when we have to ensure that the last operation to the co-pro - * was completed before continuing with operation. - */ - .macro cpwait, rd - mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 - mov \rd, \rd @ wait for completion - sub pc, pc, #4 @ flush instruction pipeline - .endm - - .macro cpwait_ret, lr, rd - mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 - sub pc, \lr, \rd, LSR #32 @ wait for completion and - @ flush instruction pipeline - .endm - -/* - * This macro cleans the entire dcache using line allocate. - * The main loop has been unrolled to reduce loop overhead. - * rd and rs are two scratch registers. - */ - .macro clean_d_cache, rd, rs - ldr \rs, =clean_addr - ldr \rd, [\rs] - eor \rd, \rd, #CACHESIZE - str \rd, [\rs] - add \rs, \rd, #CACHESIZE -1: mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line - add \rd, \rd, #CACHELINESIZE - mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line - add \rd, \rd, #CACHELINESIZE - mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line - add \rd, \rd, #CACHELINESIZE - mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line - add \rd, \rd, #CACHELINESIZE - teq \rd, \rs - bne 1b - .endm - - .data - .align 2 -clean_addr: .word CLEAN_ADDR - - .text - -/* - * cpu_xscale_proc_init() - * - * Nothing too exciting at the moment - */ -ENTRY(cpu_xscale_proc_init) - @ enable write buffer coalescing. Some bootloader disable it - mrc p15, 0, r1, c1, c0, 1 - bic r1, r1, #1 - mcr p15, 0, r1, c1, c0, 1 - ret lr - -/* - * cpu_xscale_proc_fin() - */ -ENTRY(cpu_xscale_proc_fin) - mrc p15, 0, r0, c1, c0, 0 @ ctrl register - bic r0, r0, #0x1800 @ ...IZ........... - bic r0, r0, #0x0006 @ .............CA. - mcr p15, 0, r0, c1, c0, 0 @ disable caches - ret lr - -/* - * cpu_xscale_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the - * same state as it would be if it had been reset, and branch - * to what would be the reset vector. - * - * loc: location to jump to for soft reset - * - * Beware PXA270 erratum E7. - */ - .align 5 - .pushsection .idmap.text, "ax" -ENTRY(cpu_xscale_reset) - mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE - msr cpsr_c, r1 @ reset CPSR - mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB - mcr p15, 0, r1, c8, c5, 0 @ invalidate I-TLB - mrc p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x0086 @ ........B....CA. - bic r1, r1, #0x3900 @ ..VIZ..S........ - sub pc, pc, #4 @ flush pipeline - @ *** cache line aligned *** - mcr p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x0001 @ ...............M - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB - mcr p15, 0, r1, c1, c0, 0 @ ctrl register - @ CAUTION: MMU turned off from this point. We count on the pipeline - @ already containing those two last instructions to survive. - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - ret r0 -ENDPROC(cpu_xscale_reset) - .popsection - -/* - * cpu_xscale_do_idle() - * - * Cause the processor to idle - * - * For now we do nothing but go to idle mode for every case - * - * XScale supports clock switching, but using idle mode support - * allows external hardware to react to system state changes. - */ - .align 5 - -ENTRY(cpu_xscale_do_idle) - mov r0, #1 - mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE - ret lr - -/* ================================= CACHE ================================ */ - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(xscale_flush_icache_all) - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - ret lr -ENDPROC(xscale_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - */ -ENTRY(xscale_flush_user_cache_all) - /* FALLTHROUGH */ - -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(xscale_flush_kern_cache_all) - mov r2, #VM_EXEC - mov ip, #0 -__flush_whole_cache: - clean_d_cache r0, r1 - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB - mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * flush_user_cache_range(start, end, vm_flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - vma - vma_area_struct describing address space - */ - .align 5 -ENTRY(xscale_flush_user_cache_range) - mov ip, #0 - sub r3, r1, r0 @ calculate total size - cmp r3, #MAX_AREA_SIZE - bhs __flush_whole_cache - -1: tst r2, #VM_EXEC - mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line - mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line - mcr p15, 0, r0, c7, c6, 1 @ Invalidate D cache line - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - tst r2, #VM_EXEC - mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB - mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - * - * Note: single I-cache line invalidation isn't used here since - * it also trashes the mini I-cache used by JTAG debuggers. - */ -ENTRY(xscale_coherent_kern_range) - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(xscale_coherent_user_range) - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * flush_kern_dcache_area(void *addr, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(xscale_flush_kern_dcache_area) - add r1, r0, r1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -xscale_dma_inv_range: - tst r0, #CACHELINESIZE - 1 - bic r0, r0, #CACHELINESIZE - 1 - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry - tst r1, #CACHELINESIZE - 1 - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry -1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -xscale_dma_clean_range: - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(xscale_dma_flush_range) - bic r0, r0, #CACHELINESIZE - 1 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry - add r0, r0, #CACHELINESIZE - cmp r0, r1 - blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - ret lr - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(xscale_dma_map_area) - add r1, r1, r0 - cmp r2, #DMA_TO_DEVICE - beq xscale_dma_clean_range - bcs xscale_dma_inv_range - b xscale_dma_flush_range -ENDPROC(xscale_dma_map_area) - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(xscale_80200_A0_A1_dma_map_area) - add r1, r1, r0 - teq r2, #DMA_TO_DEVICE - beq xscale_dma_clean_range - b xscale_dma_flush_range -ENDPROC(xscale_80200_A0_A1_dma_map_area) - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(xscale_dma_unmap_area) - ret lr -ENDPROC(xscale_dma_unmap_area) - - .globl xscale_flush_kern_cache_louis - .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions xscale - -/* - * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't - * clear the dirty bits, which means that if we invalidate a dirty line, - * the dirty data can still be written back to external memory later on. - * - * The recommended workaround is to always do a clean D-cache line before - * doing an invalidate D-cache line, so on the affected processors, - * dma_inv_range() is implemented as dma_flush_range(). - * - * See erratum #25 of "Intel 80200 Processor Specification Update", - * revision January 22, 2003, available at: - * http://www.intel.com/design/iio/specupdt/273415.htm - */ -.macro a0_alias basename - .globl xscale_80200_A0_A1_\basename - .type xscale_80200_A0_A1_\basename , %function - .equ xscale_80200_A0_A1_\basename , xscale_\basename -.endm - -/* - * Most of the cache functions are unchanged for these processor revisions. - * Export suitable alias symbols for the unchanged functions: - */ - a0_alias flush_icache_all - a0_alias flush_user_cache_all - a0_alias flush_kern_cache_all - a0_alias flush_kern_cache_louis - a0_alias flush_user_cache_range - a0_alias coherent_kern_range - a0_alias coherent_user_range - a0_alias flush_kern_dcache_area - a0_alias dma_flush_range - a0_alias dma_unmap_area - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions xscale_80200_A0_A1 - -ENTRY(cpu_xscale_dcache_clean_area) -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry - add r0, r0, #CACHELINESIZE - subs r1, r1, #CACHELINESIZE - bhi 1b - ret lr - -/* =============================== PageTable ============================== */ - -/* - * cpu_xscale_switch_mm(pgd) - * - * Set the translation base pointer to be as described by pgd. - * - * pgd: new page tables - */ - .align 5 -ENTRY(cpu_xscale_switch_mm) - clean_d_cache r1, r2 - mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB - mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - cpwait_ret lr, ip - -/* - * cpu_xscale_set_pte_ext(ptep, pte, ext) - * - * Set a PTE and flush it out - * - * Errata 40: must set memory to write-through for user read-only pages. - */ -cpu_xscale_mt_table: - .long 0x00 @ L_PTE_MT_UNCACHED - .long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE - .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK - .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED - .long 0x00 @ unused - .long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE - .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC - .long 0x00 @ unused - .long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC - .long 0x00 @ unused - .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED - .long 0x00 @ L_PTE_MT_DEV_NONSHARED - .long 0x00 @ unused - .long 0x00 @ unused - .long 0x00 @ unused - - .align 5 -ENTRY(cpu_xscale_set_pte_ext) - xscale_set_pte_ext_prologue - - @ - @ Erratum 40: must set memory to write-through for user read-only pages - @ - and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) - teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY - - moveq r1, #L_PTE_MT_WRITETHROUGH - and r1, r1, #L_PTE_MT_MASK - adr ip, cpu_xscale_mt_table - ldr ip, [ip, r1] - bic r2, r2, #0x0c - orr r2, r2, ip - - xscale_set_pte_ext_epilogue - ret lr - - .ltorg - .align - -.globl cpu_xscale_suspend_size -.equ cpu_xscale_suspend_size, 4 * 6 -#ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_xscale_do_suspend) - stmfd sp!, {r4 - r9, lr} - mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode - mrc p15, 0, r5, c15, c1, 0 @ CP access reg - mrc p15, 0, r6, c13, c0, 0 @ PID - mrc p15, 0, r7, c3, c0, 0 @ domain ID - mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mrc p15, 0, r9, c1, c0, 0 @ control reg - bic r4, r4, #2 @ clear frequency change bit - stmia r0, {r4 - r9} @ store cp regs - ldmfd sp!, {r4 - r9, pc} -ENDPROC(cpu_xscale_do_suspend) - -ENTRY(cpu_xscale_do_resume) - ldmia r0, {r4 - r9} @ load cp regs - mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB - mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. - mcr p15, 0, r5, c15, c1, 0 @ CP access reg - mcr p15, 0, r6, c13, c0, 0 @ PID - mcr p15, 0, r7, c3, c0, 0 @ domain ID - mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg - mov r0, r9 @ control register - b cpu_resume_mmu -ENDPROC(cpu_xscale_do_resume) -#endif - - .type __xscale_setup, #function -__xscale_setup: - mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB - mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs - mov r0, #1 << 6 @ cp6 for IOP3xx and Bulverde - orr r0, r0, #1 << 13 @ Its undefined whether this - mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes - - adr r5, xscale_crval - ldmia r5, {r5, r6} - mrc p15, 0, r0, c1, c0, 0 @ get control register - bic r0, r0, r5 - orr r0, r0, r6 - ret lr - .size __xscale_setup, . - __xscale_setup - - /* - * R - * .RVI ZFRS BLDP WCAM - * ..11 1.01 .... .101 - * - */ - .type xscale_crval, #object -xscale_crval: - crval clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900 - - __INITDATA - - @ define struct processor (see and proc-macros.S) - define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 - - .section ".rodata" - - string cpu_arch_name, "armv5te" - string cpu_elf_name, "v5" - - string cpu_80200_A0_A1_name, "XScale-80200 A0/A1" - string cpu_80200_name, "XScale-80200" - string cpu_80219_name, "XScale-80219" - string cpu_8032x_name, "XScale-IOP8032x Family" - string cpu_8033x_name, "XScale-IOP8033x Family" - string cpu_pxa250_name, "XScale-PXA250" - string cpu_pxa210_name, "XScale-PXA210" - string cpu_ixp42x_name, "XScale-IXP42x Family" - string cpu_ixp43x_name, "XScale-IXP43x Family" - string cpu_ixp46x_name, "XScale-IXP46x Family" - string cpu_ixp2400_name, "XScale-IXP2400" - string cpu_ixp2800_name, "XScale-IXP2800" - string cpu_pxa255_name, "XScale-PXA255" - string cpu_pxa270_name, "XScale-PXA270" - - .align - - .section ".proc.info.init", #alloc - -.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache - .type __\name\()_proc_info,#object -__\name\()_proc_info: - .long \cpu_val - .long \cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - initfn __xscale_setup, __\name\()_proc_info - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long \cpu_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .ifb \cache - .long xscale_cache_fns - .else - .long \cache - .endif - .size __\name\()_proc_info, . - __\name\()_proc_info -.endm - - xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \ - cache=xscale_80200_A0_A1_cache_fns - xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name - xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name - xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name - xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name - xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name - xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name - xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name - xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name - xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name - xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name - xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name - xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name - xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S deleted file mode 100644 index 769778928356e01e50d023f4924e3df57944999b..0000000000000000000000000000000000000000 --- a/arch/arm/mm/pv-fixup-asm.S +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 Russell King - * - * This assembly is required to safely remap the physical address space - * for Keystone 2 - */ -#include -#include -#include -#include -#include - - .section ".idmap.text", "ax" - -#define L1_ORDER 3 -#define L2_ORDER 3 - -ENTRY(lpae_pgtables_remap_asm) - stmfd sp!, {r4-r8, lr} - - mrc p15, 0, r8, c1, c0, 0 @ read control reg - bic ip, r8, #CR_M @ disable caches and MMU - mcr p15, 0, ip, c1, c0, 0 - dsb - isb - - /* Update level 2 entries covering the kernel */ - ldr r6, =(_end - 1) - add r7, r2, #0x1000 - add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER - add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER) -1: ldrd r4, r5, [r7] - adds r4, r4, r0 - adc r5, r5, r1 - strd r4, r5, [r7], #1 << L2_ORDER - cmp r7, r6 - bls 1b - - /* Update level 2 entries for the boot data */ - add r7, r2, #0x1000 - add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER - bic r7, r7, #(1 << L2_ORDER) - 1 - ldrd r4, r5, [r7] - adds r4, r4, r0 - adc r5, r5, r1 - strd r4, r5, [r7], #1 << L2_ORDER - ldrd r4, r5, [r7] - adds r4, r4, r0 - adc r5, r5, r1 - strd r4, r5, [r7] - - /* Update level 1 entries */ - mov r6, #4 - mov r7, r2 -2: ldrd r4, r5, [r7] - adds r4, r4, r0 - adc r5, r5, r1 - strd r4, r5, [r7], #1 << L1_ORDER - subs r6, r6, #1 - bne 2b - - mrrc p15, 0, r4, r5, c2 @ read TTBR0 - adds r4, r4, r0 @ update physical address - adc r5, r5, r1 - mcrr p15, 0, r4, r5, c2 @ write back TTBR0 - mrrc p15, 1, r4, r5, c2 @ read TTBR1 - adds r4, r4, r0 @ update physical address - adc r5, r5, r1 - mcrr p15, 1, r4, r5, c2 @ write back TTBR1 - - dsb - - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate - mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all() - dsb - isb - - mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU - dsb - isb - - ldmfd sp!, {r4-r8, pc} -ENDPROC(lpae_pgtables_remap_asm) diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S deleted file mode 100644 index def6161ec4523d5579ba9f99e1be3a1589af0081..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-fa.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlb-fa.S - * - * Copyright (C) 2005 Faraday Corp. - * Copyright (C) 2008-2009 Paulius Zaleckas - * - * Based on tlb-v4wbi.S: - * Copyright (C) 1997-2002 Russell King - * - * ARM architecture version 4, Faraday variation. - * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) - * - * Processors: FA520 FA526 FA626 - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - - -/* - * flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 4 -ENTRY(fa_flush_user_tlb_range) - vma_vm_mm ip, r2 - act_mm r3 @ get current->active_mm - eors r3, ip, r3 @ == mm ? - retne lr @ no, we dont do anything - mov r3, #0 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mcr p15, 0, r3, c7, c10, 4 @ data write barrier - ret lr - - -ENTRY(fa_flush_kern_tlb_range) - mov r3, #0 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mcr p15, 0, r3, c7, c10, 4 @ data write barrier - mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) - ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions fa, fa_tlb_flags diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S deleted file mode 100644 index b962b4e751584f7080c6ca4e55b61ed8b39dd622..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-v4.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlbv4.S - * - * Copyright (C) 1997-2002 Russell King - * - * ARM architecture version 4 TLB handling functions. - * These assume a split I/D TLBs, and no write buffer. - * - * Processors: ARM720T - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - - .align 5 -/* - * v4_flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified user address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 5 -ENTRY(v4_flush_user_tlb_range) - vma_vm_mm ip, r2 - act_mm r3 @ get current->active_mm - eors r3, ip, r3 @ == mm ? - retne lr @ no, we dont do anything -.v4_flush_kern_tlb_range: - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c8, c7, 1 @ invalidate TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - ret lr - -/* - * v4_flush_kern_tlb_range(start, end) - * - * Invalidate a range of TLB entries in the specified kernel - * address range. - * - * - start - virtual address (may not be aligned) - * - end - virtual address (may not be aligned) - */ -.globl v4_flush_kern_tlb_range -.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range - - __INITDATA - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions v4, v4_tlb_flags diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S deleted file mode 100644 index 9348bba7586a0e6e8e256aa9ae511a6446548c67..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-v4wb.S +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlbv4wb.S - * - * Copyright (C) 1997-2002 Russell King - * - * ARM architecture version 4 TLB handling functions. - * These assume a split I/D TLBs w/o I TLB entry, with a write buffer. - * - * Processors: SA110 SA1100 SA1110 - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - - .align 5 -/* - * v4wb_flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 5 -ENTRY(v4wb_flush_user_tlb_range) - vma_vm_mm ip, r2 - act_mm r3 @ get current->active_mm - eors r3, ip, r3 @ == mm ? - retne lr @ no, we dont do anything - vma_vm_flags r2, r2 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - tst r2, #VM_EXEC - mcrne p15, 0, r3, c8, c5, 0 @ invalidate I TLB - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - ret lr - -/* - * v4_flush_kern_tlb_range(start, end) - * - * Invalidate a range of TLB entries in the specified kernel - * address range. - * - * - start - virtual address (may not be aligned) - * - end - virtual address (may not be aligned) - */ -ENTRY(v4wb_flush_kern_tlb_range) - mov r3, #0 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 - mcr p15, 0, r3, c8, c5, 0 @ invalidate I TLB -1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions v4wb, v4wb_tlb_flags diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S deleted file mode 100644 index d4f9040a4111c180861de6feb7db275ad8a0a417..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-v4wbi.S +++ /dev/null @@ -1,62 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlbv4wbi.S - * - * Copyright (C) 1997-2002 Russell King - * - * ARM architecture version 4 and version 5 TLB handling functions. - * These assume a split I/D TLBs, with a write buffer. - * - * Processors: ARM920 ARM922 ARM925 ARM926 XScale - */ -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * v4wb_flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 5 -ENTRY(v4wbi_flush_user_tlb_range) - vma_vm_mm ip, r2 - act_mm r3 @ get current->active_mm - eors r3, ip, r3 @ == mm ? - retne lr @ no, we dont do anything - mov r3, #0 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - vma_vm_flags r2, r2 - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: tst r2, #VM_EXEC - mcrne p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry - mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - ret lr - -ENTRY(v4wbi_flush_kern_tlb_range) - mov r3, #0 - mcr p15, 0, r3, c7, c10, 4 @ drain WB - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry - mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions v4wbi, v4wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S deleted file mode 100644 index 5335b9687297fa476f9b0d91fff1f8241d0e69af..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-v6.S +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlb-v6.S - * - * Copyright (C) 1997-2002 Russell King - * - * ARM architecture version 6 TLB handling functions. - * These assume a split I/D TLB. - */ -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -#define HARVARD_TLB - -/* - * v6wbi_flush_user_tlb_range(start, end, vma) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - vma - vma_struct describing address range - * - * It is assumed that: - * - the "Invalidate single entry" instruction will invalidate - * both the I and the D TLBs on Harvard-style TLBs - */ -ENTRY(v6wbi_flush_user_tlb_range) - vma_vm_mm r3, r2 @ get vma->vm_mm - mov ip, #0 - mmid r3, r3 @ get vm_mm->context.id - mcr p15, 0, ip, c7, c10, 4 @ drain write buffer - mov r0, r0, lsr #PAGE_SHIFT @ align address - mov r1, r1, lsr #PAGE_SHIFT - asid r3, r3 @ mask ASID - orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA - mov r1, r1, lsl #PAGE_SHIFT - vma_vm_flags r2, r2 @ get vma->vm_flags -1: -#ifdef HARVARD_TLB - mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) - tst r2, #VM_EXEC @ Executable area ? - mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA (was 1) -#endif - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier - ret lr - -/* - * v6wbi_flush_kern_tlb_range(start,end) - * - * Invalidate a range of kernel TLB entries - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - */ -ENTRY(v6wbi_flush_kern_tlb_range) - mov r2, #0 - mcr p15, 0, r2, c7, c10, 4 @ drain write buffer - mov r0, r0, lsr #PAGE_SHIFT @ align address - mov r1, r1, lsr #PAGE_SHIFT - mov r0, r0, lsl #PAGE_SHIFT - mov r1, r1, lsl #PAGE_SHIFT -1: -#ifdef HARVARD_TLB - mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA - mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA -#else - mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA -#endif - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier - mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) - ret lr - - __INIT - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions v6wbi, v6wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S deleted file mode 100644 index 1bb28d7db5670551d22035c785544066fbeb1e2c..0000000000000000000000000000000000000000 --- a/arch/arm/mm/tlb-v7.S +++ /dev/null @@ -1,92 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/mm/tlb-v7.S - * - * Copyright (C) 1997-2002 Russell King - * Modified for ARMv7 by Catalin Marinas - * - * ARM architecture version 6 TLB handling functions. - * These assume a split I/D TLB. - */ -#include -#include -#include -#include -#include -#include -#include "proc-macros.S" - -/* - * v7wbi_flush_user_tlb_range(start, end, vma) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - vma - vma_struct describing address range - * - * It is assumed that: - * - the "Invalidate single entry" instruction will invalidate - * both the I and the D TLBs on Harvard-style TLBs - */ -ENTRY(v7wbi_flush_user_tlb_range) - vma_vm_mm r3, r2 @ get vma->vm_mm - mmid r3, r3 @ get vm_mm->context.id - dsb ish - mov r0, r0, lsr #PAGE_SHIFT @ align address - mov r1, r1, lsr #PAGE_SHIFT - asid r3, r3 @ mask ASID -#ifdef CONFIG_ARM_ERRATA_720789 - ALT_SMP(W(mov) r3, #0 ) - ALT_UP(W(nop) ) -#endif - orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA - mov r1, r1, lsl #PAGE_SHIFT -1: -#ifdef CONFIG_ARM_ERRATA_720789 - ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) -#else - ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) -#endif - ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA - - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - dsb ish - ret lr -ENDPROC(v7wbi_flush_user_tlb_range) - -/* - * v7wbi_flush_kern_tlb_range(start,end) - * - * Invalidate a range of kernel TLB entries - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - */ -ENTRY(v7wbi_flush_kern_tlb_range) - dsb ish - mov r0, r0, lsr #PAGE_SHIFT @ align address - mov r1, r1, lsr #PAGE_SHIFT - mov r0, r0, lsl #PAGE_SHIFT - mov r1, r1, lsl #PAGE_SHIFT -1: -#ifdef CONFIG_ARM_ERRATA_720789 - ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) -#else - ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) -#endif - ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - dsb ish - isb - ret lr -ENDPROC(v7wbi_flush_kern_tlb_range) - - __INIT - - /* define struct cpu_tlb_fns (see and proc-macros.S) */ - define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S deleted file mode 100644 index d8f9915566e1577334c334fd4a348d3dcce9df71..0000000000000000000000000000000000000000 --- a/arch/arm/nwfpe/entry.S +++ /dev/null @@ -1,113 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - NetWinder Floating Point Emulator - (c) Rebel.COM, 1998 - (c) 1998, 1999 Philip Blundell - - Direct questions, comments to Scott Bambrough - -*/ -#include -#include - -/* This is the kernel's entry point into the floating point emulator. -It is called from the kernel with code similar to this: - - sub r4, r5, #4 - ldrt r0, [r4] @ r0 = instruction - adrsvc al, r9, ret_from_exception @ r9 = normal FP return - adrsvc al, lr, fpundefinstr @ lr = undefined instr return - - get_current_task r10 - mov r8, #1 - strb r8, [r10, #TSK_USED_MATH] @ set current->used_math - add r10, r10, #TSS_FPESAVE @ r10 = workspace - ldr r4, .LC2 - ldr pc, [r4] @ Call FP emulator entry point - -The kernel expects the emulator to return via one of two possible -points of return it passes to the emulator. The emulator, if -successful in its emulation, jumps to ret_from_exception (passed in -r9) and the kernel takes care of returning control from the trap to -the user code. If the emulator is unable to emulate the instruction, -it returns via _fpundefinstr (passed via lr) and the kernel halts the -user program with a core dump. - -On entry to the emulator r10 points to an area of private FP workspace -reserved in the thread structure for this process. This is where the -emulator saves its registers across calls. The first word of this area -is used as a flag to detect the first time a process uses floating point, -so that the emulator startup cost can be avoided for tasks that don't -want it. - -This routine does three things: - -1) The kernel has created a struct pt_regs on the stack and saved the -user registers into it. See /usr/include/asm/proc/ptrace.h for details. - -2) It calls EmulateAll to emulate a floating point instruction. -EmulateAll returns 1 if the emulation was successful, or 0 if not. - -3) If an instruction has been emulated successfully, it looks ahead at -the next instruction. If it is a floating point instruction, it -executes the instruction, without returning to user space. In this -way it repeatedly looks ahead and executes floating point instructions -until it encounters a non floating point instruction, at which time it -returns via _fpreturn. - -This is done to reduce the effect of the trap overhead on each -floating point instructions. GCC attempts to group floating point -instructions to allow the emulator to spread the cost of the trap over -several floating point instructions. */ - -#include - - .globl nwfpe_enter -nwfpe_enter: - mov r4, lr @ save the failure-return addresses - mov sl, sp @ we access the registers via 'sl' - - ldr r5, [sp, #S_PC] @ get contents of PC; - mov r6, r0 @ save the opcode -emulate: - ldr r1, [sp, #S_PSR] @ fetch the PSR - bl arm_check_condition @ check the condition - cmp r0, #ARM_OPCODE_CONDTEST_PASS @ condition passed? - - @ if condition code failed to match, next insn - bne next @ get the next instruction; - - mov r0, r6 @ prepare for EmulateAll() - bl EmulateAll @ emulate the instruction - cmp r0, #0 @ was emulation successful - reteq r4 @ no, return failure - -next: - uaccess_enable r3 -.Lx1: ldrt r6, [r5], #4 @ get the next instruction and - @ increment PC - uaccess_disable r3 - and r2, r6, #0x0F000000 @ test for FP insns - teq r2, #0x0C000000 - teqne r2, #0x0D000000 - teqne r2, #0x0E000000 - retne r9 @ return ok if not a fp insn - - str r5, [sp, #S_PC] @ update PC copy in regs - - mov r0, r6 @ save a copy - b emulate @ check condition and emulate - - @ We need to be prepared for the instructions at .Lx1 and .Lx2 - @ to fault. Emit the appropriate exception gunk to fix things up. - @ ??? For some reason, faults can happen at .Lx2 even with a - @ plain LDR instruction. Weird, but it seems harmless. - .pushsection .text.fixup,"ax" - .align 2 -.Lfix: ret r9 @ let the user eat segfaults - .popsection - - .pushsection __ex_table,"a" - .align 3 - .long .Lx1, .Lfix - .popsection diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S deleted file mode 100644 index 09d9fc30c8cabce736a89fe026539454b971a551..0000000000000000000000000000000000000000 --- a/arch/arm/plat-versatile/headsmp.S +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/plat-versatile/headsmp.S - * - * Copyright (c) 2003 ARM Limited - * All Rights Reserved - */ -#include -#include -#include - -/* - * Realview/Versatile Express specific entry point for secondary CPUs. - * This provides a "holding pen" into which all secondary cores are held - * until we're ready for them to initialise. - */ -ENTRY(versatile_secondary_startup) - ARM_BE8(setend be) - mrc p15, 0, r0, c0, c0, 5 - bic r0, #0xff000000 - adr r4, 1f - ldmia r4, {r5, r6} - sub r4, r4, r5 - add r6, r6, r4 -pen: ldr r7, [r6] - cmp r7, r0 - bne pen - - /* - * we've been released from the holding pen: secondary_stack - * should now contain the SVC stack for this core - */ - b secondary_startup - - .align -1: .long . - .long versatile_cpu_release -ENDPROC(versatile_secondary_startup) diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S deleted file mode 100644 index 9cd73b725d9fb89c2b9bb1d1178cce705baff38b..0000000000000000000000000000000000000000 --- a/arch/arm/vdso/datapage.S +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - - .align 2 -.L_vdso_data_ptr: - .long _start - . - VDSO_DATA_SIZE - -ENTRY(__get_datapage) - .fnstart - adr r0, .L_vdso_data_ptr - ldr r1, [r0] - add r0, r0, r1 - bx lr - .fnend -ENDPROC(__get_datapage) diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S deleted file mode 100644 index 65f2e6f863baf1d2b6f992a9d8474290acb0d1ac..0000000000000000000000000000000000000000 --- a/arch/arm/vdso/vdso.S +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Adapted from arm64 version. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#include -#include -#include -#include - - .globl vdso_start, vdso_end - .section .data..ro_after_init - .balign PAGE_SIZE -vdso_start: - .incbin "arch/arm/vdso/vdso.so" - .balign PAGE_SIZE -vdso_end: - - .previous diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S deleted file mode 100644 index 73cf205b003ea94acf3ca1b4bddd35945b70db7e..0000000000000000000000000000000000000000 --- a/arch/arm/vdso/vdso.lds.S +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Adapted from arm64 version. - * - * GNU linker script for the VDSO library. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - * Heavily based on the vDSO linker scripts for other archs. - */ - -#include -#include -#include - -OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") -OUTPUT_ARCH(arm) - -SECTIONS -{ - PROVIDE(_start = .); - - . = SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - - .dynamic : { *(.dynamic) } :text :dynamic - - .rodata : { *(.rodata*) } :text - - .text : { *(.text*) } :text =0xe7f001f2 - - .got : { *(.got) } - .rel.plt : { *(.rel.plt) } - - /DISCARD/ : { - *(.note.GNU-stack) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -VERSION -{ - LINUX_2.6 { - global: - __vdso_clock_gettime; - __vdso_gettimeofday; - local: *; - }; -} diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S deleted file mode 100644 index 27b0a1f27fbdf392e882d049045fd8102fe31b6e..0000000000000000000000000000000000000000 --- a/arch/arm/vfp/entry.S +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/vfp/entry.S - * - * Copyright (C) 2004 ARM Limited. - * Written by Deep Blue Solutions Limited. - */ -#include -#include -#include -#include -#include -#include - -@ VFP entry point. -@ -@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) -@ r2 = PC value to resume execution after successful emulation -@ r9 = normal "successful" return address -@ r10 = this threads thread_info structure -@ lr = unrecognised instruction return address -@ IRQs enabled. -@ -ENTRY(do_vfp) - inc_preempt_count r10, r4 - ldr r4, .LCvfp - ldr r11, [r10, #TI_CPU] @ CPU number - add r10, r10, #TI_VFPSTATE @ r10 = workspace - ldr pc, [r4] @ call VFP entry point -ENDPROC(do_vfp) - -ENTRY(vfp_null_entry) - dec_preempt_count_ti r10, r4 - ret lr -ENDPROC(vfp_null_entry) - - .align 2 -.LCvfp: - .word vfp_vector diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S deleted file mode 100644 index b530db8f2c6c8bea795902c13e72aa0e05a43b31..0000000000000000000000000000000000000000 --- a/arch/arm/vfp/vfphw.S +++ /dev/null @@ -1,315 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm/vfp/vfphw.S - * - * Copyright (C) 2004 ARM Limited. - * Written by Deep Blue Solutions Limited. - * - * This code is called from the kernel's undefined instruction trap. - * r9 holds the return address for successful handling. - * lr holds the return address for unrecognised instructions. - * r10 points at the start of the private FP workspace in the thread structure - * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h) - */ -#include -#include -#include -#include -#include -#include -#include - - .macro DBGSTR, str -#ifdef DEBUG - stmfd sp!, {r0-r3, ip, lr} - ldr r0, =1f - bl printk - ldmfd sp!, {r0-r3, ip, lr} - - .pushsection .rodata, "a" -1: .ascii KERN_DEBUG "VFP: \str\n" - .byte 0 - .previous -#endif - .endm - - .macro DBGSTR1, str, arg -#ifdef DEBUG - stmfd sp!, {r0-r3, ip, lr} - mov r1, \arg - ldr r0, =1f - bl printk - ldmfd sp!, {r0-r3, ip, lr} - - .pushsection .rodata, "a" -1: .ascii KERN_DEBUG "VFP: \str\n" - .byte 0 - .previous -#endif - .endm - - .macro DBGSTR3, str, arg1, arg2, arg3 -#ifdef DEBUG - stmfd sp!, {r0-r3, ip, lr} - mov r3, \arg3 - mov r2, \arg2 - mov r1, \arg1 - ldr r0, =1f - bl printk - ldmfd sp!, {r0-r3, ip, lr} - - .pushsection .rodata, "a" -1: .ascii KERN_DEBUG "VFP: \str\n" - .byte 0 - .previous -#endif - .endm - - -@ VFP hardware support entry point. -@ -@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) -@ r2 = PC value to resume execution after successful emulation -@ r9 = normal "successful" return address -@ r10 = vfp_state union -@ r11 = CPU number -@ lr = unrecognised instruction return address -@ IRQs enabled. -ENTRY(vfp_support_entry) - DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 - - VFPFMRX r1, FPEXC @ Is the VFP enabled? - DBGSTR1 "fpexc %08x", r1 - tst r1, #FPEXC_EN - bne look_for_VFP_exceptions @ VFP is already enabled - - DBGSTR1 "enable %x", r10 - ldr r3, vfp_current_hw_state_address - orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set - ldr r4, [r3, r11, lsl #2] @ vfp_current_hw_state pointer - bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled - cmp r4, r10 @ this thread owns the hw context? -#ifndef CONFIG_SMP - @ For UP, checking that this thread owns the hw context is - @ sufficient to determine that the hardware state is valid. - beq vfp_hw_state_valid - - @ On UP, we lazily save the VFP context. As a different - @ thread wants ownership of the VFP hardware, save the old - @ state if there was a previous (valid) owner. - - VFPFMXR FPEXC, r5 @ enable VFP, disable any pending - @ exceptions, so we can get at the - @ rest of it - - DBGSTR1 "save old state %p", r4 - cmp r4, #0 @ if the vfp_current_hw_state is NULL - beq vfp_reload_hw @ then the hw state needs reloading - VFPFSTMIA r4, r5 @ save the working registers - VFPFMRX r5, FPSCR @ current status -#ifndef CONFIG_CPU_FEROCEON - tst r1, #FPEXC_EX @ is there additional state to save? - beq 1f - VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set) - tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? - beq 1f - VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present) -1: -#endif - stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 -vfp_reload_hw: - -#else - @ For SMP, if this thread does not own the hw context, then we - @ need to reload it. No need to save the old state as on SMP, - @ we always save the state when we switch away from a thread. - bne vfp_reload_hw - - @ This thread has ownership of the current hardware context. - @ However, it may have been migrated to another CPU, in which - @ case the saved state is newer than the hardware context. - @ Check this by looking at the CPU number which the state was - @ last loaded onto. - ldr ip, [r10, #VFP_CPU] - teq ip, r11 - beq vfp_hw_state_valid - -vfp_reload_hw: - @ We're loading this threads state into the VFP hardware. Update - @ the CPU number which contains the most up to date VFP context. - str r11, [r10, #VFP_CPU] - - VFPFMXR FPEXC, r5 @ enable VFP, disable any pending - @ exceptions, so we can get at the - @ rest of it -#endif - - DBGSTR1 "load state %p", r10 - str r10, [r3, r11, lsl #2] @ update the vfp_current_hw_state pointer - @ Load the saved state back into the VFP - VFPFLDMIA r10, r5 @ reload the working registers while - @ FPEXC is in a safe state - ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 -#ifndef CONFIG_CPU_FEROCEON - tst r1, #FPEXC_EX @ is there additional state to restore? - beq 1f - VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set) - tst r1, #FPEXC_FP2V @ is there an FPINST2 to write? - beq 1f - VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present) -1: -#endif - VFPFMXR FPSCR, r5 @ restore status - -@ The context stored in the VFP hardware is up to date with this thread -vfp_hw_state_valid: - tst r1, #FPEXC_EX - bne process_exception @ might as well handle the pending - @ exception before retrying branch - @ out before setting an FPEXC that - @ stops us reading stuff - VFPFMXR FPEXC, r1 @ Restore FPEXC last - sub r2, r2, #4 @ Retry current instruction - if Thumb - str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, - @ else it's one 32-bit instruction, so - @ always subtract 4 from the following - @ instruction address. - dec_preempt_count_ti r10, r4 - ret r9 @ we think we have handled things - - -look_for_VFP_exceptions: - @ Check for synchronous or asynchronous exception - tst r1, #FPEXC_EX | FPEXC_DEX - bne process_exception - @ On some implementations of the VFP subarch 1, setting FPSCR.IXE - @ causes all the CDP instructions to be bounced synchronously without - @ setting the FPEXC.EX bit - VFPFMRX r5, FPSCR - tst r5, #FPSCR_IXE - bne process_exception - - tst r5, #FPSCR_LENGTH_MASK - beq skip - orr r1, r1, #FPEXC_DEX - b process_exception -skip: - - @ Fall into hand on to next handler - appropriate coproc instr - @ not recognised by VFP - - DBGSTR "not VFP" - dec_preempt_count_ti r10, r4 - ret lr - -process_exception: - DBGSTR "bounce" - mov r2, sp @ nothing stacked - regdump is at TOS - mov lr, r9 @ setup for a return to the user code. - - @ Now call the C code to package up the bounce to the support code - @ r0 holds the trigger instruction - @ r1 holds the FPEXC value - @ r2 pointer to register dump - b VFP_bounce @ we have handled this - the support - @ code will raise an exception if - @ required. If not, the user code will - @ retry the faulted instruction -ENDPROC(vfp_support_entry) - -ENTRY(vfp_save_state) - @ Save the current VFP state - @ r0 - save location - @ r1 - FPEXC - DBGSTR1 "save VFP state %p", r0 - VFPFSTMIA r0, r2 @ save the working registers - VFPFMRX r2, FPSCR @ current status - tst r1, #FPEXC_EX @ is there additional state to save? - beq 1f - VFPFMRX r3, FPINST @ FPINST (only if FPEXC.EX is set) - tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? - beq 1f - VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present) -1: - stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 - ret lr -ENDPROC(vfp_save_state) - - .align -vfp_current_hw_state_address: - .word vfp_current_hw_state - - .macro tbl_branch, base, tmp, shift -#ifdef CONFIG_THUMB2_KERNEL - adr \tmp, 1f - add \tmp, \tmp, \base, lsl \shift - ret \tmp -#else - add pc, pc, \base, lsl \shift - mov r0, r0 -#endif -1: - .endm - -ENTRY(vfp_get_float) - tbl_branch r0, r3, #3 - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 - ret lr - .org 1b + 8 -1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 - ret lr - .org 1b + 8 - .endr -ENDPROC(vfp_get_float) - -ENTRY(vfp_put_float) - tbl_branch r1, r3, #3 - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 - ret lr - .org 1b + 8 -1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 - ret lr - .org 1b + 8 - .endr -ENDPROC(vfp_put_float) - -ENTRY(vfp_get_double) - tbl_branch r0, r3, #3 - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmrrd r0, r1, d\dr - ret lr - .org 1b + 8 - .endr -#ifdef CONFIG_VFPv3 - @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr - ret lr - .org 1b + 8 - .endr -#endif - - @ virtual register 16 (or 32 if VFPv3) for compare with zero - mov r0, #0 - mov r1, #0 - ret lr -ENDPROC(vfp_get_double) - -ENTRY(vfp_put_double) - tbl_branch r2, r3, #3 - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmdrr d\dr, r0, r1 - ret lr - .org 1b + 8 - .endr -#ifdef CONFIG_VFPv3 - @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr - ret lr - .org 1b + 8 - .endr -#endif -ENDPROC(vfp_put_double) diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S deleted file mode 100644 index b11bba542faccc4889abe8b5571b583e83ee0479..0000000000000000000000000000000000000000 --- a/arch/arm/xen/hypercall.S +++ /dev/null @@ -1,122 +0,0 @@ -/****************************************************************************** - * hypercall.S - * - * Xen hypercall wrappers - * - * Stefano Stabellini , Citrix, 2012 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * The Xen hypercall calling convention is very similar to the ARM - * procedure calling convention: the first paramter is passed in r0, the - * second in r1, the third in r2 and the fourth in r3. Considering that - * Xen hypercalls have 5 arguments at most, the fifth paramter is passed - * in r4, differently from the procedure calling convention of using the - * stack for that case. - * - * The hypercall number is passed in r12. - * - * The return value is in r0. - * - * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM - * hypercall tag. - */ - -#include -#include -#include -#include - - -#define XEN_IMM 0xEA1 - -#define HYPERCALL_SIMPLE(hypercall) \ -ENTRY(HYPERVISOR_##hypercall) \ - mov r12, #__HYPERVISOR_##hypercall; \ - __HVC(XEN_IMM); \ - ret lr; \ -ENDPROC(HYPERVISOR_##hypercall) - -#define HYPERCALL0 HYPERCALL_SIMPLE -#define HYPERCALL1 HYPERCALL_SIMPLE -#define HYPERCALL2 HYPERCALL_SIMPLE -#define HYPERCALL3 HYPERCALL_SIMPLE -#define HYPERCALL4 HYPERCALL_SIMPLE - -#define HYPERCALL5(hypercall) \ -ENTRY(HYPERVISOR_##hypercall) \ - stmdb sp!, {r4} \ - ldr r4, [sp, #4] \ - mov r12, #__HYPERVISOR_##hypercall; \ - __HVC(XEN_IMM); \ - ldm sp!, {r4} \ - ret lr \ -ENDPROC(HYPERVISOR_##hypercall) - - .text - -HYPERCALL2(xen_version); -HYPERCALL3(console_io); -HYPERCALL3(grant_table_op); -HYPERCALL2(sched_op); -HYPERCALL2(event_channel_op); -HYPERCALL2(hvm_op); -HYPERCALL2(memory_op); -HYPERCALL2(physdev_op); -HYPERCALL3(vcpu_op); -HYPERCALL1(tmem_op); -HYPERCALL1(platform_op_raw); -HYPERCALL2(multicall); -HYPERCALL2(vm_assist); -HYPERCALL3(dm_op); - -ENTRY(privcmd_call) - stmdb sp!, {r4} - mov r12, r0 - mov r0, r1 - mov r1, r2 - mov r2, r3 - ldr r3, [sp, #8] - /* - * Privcmd calls are issued by the userspace. We need to allow the - * kernel to access the userspace memory before issuing the hypercall. - */ - uaccess_enable r4 - - /* r4 is loaded now as we use it as scratch register before */ - ldr r4, [sp, #4] - __HVC(XEN_IMM) - - /* - * Disable userspace access from kernel. This is fine to do it - * unconditionally as no set_fs(KERNEL_DS) is called before. - */ - uaccess_disable r4 - - ldm sp!, {r4} - ret lr -ENDPROC(privcmd_call); diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi deleted file mode 120000 index 68fd0f8f1dee8e9b7ff0c0229c888d1ec1d03b05..0000000000000000000000000000000000000000 --- a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi +++ /dev/null @@ -1 +0,0 @@ -../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi new file mode 100644 index 0000000000000000000000000000000000000000..68fd0f8f1dee8e9b7ff0c0229c888d1ec1d03b05 --- /dev/null +++ b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi \ No newline at end of file diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S deleted file mode 100644 index 9add9bbc48d8b84a9454f6b29db41d403a47125f..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ /dev/null @@ -1,221 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions - * - * Copyright (C) 2013 - 2017 Linaro Ltd - */ - -#include -#include - - .text - .arch armv8-a+crypto - - /* - * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, - * u32 *macp, u8 const rk[], u32 rounds); - */ -ENTRY(ce_aes_ccm_auth_data) - ldr w8, [x3] /* leftover from prev round? */ - ld1 {v0.16b}, [x0] /* load mac */ - cbz w8, 1f - sub w8, w8, #16 - eor v1.16b, v1.16b, v1.16b -0: ldrb w7, [x1], #1 /* get 1 byte of input */ - subs w2, w2, #1 - add w8, w8, #1 - ins v1.b[0], w7 - ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ - beq 8f /* out of input? */ - cbnz w8, 0b - eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.4s}, [x4] /* load first round key */ - prfm pldl1strm, [x1] - cmp w5, #12 /* which key size? */ - add x6, x4, #16 - sub w7, w5, #2 /* modified # of rounds */ - bmi 2f - bne 5f - mov v5.16b, v3.16b - b 4f -2: mov v4.16b, v3.16b - ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ -3: aese v0.16b, v4.16b - aesmc v0.16b, v0.16b -4: ld1 {v3.4s}, [x6], #16 /* load next round key */ - aese v0.16b, v5.16b - aesmc v0.16b, v0.16b -5: ld1 {v4.4s}, [x6], #16 /* load next round key */ - subs w7, w7, #3 - aese v0.16b, v3.16b - aesmc v0.16b, v0.16b - ld1 {v5.4s}, [x6], #16 /* load next round key */ - bpl 3b - aese v0.16b, v4.16b - subs w2, w2, #16 /* last data? */ - eor v0.16b, v0.16b, v5.16b /* final round */ - bmi 6f - ld1 {v1.16b}, [x1], #16 /* load next input block */ - eor v0.16b, v0.16b, v1.16b /* xor with mac */ - bne 1b -6: st1 {v0.16b}, [x0] /* store mac */ - beq 10f - adds w2, w2, #16 - beq 10f - mov w8, w2 -7: ldrb w7, [x1], #1 - umov w6, v0.b[0] - eor w6, w6, w7 - strb w6, [x0], #1 - subs w2, w2, #1 - beq 10f - ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ - b 7b -8: cbz w8, 91f - mov w7, w8 - add w8, w8, #16 -9: ext v1.16b, v1.16b, v1.16b, #1 - adds w7, w7, #1 - bne 9b -91: eor v0.16b, v0.16b, v1.16b - st1 {v0.16b}, [x0] -10: str w8, [x3] - ret -ENDPROC(ce_aes_ccm_auth_data) - - /* - * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], - * u32 rounds); - */ -ENTRY(ce_aes_ccm_final) - ld1 {v3.4s}, [x2], #16 /* load first round key */ - ld1 {v0.16b}, [x0] /* load mac */ - cmp w3, #12 /* which key size? */ - sub w3, w3, #2 /* modified # of rounds */ - ld1 {v1.16b}, [x1] /* load 1st ctriv */ - bmi 0f - bne 3f - mov v5.16b, v3.16b - b 2f -0: mov v4.16b, v3.16b -1: ld1 {v5.4s}, [x2], #16 /* load next round key */ - aese v0.16b, v4.16b - aesmc v0.16b, v0.16b - aese v1.16b, v4.16b - aesmc v1.16b, v1.16b -2: ld1 {v3.4s}, [x2], #16 /* load next round key */ - aese v0.16b, v5.16b - aesmc v0.16b, v0.16b - aese v1.16b, v5.16b - aesmc v1.16b, v1.16b -3: ld1 {v4.4s}, [x2], #16 /* load next round key */ - subs w3, w3, #3 - aese v0.16b, v3.16b - aesmc v0.16b, v0.16b - aese v1.16b, v3.16b - aesmc v1.16b, v1.16b - bpl 1b - aese v0.16b, v4.16b - aese v1.16b, v4.16b - /* final round key cancels out */ - eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ - st1 {v0.16b}, [x0] /* store result */ - ret -ENDPROC(ce_aes_ccm_final) - - .macro aes_ccm_do_crypt,enc - ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.16b}, [x5] /* load mac */ -CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ -0: /* outer loop */ - ld1 {v1.8b}, [x6] /* load upper ctr */ - prfm pldl1strm, [x1] - add x8, x8, #1 - rev x9, x8 - cmp w4, #12 /* which key size? */ - sub w7, w4, #2 /* get modified # of rounds */ - ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.4s}, [x3] /* load first round key */ - add x10, x3, #16 - bmi 1f - bne 4f - mov v5.16b, v3.16b - b 3f -1: mov v4.16b, v3.16b - ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ -2: /* inner loop: 3 rounds, 2x interleaved */ - aese v0.16b, v4.16b - aesmc v0.16b, v0.16b - aese v1.16b, v4.16b - aesmc v1.16b, v1.16b -3: ld1 {v3.4s}, [x10], #16 /* load next round key */ - aese v0.16b, v5.16b - aesmc v0.16b, v0.16b - aese v1.16b, v5.16b - aesmc v1.16b, v1.16b -4: ld1 {v4.4s}, [x10], #16 /* load next round key */ - subs w7, w7, #3 - aese v0.16b, v3.16b - aesmc v0.16b, v0.16b - aese v1.16b, v3.16b - aesmc v1.16b, v1.16b - ld1 {v5.4s}, [x10], #16 /* load next round key */ - bpl 2b - aese v0.16b, v4.16b - aese v1.16b, v4.16b - subs w2, w2, #16 - bmi 6f /* partial block? */ - ld1 {v2.16b}, [x1], #16 /* load next input block */ - .if \enc == 1 - eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ - eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ - .else - eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ - eor v1.16b, v2.16b, v5.16b /* final round enc */ - .endif - eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ - st1 {v1.16b}, [x0], #16 /* write output block */ - bne 0b -CPU_LE( rev x8, x8 ) - st1 {v0.16b}, [x5] /* store mac */ - str x8, [x6, #8] /* store lsb end of ctr (BE) */ -5: ret - -6: eor v0.16b, v0.16b, v5.16b /* final round mac */ - eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.16b}, [x5] /* store mac */ - add w2, w2, #16 /* process partial tail block */ -7: ldrb w9, [x1], #1 /* get 1 byte of input */ - umov w6, v1.b[0] /* get top crypted ctr byte */ - umov w7, v0.b[0] /* get top mac byte */ - .if \enc == 1 - eor w7, w7, w9 - eor w9, w9, w6 - .else - eor w9, w9, w6 - eor w7, w7, w9 - .endif - strb w9, [x0], #1 /* store out byte */ - strb w7, [x5], #1 /* store mac byte */ - subs w2, w2, #1 - beq 5b - ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ - ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ - b 7b - .endm - - /* - * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, - * u8 const rk[], u32 rounds, u8 mac[], - * u8 ctr[]); - * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, - * u8 const rk[], u32 rounds, u8 mac[], - * u8 ctr[]); - */ -ENTRY(ce_aes_ccm_encrypt) - aes_ccm_do_crypt 1 -ENDPROC(ce_aes_ccm_encrypt) - -ENTRY(ce_aes_ccm_decrypt) - aes_ccm_do_crypt 0 -ENDPROC(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S deleted file mode 100644 index 76a30fe4ba8b140ecfa0fc2e1feb5f3cc94fc19f..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-ce-core.S +++ /dev/null @@ -1,84 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 - 2017 Linaro Ltd - */ - -#include -#include - - .arch armv8-a+crypto - -ENTRY(__aes_ce_encrypt) - sub w3, w3, #2 - ld1 {v0.16b}, [x2] - ld1 {v1.4s}, [x0], #16 - cmp w3, #10 - bmi 0f - bne 3f - mov v3.16b, v1.16b - b 2f -0: mov v2.16b, v1.16b - ld1 {v3.4s}, [x0], #16 -1: aese v0.16b, v2.16b - aesmc v0.16b, v0.16b -2: ld1 {v1.4s}, [x0], #16 - aese v0.16b, v3.16b - aesmc v0.16b, v0.16b -3: ld1 {v2.4s}, [x0], #16 - subs w3, w3, #3 - aese v0.16b, v1.16b - aesmc v0.16b, v0.16b - ld1 {v3.4s}, [x0], #16 - bpl 1b - aese v0.16b, v2.16b - eor v0.16b, v0.16b, v3.16b - st1 {v0.16b}, [x1] - ret -ENDPROC(__aes_ce_encrypt) - -ENTRY(__aes_ce_decrypt) - sub w3, w3, #2 - ld1 {v0.16b}, [x2] - ld1 {v1.4s}, [x0], #16 - cmp w3, #10 - bmi 0f - bne 3f - mov v3.16b, v1.16b - b 2f -0: mov v2.16b, v1.16b - ld1 {v3.4s}, [x0], #16 -1: aesd v0.16b, v2.16b - aesimc v0.16b, v0.16b -2: ld1 {v1.4s}, [x0], #16 - aesd v0.16b, v3.16b - aesimc v0.16b, v0.16b -3: ld1 {v2.4s}, [x0], #16 - subs w3, w3, #3 - aesd v0.16b, v1.16b - aesimc v0.16b, v0.16b - ld1 {v3.4s}, [x0], #16 - bpl 1b - aesd v0.16b, v2.16b - eor v0.16b, v0.16b, v3.16b - st1 {v0.16b}, [x1] - ret -ENDPROC(__aes_ce_decrypt) - -/* - * __aes_ce_sub() - use the aese instruction to perform the AES sbox - * substitution on each byte in 'input' - */ -ENTRY(__aes_ce_sub) - dup v1.4s, w0 - movi v0.16b, #0 - aese v0.16b, v1.16b - umov w0, v0.s[0] - ret -ENDPROC(__aes_ce_sub) - -ENTRY(__aes_ce_invert) - ld1 {v0.4s}, [x1] - aesimc v1.16b, v0.16b - st1 {v1.4s}, [x0] - ret -ENDPROC(__aes_ce_invert) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S deleted file mode 100644 index c132c49c89a8c4427fd2a252ddd4874a5ad44dfa..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-ce.S +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with - * Crypto Extensions - * - * Copyright (C) 2013 - 2017 Linaro Ltd - */ - -#include -#include - -#define AES_ENTRY(func) ENTRY(ce_ ## func) -#define AES_ENDPROC(func) ENDPROC(ce_ ## func) - - .arch armv8-a+crypto - - xtsmask .req v16 - cbciv .req v16 - vctr .req v16 - - .macro xts_reload_mask, tmp - .endm - - .macro xts_cts_skip_tw, reg, lbl - .endm - - /* preload all round keys */ - .macro load_round_keys, rounds, rk - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - ld1 {v17.4s-v18.4s}, [\rk], #32 -1111: ld1 {v19.4s-v20.4s}, [\rk], #32 -2222: ld1 {v21.4s-v24.4s}, [\rk], #64 - ld1 {v25.4s-v28.4s}, [\rk], #64 - ld1 {v29.4s-v31.4s}, [\rk] - .endm - - /* prepare for encryption with key in rk[] */ - .macro enc_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp - .endm - - /* prepare for encryption (again) but with new key in rk[] */ - .macro enc_switch_key, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp - .endm - - /* prepare for decryption with key in rk[] */ - .macro dec_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp - .endm - - .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 - aes\de \i0\().16b, \k\().16b - aes\mc \i0\().16b, \i0\().16b - .ifnb \i1 - aes\de \i1\().16b, \k\().16b - aes\mc \i1\().16b, \i1\().16b - .ifnb \i3 - aes\de \i2\().16b, \k\().16b - aes\mc \i2\().16b, \i2\().16b - aes\de \i3\().16b, \k\().16b - aes\mc \i3\().16b, \i3\().16b - .ifnb \i4 - aes\de \i4\().16b, \k\().16b - aes\mc \i4\().16b, \i4\().16b - .endif - .endif - .endif - .endm - - /* up to 5 interleaved encryption rounds with the same round key */ - .macro round_Nx, enc, k, i0, i1, i2, i3, i4 - .ifc \enc, e - do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4 - .else - do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4 - .endif - .endm - - /* up to 5 interleaved final rounds */ - .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4 - aes\de \i0\().16b, \k\().16b - .ifnb \i1 - aes\de \i1\().16b, \k\().16b - .ifnb \i3 - aes\de \i2\().16b, \k\().16b - aes\de \i3\().16b, \k\().16b - .ifnb \i4 - aes\de \i4\().16b, \k\().16b - .endif - .endif - .endif - eor \i0\().16b, \i0\().16b, \k2\().16b - .ifnb \i1 - eor \i1\().16b, \i1\().16b, \k2\().16b - .ifnb \i3 - eor \i2\().16b, \i2\().16b, \k2\().16b - eor \i3\().16b, \i3\().16b, \k2\().16b - .ifnb \i4 - eor \i4\().16b, \i4\().16b, \k2\().16b - .endif - .endif - .endif - .endm - - /* up to 5 interleaved blocks */ - .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 - round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 -1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 - round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 -2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 - round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 - .endr - fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 - .endm - - .macro encrypt_block, in, rounds, t0, t1, t2 - do_block_Nx e, \rounds, \in - .endm - - .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 - do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 - .endm - - .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 - do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4 - .endm - - .macro decrypt_block, in, rounds, t0, t1, t2 - do_block_Nx d, \rounds, \in - .endm - - .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 - do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 - .endm - - .macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 - do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4 - .endm - -#define MAX_STRIDE 5 - -#include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S deleted file mode 100644 index 423d0aebc570f0350b8a6055b769161c559f2afc..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-cipher-core.S +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Scalar AES core transform - * - * Copyright (C) 2017 Linaro Ltd - */ - -#include -#include -#include - - .text - - rk .req x0 - out .req x1 - in .req x2 - rounds .req x3 - tt .req x2 - - .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift - .ifc \op\shift, b0 - ubfiz \reg0, \in0, #2, #8 - ubfiz \reg1, \in1e, #2, #8 - .else - ubfx \reg0, \in0, #\shift, #8 - ubfx \reg1, \in1e, #\shift, #8 - .endif - - /* - * AArch64 cannot do byte size indexed loads from a table containing - * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a - * valid instruction. So perform the shift explicitly first for the - * high bytes (the low byte is shifted implicitly by using ubfiz rather - * than ubfx above) - */ - .ifnc \op, b - ldr \reg0, [tt, \reg0, uxtw #2] - ldr \reg1, [tt, \reg1, uxtw #2] - .else - .if \shift > 0 - lsl \reg0, \reg0, #2 - lsl \reg1, \reg1, #2 - .endif - ldrb \reg0, [tt, \reg0, uxtw] - ldrb \reg1, [tt, \reg1, uxtw] - .endif - .endm - - .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift - ubfx \reg0, \in0, #\shift, #8 - ubfx \reg1, \in1d, #\shift, #8 - ldr\op \reg0, [tt, \reg0, uxtw #\sz] - ldr\op \reg1, [tt, \reg1, uxtw #\sz] - .endm - - .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op - ldp \out0, \out1, [rk], #8 - - __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0 - __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8 - __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16 - __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24 - - eor \out0, \out0, w12 - eor \out1, \out1, w13 - eor \out0, \out0, w14, ror #24 - eor \out1, \out1, w15, ror #24 - eor \out0, \out0, w16, ror #16 - eor \out1, \out1, w17, ror #16 - eor \out0, \out0, \t0, ror #8 - eor \out1, \out1, \t1, ror #8 - .endm - - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op - __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op - .endm - - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op - __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op - .endm - - .macro do_crypt, round, ttab, ltab, bsz - ldp w4, w5, [in] - ldp w6, w7, [in, #8] - ldp w8, w9, [rk], #16 - ldp w10, w11, [rk, #-8] - -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) - - eor w4, w4, w8 - eor w5, w5, w9 - eor w6, w6, w10 - eor w7, w7, w11 - - adr_l tt, \ttab - - tbnz rounds, #1, 1f - -0: \round w8, w9, w10, w11, w4, w5, w6, w7 - \round w4, w5, w6, w7, w8, w9, w10, w11 - -1: subs rounds, rounds, #4 - \round w8, w9, w10, w11, w4, w5, w6, w7 - b.ls 3f -2: \round w4, w5, w6, w7, w8, w9, w10, w11 - b 0b -3: adr_l tt, \ltab - \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b - -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) - - stp w4, w5, [out] - stp w6, w7, [out, #8] - ret - .endm - -ENTRY(__aes_arm64_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -ENDPROC(__aes_arm64_encrypt) - - .align 5 -ENTRY(__aes_arm64_decrypt) - do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 -ENDPROC(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S deleted file mode 100644 index 131618389f1fda7fd3744ce0584a33ce5cc20388..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-modes.S +++ /dev/null @@ -1,679 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES - * - * Copyright (C) 2013 - 2017 Linaro Ltd - */ - -/* included by aes-ce.S and aes-neon.S */ - - .text - .align 4 - -#ifndef MAX_STRIDE -#define MAX_STRIDE 4 -#endif - -#if MAX_STRIDE == 4 -#define ST4(x...) x -#define ST5(x...) -#else -#define ST4(x...) -#define ST5(x...) x -#endif - -aes_encrypt_block4x: - encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 - ret -ENDPROC(aes_encrypt_block4x) - -aes_decrypt_block4x: - decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 - ret -ENDPROC(aes_decrypt_block4x) - -#if MAX_STRIDE == 5 -aes_encrypt_block5x: - encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 - ret -ENDPROC(aes_encrypt_block5x) - -aes_decrypt_block5x: - decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 - ret -ENDPROC(aes_decrypt_block5x) -#endif - - /* - * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - */ - -AES_ENTRY(aes_ecb_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - enc_prepare w3, x2, x5 - -.LecbencloopNx: - subs w4, w4, #MAX_STRIDE - bmi .Lecbenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ -ST4( bl aes_encrypt_block4x ) -ST5( ld1 {v4.16b}, [x1], #16 ) -ST5( bl aes_encrypt_block5x ) - st1 {v0.16b-v3.16b}, [x0], #64 -ST5( st1 {v4.16b}, [x0], #16 ) - b .LecbencloopNx -.Lecbenc1x: - adds w4, w4, #MAX_STRIDE - beq .Lecbencout -.Lecbencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ - encrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - bne .Lecbencloop -.Lecbencout: - ldp x29, x30, [sp], #16 - ret -AES_ENDPROC(aes_ecb_encrypt) - - -AES_ENTRY(aes_ecb_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - dec_prepare w3, x2, x5 - -.LecbdecloopNx: - subs w4, w4, #MAX_STRIDE - bmi .Lecbdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ -ST4( bl aes_decrypt_block4x ) -ST5( ld1 {v4.16b}, [x1], #16 ) -ST5( bl aes_decrypt_block5x ) - st1 {v0.16b-v3.16b}, [x0], #64 -ST5( st1 {v4.16b}, [x0], #16 ) - b .LecbdecloopNx -.Lecbdec1x: - adds w4, w4, #MAX_STRIDE - beq .Lecbdecout -.Lecbdecloop: - ld1 {v0.16b}, [x1], #16 /* get next ct block */ - decrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - bne .Lecbdecloop -.Lecbdecout: - ldp x29, x30, [sp], #16 - ret -AES_ENDPROC(aes_ecb_decrypt) - - - /* - * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) - * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) - * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], - * int rounds, int blocks, u8 iv[], - * u32 const rk2[]); - * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], - * int rounds, int blocks, u8 iv[], - * u32 const rk2[]); - */ - -AES_ENTRY(aes_essiv_cbc_encrypt) - ld1 {v4.16b}, [x5] /* get iv */ - - mov w8, #14 /* AES-256: 14 rounds */ - enc_prepare w8, x6, x7 - encrypt_block v4, w8, x6, x7, w9 - enc_switch_key w3, x2, x6 - b .Lcbcencloop4x - -AES_ENTRY(aes_cbc_encrypt) - ld1 {v4.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x6 - -.Lcbcencloop4x: - subs w4, w4, #4 - bmi .Lcbcenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ - eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x6, w7 - eor v1.16b, v1.16b, v0.16b - encrypt_block v1, w3, x2, x6, w7 - eor v2.16b, v2.16b, v1.16b - encrypt_block v2, w3, x2, x6, w7 - eor v3.16b, v3.16b, v2.16b - encrypt_block v3, w3, x2, x6, w7 - st1 {v0.16b-v3.16b}, [x0], #64 - mov v4.16b, v3.16b - b .Lcbcencloop4x -.Lcbcenc1x: - adds w4, w4, #4 - beq .Lcbcencout -.Lcbcencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ - eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ - encrypt_block v4, w3, x2, x6, w7 - st1 {v4.16b}, [x0], #16 - subs w4, w4, #1 - bne .Lcbcencloop -.Lcbcencout: - st1 {v4.16b}, [x5] /* return iv */ - ret -AES_ENDPROC(aes_cbc_encrypt) -AES_ENDPROC(aes_essiv_cbc_encrypt) - -AES_ENTRY(aes_essiv_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - ld1 {cbciv.16b}, [x5] /* get iv */ - - mov w8, #14 /* AES-256: 14 rounds */ - enc_prepare w8, x6, x7 - encrypt_block cbciv, w8, x6, x7, w9 - b .Lessivcbcdecstart - -AES_ENTRY(aes_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - ld1 {cbciv.16b}, [x5] /* get iv */ -.Lessivcbcdecstart: - dec_prepare w3, x2, x6 - -.LcbcdecloopNx: - subs w4, w4, #MAX_STRIDE - bmi .Lcbcdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ -#if MAX_STRIDE == 5 - ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ - mov v5.16b, v0.16b - mov v6.16b, v1.16b - mov v7.16b, v2.16b - bl aes_decrypt_block5x - sub x1, x1, #32 - eor v0.16b, v0.16b, cbciv.16b - eor v1.16b, v1.16b, v5.16b - ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ - ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ - eor v2.16b, v2.16b, v6.16b - eor v3.16b, v3.16b, v7.16b - eor v4.16b, v4.16b, v5.16b -#else - mov v4.16b, v0.16b - mov v5.16b, v1.16b - mov v6.16b, v2.16b - bl aes_decrypt_block4x - sub x1, x1, #16 - eor v0.16b, v0.16b, cbciv.16b - eor v1.16b, v1.16b, v4.16b - ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ - eor v2.16b, v2.16b, v5.16b - eor v3.16b, v3.16b, v6.16b -#endif - st1 {v0.16b-v3.16b}, [x0], #64 -ST5( st1 {v4.16b}, [x0], #16 ) - b .LcbcdecloopNx -.Lcbcdec1x: - adds w4, w4, #MAX_STRIDE - beq .Lcbcdecout -.Lcbcdecloop: - ld1 {v1.16b}, [x1], #16 /* get next ct block */ - mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x6, w7 - eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ - mov cbciv.16b, v1.16b /* ct is next iv */ - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - bne .Lcbcdecloop -.Lcbcdecout: - st1 {cbciv.16b}, [x5] /* return iv */ - ldp x29, x30, [sp], #16 - ret -AES_ENDPROC(aes_cbc_decrypt) -AES_ENDPROC(aes_essiv_cbc_decrypt) - - - /* - * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], - * int rounds, int bytes, u8 const iv[]) - * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], - * int rounds, int bytes, u8 const iv[]) - */ - -AES_ENTRY(aes_cbc_cts_encrypt) - adr_l x8, .Lcts_permute_table - sub x4, x4, #16 - add x9, x8, #32 - add x8, x8, x4 - sub x9, x9, x4 - ld1 {v3.16b}, [x8] - ld1 {v4.16b}, [x9] - - ld1 {v0.16b}, [x1], x4 /* overlapping loads */ - ld1 {v1.16b}, [x1] - - ld1 {v5.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x6 - - eor v0.16b, v0.16b, v5.16b /* xor with iv */ - tbl v1.16b, {v1.16b}, v4.16b - encrypt_block v0, w3, x2, x6, w7 - - eor v1.16b, v1.16b, v0.16b - tbl v0.16b, {v0.16b}, v3.16b - encrypt_block v1, w3, x2, x6, w7 - - add x4, x0, x4 - st1 {v0.16b}, [x4] /* overlapping stores */ - st1 {v1.16b}, [x0] - ret -AES_ENDPROC(aes_cbc_cts_encrypt) - -AES_ENTRY(aes_cbc_cts_decrypt) - adr_l x8, .Lcts_permute_table - sub x4, x4, #16 - add x9, x8, #32 - add x8, x8, x4 - sub x9, x9, x4 - ld1 {v3.16b}, [x8] - ld1 {v4.16b}, [x9] - - ld1 {v0.16b}, [x1], x4 /* overlapping loads */ - ld1 {v1.16b}, [x1] - - ld1 {v5.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x6 - - decrypt_block v0, w3, x2, x6, w7 - tbl v2.16b, {v0.16b}, v3.16b - eor v2.16b, v2.16b, v1.16b - - tbx v0.16b, {v1.16b}, v4.16b - decrypt_block v0, w3, x2, x6, w7 - eor v0.16b, v0.16b, v5.16b /* xor with iv */ - - add x4, x0, x4 - st1 {v2.16b}, [x4] /* overlapping stores */ - st1 {v0.16b}, [x0] - ret -AES_ENDPROC(aes_cbc_cts_decrypt) - - .section ".rodata", "a" - .align 6 -.Lcts_permute_table: - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .previous - - - /* - * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 ctr[]) - */ - -AES_ENTRY(aes_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - enc_prepare w3, x2, x6 - ld1 {vctr.16b}, [x5] - - umov x6, vctr.d[1] /* keep swabbed ctr in reg */ - rev x6, x6 - cmn w6, w4 /* 32 bit overflow? */ - bcs .Lctrloop -.LctrloopNx: - subs w4, w4, #MAX_STRIDE - bmi .Lctr1x - add w7, w6, #1 - mov v0.16b, vctr.16b - add w8, w6, #2 - mov v1.16b, vctr.16b - add w9, w6, #3 - mov v2.16b, vctr.16b - add w9, w6, #3 - rev w7, w7 - mov v3.16b, vctr.16b - rev w8, w8 -ST5( mov v4.16b, vctr.16b ) - mov v1.s[3], w7 - rev w9, w9 -ST5( add w10, w6, #4 ) - mov v2.s[3], w8 -ST5( rev w10, w10 ) - mov v3.s[3], w9 -ST5( mov v4.s[3], w10 ) - ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ -ST4( bl aes_encrypt_block4x ) -ST5( bl aes_encrypt_block5x ) - eor v0.16b, v5.16b, v0.16b -ST4( ld1 {v5.16b}, [x1], #16 ) - eor v1.16b, v6.16b, v1.16b -ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) - eor v2.16b, v7.16b, v2.16b - eor v3.16b, v5.16b, v3.16b -ST5( eor v4.16b, v6.16b, v4.16b ) - st1 {v0.16b-v3.16b}, [x0], #64 -ST5( st1 {v4.16b}, [x0], #16 ) - add x6, x6, #MAX_STRIDE - rev x7, x6 - ins vctr.d[1], x7 - cbz w4, .Lctrout - b .LctrloopNx -.Lctr1x: - adds w4, w4, #MAX_STRIDE - beq .Lctrout -.Lctrloop: - mov v0.16b, vctr.16b - encrypt_block v0, w3, x2, x8, w7 - - adds x6, x6, #1 /* increment BE ctr */ - rev x7, x6 - ins vctr.d[1], x7 - bcs .Lctrcarry /* overflow? */ - -.Lctrcarrydone: - subs w4, w4, #1 - bmi .Lctrtailblock /* blocks <0 means tail block */ - ld1 {v3.16b}, [x1], #16 - eor v3.16b, v0.16b, v3.16b - st1 {v3.16b}, [x0], #16 - bne .Lctrloop - -.Lctrout: - st1 {vctr.16b}, [x5] /* return next CTR value */ - ldp x29, x30, [sp], #16 - ret - -.Lctrtailblock: - st1 {v0.16b}, [x0] - b .Lctrout - -.Lctrcarry: - umov x7, vctr.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins vctr.d[0], x7 - b .Lctrcarrydone -AES_ENDPROC(aes_ctr_encrypt) - - - /* - * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int bytes, u8 const rk2[], u8 iv[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int bytes, u8 const rk2[], u8 iv[], int first) - */ - - .macro next_tweak, out, in, tmp - sshr \tmp\().2d, \in\().2d, #63 - and \tmp\().16b, \tmp\().16b, xtsmask.16b - add \out\().2d, \in\().2d, \in\().2d - ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 - eor \out\().16b, \out\().16b, \tmp\().16b - .endm - - .macro xts_load_mask, tmp - movi xtsmask.2s, #0x1 - movi \tmp\().2s, #0x87 - uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s - .endm - -AES_ENTRY(aes_xts_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - ld1 {v4.16b}, [x6] - xts_load_mask v8 - cbz w7, .Lxtsencnotfirst - - enc_prepare w3, x5, x8 - xts_cts_skip_tw w7, .LxtsencNx - encrypt_block v4, w3, x5, x8, w7 /* first tweak */ - enc_switch_key w3, x2, x8 - b .LxtsencNx - -.Lxtsencnotfirst: - enc_prepare w3, x2, x8 -.LxtsencloopNx: - next_tweak v4, v4, v8 -.LxtsencNx: - subs w4, w4, #64 - bmi .Lxtsenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ - next_tweak v5, v4, v8 - eor v0.16b, v0.16b, v4.16b - next_tweak v6, v5, v8 - eor v1.16b, v1.16b, v5.16b - eor v2.16b, v2.16b, v6.16b - next_tweak v7, v6, v8 - eor v3.16b, v3.16b, v7.16b - bl aes_encrypt_block4x - eor v3.16b, v3.16b, v7.16b - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 - mov v4.16b, v7.16b - cbz w4, .Lxtsencret - xts_reload_mask v8 - b .LxtsencloopNx -.Lxtsenc1x: - adds w4, w4, #64 - beq .Lxtsencout - subs w4, w4, #16 - bmi .LxtsencctsNx -.Lxtsencloop: - ld1 {v0.16b}, [x1], #16 -.Lxtsencctsout: - eor v0.16b, v0.16b, v4.16b - encrypt_block v0, w3, x2, x8, w7 - eor v0.16b, v0.16b, v4.16b - cbz w4, .Lxtsencout - subs w4, w4, #16 - next_tweak v4, v4, v8 - bmi .Lxtsenccts - st1 {v0.16b}, [x0], #16 - b .Lxtsencloop -.Lxtsencout: - st1 {v0.16b}, [x0] -.Lxtsencret: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 - ret - -.LxtsencctsNx: - mov v0.16b, v3.16b - sub x0, x0, #16 -.Lxtsenccts: - adr_l x8, .Lcts_permute_table - - add x1, x1, w4, sxtw /* rewind input pointer */ - add w4, w4, #16 /* # bytes in final block */ - add x9, x8, #32 - add x8, x8, x4 - sub x9, x9, x4 - add x4, x0, x4 /* output address of final block */ - - ld1 {v1.16b}, [x1] /* load final block */ - ld1 {v2.16b}, [x8] - ld1 {v3.16b}, [x9] - - tbl v2.16b, {v0.16b}, v2.16b - tbx v0.16b, {v1.16b}, v3.16b - st1 {v2.16b}, [x4] /* overlapping stores */ - mov w4, wzr - b .Lxtsencctsout -AES_ENDPROC(aes_xts_encrypt) - -AES_ENTRY(aes_xts_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - /* subtract 16 bytes if we are doing CTS */ - sub w8, w4, #0x10 - tst w4, #0xf - csel w4, w4, w8, eq - - ld1 {v4.16b}, [x6] - xts_load_mask v8 - xts_cts_skip_tw w7, .Lxtsdecskiptw - cbz w7, .Lxtsdecnotfirst - - enc_prepare w3, x5, x8 - encrypt_block v4, w3, x5, x8, w7 /* first tweak */ -.Lxtsdecskiptw: - dec_prepare w3, x2, x8 - b .LxtsdecNx - -.Lxtsdecnotfirst: - dec_prepare w3, x2, x8 -.LxtsdecloopNx: - next_tweak v4, v4, v8 -.LxtsdecNx: - subs w4, w4, #64 - bmi .Lxtsdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ - next_tweak v5, v4, v8 - eor v0.16b, v0.16b, v4.16b - next_tweak v6, v5, v8 - eor v1.16b, v1.16b, v5.16b - eor v2.16b, v2.16b, v6.16b - next_tweak v7, v6, v8 - eor v3.16b, v3.16b, v7.16b - bl aes_decrypt_block4x - eor v3.16b, v3.16b, v7.16b - eor v0.16b, v0.16b, v4.16b - eor v1.16b, v1.16b, v5.16b - eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 - mov v4.16b, v7.16b - cbz w4, .Lxtsdecout - xts_reload_mask v8 - b .LxtsdecloopNx -.Lxtsdec1x: - adds w4, w4, #64 - beq .Lxtsdecout - subs w4, w4, #16 -.Lxtsdecloop: - ld1 {v0.16b}, [x1], #16 - bmi .Lxtsdeccts -.Lxtsdecctsout: - eor v0.16b, v0.16b, v4.16b - decrypt_block v0, w3, x2, x8, w7 - eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - cbz w4, .Lxtsdecout - subs w4, w4, #16 - next_tweak v4, v4, v8 - b .Lxtsdecloop -.Lxtsdecout: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 - ret - -.Lxtsdeccts: - adr_l x8, .Lcts_permute_table - - add x1, x1, w4, sxtw /* rewind input pointer */ - add w4, w4, #16 /* # bytes in final block */ - add x9, x8, #32 - add x8, x8, x4 - sub x9, x9, x4 - add x4, x0, x4 /* output address of final block */ - - next_tweak v5, v4, v8 - - ld1 {v1.16b}, [x1] /* load final block */ - ld1 {v2.16b}, [x8] - ld1 {v3.16b}, [x9] - - eor v0.16b, v0.16b, v5.16b - decrypt_block v0, w3, x2, x8, w7 - eor v0.16b, v0.16b, v5.16b - - tbl v2.16b, {v0.16b}, v2.16b - tbx v0.16b, {v1.16b}, v3.16b - - st1 {v2.16b}, [x4] /* overlapping stores */ - mov w4, wzr - b .Lxtsdecctsout -AES_ENDPROC(aes_xts_decrypt) - - /* - * aes_mac_update(u8 const in[], u32 const rk[], int rounds, - * int blocks, u8 dg[], int enc_before, int enc_after) - */ -AES_ENTRY(aes_mac_update) - frame_push 6 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x6 - - ld1 {v0.16b}, [x23] /* get dg */ - enc_prepare w2, x1, x7 - cbz w5, .Lmacloop4x - - encrypt_block v0, w2, x1, x7, w8 - -.Lmacloop4x: - subs w22, w22, #4 - bmi .Lmac1x - ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ - eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - encrypt_block v0, w21, x20, x7, w8 - eor v0.16b, v0.16b, v2.16b - encrypt_block v0, w21, x20, x7, w8 - eor v0.16b, v0.16b, v3.16b - encrypt_block v0, w21, x20, x7, w8 - eor v0.16b, v0.16b, v4.16b - cmp w22, wzr - csinv x5, x24, xzr, eq - cbz w5, .Lmacout - encrypt_block v0, w21, x20, x7, w8 - st1 {v0.16b}, [x23] /* return dg */ - cond_yield_neon .Lmacrestart - b .Lmacloop4x -.Lmac1x: - add w22, w22, #4 -.Lmacloop: - cbz w22, .Lmacout - ld1 {v1.16b}, [x19], #16 /* get next pt block */ - eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - - subs w22, w22, #1 - csinv x5, x24, xzr, eq - cbz w5, .Lmacout - -.Lmacenc: - encrypt_block v0, w21, x20, x7, w8 - b .Lmacloop - -.Lmacout: - st1 {v0.16b}, [x23] /* return dg */ - frame_pop - ret - -.Lmacrestart: - ld1 {v0.16b}, [x23] /* get dg */ - enc_prepare w21, x20, x0 - b .Lmacloop4x -AES_ENDPROC(aes_mac_update) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S deleted file mode 100644 index 22d9b110cf78b1838ea1ef5c7643a9817e737756..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-neon.S +++ /dev/null @@ -1,250 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON - * - * Copyright (C) 2013 - 2017 Linaro Ltd. - */ - -#include -#include - -#define AES_ENTRY(func) ENTRY(neon_ ## func) -#define AES_ENDPROC(func) ENDPROC(neon_ ## func) - - xtsmask .req v7 - cbciv .req v7 - vctr .req v4 - - .macro xts_reload_mask, tmp - xts_load_mask \tmp - .endm - - /* special case for the neon-bs driver calling into this one for CTS */ - .macro xts_cts_skip_tw, reg, lbl - tbnz \reg, #1, \lbl - .endm - - /* multiply by polynomial 'x' in GF(2^8) */ - .macro mul_by_x, out, in, temp, const - sshr \temp, \in, #7 - shl \out, \in, #1 - and \temp, \temp, \const - eor \out, \out, \temp - .endm - - /* multiply by polynomial 'x^2' in GF(2^8) */ - .macro mul_by_x2, out, in, temp, const - ushr \temp, \in, #6 - shl \out, \in, #2 - pmul \temp, \temp, \const - eor \out, \out, \temp - .endm - - /* preload the entire Sbox */ - .macro prepare, sbox, shiftrows, temp - movi v12.16b, #0x1b - ldr_l q13, \shiftrows, \temp - ldr_l q14, .Lror32by8, \temp - adr_l \temp, \sbox - ld1 {v16.16b-v19.16b}, [\temp], #64 - ld1 {v20.16b-v23.16b}, [\temp], #64 - ld1 {v24.16b-v27.16b}, [\temp], #64 - ld1 {v28.16b-v31.16b}, [\temp] - .endm - - /* do preload for encryption */ - .macro enc_prepare, ignore0, ignore1, temp - prepare crypto_aes_sbox, .LForward_ShiftRows, \temp - .endm - - .macro enc_switch_key, ignore0, ignore1, temp - /* do nothing */ - .endm - - /* do preload for decryption */ - .macro dec_prepare, ignore0, ignore1, temp - prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp - .endm - - /* apply SubBytes transformation using the the preloaded Sbox */ - .macro sub_bytes, in - sub v9.16b, \in\().16b, v15.16b - tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b - sub v10.16b, v9.16b, v15.16b - tbx \in\().16b, {v20.16b-v23.16b}, v9.16b - sub v11.16b, v10.16b, v15.16b - tbx \in\().16b, {v24.16b-v27.16b}, v10.16b - tbx \in\().16b, {v28.16b-v31.16b}, v11.16b - .endm - - /* apply MixColumns transformation */ - .macro mix_columns, in, enc - .if \enc == 0 - /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ - mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b - eor \in\().16b, \in\().16b, v8.16b - rev32 v8.8h, v8.8h - eor \in\().16b, \in\().16b, v8.16b - .endif - - mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b - rev32 v8.8h, \in\().8h - eor v8.16b, v8.16b, v9.16b - eor \in\().16b, \in\().16b, v8.16b - tbl \in\().16b, {\in\().16b}, v14.16b - eor \in\().16b, \in\().16b, v8.16b - .endm - - .macro do_block, enc, in, rounds, rk, rkp, i - ld1 {v15.4s}, [\rk] - add \rkp, \rk, #16 - mov \i, \rounds -1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ - movi v15.16b, #0x40 - tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ - sub_bytes \in - subs \i, \i, #1 - ld1 {v15.4s}, [\rkp], #16 - beq 2222f - mix_columns \in, \enc - b 1111b -2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ - .endm - - .macro encrypt_block, in, rounds, rk, rkp, i - do_block 1, \in, \rounds, \rk, \rkp, \i - .endm - - .macro decrypt_block, in, rounds, rk, rkp, i - do_block 0, \in, \rounds, \rk, \rkp, \i - .endm - - /* - * Interleaved versions: functionally equivalent to the - * ones above, but applied to AES states in parallel. - */ - - .macro sub_bytes_4x, in0, in1, in2, in3 - sub v8.16b, \in0\().16b, v15.16b - tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b - sub v9.16b, \in1\().16b, v15.16b - tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, \in2\().16b, v15.16b - tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b - sub v11.16b, \in3\().16b, v15.16b - tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b - tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b - tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v8.16b, v15.16b - tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b - sub v9.16b, v9.16b, v15.16b - tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b - sub v10.16b, v10.16b, v15.16b - tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b - sub v11.16b, v11.16b, v15.16b - tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b - sub v8.16b, v8.16b, v15.16b - tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b - sub v9.16b, v9.16b, v15.16b - tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b - sub v10.16b, v10.16b, v15.16b - tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b - sub v11.16b, v11.16b, v15.16b - tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b - tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b - tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b - .endm - - .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const - sshr \tmp0\().16b, \in0\().16b, #7 - shl \out0\().16b, \in0\().16b, #1 - sshr \tmp1\().16b, \in1\().16b, #7 - and \tmp0\().16b, \tmp0\().16b, \const\().16b - shl \out1\().16b, \in1\().16b, #1 - and \tmp1\().16b, \tmp1\().16b, \const\().16b - eor \out0\().16b, \out0\().16b, \tmp0\().16b - eor \out1\().16b, \out1\().16b, \tmp1\().16b - .endm - - .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const - ushr \tmp0\().16b, \in0\().16b, #6 - shl \out0\().16b, \in0\().16b, #2 - ushr \tmp1\().16b, \in1\().16b, #6 - pmul \tmp0\().16b, \tmp0\().16b, \const\().16b - shl \out1\().16b, \in1\().16b, #2 - pmul \tmp1\().16b, \tmp1\().16b, \const\().16b - eor \out0\().16b, \out0\().16b, \tmp0\().16b - eor \out1\().16b, \out1\().16b, \tmp1\().16b - .endm - - .macro mix_columns_2x, in0, in1, enc - .if \enc == 0 - /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ - mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 - eor \in0\().16b, \in0\().16b, v8.16b - rev32 v8.8h, v8.8h - eor \in1\().16b, \in1\().16b, v9.16b - rev32 v9.8h, v9.8h - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - .endif - - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 - rev32 v10.8h, \in0\().8h - rev32 v11.8h, \in1\().8h - eor v10.16b, v10.16b, v8.16b - eor v11.16b, v11.16b, v9.16b - eor \in0\().16b, \in0\().16b, v10.16b - eor \in1\().16b, \in1\().16b, v11.16b - tbl \in0\().16b, {\in0\().16b}, v14.16b - tbl \in1\().16b, {\in1\().16b}, v14.16b - eor \in0\().16b, \in0\().16b, v10.16b - eor \in1\().16b, \in1\().16b, v11.16b - .endm - - .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i - ld1 {v15.4s}, [\rk] - add \rkp, \rk, #16 - mov \i, \rounds -1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ - eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ - movi v15.16b, #0x40 - tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ - tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ - tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - sub_bytes_4x \in0, \in1, \in2, \in3 - subs \i, \i, #1 - ld1 {v15.4s}, [\rkp], #16 - beq 2222f - mix_columns_2x \in0, \in1, \enc - mix_columns_2x \in2, \in3, \enc - b 1111b -2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ - eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ - .endm - - .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i - do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i - .endm - - .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i - do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i - .endm - -#include "aes-modes.S" - - .section ".rodata", "a" - .align 4 -.LForward_ShiftRows: - .octa 0x0b06010c07020d08030e09040f0a0500 - -.LReverse_ShiftRows: - .octa 0x0306090c0f0205080b0e0104070a0d00 - -.Lror32by8: - .octa 0x0c0f0e0d080b0a090407060500030201 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S deleted file mode 100644 index 65982039fa3653faf9d9dc369c8b163b5ae95cc1..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ /dev/null @@ -1,1005 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Bit sliced AES using NEON instructions - * - * Copyright (C) 2016 Linaro Ltd - */ - -/* - * The algorithm implemented here is described in detail by the paper - * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and - * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) - * - * This implementation is based primarily on the OpenSSL implementation - * for 32-bit ARM written by Andy Polyakov - */ - -#include -#include - - .text - - rounds .req x11 - bskey .req x12 - - .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 - eor \b2, \b2, \b1 - eor \b5, \b5, \b6 - eor \b3, \b3, \b0 - eor \b6, \b6, \b2 - eor \b5, \b5, \b0 - eor \b6, \b6, \b3 - eor \b3, \b3, \b7 - eor \b7, \b7, \b5 - eor \b3, \b3, \b4 - eor \b4, \b4, \b5 - eor \b2, \b2, \b7 - eor \b3, \b3, \b1 - eor \b1, \b1, \b5 - .endm - - .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 - eor \b0, \b0, \b6 - eor \b1, \b1, \b4 - eor \b4, \b4, \b6 - eor \b2, \b2, \b0 - eor \b6, \b6, \b1 - eor \b1, \b1, \b5 - eor \b5, \b5, \b3 - eor \b3, \b3, \b7 - eor \b7, \b7, \b5 - eor \b2, \b2, \b5 - eor \b4, \b4, \b7 - .endm - - .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 - eor \b1, \b1, \b7 - eor \b4, \b4, \b7 - eor \b7, \b7, \b5 - eor \b1, \b1, \b3 - eor \b2, \b2, \b5 - eor \b3, \b3, \b7 - eor \b6, \b6, \b1 - eor \b2, \b2, \b0 - eor \b5, \b5, \b3 - eor \b4, \b4, \b6 - eor \b0, \b0, \b6 - eor \b1, \b1, \b4 - .endm - - .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 - eor \b1, \b1, \b5 - eor \b2, \b2, \b7 - eor \b3, \b3, \b1 - eor \b4, \b4, \b5 - eor \b7, \b7, \b5 - eor \b3, \b3, \b4 - eor \b5, \b5, \b0 - eor \b3, \b3, \b7 - eor \b6, \b6, \b2 - eor \b2, \b2, \b1 - eor \b6, \b6, \b3 - eor \b3, \b3, \b0 - eor \b5, \b5, \b6 - .endm - - .macro mul_gf4, x0, x1, y0, y1, t0, t1 - eor \t0, \y0, \y1 - and \t0, \t0, \x0 - eor \x0, \x0, \x1 - and \t1, \x1, \y0 - and \x0, \x0, \y1 - eor \x1, \t1, \t0 - eor \x0, \x0, \t1 - .endm - - .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 - eor \t0, \y0, \y1 - eor \t1, \y2, \y3 - and \t0, \t0, \x0 - and \t1, \t1, \x2 - eor \x0, \x0, \x1 - eor \x2, \x2, \x3 - and \x1, \x1, \y0 - and \x3, \x3, \y2 - and \x0, \x0, \y1 - and \x2, \x2, \y3 - eor \x1, \x1, \x0 - eor \x2, \x2, \x3 - eor \x0, \x0, \t0 - eor \x3, \x3, \t1 - .endm - - .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, y1, y2, y3, t0, t1, t2, t3 - eor \t0, \x0, \x2 - eor \t1, \x1, \x3 - mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 - eor \y0, \y0, \y2 - eor \y1, \y1, \y3 - mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 - eor \x0, \x0, \t0 - eor \x2, \x2, \t0 - eor \x1, \x1, \t1 - eor \x3, \x3, \t1 - eor \t0, \x4, \x6 - eor \t1, \x5, \x7 - mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 - eor \y0, \y0, \y2 - eor \y1, \y1, \y3 - mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 - eor \x4, \x4, \t0 - eor \x6, \x6, \t0 - eor \x5, \x5, \t1 - eor \x7, \x7, \t1 - .endm - - .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - eor \t3, \x4, \x6 - eor \t0, \x5, \x7 - eor \t1, \x1, \x3 - eor \s1, \x7, \x6 - eor \s0, \x0, \x2 - eor \s3, \t3, \t0 - orr \t2, \t0, \t1 - and \s2, \t3, \s0 - orr \t3, \t3, \s0 - eor \s0, \s0, \t1 - and \t0, \t0, \t1 - eor \t1, \x3, \x2 - and \s3, \s3, \s0 - and \s1, \s1, \t1 - eor \t1, \x4, \x5 - eor \s0, \x1, \x0 - eor \t3, \t3, \s1 - eor \t2, \t2, \s1 - and \s1, \t1, \s0 - orr \t1, \t1, \s0 - eor \t3, \t3, \s3 - eor \t0, \t0, \s1 - eor \t2, \t2, \s2 - eor \t1, \t1, \s3 - eor \t0, \t0, \s2 - and \s0, \x7, \x3 - eor \t1, \t1, \s2 - and \s1, \x6, \x2 - and \s2, \x5, \x1 - orr \s3, \x4, \x0 - eor \t3, \t3, \s0 - eor \t1, \t1, \s2 - eor \s0, \t0, \s3 - eor \t2, \t2, \s1 - and \s2, \t3, \t1 - eor \s1, \t2, \s2 - eor \s3, \s0, \s2 - bsl \s1, \t1, \s0 - not \t0, \s0 - bsl \s0, \s1, \s3 - bsl \t0, \s1, \s3 - bsl \s3, \t3, \t2 - eor \t3, \t3, \t2 - and \s2, \s0, \s3 - eor \t1, \t1, \t0 - eor \s2, \s2, \t3 - mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ - \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - .endm - - .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ - \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b - inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ - \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ - \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ - \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b - out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ - \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b - .endm - - .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ - t0, t1, t2, t3, s0, s1, s2, s3 - inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ - \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b - inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ - \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ - \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ - \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b - inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ - \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b - .endm - - .macro enc_next_rk - ldp q16, q17, [bskey], #128 - ldp q18, q19, [bskey, #-96] - ldp q20, q21, [bskey, #-64] - ldp q22, q23, [bskey, #-32] - .endm - - .macro dec_next_rk - ldp q16, q17, [bskey, #-128]! - ldp q18, q19, [bskey, #32] - ldp q20, q21, [bskey, #64] - ldp q22, q23, [bskey, #96] - .endm - - .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 - eor \x0\().16b, \x0\().16b, v16.16b - eor \x1\().16b, \x1\().16b, v17.16b - eor \x2\().16b, \x2\().16b, v18.16b - eor \x3\().16b, \x3\().16b, v19.16b - eor \x4\().16b, \x4\().16b, v20.16b - eor \x5\().16b, \x5\().16b, v21.16b - eor \x6\().16b, \x6\().16b, v22.16b - eor \x7\().16b, \x7\().16b, v23.16b - .endm - - .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask - tbl \x0\().16b, {\x0\().16b}, \mask\().16b - tbl \x1\().16b, {\x1\().16b}, \mask\().16b - tbl \x2\().16b, {\x2\().16b}, \mask\().16b - tbl \x3\().16b, {\x3\().16b}, \mask\().16b - tbl \x4\().16b, {\x4\().16b}, \mask\().16b - tbl \x5\().16b, {\x5\().16b}, \mask\().16b - tbl \x6\().16b, {\x6\().16b}, \mask\().16b - tbl \x7\().16b, {\x7\().16b}, \mask\().16b - .endm - - .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, t4, t5, t6, t7, inv - ext \t0\().16b, \x0\().16b, \x0\().16b, #12 - ext \t1\().16b, \x1\().16b, \x1\().16b, #12 - eor \x0\().16b, \x0\().16b, \t0\().16b - ext \t2\().16b, \x2\().16b, \x2\().16b, #12 - eor \x1\().16b, \x1\().16b, \t1\().16b - ext \t3\().16b, \x3\().16b, \x3\().16b, #12 - eor \x2\().16b, \x2\().16b, \t2\().16b - ext \t4\().16b, \x4\().16b, \x4\().16b, #12 - eor \x3\().16b, \x3\().16b, \t3\().16b - ext \t5\().16b, \x5\().16b, \x5\().16b, #12 - eor \x4\().16b, \x4\().16b, \t4\().16b - ext \t6\().16b, \x6\().16b, \x6\().16b, #12 - eor \x5\().16b, \x5\().16b, \t5\().16b - ext \t7\().16b, \x7\().16b, \x7\().16b, #12 - eor \x6\().16b, \x6\().16b, \t6\().16b - eor \t1\().16b, \t1\().16b, \x0\().16b - eor \x7\().16b, \x7\().16b, \t7\().16b - ext \x0\().16b, \x0\().16b, \x0\().16b, #8 - eor \t2\().16b, \t2\().16b, \x1\().16b - eor \t0\().16b, \t0\().16b, \x7\().16b - eor \t1\().16b, \t1\().16b, \x7\().16b - ext \x1\().16b, \x1\().16b, \x1\().16b, #8 - eor \t5\().16b, \t5\().16b, \x4\().16b - eor \x0\().16b, \x0\().16b, \t0\().16b - eor \t6\().16b, \t6\().16b, \x5\().16b - eor \x1\().16b, \x1\().16b, \t1\().16b - ext \t0\().16b, \x4\().16b, \x4\().16b, #8 - eor \t4\().16b, \t4\().16b, \x3\().16b - ext \t1\().16b, \x5\().16b, \x5\().16b, #8 - eor \t7\().16b, \t7\().16b, \x6\().16b - ext \x4\().16b, \x3\().16b, \x3\().16b, #8 - eor \t3\().16b, \t3\().16b, \x2\().16b - ext \x5\().16b, \x7\().16b, \x7\().16b, #8 - eor \t4\().16b, \t4\().16b, \x7\().16b - ext \x3\().16b, \x6\().16b, \x6\().16b, #8 - eor \t3\().16b, \t3\().16b, \x7\().16b - ext \x6\().16b, \x2\().16b, \x2\().16b, #8 - eor \x7\().16b, \t1\().16b, \t5\().16b - .ifb \inv - eor \x2\().16b, \t0\().16b, \t4\().16b - eor \x4\().16b, \x4\().16b, \t3\().16b - eor \x5\().16b, \x5\().16b, \t7\().16b - eor \x3\().16b, \x3\().16b, \t6\().16b - eor \x6\().16b, \x6\().16b, \t2\().16b - .else - eor \t3\().16b, \t3\().16b, \x4\().16b - eor \x5\().16b, \x5\().16b, \t7\().16b - eor \x2\().16b, \x3\().16b, \t6\().16b - eor \x3\().16b, \t0\().16b, \t4\().16b - eor \x4\().16b, \x6\().16b, \t2\().16b - mov \x6\().16b, \t3\().16b - .endif - .endm - - .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ - t0, t1, t2, t3, t4, t5, t6, t7 - ext \t0\().16b, \x0\().16b, \x0\().16b, #8 - ext \t6\().16b, \x6\().16b, \x6\().16b, #8 - ext \t7\().16b, \x7\().16b, \x7\().16b, #8 - eor \t0\().16b, \t0\().16b, \x0\().16b - ext \t1\().16b, \x1\().16b, \x1\().16b, #8 - eor \t6\().16b, \t6\().16b, \x6\().16b - ext \t2\().16b, \x2\().16b, \x2\().16b, #8 - eor \t7\().16b, \t7\().16b, \x7\().16b - ext \t3\().16b, \x3\().16b, \x3\().16b, #8 - eor \t1\().16b, \t1\().16b, \x1\().16b - ext \t4\().16b, \x4\().16b, \x4\().16b, #8 - eor \t2\().16b, \t2\().16b, \x2\().16b - ext \t5\().16b, \x5\().16b, \x5\().16b, #8 - eor \t3\().16b, \t3\().16b, \x3\().16b - eor \t4\().16b, \t4\().16b, \x4\().16b - eor \t5\().16b, \t5\().16b, \x5\().16b - eor \x0\().16b, \x0\().16b, \t6\().16b - eor \x1\().16b, \x1\().16b, \t6\().16b - eor \x2\().16b, \x2\().16b, \t0\().16b - eor \x4\().16b, \x4\().16b, \t2\().16b - eor \x3\().16b, \x3\().16b, \t1\().16b - eor \x1\().16b, \x1\().16b, \t7\().16b - eor \x2\().16b, \x2\().16b, \t7\().16b - eor \x4\().16b, \x4\().16b, \t6\().16b - eor \x5\().16b, \x5\().16b, \t3\().16b - eor \x3\().16b, \x3\().16b, \t6\().16b - eor \x6\().16b, \x6\().16b, \t4\().16b - eor \x4\().16b, \x4\().16b, \t7\().16b - eor \x5\().16b, \x5\().16b, \t7\().16b - eor \x7\().16b, \x7\().16b, \t5\().16b - mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ - \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 - .endm - - .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 - ushr \t0\().2d, \b0\().2d, #\n - ushr \t1\().2d, \b1\().2d, #\n - eor \t0\().16b, \t0\().16b, \a0\().16b - eor \t1\().16b, \t1\().16b, \a1\().16b - and \t0\().16b, \t0\().16b, \mask\().16b - and \t1\().16b, \t1\().16b, \mask\().16b - eor \a0\().16b, \a0\().16b, \t0\().16b - shl \t0\().2d, \t0\().2d, #\n - eor \a1\().16b, \a1\().16b, \t1\().16b - shl \t1\().2d, \t1\().2d, #\n - eor \b0\().16b, \b0\().16b, \t0\().16b - eor \b1\().16b, \b1\().16b, \t1\().16b - .endm - - .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 - movi \t0\().16b, #0x55 - movi \t1\().16b, #0x33 - swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 - swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 - movi \t0\().16b, #0x0f - swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 - swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 - swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 - swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 - .endm - - - .align 6 -M0: .octa 0x0004080c0105090d02060a0e03070b0f - -M0SR: .octa 0x0004080c05090d010a0e02060f03070b -SR: .octa 0x0f0e0d0c0a09080b0504070600030201 -SRM0: .octa 0x01060b0c0207080d0304090e00050a0f - -M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 -ISR: .octa 0x0f0e0d0c080b0a090504070602010003 -ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f - - /* - * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) - */ -ENTRY(aesbs_convert_key) - ld1 {v7.4s}, [x1], #16 // load round 0 key - ld1 {v17.4s}, [x1], #16 // load round 1 key - - movi v8.16b, #0x01 // bit masks - movi v9.16b, #0x02 - movi v10.16b, #0x04 - movi v11.16b, #0x08 - movi v12.16b, #0x10 - movi v13.16b, #0x20 - movi v14.16b, #0x40 - movi v15.16b, #0x80 - ldr q16, M0 - - sub x2, x2, #1 - str q7, [x0], #16 // save round 0 key - -.Lkey_loop: - tbl v7.16b ,{v17.16b}, v16.16b - ld1 {v17.4s}, [x1], #16 // load next round key - - cmtst v0.16b, v7.16b, v8.16b - cmtst v1.16b, v7.16b, v9.16b - cmtst v2.16b, v7.16b, v10.16b - cmtst v3.16b, v7.16b, v11.16b - cmtst v4.16b, v7.16b, v12.16b - cmtst v5.16b, v7.16b, v13.16b - cmtst v6.16b, v7.16b, v14.16b - cmtst v7.16b, v7.16b, v15.16b - not v0.16b, v0.16b - not v1.16b, v1.16b - not v5.16b, v5.16b - not v6.16b, v6.16b - - subs x2, x2, #1 - stp q0, q1, [x0], #128 - stp q2, q3, [x0, #-96] - stp q4, q5, [x0, #-64] - stp q6, q7, [x0, #-32] - b.ne .Lkey_loop - - movi v7.16b, #0x63 // compose .L63 - eor v17.16b, v17.16b, v7.16b - str q17, [x0] - ret -ENDPROC(aesbs_convert_key) - - .align 4 -aesbs_encrypt8: - ldr q9, [bskey], #16 // round 0 key - ldr q8, M0SR - ldr q24, SR - - eor v10.16b, v0.16b, v9.16b // xor with round0 key - eor v11.16b, v1.16b, v9.16b - tbl v0.16b, {v10.16b}, v8.16b - eor v12.16b, v2.16b, v9.16b - tbl v1.16b, {v11.16b}, v8.16b - eor v13.16b, v3.16b, v9.16b - tbl v2.16b, {v12.16b}, v8.16b - eor v14.16b, v4.16b, v9.16b - tbl v3.16b, {v13.16b}, v8.16b - eor v15.16b, v5.16b, v9.16b - tbl v4.16b, {v14.16b}, v8.16b - eor v10.16b, v6.16b, v9.16b - tbl v5.16b, {v15.16b}, v8.16b - eor v11.16b, v7.16b, v9.16b - tbl v6.16b, {v10.16b}, v8.16b - tbl v7.16b, {v11.16b}, v8.16b - - bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 - - sub rounds, rounds, #1 - b .Lenc_sbox - -.Lenc_loop: - shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 -.Lenc_sbox: - sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ - v13, v14, v15 - subs rounds, rounds, #1 - b.cc .Lenc_done - - enc_next_rk - - mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ - v13, v14, v15 - - add_round_key v0, v1, v2, v3, v4, v5, v6, v7 - - b.ne .Lenc_loop - ldr q24, SRM0 - b .Lenc_loop - -.Lenc_done: - ldr q12, [bskey] // last round key - - bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 - - eor v0.16b, v0.16b, v12.16b - eor v1.16b, v1.16b, v12.16b - eor v4.16b, v4.16b, v12.16b - eor v6.16b, v6.16b, v12.16b - eor v3.16b, v3.16b, v12.16b - eor v7.16b, v7.16b, v12.16b - eor v2.16b, v2.16b, v12.16b - eor v5.16b, v5.16b, v12.16b - ret -ENDPROC(aesbs_encrypt8) - - .align 4 -aesbs_decrypt8: - lsl x9, rounds, #7 - add bskey, bskey, x9 - - ldr q9, [bskey, #-112]! // round 0 key - ldr q8, M0ISR - ldr q24, ISR - - eor v10.16b, v0.16b, v9.16b // xor with round0 key - eor v11.16b, v1.16b, v9.16b - tbl v0.16b, {v10.16b}, v8.16b - eor v12.16b, v2.16b, v9.16b - tbl v1.16b, {v11.16b}, v8.16b - eor v13.16b, v3.16b, v9.16b - tbl v2.16b, {v12.16b}, v8.16b - eor v14.16b, v4.16b, v9.16b - tbl v3.16b, {v13.16b}, v8.16b - eor v15.16b, v5.16b, v9.16b - tbl v4.16b, {v14.16b}, v8.16b - eor v10.16b, v6.16b, v9.16b - tbl v5.16b, {v15.16b}, v8.16b - eor v11.16b, v7.16b, v9.16b - tbl v6.16b, {v10.16b}, v8.16b - tbl v7.16b, {v11.16b}, v8.16b - - bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 - - sub rounds, rounds, #1 - b .Ldec_sbox - -.Ldec_loop: - shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 -.Ldec_sbox: - inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ - v13, v14, v15 - subs rounds, rounds, #1 - b.cc .Ldec_done - - dec_next_rk - - add_round_key v0, v1, v6, v4, v2, v7, v3, v5 - - inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ - v13, v14, v15 - - b.ne .Ldec_loop - ldr q24, ISRM0 - b .Ldec_loop -.Ldec_done: - ldr q12, [bskey, #-16] // last round key - - bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 - - eor v0.16b, v0.16b, v12.16b - eor v1.16b, v1.16b, v12.16b - eor v6.16b, v6.16b, v12.16b - eor v4.16b, v4.16b, v12.16b - eor v2.16b, v2.16b, v12.16b - eor v7.16b, v7.16b, v12.16b - eor v3.16b, v3.16b, v12.16b - eor v5.16b, v5.16b, v12.16b - ret -ENDPROC(aesbs_decrypt8) - - /* - * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks) - */ - .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - frame_push 5 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - -99: mov x5, #1 - lsl x5, x5, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x5, x5, xzr, mi - - ld1 {v0.16b}, [x20], #16 - tbnz x5, #1, 0f - ld1 {v1.16b}, [x20], #16 - tbnz x5, #2, 0f - ld1 {v2.16b}, [x20], #16 - tbnz x5, #3, 0f - ld1 {v3.16b}, [x20], #16 - tbnz x5, #4, 0f - ld1 {v4.16b}, [x20], #16 - tbnz x5, #5, 0f - ld1 {v5.16b}, [x20], #16 - tbnz x5, #6, 0f - ld1 {v6.16b}, [x20], #16 - tbnz x5, #7, 0f - ld1 {v7.16b}, [x20], #16 - -0: mov bskey, x21 - mov rounds, x22 - bl \do8 - - st1 {\o0\().16b}, [x19], #16 - tbnz x5, #1, 1f - st1 {\o1\().16b}, [x19], #16 - tbnz x5, #2, 1f - st1 {\o2\().16b}, [x19], #16 - tbnz x5, #3, 1f - st1 {\o3\().16b}, [x19], #16 - tbnz x5, #4, 1f - st1 {\o4\().16b}, [x19], #16 - tbnz x5, #5, 1f - st1 {\o5\().16b}, [x19], #16 - tbnz x5, #6, 1f - st1 {\o6\().16b}, [x19], #16 - tbnz x5, #7, 1f - st1 {\o7\().16b}, [x19], #16 - - cbz x23, 1f - cond_yield_neon - b 99b - -1: frame_pop - ret - .endm - - .align 4 -ENTRY(aesbs_ecb_encrypt) - __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -ENDPROC(aesbs_ecb_encrypt) - - .align 4 -ENTRY(aesbs_ecb_decrypt) - __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 -ENDPROC(aesbs_ecb_decrypt) - - /* - * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) - */ - .align 4 -ENTRY(aesbs_cbc_decrypt) - frame_push 6 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - -99: mov x6, #1 - lsl x6, x6, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x6, x6, xzr, mi - - ld1 {v0.16b}, [x20], #16 - mov v25.16b, v0.16b - tbnz x6, #1, 0f - ld1 {v1.16b}, [x20], #16 - mov v26.16b, v1.16b - tbnz x6, #2, 0f - ld1 {v2.16b}, [x20], #16 - mov v27.16b, v2.16b - tbnz x6, #3, 0f - ld1 {v3.16b}, [x20], #16 - mov v28.16b, v3.16b - tbnz x6, #4, 0f - ld1 {v4.16b}, [x20], #16 - mov v29.16b, v4.16b - tbnz x6, #5, 0f - ld1 {v5.16b}, [x20], #16 - mov v30.16b, v5.16b - tbnz x6, #6, 0f - ld1 {v6.16b}, [x20], #16 - mov v31.16b, v6.16b - tbnz x6, #7, 0f - ld1 {v7.16b}, [x20] - -0: mov bskey, x21 - mov rounds, x22 - bl aesbs_decrypt8 - - ld1 {v24.16b}, [x24] // load IV - - eor v1.16b, v1.16b, v25.16b - eor v6.16b, v6.16b, v26.16b - eor v4.16b, v4.16b, v27.16b - eor v2.16b, v2.16b, v28.16b - eor v7.16b, v7.16b, v29.16b - eor v0.16b, v0.16b, v24.16b - eor v3.16b, v3.16b, v30.16b - eor v5.16b, v5.16b, v31.16b - - st1 {v0.16b}, [x19], #16 - mov v24.16b, v25.16b - tbnz x6, #1, 1f - st1 {v1.16b}, [x19], #16 - mov v24.16b, v26.16b - tbnz x6, #2, 1f - st1 {v6.16b}, [x19], #16 - mov v24.16b, v27.16b - tbnz x6, #3, 1f - st1 {v4.16b}, [x19], #16 - mov v24.16b, v28.16b - tbnz x6, #4, 1f - st1 {v2.16b}, [x19], #16 - mov v24.16b, v29.16b - tbnz x6, #5, 1f - st1 {v7.16b}, [x19], #16 - mov v24.16b, v30.16b - tbnz x6, #6, 1f - st1 {v3.16b}, [x19], #16 - mov v24.16b, v31.16b - tbnz x6, #7, 1f - ld1 {v24.16b}, [x20], #16 - st1 {v5.16b}, [x19], #16 -1: st1 {v24.16b}, [x24] // store IV - - cbz x23, 2f - cond_yield_neon - b 99b - -2: frame_pop - ret -ENDPROC(aesbs_cbc_decrypt) - - .macro next_tweak, out, in, const, tmp - sshr \tmp\().2d, \in\().2d, #63 - and \tmp\().16b, \tmp\().16b, \const\().16b - add \out\().2d, \in\().2d, \in\().2d - ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 - eor \out\().16b, \out\().16b, \tmp\().16b - .endm - - /* - * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) - * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) - */ -__xts_crypt8: - mov x6, #1 - lsl x6, x6, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x6, x6, xzr, mi - - ld1 {v0.16b}, [x20], #16 - next_tweak v26, v25, v30, v31 - eor v0.16b, v0.16b, v25.16b - tbnz x6, #1, 0f - - ld1 {v1.16b}, [x20], #16 - next_tweak v27, v26, v30, v31 - eor v1.16b, v1.16b, v26.16b - tbnz x6, #2, 0f - - ld1 {v2.16b}, [x20], #16 - next_tweak v28, v27, v30, v31 - eor v2.16b, v2.16b, v27.16b - tbnz x6, #3, 0f - - ld1 {v3.16b}, [x20], #16 - next_tweak v29, v28, v30, v31 - eor v3.16b, v3.16b, v28.16b - tbnz x6, #4, 0f - - ld1 {v4.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset] - eor v4.16b, v4.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #5, 0f - - ld1 {v5.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 16] - eor v5.16b, v5.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #6, 0f - - ld1 {v6.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 32] - eor v6.16b, v6.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #7, 0f - - ld1 {v7.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 48] - eor v7.16b, v7.16b, v29.16b - next_tweak v29, v29, v30, v31 - -0: mov bskey, x21 - mov rounds, x22 - br x7 -ENDPROC(__xts_crypt8) - - .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - frame_push 6, 64 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - -0: movi v30.2s, #0x1 - movi v25.2s, #0x87 - uzp1 v30.4s, v30.4s, v25.4s - ld1 {v25.16b}, [x24] - -99: adr x7, \do8 - bl __xts_crypt8 - - ldp q16, q17, [sp, #.Lframe_local_offset] - ldp q18, q19, [sp, #.Lframe_local_offset + 32] - - eor \o0\().16b, \o0\().16b, v25.16b - eor \o1\().16b, \o1\().16b, v26.16b - eor \o2\().16b, \o2\().16b, v27.16b - eor \o3\().16b, \o3\().16b, v28.16b - - st1 {\o0\().16b}, [x19], #16 - mov v25.16b, v26.16b - tbnz x6, #1, 1f - st1 {\o1\().16b}, [x19], #16 - mov v25.16b, v27.16b - tbnz x6, #2, 1f - st1 {\o2\().16b}, [x19], #16 - mov v25.16b, v28.16b - tbnz x6, #3, 1f - st1 {\o3\().16b}, [x19], #16 - mov v25.16b, v29.16b - tbnz x6, #4, 1f - - eor \o4\().16b, \o4\().16b, v16.16b - eor \o5\().16b, \o5\().16b, v17.16b - eor \o6\().16b, \o6\().16b, v18.16b - eor \o7\().16b, \o7\().16b, v19.16b - - st1 {\o4\().16b}, [x19], #16 - tbnz x6, #5, 1f - st1 {\o5\().16b}, [x19], #16 - tbnz x6, #6, 1f - st1 {\o6\().16b}, [x19], #16 - tbnz x6, #7, 1f - st1 {\o7\().16b}, [x19], #16 - - cbz x23, 1f - st1 {v25.16b}, [x24] - - cond_yield_neon 0b - b 99b - -1: st1 {v25.16b}, [x24] - frame_pop - ret - .endm - -ENTRY(aesbs_xts_encrypt) - __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -ENDPROC(aesbs_xts_encrypt) - -ENTRY(aesbs_xts_decrypt) - __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 -ENDPROC(aesbs_xts_decrypt) - - .macro next_ctr, v - mov \v\().d[1], x8 - adds x8, x8, #1 - mov \v\().d[0], x7 - adc x7, x7, xzr - rev64 \v\().16b, \v\().16b - .endm - - /* - * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 iv[], u8 final[]) - */ -ENTRY(aesbs_ctr_encrypt) - frame_push 8 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - mov x25, x6 - - cmp x25, #0 - cset x26, ne - add x23, x23, x26 // do one extra block if final - -98: ldp x7, x8, [x24] - ld1 {v0.16b}, [x24] -CPU_LE( rev x7, x7 ) -CPU_LE( rev x8, x8 ) - adds x8, x8, #1 - adc x7, x7, xzr - -99: mov x9, #1 - lsl x9, x9, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x9, x9, xzr, le - - tbnz x9, #1, 0f - next_ctr v1 - tbnz x9, #2, 0f - next_ctr v2 - tbnz x9, #3, 0f - next_ctr v3 - tbnz x9, #4, 0f - next_ctr v4 - tbnz x9, #5, 0f - next_ctr v5 - tbnz x9, #6, 0f - next_ctr v6 - tbnz x9, #7, 0f - next_ctr v7 - -0: mov bskey, x21 - mov rounds, x22 - bl aesbs_encrypt8 - - lsr x9, x9, x26 // disregard the extra block - tbnz x9, #0, 0f - - ld1 {v8.16b}, [x20], #16 - eor v0.16b, v0.16b, v8.16b - st1 {v0.16b}, [x19], #16 - tbnz x9, #1, 1f - - ld1 {v9.16b}, [x20], #16 - eor v1.16b, v1.16b, v9.16b - st1 {v1.16b}, [x19], #16 - tbnz x9, #2, 2f - - ld1 {v10.16b}, [x20], #16 - eor v4.16b, v4.16b, v10.16b - st1 {v4.16b}, [x19], #16 - tbnz x9, #3, 3f - - ld1 {v11.16b}, [x20], #16 - eor v6.16b, v6.16b, v11.16b - st1 {v6.16b}, [x19], #16 - tbnz x9, #4, 4f - - ld1 {v12.16b}, [x20], #16 - eor v3.16b, v3.16b, v12.16b - st1 {v3.16b}, [x19], #16 - tbnz x9, #5, 5f - - ld1 {v13.16b}, [x20], #16 - eor v7.16b, v7.16b, v13.16b - st1 {v7.16b}, [x19], #16 - tbnz x9, #6, 6f - - ld1 {v14.16b}, [x20], #16 - eor v2.16b, v2.16b, v14.16b - st1 {v2.16b}, [x19], #16 - tbnz x9, #7, 7f - - ld1 {v15.16b}, [x20], #16 - eor v5.16b, v5.16b, v15.16b - st1 {v5.16b}, [x19], #16 - -8: next_ctr v0 - st1 {v0.16b}, [x24] - cbz x23, .Lctr_done - - cond_yield_neon 98b - b 99b - -.Lctr_done: - frame_pop - ret - - /* - * If we are handling the tail of the input (x6 != NULL), return the - * final keystream block back to the caller. - */ -0: cbz x25, 8b - st1 {v0.16b}, [x25] - b 8b -1: cbz x25, 8b - st1 {v1.16b}, [x25] - b 8b -2: cbz x25, 8b - st1 {v4.16b}, [x25] - b 8b -3: cbz x25, 8b - st1 {v6.16b}, [x25] - b 8b -4: cbz x25, 8b - st1 {v3.16b}, [x25] - b 8b -5: cbz x25, 8b - st1 {v7.16b}, [x25] - b 8b -6: cbz x25, 8b - st1 {v2.16b}, [x25] - b 8b -7: cbz x25, 8b - st1 {v5.16b}, [x25] - b 8b -ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S deleted file mode 100644 index 706c4e10e9e294c7c5de49dbbe7a784ec7ca1458..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/chacha-neon-core.S +++ /dev/null @@ -1,860 +0,0 @@ -/* - * ChaCha/XChaCha NEON helper functions - * - * Copyright (C) 2016-2018 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Originally based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include - - .text - .align 6 - -/* - * chacha_permute - permute one block - * - * Permute one 64-byte block where the state matrix is stored in the four NEON - * registers v0-v3. It performs matrix operations on four words in parallel, - * but requires shuffling to rearrange the words after each round. - * - * The round count is given in w3. - * - * Clobbers: w3, x10, v4, v12 - */ -chacha_permute: - - adr_l x10, ROT8 - ld1 {v12.4s}, [x10] - -.Ldoubleround: - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - rev32 v3.8h, v3.8h - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #12 - sri v1.4s, v4.4s, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - tbl v3.16b, {v3.16b}, v12.16b - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #7 - sri v1.4s, v4.4s, #25 - - // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) - ext v1.16b, v1.16b, v1.16b, #4 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - ext v2.16b, v2.16b, v2.16b, #8 - // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) - ext v3.16b, v3.16b, v3.16b, #12 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - rev32 v3.8h, v3.8h - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #12 - sri v1.4s, v4.4s, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - tbl v3.16b, {v3.16b}, v12.16b - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #7 - sri v1.4s, v4.4s, #25 - - // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) - ext v1.16b, v1.16b, v1.16b, #12 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - ext v2.16b, v2.16b, v2.16b, #8 - // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) - ext v3.16b, v3.16b, v3.16b, #4 - - subs w3, w3, #2 - b.ne .Ldoubleround - - ret -ENDPROC(chacha_permute) - -ENTRY(chacha_block_xor_neon) - // x0: Input state matrix, s - // x1: 1 data block output, o - // x2: 1 data block input, i - // w3: nrounds - - stp x29, x30, [sp, #-16]! - mov x29, sp - - // x0..3 = s0..3 - ld1 {v0.4s-v3.4s}, [x0] - ld1 {v8.4s-v11.4s}, [x0] - - bl chacha_permute - - ld1 {v4.16b-v7.16b}, [x2] - - // o0 = i0 ^ (x0 + s0) - add v0.4s, v0.4s, v8.4s - eor v0.16b, v0.16b, v4.16b - - // o1 = i1 ^ (x1 + s1) - add v1.4s, v1.4s, v9.4s - eor v1.16b, v1.16b, v5.16b - - // o2 = i2 ^ (x2 + s2) - add v2.4s, v2.4s, v10.4s - eor v2.16b, v2.16b, v6.16b - - // o3 = i3 ^ (x3 + s3) - add v3.4s, v3.4s, v11.4s - eor v3.16b, v3.16b, v7.16b - - st1 {v0.16b-v3.16b}, [x1] - - ldp x29, x30, [sp], #16 - ret -ENDPROC(chacha_block_xor_neon) - -ENTRY(hchacha_block_neon) - // x0: Input state matrix, s - // x1: output (8 32-bit words) - // w2: nrounds - - stp x29, x30, [sp, #-16]! - mov x29, sp - - ld1 {v0.4s-v3.4s}, [x0] - - mov w3, w2 - bl chacha_permute - - st1 {v0.4s}, [x1], #16 - st1 {v3.4s}, [x1] - - ldp x29, x30, [sp], #16 - ret -ENDPROC(hchacha_block_neon) - - a0 .req w12 - a1 .req w13 - a2 .req w14 - a3 .req w15 - a4 .req w16 - a5 .req w17 - a6 .req w19 - a7 .req w20 - a8 .req w21 - a9 .req w22 - a10 .req w23 - a11 .req w24 - a12 .req w25 - a13 .req w26 - a14 .req w27 - a15 .req w28 - - .align 6 -ENTRY(chacha_4block_xor_neon) - frame_push 10 - - // x0: Input state matrix, s - // x1: 4 data blocks output, o - // x2: 4 data blocks input, i - // w3: nrounds - // x4: byte count - - adr_l x10, .Lpermute - and x5, x4, #63 - add x10, x10, x5 - add x11, x10, #64 - - // - // This function encrypts four consecutive ChaCha blocks by loading - // the state matrix in NEON registers four times. The algorithm performs - // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. - // - // At the same time, a fifth block is encrypted in parallel using - // scalar registers - // - adr_l x9, CTRINC // ... and ROT8 - ld1 {v30.4s-v31.4s}, [x9] - - // x0..15[0-3] = s0..3[0..3] - add x8, x0, #16 - ld4r { v0.4s- v3.4s}, [x0] - ld4r { v4.4s- v7.4s}, [x8], #16 - ld4r { v8.4s-v11.4s}, [x8], #16 - ld4r {v12.4s-v15.4s}, [x8] - - mov a0, v0.s[0] - mov a1, v1.s[0] - mov a2, v2.s[0] - mov a3, v3.s[0] - mov a4, v4.s[0] - mov a5, v5.s[0] - mov a6, v6.s[0] - mov a7, v7.s[0] - mov a8, v8.s[0] - mov a9, v9.s[0] - mov a10, v10.s[0] - mov a11, v11.s[0] - mov a12, v12.s[0] - mov a13, v13.s[0] - mov a14, v14.s[0] - mov a15, v15.s[0] - - // x12 += counter values 1-4 - add v12.4s, v12.4s, v30.4s - -.Ldoubleround4: - // x0 += x4, x12 = rotl32(x12 ^ x0, 16) - // x1 += x5, x13 = rotl32(x13 ^ x1, 16) - // x2 += x6, x14 = rotl32(x14 ^ x2, 16) - // x3 += x7, x15 = rotl32(x15 ^ x3, 16) - add v0.4s, v0.4s, v4.4s - add a0, a0, a4 - add v1.4s, v1.4s, v5.4s - add a1, a1, a5 - add v2.4s, v2.4s, v6.4s - add a2, a2, a6 - add v3.4s, v3.4s, v7.4s - add a3, a3, a7 - - eor v12.16b, v12.16b, v0.16b - eor a12, a12, a0 - eor v13.16b, v13.16b, v1.16b - eor a13, a13, a1 - eor v14.16b, v14.16b, v2.16b - eor a14, a14, a2 - eor v15.16b, v15.16b, v3.16b - eor a15, a15, a3 - - rev32 v12.8h, v12.8h - ror a12, a12, #16 - rev32 v13.8h, v13.8h - ror a13, a13, #16 - rev32 v14.8h, v14.8h - ror a14, a14, #16 - rev32 v15.8h, v15.8h - ror a15, a15, #16 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 12) - // x9 += x13, x5 = rotl32(x5 ^ x9, 12) - // x10 += x14, x6 = rotl32(x6 ^ x10, 12) - // x11 += x15, x7 = rotl32(x7 ^ x11, 12) - add v8.4s, v8.4s, v12.4s - add a8, a8, a12 - add v9.4s, v9.4s, v13.4s - add a9, a9, a13 - add v10.4s, v10.4s, v14.4s - add a10, a10, a14 - add v11.4s, v11.4s, v15.4s - add a11, a11, a15 - - eor v16.16b, v4.16b, v8.16b - eor a4, a4, a8 - eor v17.16b, v5.16b, v9.16b - eor a5, a5, a9 - eor v18.16b, v6.16b, v10.16b - eor a6, a6, a10 - eor v19.16b, v7.16b, v11.16b - eor a7, a7, a11 - - shl v4.4s, v16.4s, #12 - shl v5.4s, v17.4s, #12 - shl v6.4s, v18.4s, #12 - shl v7.4s, v19.4s, #12 - - sri v4.4s, v16.4s, #20 - ror a4, a4, #20 - sri v5.4s, v17.4s, #20 - ror a5, a5, #20 - sri v6.4s, v18.4s, #20 - ror a6, a6, #20 - sri v7.4s, v19.4s, #20 - ror a7, a7, #20 - - // x0 += x4, x12 = rotl32(x12 ^ x0, 8) - // x1 += x5, x13 = rotl32(x13 ^ x1, 8) - // x2 += x6, x14 = rotl32(x14 ^ x2, 8) - // x3 += x7, x15 = rotl32(x15 ^ x3, 8) - add v0.4s, v0.4s, v4.4s - add a0, a0, a4 - add v1.4s, v1.4s, v5.4s - add a1, a1, a5 - add v2.4s, v2.4s, v6.4s - add a2, a2, a6 - add v3.4s, v3.4s, v7.4s - add a3, a3, a7 - - eor v12.16b, v12.16b, v0.16b - eor a12, a12, a0 - eor v13.16b, v13.16b, v1.16b - eor a13, a13, a1 - eor v14.16b, v14.16b, v2.16b - eor a14, a14, a2 - eor v15.16b, v15.16b, v3.16b - eor a15, a15, a3 - - tbl v12.16b, {v12.16b}, v31.16b - ror a12, a12, #24 - tbl v13.16b, {v13.16b}, v31.16b - ror a13, a13, #24 - tbl v14.16b, {v14.16b}, v31.16b - ror a14, a14, #24 - tbl v15.16b, {v15.16b}, v31.16b - ror a15, a15, #24 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 7) - // x9 += x13, x5 = rotl32(x5 ^ x9, 7) - // x10 += x14, x6 = rotl32(x6 ^ x10, 7) - // x11 += x15, x7 = rotl32(x7 ^ x11, 7) - add v8.4s, v8.4s, v12.4s - add a8, a8, a12 - add v9.4s, v9.4s, v13.4s - add a9, a9, a13 - add v10.4s, v10.4s, v14.4s - add a10, a10, a14 - add v11.4s, v11.4s, v15.4s - add a11, a11, a15 - - eor v16.16b, v4.16b, v8.16b - eor a4, a4, a8 - eor v17.16b, v5.16b, v9.16b - eor a5, a5, a9 - eor v18.16b, v6.16b, v10.16b - eor a6, a6, a10 - eor v19.16b, v7.16b, v11.16b - eor a7, a7, a11 - - shl v4.4s, v16.4s, #7 - shl v5.4s, v17.4s, #7 - shl v6.4s, v18.4s, #7 - shl v7.4s, v19.4s, #7 - - sri v4.4s, v16.4s, #25 - ror a4, a4, #25 - sri v5.4s, v17.4s, #25 - ror a5, a5, #25 - sri v6.4s, v18.4s, #25 - ror a6, a6, #25 - sri v7.4s, v19.4s, #25 - ror a7, a7, #25 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 16) - // x1 += x6, x12 = rotl32(x12 ^ x1, 16) - // x2 += x7, x13 = rotl32(x13 ^ x2, 16) - // x3 += x4, x14 = rotl32(x14 ^ x3, 16) - add v0.4s, v0.4s, v5.4s - add a0, a0, a5 - add v1.4s, v1.4s, v6.4s - add a1, a1, a6 - add v2.4s, v2.4s, v7.4s - add a2, a2, a7 - add v3.4s, v3.4s, v4.4s - add a3, a3, a4 - - eor v15.16b, v15.16b, v0.16b - eor a15, a15, a0 - eor v12.16b, v12.16b, v1.16b - eor a12, a12, a1 - eor v13.16b, v13.16b, v2.16b - eor a13, a13, a2 - eor v14.16b, v14.16b, v3.16b - eor a14, a14, a3 - - rev32 v15.8h, v15.8h - ror a15, a15, #16 - rev32 v12.8h, v12.8h - ror a12, a12, #16 - rev32 v13.8h, v13.8h - ror a13, a13, #16 - rev32 v14.8h, v14.8h - ror a14, a14, #16 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 12) - // x11 += x12, x6 = rotl32(x6 ^ x11, 12) - // x8 += x13, x7 = rotl32(x7 ^ x8, 12) - // x9 += x14, x4 = rotl32(x4 ^ x9, 12) - add v10.4s, v10.4s, v15.4s - add a10, a10, a15 - add v11.4s, v11.4s, v12.4s - add a11, a11, a12 - add v8.4s, v8.4s, v13.4s - add a8, a8, a13 - add v9.4s, v9.4s, v14.4s - add a9, a9, a14 - - eor v16.16b, v5.16b, v10.16b - eor a5, a5, a10 - eor v17.16b, v6.16b, v11.16b - eor a6, a6, a11 - eor v18.16b, v7.16b, v8.16b - eor a7, a7, a8 - eor v19.16b, v4.16b, v9.16b - eor a4, a4, a9 - - shl v5.4s, v16.4s, #12 - shl v6.4s, v17.4s, #12 - shl v7.4s, v18.4s, #12 - shl v4.4s, v19.4s, #12 - - sri v5.4s, v16.4s, #20 - ror a5, a5, #20 - sri v6.4s, v17.4s, #20 - ror a6, a6, #20 - sri v7.4s, v18.4s, #20 - ror a7, a7, #20 - sri v4.4s, v19.4s, #20 - ror a4, a4, #20 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 8) - // x1 += x6, x12 = rotl32(x12 ^ x1, 8) - // x2 += x7, x13 = rotl32(x13 ^ x2, 8) - // x3 += x4, x14 = rotl32(x14 ^ x3, 8) - add v0.4s, v0.4s, v5.4s - add a0, a0, a5 - add v1.4s, v1.4s, v6.4s - add a1, a1, a6 - add v2.4s, v2.4s, v7.4s - add a2, a2, a7 - add v3.4s, v3.4s, v4.4s - add a3, a3, a4 - - eor v15.16b, v15.16b, v0.16b - eor a15, a15, a0 - eor v12.16b, v12.16b, v1.16b - eor a12, a12, a1 - eor v13.16b, v13.16b, v2.16b - eor a13, a13, a2 - eor v14.16b, v14.16b, v3.16b - eor a14, a14, a3 - - tbl v15.16b, {v15.16b}, v31.16b - ror a15, a15, #24 - tbl v12.16b, {v12.16b}, v31.16b - ror a12, a12, #24 - tbl v13.16b, {v13.16b}, v31.16b - ror a13, a13, #24 - tbl v14.16b, {v14.16b}, v31.16b - ror a14, a14, #24 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 7) - // x11 += x12, x6 = rotl32(x6 ^ x11, 7) - // x8 += x13, x7 = rotl32(x7 ^ x8, 7) - // x9 += x14, x4 = rotl32(x4 ^ x9, 7) - add v10.4s, v10.4s, v15.4s - add a10, a10, a15 - add v11.4s, v11.4s, v12.4s - add a11, a11, a12 - add v8.4s, v8.4s, v13.4s - add a8, a8, a13 - add v9.4s, v9.4s, v14.4s - add a9, a9, a14 - - eor v16.16b, v5.16b, v10.16b - eor a5, a5, a10 - eor v17.16b, v6.16b, v11.16b - eor a6, a6, a11 - eor v18.16b, v7.16b, v8.16b - eor a7, a7, a8 - eor v19.16b, v4.16b, v9.16b - eor a4, a4, a9 - - shl v5.4s, v16.4s, #7 - shl v6.4s, v17.4s, #7 - shl v7.4s, v18.4s, #7 - shl v4.4s, v19.4s, #7 - - sri v5.4s, v16.4s, #25 - ror a5, a5, #25 - sri v6.4s, v17.4s, #25 - ror a6, a6, #25 - sri v7.4s, v18.4s, #25 - ror a7, a7, #25 - sri v4.4s, v19.4s, #25 - ror a4, a4, #25 - - subs w3, w3, #2 - b.ne .Ldoubleround4 - - ld4r {v16.4s-v19.4s}, [x0], #16 - ld4r {v20.4s-v23.4s}, [x0], #16 - - // x12 += counter values 0-3 - add v12.4s, v12.4s, v30.4s - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] - add v0.4s, v0.4s, v16.4s - mov w6, v16.s[0] - mov w7, v17.s[0] - add v1.4s, v1.4s, v17.4s - mov w8, v18.s[0] - mov w9, v19.s[0] - add v2.4s, v2.4s, v18.4s - add a0, a0, w6 - add a1, a1, w7 - add v3.4s, v3.4s, v19.4s - add a2, a2, w8 - add a3, a3, w9 -CPU_BE( rev a0, a0 ) -CPU_BE( rev a1, a1 ) -CPU_BE( rev a2, a2 ) -CPU_BE( rev a3, a3 ) - - ld4r {v24.4s-v27.4s}, [x0], #16 - ld4r {v28.4s-v31.4s}, [x0] - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - add v4.4s, v4.4s, v20.4s - mov w6, v20.s[0] - mov w7, v21.s[0] - add v5.4s, v5.4s, v21.4s - mov w8, v22.s[0] - mov w9, v23.s[0] - add v6.4s, v6.4s, v22.4s - add a4, a4, w6 - add a5, a5, w7 - add v7.4s, v7.4s, v23.4s - add a6, a6, w8 - add a7, a7, w9 -CPU_BE( rev a4, a4 ) -CPU_BE( rev a5, a5 ) -CPU_BE( rev a6, a6 ) -CPU_BE( rev a7, a7 ) - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - add v8.4s, v8.4s, v24.4s - mov w6, v24.s[0] - mov w7, v25.s[0] - add v9.4s, v9.4s, v25.4s - mov w8, v26.s[0] - mov w9, v27.s[0] - add v10.4s, v10.4s, v26.4s - add a8, a8, w6 - add a9, a9, w7 - add v11.4s, v11.4s, v27.4s - add a10, a10, w8 - add a11, a11, w9 -CPU_BE( rev a8, a8 ) -CPU_BE( rev a9, a9 ) -CPU_BE( rev a10, a10 ) -CPU_BE( rev a11, a11 ) - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - add v12.4s, v12.4s, v28.4s - mov w6, v28.s[0] - mov w7, v29.s[0] - add v13.4s, v13.4s, v29.4s - mov w8, v30.s[0] - mov w9, v31.s[0] - add v14.4s, v14.4s, v30.4s - add a12, a12, w6 - add a13, a13, w7 - add v15.4s, v15.4s, v31.4s - add a14, a14, w8 - add a15, a15, w9 -CPU_BE( rev a12, a12 ) -CPU_BE( rev a13, a13 ) -CPU_BE( rev a14, a14 ) -CPU_BE( rev a15, a15 ) - - // interleave 32-bit words in state n, n+1 - ldp w6, w7, [x2], #64 - zip1 v16.4s, v0.4s, v1.4s - ldp w8, w9, [x2, #-56] - eor a0, a0, w6 - zip2 v17.4s, v0.4s, v1.4s - eor a1, a1, w7 - zip1 v18.4s, v2.4s, v3.4s - eor a2, a2, w8 - zip2 v19.4s, v2.4s, v3.4s - eor a3, a3, w9 - ldp w6, w7, [x2, #-48] - zip1 v20.4s, v4.4s, v5.4s - ldp w8, w9, [x2, #-40] - eor a4, a4, w6 - zip2 v21.4s, v4.4s, v5.4s - eor a5, a5, w7 - zip1 v22.4s, v6.4s, v7.4s - eor a6, a6, w8 - zip2 v23.4s, v6.4s, v7.4s - eor a7, a7, w9 - ldp w6, w7, [x2, #-32] - zip1 v24.4s, v8.4s, v9.4s - ldp w8, w9, [x2, #-24] - eor a8, a8, w6 - zip2 v25.4s, v8.4s, v9.4s - eor a9, a9, w7 - zip1 v26.4s, v10.4s, v11.4s - eor a10, a10, w8 - zip2 v27.4s, v10.4s, v11.4s - eor a11, a11, w9 - ldp w6, w7, [x2, #-16] - zip1 v28.4s, v12.4s, v13.4s - ldp w8, w9, [x2, #-8] - eor a12, a12, w6 - zip2 v29.4s, v12.4s, v13.4s - eor a13, a13, w7 - zip1 v30.4s, v14.4s, v15.4s - eor a14, a14, w8 - zip2 v31.4s, v14.4s, v15.4s - eor a15, a15, w9 - - mov x3, #64 - subs x5, x4, #128 - add x6, x5, x2 - csel x3, x3, xzr, ge - csel x2, x2, x6, ge - - // interleave 64-bit words in state n, n+2 - zip1 v0.2d, v16.2d, v18.2d - zip2 v4.2d, v16.2d, v18.2d - stp a0, a1, [x1], #64 - zip1 v8.2d, v17.2d, v19.2d - zip2 v12.2d, v17.2d, v19.2d - stp a2, a3, [x1, #-56] - ld1 {v16.16b-v19.16b}, [x2], x3 - - subs x6, x4, #192 - ccmp x3, xzr, #4, lt - add x7, x6, x2 - csel x3, x3, xzr, eq - csel x2, x2, x7, eq - - zip1 v1.2d, v20.2d, v22.2d - zip2 v5.2d, v20.2d, v22.2d - stp a4, a5, [x1, #-48] - zip1 v9.2d, v21.2d, v23.2d - zip2 v13.2d, v21.2d, v23.2d - stp a6, a7, [x1, #-40] - ld1 {v20.16b-v23.16b}, [x2], x3 - - subs x7, x4, #256 - ccmp x3, xzr, #4, lt - add x8, x7, x2 - csel x3, x3, xzr, eq - csel x2, x2, x8, eq - - zip1 v2.2d, v24.2d, v26.2d - zip2 v6.2d, v24.2d, v26.2d - stp a8, a9, [x1, #-32] - zip1 v10.2d, v25.2d, v27.2d - zip2 v14.2d, v25.2d, v27.2d - stp a10, a11, [x1, #-24] - ld1 {v24.16b-v27.16b}, [x2], x3 - - subs x8, x4, #320 - ccmp x3, xzr, #4, lt - add x9, x8, x2 - csel x2, x2, x9, eq - - zip1 v3.2d, v28.2d, v30.2d - zip2 v7.2d, v28.2d, v30.2d - stp a12, a13, [x1, #-16] - zip1 v11.2d, v29.2d, v31.2d - zip2 v15.2d, v29.2d, v31.2d - stp a14, a15, [x1, #-8] - ld1 {v28.16b-v31.16b}, [x2] - - // xor with corresponding input, write to output - tbnz x5, #63, 0f - eor v16.16b, v16.16b, v0.16b - eor v17.16b, v17.16b, v1.16b - eor v18.16b, v18.16b, v2.16b - eor v19.16b, v19.16b, v3.16b - st1 {v16.16b-v19.16b}, [x1], #64 - cbz x5, .Lout - - tbnz x6, #63, 1f - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1], #64 - cbz x6, .Lout - - tbnz x7, #63, 2f - eor v24.16b, v24.16b, v8.16b - eor v25.16b, v25.16b, v9.16b - eor v26.16b, v26.16b, v10.16b - eor v27.16b, v27.16b, v11.16b - st1 {v24.16b-v27.16b}, [x1], #64 - cbz x7, .Lout - - tbnz x8, #63, 3f - eor v28.16b, v28.16b, v12.16b - eor v29.16b, v29.16b, v13.16b - eor v30.16b, v30.16b, v14.16b - eor v31.16b, v31.16b, v15.16b - st1 {v28.16b-v31.16b}, [x1] - -.Lout: frame_pop - ret - - // fewer than 128 bytes of in/output -0: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - sub x2, x1, #64 - add x1, x1, x5 - ld1 {v16.16b-v19.16b}, [x2] - tbl v4.16b, {v0.16b-v3.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v5.16b, {v0.16b-v3.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v6.16b, {v0.16b-v3.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v7.16b, {v0.16b-v3.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1] - b .Lout - - // fewer than 192 bytes of in/output -1: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - add x1, x1, x6 - tbl v0.16b, {v4.16b-v7.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v1.16b, {v4.16b-v7.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v2.16b, {v4.16b-v7.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v3.16b, {v4.16b-v7.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v0.16b - eor v21.16b, v21.16b, v1.16b - eor v22.16b, v22.16b, v2.16b - eor v23.16b, v23.16b, v3.16b - st1 {v20.16b-v23.16b}, [x1] - b .Lout - - // fewer than 256 bytes of in/output -2: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x7 - tbl v0.16b, {v8.16b-v11.16b}, v4.16b - tbx v24.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v8.16b-v11.16b}, v4.16b - tbx v25.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v8.16b-v11.16b}, v4.16b - tbx v26.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v8.16b-v11.16b}, v4.16b - tbx v27.16b, {v20.16b-v23.16b}, v5.16b - - eor v24.16b, v24.16b, v0.16b - eor v25.16b, v25.16b, v1.16b - eor v26.16b, v26.16b, v2.16b - eor v27.16b, v27.16b, v3.16b - st1 {v24.16b-v27.16b}, [x1] - b .Lout - - // fewer than 320 bytes of in/output -3: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x8 - tbl v0.16b, {v12.16b-v15.16b}, v4.16b - tbx v28.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v12.16b-v15.16b}, v4.16b - tbx v29.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v12.16b-v15.16b}, v4.16b - tbx v30.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v12.16b-v15.16b}, v4.16b - tbx v31.16b, {v24.16b-v27.16b}, v5.16b - - eor v28.16b, v28.16b, v0.16b - eor v29.16b, v29.16b, v1.16b - eor v30.16b, v30.16b, v2.16b - eor v31.16b, v31.16b, v3.16b - st1 {v28.16b-v31.16b}, [x1] - b .Lout -ENDPROC(chacha_4block_xor_neon) - - .section ".rodata", "a", %progbits - .align L1_CACHE_SHIFT -.Lpermute: - .set .Li, 0 - .rept 192 - .byte (.Li - 64) - .set .Li, .Li + 1 - .endr - -CTRINC: .word 1, 2, 3, 4 -ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S deleted file mode 100644 index e545b42e6a468aa296f8428aaabedc01156b9395..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ /dev/null @@ -1,536 +0,0 @@ -// -// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions -// -// Copyright (C) 2016 Linaro Ltd -// Copyright (C) 2019 Google LLC -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License version 2 as -// published by the Free Software Foundation. -// - -// Derived from the x86 version: -// -// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -// -// Copyright (c) 2013, Intel Corporation -// -// Authors: -// Erdinc Ozturk -// Vinodh Gopal -// James Guilford -// Tim Chen -// -// This software is available to you under a choice of one of two -// licenses. You may choose to be licensed under the terms of the GNU -// General Public License (GPL) Version 2, available from the file -// COPYING in the main directory of this source tree, or the -// OpenIB.org BSD license below: -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// * Neither the name of the Intel Corporation nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// -// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Reference paper titled "Fast CRC Computation for Generic -// Polynomials Using PCLMULQDQ Instruction" -// URL: http://www.intel.com/content/dam/www/public/us/en/documents -// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -// - -#include -#include - - .text - .cpu generic+crypto - - init_crc .req w19 - buf .req x20 - len .req x21 - fold_consts_ptr .req x22 - - fold_consts .req v10 - - ad .req v14 - - k00_16 .req v15 - k32_48 .req v16 - - t3 .req v17 - t4 .req v18 - t5 .req v19 - t6 .req v20 - t7 .req v21 - t8 .req v22 - t9 .req v23 - - perm1 .req v24 - perm2 .req v25 - perm3 .req v26 - perm4 .req v27 - - bd1 .req v28 - bd2 .req v29 - bd3 .req v30 - bd4 .req v31 - - .macro __pmull_init_p64 - .endm - - .macro __pmull_pre_p64, bd - .endm - - .macro __pmull_init_p8 - // k00_16 := 0x0000000000000000_000000000000ffff - // k32_48 := 0x00000000ffffffff_0000ffffffffffff - movi k32_48.2d, #0xffffffff - mov k32_48.h[2], k32_48.h[0] - ushr k00_16.2d, k32_48.2d, #32 - - // prepare the permutation vectors - mov_q x5, 0x080f0e0d0c0b0a09 - movi perm4.8b, #8 - dup perm1.2d, x5 - eor perm1.16b, perm1.16b, perm4.16b - ushr perm2.2d, perm1.2d, #8 - ushr perm3.2d, perm1.2d, #16 - ushr perm4.2d, perm1.2d, #24 - sli perm2.2d, perm1.2d, #56 - sli perm3.2d, perm1.2d, #48 - sli perm4.2d, perm1.2d, #40 - .endm - - .macro __pmull_pre_p8, bd - tbl bd1.16b, {\bd\().16b}, perm1.16b - tbl bd2.16b, {\bd\().16b}, perm2.16b - tbl bd3.16b, {\bd\().16b}, perm3.16b - tbl bd4.16b, {\bd\().16b}, perm4.16b - .endm - -__pmull_p8_core: -.L__pmull_p8_core: - ext t4.8b, ad.8b, ad.8b, #1 // A1 - ext t5.8b, ad.8b, ad.8b, #2 // A2 - ext t6.8b, ad.8b, ad.8b, #3 // A3 - - pmull t4.8h, t4.8b, fold_consts.8b // F = A1*B - pmull t8.8h, ad.8b, bd1.8b // E = A*B1 - pmull t5.8h, t5.8b, fold_consts.8b // H = A2*B - pmull t7.8h, ad.8b, bd2.8b // G = A*B2 - pmull t6.8h, t6.8b, fold_consts.8b // J = A3*B - pmull t9.8h, ad.8b, bd3.8b // I = A*B3 - pmull t3.8h, ad.8b, bd4.8b // K = A*B4 - b 0f - -.L__pmull_p8_core2: - tbl t4.16b, {ad.16b}, perm1.16b // A1 - tbl t5.16b, {ad.16b}, perm2.16b // A2 - tbl t6.16b, {ad.16b}, perm3.16b // A3 - - pmull2 t4.8h, t4.16b, fold_consts.16b // F = A1*B - pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1 - pmull2 t5.8h, t5.16b, fold_consts.16b // H = A2*B - pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2 - pmull2 t6.8h, t6.16b, fold_consts.16b // J = A3*B - pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3 - pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4 - -0: eor t4.16b, t4.16b, t8.16b // L = E + F - eor t5.16b, t5.16b, t7.16b // M = G + H - eor t6.16b, t6.16b, t9.16b // N = I + J - - uzp1 t8.2d, t4.2d, t5.2d - uzp2 t4.2d, t4.2d, t5.2d - uzp1 t7.2d, t6.2d, t3.2d - uzp2 t6.2d, t6.2d, t3.2d - - // t4 = (L) (P0 + P1) << 8 - // t5 = (M) (P2 + P3) << 16 - eor t8.16b, t8.16b, t4.16b - and t4.16b, t4.16b, k32_48.16b - - // t6 = (N) (P4 + P5) << 24 - // t7 = (K) (P6 + P7) << 32 - eor t7.16b, t7.16b, t6.16b - and t6.16b, t6.16b, k00_16.16b - - eor t8.16b, t8.16b, t4.16b - eor t7.16b, t7.16b, t6.16b - - zip2 t5.2d, t8.2d, t4.2d - zip1 t4.2d, t8.2d, t4.2d - zip2 t3.2d, t7.2d, t6.2d - zip1 t6.2d, t7.2d, t6.2d - - ext t4.16b, t4.16b, t4.16b, #15 - ext t5.16b, t5.16b, t5.16b, #14 - ext t6.16b, t6.16b, t6.16b, #13 - ext t3.16b, t3.16b, t3.16b, #12 - - eor t4.16b, t4.16b, t5.16b - eor t6.16b, t6.16b, t3.16b - ret -ENDPROC(__pmull_p8_core) - - .macro __pmull_p8, rq, ad, bd, i - .ifnc \bd, fold_consts - .err - .endif - mov ad.16b, \ad\().16b - .ifb \i - pmull \rq\().8h, \ad\().8b, \bd\().8b // D = A*B - .else - pmull2 \rq\().8h, \ad\().16b, \bd\().16b // D = A*B - .endif - - bl .L__pmull_p8_core\i - - eor \rq\().16b, \rq\().16b, t4.16b - eor \rq\().16b, \rq\().16b, t6.16b - .endm - - // Fold reg1, reg2 into the next 32 data bytes, storing the result back - // into reg1, reg2. - .macro fold_32_bytes, p, reg1, reg2 - ldp q11, q12, [buf], #0x20 - - __pmull_\p v8, \reg1, fold_consts, 2 - __pmull_\p \reg1, \reg1, fold_consts - -CPU_LE( rev64 v11.16b, v11.16b ) -CPU_LE( rev64 v12.16b, v12.16b ) - - __pmull_\p v9, \reg2, fold_consts, 2 - __pmull_\p \reg2, \reg2, fold_consts - -CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 ) -CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) - - eor \reg1\().16b, \reg1\().16b, v8.16b - eor \reg2\().16b, \reg2\().16b, v9.16b - eor \reg1\().16b, \reg1\().16b, v11.16b - eor \reg2\().16b, \reg2\().16b, v12.16b - .endm - - // Fold src_reg into dst_reg, optionally loading the next fold constants - .macro fold_16_bytes, p, src_reg, dst_reg, load_next_consts - __pmull_\p v8, \src_reg, fold_consts - __pmull_\p \src_reg, \src_reg, fold_consts, 2 - .ifnb \load_next_consts - ld1 {fold_consts.2d}, [fold_consts_ptr], #16 - __pmull_pre_\p fold_consts - .endif - eor \dst_reg\().16b, \dst_reg\().16b, v8.16b - eor \dst_reg\().16b, \dst_reg\().16b, \src_reg\().16b - .endm - - .macro __pmull_p64, rd, rn, rm, n - .ifb \n - pmull \rd\().1q, \rn\().1d, \rm\().1d - .else - pmull2 \rd\().1q, \rn\().2d, \rm\().2d - .endif - .endm - - .macro crc_t10dif_pmull, p - frame_push 4, 128 - - mov init_crc, w0 - mov buf, x1 - mov len, x2 - - __pmull_init_\p - - // For sizes less than 256 bytes, we can't fold 128 bytes at a time. - cmp len, #256 - b.lt .Lless_than_256_bytes_\@ - - adr_l fold_consts_ptr, .Lfold_across_128_bytes_consts - - // Load the first 128 data bytes. Byte swapping is necessary to make - // the bit order match the polynomial coefficient order. - ldp q0, q1, [buf] - ldp q2, q3, [buf, #0x20] - ldp q4, q5, [buf, #0x40] - ldp q6, q7, [buf, #0x60] - add buf, buf, #0x80 -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( rev64 v1.16b, v1.16b ) -CPU_LE( rev64 v2.16b, v2.16b ) -CPU_LE( rev64 v3.16b, v3.16b ) -CPU_LE( rev64 v4.16b, v4.16b ) -CPU_LE( rev64 v5.16b, v5.16b ) -CPU_LE( rev64 v6.16b, v6.16b ) -CPU_LE( rev64 v7.16b, v7.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) -CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) -CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 ) -CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 ) -CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 ) -CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 ) -CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 ) -CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) - - // XOR the first 16 data *bits* with the initial CRC value. - movi v8.16b, #0 - mov v8.h[7], init_crc - eor v0.16b, v0.16b, v8.16b - - // Load the constants for folding across 128 bytes. - ld1 {fold_consts.2d}, [fold_consts_ptr] - __pmull_pre_\p fold_consts - - // Subtract 128 for the 128 data bytes just consumed. Subtract another - // 128 to simplify the termination condition of the following loop. - sub len, len, #256 - - // While >= 128 data bytes remain (not counting v0-v7), fold the 128 - // bytes v0-v7 into them, storing the result back into v0-v7. -.Lfold_128_bytes_loop_\@: - fold_32_bytes \p, v0, v1 - fold_32_bytes \p, v2, v3 - fold_32_bytes \p, v4, v5 - fold_32_bytes \p, v6, v7 - - subs len, len, #128 - b.lt .Lfold_128_bytes_loop_done_\@ - - if_will_cond_yield_neon - stp q0, q1, [sp, #.Lframe_local_offset] - stp q2, q3, [sp, #.Lframe_local_offset + 32] - stp q4, q5, [sp, #.Lframe_local_offset + 64] - stp q6, q7, [sp, #.Lframe_local_offset + 96] - do_cond_yield_neon - ldp q0, q1, [sp, #.Lframe_local_offset] - ldp q2, q3, [sp, #.Lframe_local_offset + 32] - ldp q4, q5, [sp, #.Lframe_local_offset + 64] - ldp q6, q7, [sp, #.Lframe_local_offset + 96] - ld1 {fold_consts.2d}, [fold_consts_ptr] - __pmull_init_\p - __pmull_pre_\p fold_consts - endif_yield_neon - - b .Lfold_128_bytes_loop_\@ - -.Lfold_128_bytes_loop_done_\@: - - // Now fold the 112 bytes in v0-v6 into the 16 bytes in v7. - - // Fold across 64 bytes. - add fold_consts_ptr, fold_consts_ptr, #16 - ld1 {fold_consts.2d}, [fold_consts_ptr], #16 - __pmull_pre_\p fold_consts - fold_16_bytes \p, v0, v4 - fold_16_bytes \p, v1, v5 - fold_16_bytes \p, v2, v6 - fold_16_bytes \p, v3, v7, 1 - // Fold across 32 bytes. - fold_16_bytes \p, v4, v6 - fold_16_bytes \p, v5, v7, 1 - // Fold across 16 bytes. - fold_16_bytes \p, v6, v7 - - // Add 128 to get the correct number of data bytes remaining in 0...127 - // (not counting v7), following the previous extra subtraction by 128. - // Then subtract 16 to simplify the termination condition of the - // following loop. - adds len, len, #(128-16) - - // While >= 16 data bytes remain (not counting v7), fold the 16 bytes v7 - // into them, storing the result back into v7. - b.lt .Lfold_16_bytes_loop_done_\@ -.Lfold_16_bytes_loop_\@: - __pmull_\p v8, v7, fold_consts - __pmull_\p v7, v7, fold_consts, 2 - eor v7.16b, v7.16b, v8.16b - ldr q0, [buf], #16 -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) - eor v7.16b, v7.16b, v0.16b - subs len, len, #16 - b.ge .Lfold_16_bytes_loop_\@ - -.Lfold_16_bytes_loop_done_\@: - // Add 16 to get the correct number of data bytes remaining in 0...15 - // (not counting v7), following the previous extra subtraction by 16. - adds len, len, #16 - b.eq .Lreduce_final_16_bytes_\@ - -.Lhandle_partial_segment_\@: - // Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first - // 16 bytes are in v7 and the rest are the remaining data in 'buf'. To - // do this without needing a fold constant for each possible 'len', - // redivide the bytes into a first chunk of 'len' bytes and a second - // chunk of 16 bytes, then fold the first chunk into the second. - - // v0 = last 16 original data bytes - add buf, buf, len - ldr q0, [buf, #-16] -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) - - // v1 = high order part of second chunk: v7 left-shifted by 'len' bytes. - adr_l x4, .Lbyteshift_table + 16 - sub x4, x4, len - ld1 {v2.16b}, [x4] - tbl v1.16b, {v7.16b}, v2.16b - - // v3 = first chunk: v7 right-shifted by '16-len' bytes. - movi v3.16b, #0x80 - eor v2.16b, v2.16b, v3.16b - tbl v3.16b, {v7.16b}, v2.16b - - // Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes. - sshr v2.16b, v2.16b, #7 - - // v2 = second chunk: 'len' bytes from v0 (low-order bytes), - // then '16-len' bytes from v1 (high-order bytes). - bsl v2.16b, v1.16b, v0.16b - - // Fold the first chunk into the second chunk, storing the result in v7. - __pmull_\p v0, v3, fold_consts - __pmull_\p v7, v3, fold_consts, 2 - eor v7.16b, v7.16b, v0.16b - eor v7.16b, v7.16b, v2.16b - -.Lreduce_final_16_bytes_\@: - // Reduce the 128-bit value M(x), stored in v7, to the final 16-bit CRC. - - movi v2.16b, #0 // init zero register - - // Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. - ld1 {fold_consts.2d}, [fold_consts_ptr], #16 - __pmull_pre_\p fold_consts - - // Fold the high 64 bits into the low 64 bits, while also multiplying by - // x^64. This produces a 128-bit value congruent to x^64 * M(x) and - // whose low 48 bits are 0. - ext v0.16b, v2.16b, v7.16b, #8 - __pmull_\p v7, v7, fold_consts, 2 // high bits * x^48 * (x^80 mod G(x)) - eor v0.16b, v0.16b, v7.16b // + low bits * x^64 - - // Fold the high 32 bits into the low 96 bits. This produces a 96-bit - // value congruent to x^64 * M(x) and whose low 48 bits are 0. - ext v1.16b, v0.16b, v2.16b, #12 // extract high 32 bits - mov v0.s[3], v2.s[0] // zero high 32 bits - __pmull_\p v1, v1, fold_consts // high 32 bits * x^48 * (x^48 mod G(x)) - eor v0.16b, v0.16b, v1.16b // + low bits - - // Load G(x) and floor(x^48 / G(x)). - ld1 {fold_consts.2d}, [fold_consts_ptr] - __pmull_pre_\p fold_consts - - // Use Barrett reduction to compute the final CRC value. - __pmull_\p v1, v0, fold_consts, 2 // high 32 bits * floor(x^48 / G(x)) - ushr v1.2d, v1.2d, #32 // /= x^32 - __pmull_\p v1, v1, fold_consts // *= G(x) - ushr v0.2d, v0.2d, #48 - eor v0.16b, v0.16b, v1.16b // + low 16 nonzero bits - // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0. - - umov w0, v0.h[0] - frame_pop - ret - -.Lless_than_256_bytes_\@: - // Checksumming a buffer of length 16...255 bytes - - adr_l fold_consts_ptr, .Lfold_across_16_bytes_consts - - // Load the first 16 data bytes. - ldr q7, [buf], #0x10 -CPU_LE( rev64 v7.16b, v7.16b ) -CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) - - // XOR the first 16 data *bits* with the initial CRC value. - movi v0.16b, #0 - mov v0.h[7], init_crc - eor v7.16b, v7.16b, v0.16b - - // Load the fold-across-16-bytes constants. - ld1 {fold_consts.2d}, [fold_consts_ptr], #16 - __pmull_pre_\p fold_consts - - cmp len, #16 - b.eq .Lreduce_final_16_bytes_\@ // len == 16 - subs len, len, #32 - b.ge .Lfold_16_bytes_loop_\@ // 32 <= len <= 255 - add len, len, #16 - b .Lhandle_partial_segment_\@ // 17 <= len <= 31 - .endm - -// -// u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len); -// -// Assumes len >= 16. -// -ENTRY(crc_t10dif_pmull_p8) - crc_t10dif_pmull p8 -ENDPROC(crc_t10dif_pmull_p8) - - .align 5 -// -// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); -// -// Assumes len >= 16. -// -ENTRY(crc_t10dif_pmull_p64) - crc_t10dif_pmull p64 -ENDPROC(crc_t10dif_pmull_p64) - - .section ".rodata", "a" - .align 4 - -// Fold constants precomputed from the polynomial 0x18bb7 -// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 -.Lfold_across_128_bytes_consts: - .quad 0x0000000000006123 // x^(8*128) mod G(x) - .quad 0x0000000000002295 // x^(8*128+64) mod G(x) -// .Lfold_across_64_bytes_consts: - .quad 0x0000000000001069 // x^(4*128) mod G(x) - .quad 0x000000000000dd31 // x^(4*128+64) mod G(x) -// .Lfold_across_32_bytes_consts: - .quad 0x000000000000857d // x^(2*128) mod G(x) - .quad 0x0000000000007acc // x^(2*128+64) mod G(x) -.Lfold_across_16_bytes_consts: - .quad 0x000000000000a010 // x^(1*128) mod G(x) - .quad 0x0000000000001faa // x^(1*128+64) mod G(x) -// .Lfinal_fold_consts: - .quad 0x1368000000000000 // x^48 * (x^48 mod G(x)) - .quad 0x2d56000000000000 // x^48 * (x^80 mod G(x)) -// .Lbarrett_reduction_consts: - .quad 0x0000000000018bb7 // G(x) - .quad 0x00000001f65a57f8 // floor(x^48 / G(x)) - -// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - -// len] is the index vector to shift left by 'len' bytes, and is also {0x80, -// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes. -.Lbyteshift_table: - .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 - .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S deleted file mode 100644 index 410e8afcf5a7dfdd3690f8e74ea6a382ac1fa02f..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/ghash-ce-core.S +++ /dev/null @@ -1,575 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Accelerated GHASH implementation with ARMv8 PMULL instructions. - * - * Copyright (C) 2014 - 2018 Linaro Ltd. - */ - -#include -#include - - SHASH .req v0 - SHASH2 .req v1 - T1 .req v2 - T2 .req v3 - MASK .req v4 - XL .req v5 - XM .req v6 - XH .req v7 - IN1 .req v7 - - k00_16 .req v8 - k32_48 .req v9 - - t3 .req v10 - t4 .req v11 - t5 .req v12 - t6 .req v13 - t7 .req v14 - t8 .req v15 - t9 .req v16 - - perm1 .req v17 - perm2 .req v18 - perm3 .req v19 - - sh1 .req v20 - sh2 .req v21 - sh3 .req v22 - sh4 .req v23 - - ss1 .req v24 - ss2 .req v25 - ss3 .req v26 - ss4 .req v27 - - XL2 .req v8 - XM2 .req v9 - XH2 .req v10 - XL3 .req v11 - XM3 .req v12 - XH3 .req v13 - TT3 .req v14 - TT4 .req v15 - HH .req v16 - HH3 .req v17 - HH4 .req v18 - HH34 .req v19 - - .text - .arch armv8-a+crypto - - .macro __pmull_p64, rd, rn, rm - pmull \rd\().1q, \rn\().1d, \rm\().1d - .endm - - .macro __pmull2_p64, rd, rn, rm - pmull2 \rd\().1q, \rn\().2d, \rm\().2d - .endm - - .macro __pmull_p8, rq, ad, bd - ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 - ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 - ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 - - __pmull_p8_\bd \rq, \ad - .endm - - .macro __pmull2_p8, rq, ad, bd - tbl t3.16b, {\ad\().16b}, perm1.16b // A1 - tbl t5.16b, {\ad\().16b}, perm2.16b // A2 - tbl t7.16b, {\ad\().16b}, perm3.16b // A3 - - __pmull2_p8_\bd \rq, \ad - .endm - - .macro __pmull_p8_SHASH, rq, ad - __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 - .endm - - .macro __pmull_p8_SHASH2, rq, ad - __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 - .endm - - .macro __pmull2_p8_SHASH, rq, ad - __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 - .endm - - .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 - pmull\t t3.8h, t3.\nb, \bd // F = A1*B - pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 - pmull\t t5.8h, t5.\nb, \bd // H = A2*B - pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 - pmull\t t7.8h, t7.\nb, \bd // J = A3*B - pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 - pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 - pmull\t \rq\().8h, \ad, \bd // D = A*B - - eor t3.16b, t3.16b, t4.16b // L = E + F - eor t5.16b, t5.16b, t6.16b // M = G + H - eor t7.16b, t7.16b, t8.16b // N = I + J - - uzp1 t4.2d, t3.2d, t5.2d - uzp2 t3.2d, t3.2d, t5.2d - uzp1 t6.2d, t7.2d, t9.2d - uzp2 t7.2d, t7.2d, t9.2d - - // t3 = (L) (P0 + P1) << 8 - // t5 = (M) (P2 + P3) << 16 - eor t4.16b, t4.16b, t3.16b - and t3.16b, t3.16b, k32_48.16b - - // t7 = (N) (P4 + P5) << 24 - // t9 = (K) (P6 + P7) << 32 - eor t6.16b, t6.16b, t7.16b - and t7.16b, t7.16b, k00_16.16b - - eor t4.16b, t4.16b, t3.16b - eor t6.16b, t6.16b, t7.16b - - zip2 t5.2d, t4.2d, t3.2d - zip1 t3.2d, t4.2d, t3.2d - zip2 t9.2d, t6.2d, t7.2d - zip1 t7.2d, t6.2d, t7.2d - - ext t3.16b, t3.16b, t3.16b, #15 - ext t5.16b, t5.16b, t5.16b, #14 - ext t7.16b, t7.16b, t7.16b, #13 - ext t9.16b, t9.16b, t9.16b, #12 - - eor t3.16b, t3.16b, t5.16b - eor t7.16b, t7.16b, t9.16b - eor \rq\().16b, \rq\().16b, t3.16b - eor \rq\().16b, \rq\().16b, t7.16b - .endm - - .macro __pmull_pre_p64 - add x8, x3, #16 - ld1 {HH.2d-HH4.2d}, [x8] - - trn1 SHASH2.2d, SHASH.2d, HH.2d - trn2 T1.2d, SHASH.2d, HH.2d - eor SHASH2.16b, SHASH2.16b, T1.16b - - trn1 HH34.2d, HH3.2d, HH4.2d - trn2 T1.2d, HH3.2d, HH4.2d - eor HH34.16b, HH34.16b, T1.16b - - movi MASK.16b, #0xe1 - shl MASK.2d, MASK.2d, #57 - .endm - - .macro __pmull_pre_p8 - ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 - eor SHASH2.16b, SHASH2.16b, SHASH.16b - - // k00_16 := 0x0000000000000000_000000000000ffff - // k32_48 := 0x00000000ffffffff_0000ffffffffffff - movi k32_48.2d, #0xffffffff - mov k32_48.h[2], k32_48.h[0] - ushr k00_16.2d, k32_48.2d, #32 - - // prepare the permutation vectors - mov_q x5, 0x080f0e0d0c0b0a09 - movi T1.8b, #8 - dup perm1.2d, x5 - eor perm1.16b, perm1.16b, T1.16b - ushr perm2.2d, perm1.2d, #8 - ushr perm3.2d, perm1.2d, #16 - ushr T1.2d, perm1.2d, #24 - sli perm2.2d, perm1.2d, #56 - sli perm3.2d, perm1.2d, #48 - sli T1.2d, perm1.2d, #40 - - // precompute loop invariants - tbl sh1.16b, {SHASH.16b}, perm1.16b - tbl sh2.16b, {SHASH.16b}, perm2.16b - tbl sh3.16b, {SHASH.16b}, perm3.16b - tbl sh4.16b, {SHASH.16b}, T1.16b - ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 - ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 - ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 - ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 - .endm - - // - // PMULL (64x64->128) based reduction for CPUs that can do - // it in a single instruction. - // - .macro __pmull_reduce_p64 - pmull T2.1q, XL.1d, MASK.1d - eor XM.16b, XM.16b, T1.16b - - mov XH.d[0], XM.d[1] - mov XM.d[1], XL.d[0] - - eor XL.16b, XM.16b, T2.16b - ext T2.16b, XL.16b, XL.16b, #8 - pmull XL.1q, XL.1d, MASK.1d - .endm - - // - // Alternative reduction for CPUs that lack support for the - // 64x64->128 PMULL instruction - // - .macro __pmull_reduce_p8 - eor XM.16b, XM.16b, T1.16b - - mov XL.d[1], XM.d[0] - mov XH.d[0], XM.d[1] - - shl T1.2d, XL.2d, #57 - shl T2.2d, XL.2d, #62 - eor T2.16b, T2.16b, T1.16b - shl T1.2d, XL.2d, #63 - eor T2.16b, T2.16b, T1.16b - ext T1.16b, XL.16b, XH.16b, #8 - eor T2.16b, T2.16b, T1.16b - - mov XL.d[1], T2.d[0] - mov XH.d[0], T2.d[1] - - ushr T2.2d, XL.2d, #1 - eor XH.16b, XH.16b, XL.16b - eor XL.16b, XL.16b, T2.16b - ushr T2.2d, T2.2d, #6 - ushr XL.2d, XL.2d, #1 - .endm - - .macro __pmull_ghash, pn - ld1 {SHASH.2d}, [x3] - ld1 {XL.2d}, [x1] - - __pmull_pre_\pn - - /* do the head block first, if supplied */ - cbz x4, 0f - ld1 {T1.2d}, [x4] - mov x4, xzr - b 3f - -0: .ifc \pn, p64 - tbnz w0, #0, 2f // skip until #blocks is a - tbnz w0, #1, 2f // round multiple of 4 - -1: ld1 {XM3.16b-TT4.16b}, [x2], #64 - - sub w0, w0, #4 - - rev64 T1.16b, XM3.16b - rev64 T2.16b, XH3.16b - rev64 TT4.16b, TT4.16b - rev64 TT3.16b, TT3.16b - - ext IN1.16b, TT4.16b, TT4.16b, #8 - ext XL3.16b, TT3.16b, TT3.16b, #8 - - eor TT4.16b, TT4.16b, IN1.16b - pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 - pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 - pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) - - eor TT3.16b, TT3.16b, XL3.16b - pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1 - pmull XL3.1q, HH.1d, XL3.1d // a0 * b0 - pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) - - ext IN1.16b, T2.16b, T2.16b, #8 - eor XL2.16b, XL2.16b, XL3.16b - eor XH2.16b, XH2.16b, XH3.16b - eor XM2.16b, XM2.16b, XM3.16b - - eor T2.16b, T2.16b, IN1.16b - pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1 - pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0 - pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) - - eor XL2.16b, XL2.16b, XL3.16b - eor XH2.16b, XH2.16b, XH3.16b - eor XM2.16b, XM2.16b, XM3.16b - - ext IN1.16b, T1.16b, T1.16b, #8 - ext TT3.16b, XL.16b, XL.16b, #8 - eor XL.16b, XL.16b, IN1.16b - eor T1.16b, T1.16b, TT3.16b - - pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1 - eor T1.16b, T1.16b, XL.16b - pmull XL.1q, HH4.1d, XL.1d // a0 * b0 - pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) - - eor XL.16b, XL.16b, XL2.16b - eor XH.16b, XH.16b, XH2.16b - eor XM.16b, XM.16b, XM2.16b - - eor T2.16b, XL.16b, XH.16b - ext T1.16b, XL.16b, XH.16b, #8 - eor XM.16b, XM.16b, T2.16b - - __pmull_reduce_p64 - - eor T2.16b, T2.16b, XH.16b - eor XL.16b, XL.16b, T2.16b - - cbz w0, 5f - b 1b - .endif - -2: ld1 {T1.2d}, [x2], #16 - sub w0, w0, #1 - -3: /* multiply XL by SHASH in GF(2^128) */ -CPU_LE( rev64 T1.16b, T1.16b ) - - ext T2.16b, XL.16b, XL.16b, #8 - ext IN1.16b, T1.16b, T1.16b, #8 - eor T1.16b, T1.16b, T2.16b - eor XL.16b, XL.16b, IN1.16b - - __pmull2_\pn XH, XL, SHASH // a1 * b1 - eor T1.16b, T1.16b, XL.16b - __pmull_\pn XL, XL, SHASH // a0 * b0 - __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) - -4: eor T2.16b, XL.16b, XH.16b - ext T1.16b, XL.16b, XH.16b, #8 - eor XM.16b, XM.16b, T2.16b - - __pmull_reduce_\pn - - eor T2.16b, T2.16b, XH.16b - eor XL.16b, XL.16b, T2.16b - - cbnz w0, 0b - -5: st1 {XL.2d}, [x1] - ret - .endm - - /* - * void pmull_ghash_update(int blocks, u64 dg[], const char *src, - * struct ghash_key const *k, const char *head) - */ -ENTRY(pmull_ghash_update_p64) - __pmull_ghash p64 -ENDPROC(pmull_ghash_update_p64) - -ENTRY(pmull_ghash_update_p8) - __pmull_ghash p8 -ENDPROC(pmull_ghash_update_p8) - - KS0 .req v12 - KS1 .req v13 - INP0 .req v14 - INP1 .req v15 - - .macro load_round_keys, rounds, rk - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - ld1 {v17.4s-v18.4s}, [\rk], #32 -1111: ld1 {v19.4s-v20.4s}, [\rk], #32 -2222: ld1 {v21.4s-v24.4s}, [\rk], #64 - ld1 {v25.4s-v28.4s}, [\rk], #64 - ld1 {v29.4s-v31.4s}, [\rk] - .endm - - .macro enc_round, state, key - aese \state\().16b, \key\().16b - aesmc \state\().16b, \state\().16b - .endm - - .macro enc_block, state, rounds - cmp \rounds, #12 - b.lo 2222f /* 128 bits */ - b.eq 1111f /* 192 bits */ - enc_round \state, v17 - enc_round \state, v18 -1111: enc_round \state, v19 - enc_round \state, v20 -2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 - enc_round \state, \key - .endr - aese \state\().16b, v30.16b - eor \state\().16b, \state\().16b, v31.16b - .endm - - .macro pmull_gcm_do_crypt, enc - ld1 {SHASH.2d}, [x4], #16 - ld1 {HH.2d}, [x4] - ld1 {XL.2d}, [x1] - ldr x8, [x5, #8] // load lower counter - - movi MASK.16b, #0xe1 - trn1 SHASH2.2d, SHASH.2d, HH.2d - trn2 T1.2d, SHASH.2d, HH.2d -CPU_LE( rev x8, x8 ) - shl MASK.2d, MASK.2d, #57 - eor SHASH2.16b, SHASH2.16b, T1.16b - - .if \enc == 1 - ldr x10, [sp] - ld1 {KS0.16b-KS1.16b}, [x10] - .endif - - cbnz x6, 4f - -0: ld1 {INP0.16b-INP1.16b}, [x3], #32 - - rev x9, x8 - add x11, x8, #1 - add x8, x8, #2 - - .if \enc == 1 - eor INP0.16b, INP0.16b, KS0.16b // encrypt input - eor INP1.16b, INP1.16b, KS1.16b - .endif - - ld1 {KS0.8b}, [x5] // load upper counter - rev x11, x11 - sub w0, w0, #2 - mov KS1.8b, KS0.8b - ins KS0.d[1], x9 // set lower counter - ins KS1.d[1], x11 - - rev64 T1.16b, INP1.16b - - cmp w7, #12 - b.ge 2f // AES-192/256? - -1: enc_round KS0, v21 - ext IN1.16b, T1.16b, T1.16b, #8 - - enc_round KS1, v21 - pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 - - enc_round KS0, v22 - eor T1.16b, T1.16b, IN1.16b - - enc_round KS1, v22 - pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 - - enc_round KS0, v23 - pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) - - enc_round KS1, v23 - rev64 T1.16b, INP0.16b - ext T2.16b, XL.16b, XL.16b, #8 - - enc_round KS0, v24 - ext IN1.16b, T1.16b, T1.16b, #8 - eor T1.16b, T1.16b, T2.16b - - enc_round KS1, v24 - eor XL.16b, XL.16b, IN1.16b - - enc_round KS0, v25 - eor T1.16b, T1.16b, XL.16b - - enc_round KS1, v25 - pmull2 XH.1q, HH.2d, XL.2d // a1 * b1 - - enc_round KS0, v26 - pmull XL.1q, HH.1d, XL.1d // a0 * b0 - - enc_round KS1, v26 - pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0) - - enc_round KS0, v27 - eor XL.16b, XL.16b, XL2.16b - eor XH.16b, XH.16b, XH2.16b - - enc_round KS1, v27 - eor XM.16b, XM.16b, XM2.16b - ext T1.16b, XL.16b, XH.16b, #8 - - enc_round KS0, v28 - eor T2.16b, XL.16b, XH.16b - eor XM.16b, XM.16b, T1.16b - - enc_round KS1, v28 - eor XM.16b, XM.16b, T2.16b - - enc_round KS0, v29 - pmull T2.1q, XL.1d, MASK.1d - - enc_round KS1, v29 - mov XH.d[0], XM.d[1] - mov XM.d[1], XL.d[0] - - aese KS0.16b, v30.16b - eor XL.16b, XM.16b, T2.16b - - aese KS1.16b, v30.16b - ext T2.16b, XL.16b, XL.16b, #8 - - eor KS0.16b, KS0.16b, v31.16b - pmull XL.1q, XL.1d, MASK.1d - eor T2.16b, T2.16b, XH.16b - - eor KS1.16b, KS1.16b, v31.16b - eor XL.16b, XL.16b, T2.16b - - .if \enc == 0 - eor INP0.16b, INP0.16b, KS0.16b - eor INP1.16b, INP1.16b, KS1.16b - .endif - - st1 {INP0.16b-INP1.16b}, [x2], #32 - - cbnz w0, 0b - -CPU_LE( rev x8, x8 ) - st1 {XL.2d}, [x1] - str x8, [x5, #8] // store lower counter - - .if \enc == 1 - st1 {KS0.16b-KS1.16b}, [x10] - .endif - - ret - -2: b.eq 3f // AES-192? - enc_round KS0, v17 - enc_round KS1, v17 - enc_round KS0, v18 - enc_round KS1, v18 -3: enc_round KS0, v19 - enc_round KS1, v19 - enc_round KS0, v20 - enc_round KS1, v20 - b 1b - -4: load_round_keys w7, x6 - b 0b - .endm - - /* - * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], - * struct ghash_key const *k, u8 ctr[], - * int rounds, u8 ks[]) - */ -ENTRY(pmull_gcm_encrypt) - pmull_gcm_do_crypt 1 -ENDPROC(pmull_gcm_encrypt) - - /* - * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], - * struct ghash_key const *k, u8 ctr[], - * int rounds) - */ -ENTRY(pmull_gcm_decrypt) - pmull_gcm_do_crypt 0 -ENDPROC(pmull_gcm_decrypt) - - /* - * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds) - */ -ENTRY(pmull_gcm_encrypt_block) - cbz x2, 0f - load_round_keys w3, x2 -0: ld1 {v0.16b}, [x1] - enc_block v0, w3 - st1 {v0.16b}, [x0] - ret -ENDPROC(pmull_gcm_encrypt_block) diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S deleted file mode 100644 index e05570c38de7621658313ef401fe46e708847594..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/nh-neon-core.S +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * NH - ε-almost-universal hash function, ARM64 NEON accelerated version - * - * Copyright 2018 Google LLC - * - * Author: Eric Biggers - */ - -#include - - KEY .req x0 - MESSAGE .req x1 - MESSAGE_LEN .req x2 - HASH .req x3 - - PASS0_SUMS .req v0 - PASS1_SUMS .req v1 - PASS2_SUMS .req v2 - PASS3_SUMS .req v3 - K0 .req v4 - K1 .req v5 - K2 .req v6 - K3 .req v7 - T0 .req v8 - T1 .req v9 - T2 .req v10 - T3 .req v11 - T4 .req v12 - T5 .req v13 - T6 .req v14 - T7 .req v15 - -.macro _nh_stride k0, k1, k2, k3 - - // Load next message stride - ld1 {T3.16b}, [MESSAGE], #16 - - // Load next key stride - ld1 {\k3\().4s}, [KEY], #16 - - // Add message words to key words - add T0.4s, T3.4s, \k0\().4s - add T1.4s, T3.4s, \k1\().4s - add T2.4s, T3.4s, \k2\().4s - add T3.4s, T3.4s, \k3\().4s - - // Multiply 32x32 => 64 and accumulate - mov T4.d[0], T0.d[1] - mov T5.d[0], T1.d[1] - mov T6.d[0], T2.d[1] - mov T7.d[0], T3.d[1] - umlal PASS0_SUMS.2d, T0.2s, T4.2s - umlal PASS1_SUMS.2d, T1.2s, T5.2s - umlal PASS2_SUMS.2d, T2.2s, T6.2s - umlal PASS3_SUMS.2d, T3.2s, T7.2s -.endm - -/* - * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) - * - * It's guaranteed that message_len % 16 == 0. - */ -ENTRY(nh_neon) - - ld1 {K0.4s,K1.4s}, [KEY], #32 - movi PASS0_SUMS.2d, #0 - movi PASS1_SUMS.2d, #0 - ld1 {K2.4s}, [KEY], #16 - movi PASS2_SUMS.2d, #0 - movi PASS3_SUMS.2d, #0 - - subs MESSAGE_LEN, MESSAGE_LEN, #64 - blt .Lloop4_done -.Lloop4: - _nh_stride K0, K1, K2, K3 - _nh_stride K1, K2, K3, K0 - _nh_stride K2, K3, K0, K1 - _nh_stride K3, K0, K1, K2 - subs MESSAGE_LEN, MESSAGE_LEN, #64 - bge .Lloop4 - -.Lloop4_done: - ands MESSAGE_LEN, MESSAGE_LEN, #63 - beq .Ldone - _nh_stride K0, K1, K2, K3 - - subs MESSAGE_LEN, MESSAGE_LEN, #16 - beq .Ldone - _nh_stride K1, K2, K3, K0 - - subs MESSAGE_LEN, MESSAGE_LEN, #16 - beq .Ldone - _nh_stride K2, K3, K0, K1 - -.Ldone: - // Sum the accumulators for each pass, then store the sums to 'hash' - addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d - addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d - st1 {T0.16b,T1.16b}, [HASH] - ret -ENDPROC(nh_neon) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S deleted file mode 100644 index c2ce1f820706f3ac535ffab1809e327577d3dca1..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sha1-ce-core.S +++ /dev/null @@ -1,163 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 Linaro Ltd - */ - -#include -#include - - .text - .arch armv8-a+crypto - - k0 .req v0 - k1 .req v1 - k2 .req v2 - k3 .req v3 - - t0 .req v4 - t1 .req v5 - - dga .req q6 - dgav .req v6 - dgb .req s7 - dgbv .req v7 - - dg0q .req q12 - dg0s .req s12 - dg0v .req v12 - dg1s .req s13 - dg1v .req v13 - dg2s .req s14 - - .macro add_only, op, ev, rc, s0, dg1 - .ifc \ev, ev - add t1.4s, v\s0\().4s, \rc\().4s - sha1h dg2s, dg0s - .ifnb \dg1 - sha1\op dg0q, \dg1, t0.4s - .else - sha1\op dg0q, dg1s, t0.4s - .endif - .else - .ifnb \s0 - add t0.4s, v\s0\().4s, \rc\().4s - .endif - sha1h dg1s, dg0s - sha1\op dg0q, dg2s, t1.4s - .endif - .endm - - .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 - sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s - add_only \op, \ev, \rc, \s1, \dg1 - sha1su1 v\s0\().4s, v\s3\().4s - .endm - - .macro loadrc, k, val, tmp - movz \tmp, :abs_g0_nc:\val - movk \tmp, :abs_g1:\val - dup \k, \tmp - .endm - - /* - * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - * int blocks) - */ -ENTRY(sha1_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - - /* load round constants */ -0: loadrc k0.4s, 0x5a827999, w6 - loadrc k1.4s, 0x6ed9eba1, w6 - loadrc k2.4s, 0x8f1bbcdc, w6 - loadrc k3.4s, 0xca62c1d6, w6 - - /* load state */ - ld1 {dgav.4s}, [x19] - ldr dgb, [x19, #16] - - /* load sha1_ce_state::finalize */ - ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x19, x4] - - /* load input */ -1: ld1 {v8.4s-v11.4s}, [x20], #64 - sub w21, w21, #1 - -CPU_LE( rev32 v8.16b, v8.16b ) -CPU_LE( rev32 v9.16b, v9.16b ) -CPU_LE( rev32 v10.16b, v10.16b ) -CPU_LE( rev32 v11.16b, v11.16b ) - -2: add t0.4s, v8.4s, k0.4s - mov dg0v.16b, dgav.16b - - add_update c, ev, k0, 8, 9, 10, 11, dgb - add_update c, od, k0, 9, 10, 11, 8 - add_update c, ev, k0, 10, 11, 8, 9 - add_update c, od, k0, 11, 8, 9, 10 - add_update c, ev, k1, 8, 9, 10, 11 - - add_update p, od, k1, 9, 10, 11, 8 - add_update p, ev, k1, 10, 11, 8, 9 - add_update p, od, k1, 11, 8, 9, 10 - add_update p, ev, k1, 8, 9, 10, 11 - add_update p, od, k2, 9, 10, 11, 8 - - add_update m, ev, k2, 10, 11, 8, 9 - add_update m, od, k2, 11, 8, 9, 10 - add_update m, ev, k2, 8, 9, 10, 11 - add_update m, od, k2, 9, 10, 11, 8 - add_update m, ev, k3, 10, 11, 8, 9 - - add_update p, od, k3, 11, 8, 9, 10 - add_only p, ev, k3, 9 - add_only p, od, k3, 10 - add_only p, ev, k3, 11 - add_only p, od - - /* update state */ - add dgbv.2s, dgbv.2s, dg1v.2s - add dgav.4s, dgav.4s, dg0v.4s - - cbz w21, 3f - - if_will_cond_yield_neon - st1 {dgav.4s}, [x19] - str dgb, [x19, #16] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b - - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -3: cbz x4, 4f - ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x19, x4] - movi v9.2d, #0 - mov x8, #0x80000000 - movi v10.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d8, x8 - mov x4, #0 - mov v11.d[0], xzr - mov v11.d[1], x7 - b 2b - - /* store new state */ -4: st1 {dgav.4s}, [x19] - str dgb, [x19, #16] - frame_pop - ret -ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S deleted file mode 100644 index 6f728a41900937d48326f7a26caea3d58d0ff5e1..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sha2-ce-core.S +++ /dev/null @@ -1,169 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions - * - * Copyright (C) 2014 Linaro Ltd - */ - -#include -#include - - .text - .arch armv8-a+crypto - - dga .req q20 - dgav .req v20 - dgb .req q21 - dgbv .req v21 - - t0 .req v22 - t1 .req v23 - - dg0q .req q24 - dg0v .req v24 - dg1q .req q25 - dg1v .req v25 - dg2q .req q26 - dg2v .req v26 - - .macro add_only, ev, rc, s0 - mov dg2v.16b, dg0v.16b - .ifeq \ev - add t1.4s, v\s0\().4s, \rc\().4s - sha256h dg0q, dg1q, t0.4s - sha256h2 dg1q, dg2q, t0.4s - .else - .ifnb \s0 - add t0.4s, v\s0\().4s, \rc\().4s - .endif - sha256h dg0q, dg1q, t1.4s - sha256h2 dg1q, dg2q, t1.4s - .endif - .endm - - .macro add_update, ev, rc, s0, s1, s2, s3 - sha256su0 v\s0\().4s, v\s1\().4s - add_only \ev, \rc, \s1 - sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s - .endm - - /* - * The SHA-256 round constants - */ - .section ".rodata", "a" - .align 4 -.Lsha2_rcon: - .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 - - /* - * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, - * int blocks) - */ - .text -ENTRY(sha2_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - - /* load round constants */ -0: adr_l x8, .Lsha2_rcon - ld1 { v0.4s- v3.4s}, [x8], #64 - ld1 { v4.4s- v7.4s}, [x8], #64 - ld1 { v8.4s-v11.4s}, [x8], #64 - ld1 {v12.4s-v15.4s}, [x8] - - /* load state */ - ld1 {dgav.4s, dgbv.4s}, [x19] - - /* load sha256_ce_state::finalize */ - ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x19, x4] - - /* load input */ -1: ld1 {v16.4s-v19.4s}, [x20], #64 - sub w21, w21, #1 - -CPU_LE( rev32 v16.16b, v16.16b ) -CPU_LE( rev32 v17.16b, v17.16b ) -CPU_LE( rev32 v18.16b, v18.16b ) -CPU_LE( rev32 v19.16b, v19.16b ) - -2: add t0.4s, v16.4s, v0.4s - mov dg0v.16b, dgav.16b - mov dg1v.16b, dgbv.16b - - add_update 0, v1, 16, 17, 18, 19 - add_update 1, v2, 17, 18, 19, 16 - add_update 0, v3, 18, 19, 16, 17 - add_update 1, v4, 19, 16, 17, 18 - - add_update 0, v5, 16, 17, 18, 19 - add_update 1, v6, 17, 18, 19, 16 - add_update 0, v7, 18, 19, 16, 17 - add_update 1, v8, 19, 16, 17, 18 - - add_update 0, v9, 16, 17, 18, 19 - add_update 1, v10, 17, 18, 19, 16 - add_update 0, v11, 18, 19, 16, 17 - add_update 1, v12, 19, 16, 17, 18 - - add_only 0, v13, 17 - add_only 1, v14, 18 - add_only 0, v15, 19 - add_only 1 - - /* update state */ - add dgav.4s, dgav.4s, dg0v.4s - add dgbv.4s, dgbv.4s, dg1v.4s - - /* handled all input blocks? */ - cbz w21, 3f - - if_will_cond_yield_neon - st1 {dgav.4s, dgbv.4s}, [x19] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b - - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -3: cbz x4, 4f - ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x19, x4] - movi v17.2d, #0 - mov x8, #0x80000000 - movi v18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d16, x8 - mov x4, #0 - mov v19.d[0], xzr - mov v19.d[1], x7 - b 2b - - /* store new state */ -4: st1 {dgav.4s, dgbv.4s}, [x19] - frame_pop - ret -ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S deleted file mode 100644 index a7d587fa54f6c64836d6eba75f2a1ec6228b12c3..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sha3-ce-core.S +++ /dev/null @@ -1,233 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - .set .Lv\b\().2d, \b - .set .Lv\b\().16b, \b - .endr - - /* - * ARMv8.2 Crypto Extensions instructions - */ - .macro eor3, rd, rn, rm, ra - .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro rax1, rd, rn, rm - .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro bcax, rd, rn, rm, ra - .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro xar, rd, rn, rm, imm6 - .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) - .endm - - /* - * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) - */ - .text -ENTRY(sha3_ce_transform) - frame_push 4 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - -0: /* load state */ - add x8, x19, #32 - ld1 { v0.1d- v3.1d}, [x19] - ld1 { v4.1d- v7.1d}, [x8], #32 - ld1 { v8.1d-v11.1d}, [x8], #32 - ld1 {v12.1d-v15.1d}, [x8], #32 - ld1 {v16.1d-v19.1d}, [x8], #32 - ld1 {v20.1d-v23.1d}, [x8], #32 - ld1 {v24.1d}, [x8] - -1: sub w21, w21, #1 - mov w8, #24 - adr_l x9, .Lsha3_rcon - - /* load input */ - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b-v31.8b}, [x20], #24 - eor v0.8b, v0.8b, v25.8b - eor v1.8b, v1.8b, v26.8b - eor v2.8b, v2.8b, v27.8b - eor v3.8b, v3.8b, v28.8b - eor v4.8b, v4.8b, v29.8b - eor v5.8b, v5.8b, v30.8b - eor v6.8b, v6.8b, v31.8b - - tbnz x22, #6, 3f // SHA3-512 - - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b-v30.8b}, [x20], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - eor v9.8b, v9.8b, v27.8b - eor v10.8b, v10.8b, v28.8b - eor v11.8b, v11.8b, v29.8b - eor v12.8b, v12.8b, v30.8b - - tbnz x22, #4, 2f // SHA3-384 or SHA3-224 - - // SHA3-256 - ld1 {v25.8b-v28.8b}, [x20], #32 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - b 4f - -2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 - - // SHA3-224 - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b}, [x20], #8 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - eor v17.8b, v17.8b, v29.8b - b 4f - - // SHA3-512 -3: ld1 {v25.8b-v26.8b}, [x20], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - -4: sub w8, w8, #1 - - eor3 v29.16b, v4.16b, v9.16b, v14.16b - eor3 v26.16b, v1.16b, v6.16b, v11.16b - eor3 v28.16b, v3.16b, v8.16b, v13.16b - eor3 v25.16b, v0.16b, v5.16b, v10.16b - eor3 v27.16b, v2.16b, v7.16b, v12.16b - eor3 v29.16b, v29.16b, v19.16b, v24.16b - eor3 v26.16b, v26.16b, v16.16b, v21.16b - eor3 v28.16b, v28.16b, v18.16b, v23.16b - eor3 v25.16b, v25.16b, v15.16b, v20.16b - eor3 v27.16b, v27.16b, v17.16b, v22.16b - - rax1 v30.2d, v29.2d, v26.2d // bc[0] - rax1 v26.2d, v26.2d, v28.2d // bc[2] - rax1 v28.2d, v28.2d, v25.2d // bc[4] - rax1 v25.2d, v25.2d, v27.2d // bc[1] - rax1 v27.2d, v27.2d, v29.2d // bc[3] - - eor v0.16b, v0.16b, v30.16b - xar v29.2d, v1.2d, v25.2d, (64 - 1) - xar v1.2d, v6.2d, v25.2d, (64 - 44) - xar v6.2d, v9.2d, v28.2d, (64 - 20) - xar v9.2d, v22.2d, v26.2d, (64 - 61) - xar v22.2d, v14.2d, v28.2d, (64 - 39) - xar v14.2d, v20.2d, v30.2d, (64 - 18) - xar v31.2d, v2.2d, v26.2d, (64 - 62) - xar v2.2d, v12.2d, v26.2d, (64 - 43) - xar v12.2d, v13.2d, v27.2d, (64 - 25) - xar v13.2d, v19.2d, v28.2d, (64 - 8) - xar v19.2d, v23.2d, v27.2d, (64 - 56) - xar v23.2d, v15.2d, v30.2d, (64 - 41) - xar v15.2d, v4.2d, v28.2d, (64 - 27) - xar v28.2d, v24.2d, v28.2d, (64 - 14) - xar v24.2d, v21.2d, v25.2d, (64 - 2) - xar v8.2d, v8.2d, v27.2d, (64 - 55) - xar v4.2d, v16.2d, v25.2d, (64 - 45) - xar v16.2d, v5.2d, v30.2d, (64 - 36) - xar v5.2d, v3.2d, v27.2d, (64 - 28) - xar v27.2d, v18.2d, v27.2d, (64 - 21) - xar v3.2d, v17.2d, v26.2d, (64 - 15) - xar v25.2d, v11.2d, v25.2d, (64 - 10) - xar v26.2d, v7.2d, v26.2d, (64 - 6) - xar v30.2d, v10.2d, v30.2d, (64 - 3) - - bcax v20.16b, v31.16b, v22.16b, v8.16b - bcax v21.16b, v8.16b, v23.16b, v22.16b - bcax v22.16b, v22.16b, v24.16b, v23.16b - bcax v23.16b, v23.16b, v31.16b, v24.16b - bcax v24.16b, v24.16b, v8.16b, v31.16b - - ld1r {v31.2d}, [x9], #8 - - bcax v17.16b, v25.16b, v19.16b, v3.16b - bcax v18.16b, v3.16b, v15.16b, v19.16b - bcax v19.16b, v19.16b, v16.16b, v15.16b - bcax v15.16b, v15.16b, v25.16b, v16.16b - bcax v16.16b, v16.16b, v3.16b, v25.16b - - bcax v10.16b, v29.16b, v12.16b, v26.16b - bcax v11.16b, v26.16b, v13.16b, v12.16b - bcax v12.16b, v12.16b, v14.16b, v13.16b - bcax v13.16b, v13.16b, v29.16b, v14.16b - bcax v14.16b, v14.16b, v26.16b, v29.16b - - bcax v7.16b, v30.16b, v9.16b, v4.16b - bcax v8.16b, v4.16b, v5.16b, v9.16b - bcax v9.16b, v9.16b, v6.16b, v5.16b - bcax v5.16b, v5.16b, v30.16b, v6.16b - bcax v6.16b, v6.16b, v4.16b, v30.16b - - bcax v3.16b, v27.16b, v0.16b, v28.16b - bcax v4.16b, v28.16b, v1.16b, v0.16b - bcax v0.16b, v0.16b, v2.16b, v1.16b - bcax v1.16b, v1.16b, v27.16b, v2.16b - bcax v2.16b, v2.16b, v28.16b, v27.16b - - eor v0.16b, v0.16b, v31.16b - - cbnz w8, 4b - cbz w21, 5f - - if_will_cond_yield_neon - add x8, x19, #32 - st1 { v0.1d- v3.1d}, [x19] - st1 { v4.1d- v7.1d}, [x8], #32 - st1 { v8.1d-v11.1d}, [x8], #32 - st1 {v12.1d-v15.1d}, [x8], #32 - st1 {v16.1d-v19.1d}, [x8], #32 - st1 {v20.1d-v23.1d}, [x8], #32 - st1 {v24.1d}, [x8] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b - - /* save state */ -5: st1 { v0.1d- v3.1d}, [x19], #32 - st1 { v4.1d- v7.1d}, [x19], #32 - st1 { v8.1d-v11.1d}, [x19], #32 - st1 {v12.1d-v15.1d}, [x19], #32 - st1 {v16.1d-v19.1d}, [x19], #32 - st1 {v20.1d-v23.1d}, [x19], #32 - st1 {v24.1d}, [x19] - frame_pop - ret -ENDPROC(sha3_ce_transform) - - .section ".rodata", "a" - .align 8 -.Lsha3_rcon: - .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a - .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 - .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a - .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a - .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 - .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 - .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 - .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S deleted file mode 100644 index ce65e3abe4f2e5964cc2ba0537674d2d91f286fb..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sha512-ce-core.S +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 - .set .Lq\b, \b - .set .Lv\b\().2d, \b - .endr - - .macro sha512h, rd, rn, rm - .inst 0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro sha512h2, rd, rn, rm - .inst 0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro sha512su0, rd, rn - .inst 0xcec08000 | .L\rd | (.L\rn << 5) - .endm - - .macro sha512su1, rd, rn, rm - .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - /* - * The SHA-512 round constants - */ - .section ".rodata", "a" - .align 4 -.Lsha512_rcon: - .quad 0x428a2f98d728ae22, 0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538, 0x59f111f1b605d019 - .quad 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242, 0x12835b0145706fbe - .quad 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235, 0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 - .quad 0x983e5152ee66dfab, 0xa831c66d2db43210 - .quad 0xb00327c898fb213f, 0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2, 0xd5a79147930aa725 - .quad 0x06ca6351e003826f, 0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc, 0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df - .quad 0x650a73548baf63de, 0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6, 0x92722c851482353b - .quad 0xa2bfe8a14cf10364, 0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791, 0xc76c51a30654be30 - .quad 0xd192e819d6ef5218, 0xd69906245565a910 - .quad 0xf40e35855771202a, 0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc, 0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72, 0x8cc702081a6439ec - .quad 0x90befffa23631e28, 0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915, 0xc67178f2e372532b - .quad 0xca273eceea26619c, 0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba, 0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae, 0x1b710b35131c471b - .quad 0x28db77f523047d84, 0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 - - .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4 - .ifnb \rc1 - ld1 {v\rc1\().2d}, [x4], #16 - .endif - add v5.2d, v\rc0\().2d, v\in0\().2d - ext v6.16b, v\i2\().16b, v\i3\().16b, #8 - ext v5.16b, v5.16b, v5.16b, #8 - ext v7.16b, v\i1\().16b, v\i2\().16b, #8 - add v\i3\().2d, v\i3\().2d, v5.2d - .ifnb \in1 - ext v5.16b, v\in3\().16b, v\in4\().16b, #8 - sha512su0 v\in0\().2d, v\in1\().2d - .endif - sha512h q\i3, q6, v7.2d - .ifnb \in1 - sha512su1 v\in0\().2d, v\in2\().2d, v5.2d - .endif - add v\i4\().2d, v\i1\().2d, v\i3\().2d - sha512h2 q\i3, q\i1, v\i0\().2d - .endm - - /* - * void sha512_ce_transform(struct sha512_state *sst, u8 const *src, - * int blocks) - */ - .text -ENTRY(sha512_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - - /* load state */ -0: ld1 {v8.2d-v11.2d}, [x19] - - /* load first 4 round constants */ - adr_l x3, .Lsha512_rcon - ld1 {v20.2d-v23.2d}, [x3], #64 - - /* load input */ -1: ld1 {v12.2d-v15.2d}, [x20], #64 - ld1 {v16.2d-v19.2d}, [x20], #64 - sub w21, w21, #1 - -CPU_LE( rev64 v12.16b, v12.16b ) -CPU_LE( rev64 v13.16b, v13.16b ) -CPU_LE( rev64 v14.16b, v14.16b ) -CPU_LE( rev64 v15.16b, v15.16b ) -CPU_LE( rev64 v16.16b, v16.16b ) -CPU_LE( rev64 v17.16b, v17.16b ) -CPU_LE( rev64 v18.16b, v18.16b ) -CPU_LE( rev64 v19.16b, v19.16b ) - - mov x4, x3 // rc pointer - - mov v0.16b, v8.16b - mov v1.16b, v9.16b - mov v2.16b, v10.16b - mov v3.16b, v11.16b - - // v0 ab cd -- ef gh ab - // v1 cd -- ef gh ab cd - // v2 ef gh ab cd -- ef - // v3 gh ab cd -- ef gh - // v4 -- ef gh ab cd -- - - dround 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17 - dround 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18 - dround 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19 - dround 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12 - dround 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13 - - dround 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14 - dround 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15 - dround 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16 - dround 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17 - dround 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18 - - dround 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19 - dround 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12 - dround 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13 - dround 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14 - dround 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15 - - dround 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16 - dround 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17 - dround 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18 - dround 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19 - dround 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12 - - dround 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13 - dround 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14 - dround 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15 - dround 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16 - dround 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17 - - dround 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18 - dround 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19 - dround 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12 - dround 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13 - dround 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14 - - dround 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15 - dround 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16 - dround 2, 3, 1, 4, 0, 28, 24, 12 - dround 4, 2, 0, 1, 3, 29, 25, 13 - dround 1, 4, 3, 0, 2, 30, 26, 14 - - dround 0, 1, 2, 3, 4, 31, 27, 15 - dround 3, 0, 4, 2, 1, 24, , 16 - dround 2, 3, 1, 4, 0, 25, , 17 - dround 4, 2, 0, 1, 3, 26, , 18 - dround 1, 4, 3, 0, 2, 27, , 19 - - /* update state */ - add v8.2d, v8.2d, v0.2d - add v9.2d, v9.2d, v1.2d - add v10.2d, v10.2d, v2.2d - add v11.2d, v11.2d, v3.2d - - /* handled all input blocks? */ - cbz w21, 3f - - if_will_cond_yield_neon - st1 {v8.2d-v11.2d}, [x19] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b - - /* store new state */ -3: st1 {v8.2d-v11.2d}, [x19] - frame_pop - ret -ENDPROC(sha512_ce_transform) diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S deleted file mode 100644 index d50d187906cbe464cf31f898a855a522af8b1be6..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sm3-ce-core.S +++ /dev/null @@ -1,138 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd - */ - -#include -#include - - .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 - .set .Lv\b\().4s, \b - .endr - - .macro sm3partw1, rd, rn, rm - .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro sm3partw2, rd, rn, rm - .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro sm3ss1, rd, rn, rm, ra - .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro sm3tt1a, rd, rn, rm, imm2 - .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) - .endm - - .macro sm3tt1b, rd, rn, rm, imm2 - .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) - .endm - - .macro sm3tt2a, rd, rn, rm, imm2 - .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) - .endm - - .macro sm3tt2b, rd, rn, rm, imm2 - .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) - .endm - - .macro round, ab, s0, t0, t1, i - sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s - shl \t1\().4s, \t0\().4s, #1 - sri \t1\().4s, \t0\().4s, #31 - sm3tt1\ab v8.4s, v5.4s, v10.4s, \i - sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i - .endm - - .macro qround, ab, s0, s1, s2, s3, s4 - .ifnb \s4 - ext \s4\().16b, \s1\().16b, \s2\().16b, #12 - ext v6.16b, \s0\().16b, \s1\().16b, #12 - ext v7.16b, \s2\().16b, \s3\().16b, #8 - sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s - .endif - - eor v10.16b, \s0\().16b, \s1\().16b - - round \ab, \s0, v11, v12, 0 - round \ab, \s0, v12, v11, 1 - round \ab, \s0, v11, v12, 2 - round \ab, \s0, v12, v11, 3 - - .ifnb \s4 - sm3partw2 \s4\().4s, v7.4s, v6.4s - .endif - .endm - - /* - * void sm3_ce_transform(struct sm3_state *sst, u8 const *src, - * int blocks) - */ - .text -ENTRY(sm3_ce_transform) - /* load state */ - ld1 {v8.4s-v9.4s}, [x0] - rev64 v8.4s, v8.4s - rev64 v9.4s, v9.4s - ext v8.16b, v8.16b, v8.16b, #8 - ext v9.16b, v9.16b, v9.16b, #8 - - adr_l x8, .Lt - ldp s13, s14, [x8] - - /* load input */ -0: ld1 {v0.16b-v3.16b}, [x1], #64 - sub w2, w2, #1 - - mov v15.16b, v8.16b - mov v16.16b, v9.16b - -CPU_LE( rev32 v0.16b, v0.16b ) -CPU_LE( rev32 v1.16b, v1.16b ) -CPU_LE( rev32 v2.16b, v2.16b ) -CPU_LE( rev32 v3.16b, v3.16b ) - - ext v11.16b, v13.16b, v13.16b, #4 - - qround a, v0, v1, v2, v3, v4 - qround a, v1, v2, v3, v4, v0 - qround a, v2, v3, v4, v0, v1 - qround a, v3, v4, v0, v1, v2 - - ext v11.16b, v14.16b, v14.16b, #4 - - qround b, v4, v0, v1, v2, v3 - qround b, v0, v1, v2, v3, v4 - qround b, v1, v2, v3, v4, v0 - qround b, v2, v3, v4, v0, v1 - qround b, v3, v4, v0, v1, v2 - qround b, v4, v0, v1, v2, v3 - qround b, v0, v1, v2, v3, v4 - qround b, v1, v2, v3, v4, v0 - qround b, v2, v3, v4, v0, v1 - qround b, v3, v4 - qround b, v4, v0 - qround b, v0, v1 - - eor v8.16b, v8.16b, v15.16b - eor v9.16b, v9.16b, v16.16b - - /* handled all input blocks? */ - cbnz w2, 0b - - /* save state */ - rev64 v8.4s, v8.4s - rev64 v9.4s, v9.4s - ext v8.16b, v8.16b, v8.16b, #8 - ext v9.16b, v9.16b, v9.16b, #8 - st1 {v8.4s-v9.4s}, [x0] - ret -ENDPROC(sm3_ce_transform) - - .section ".rodata", "a" - .align 3 -.Lt: .word 0x79cc4519, 0x9d8a7a87 diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S deleted file mode 100644 index af3bfbc3f4d4d44ec2b5129d2073abd94075af27..0000000000000000000000000000000000000000 --- a/arch/arm64/crypto/sm4-ce-core.S +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include - - .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8 - .set .Lv\b\().4s, \b - .endr - - .macro sm4e, rd, rn - .inst 0xcec08400 | .L\rd | (.L\rn << 5) - .endm - - /* - * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); - */ - .text -ENTRY(sm4_ce_do_crypt) - ld1 {v8.4s}, [x2] - ld1 {v0.4s-v3.4s}, [x0], #64 -CPU_LE( rev32 v8.16b, v8.16b ) - ld1 {v4.4s-v7.4s}, [x0] - sm4e v8.4s, v0.4s - sm4e v8.4s, v1.4s - sm4e v8.4s, v2.4s - sm4e v8.4s, v3.4s - sm4e v8.4s, v4.4s - sm4e v8.4s, v5.4s - sm4e v8.4s, v6.4s - sm4e v8.4s, v7.4s - rev64 v8.4s, v8.4s - ext v8.16b, v8.16b, v8.16b, #8 -CPU_LE( rev32 v8.16b, v8.16b ) - st1 {v8.4s}, [x1] - ret -ENDPROC(sm4_ce_do_crypt) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S deleted file mode 100644 index 6ea337d464c414ed67cd139071022500220604d1..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/cpu-reset.S +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * CPU reset routines - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2012 ARM Ltd. - * Copyright (C) 2015 Huawei Futurewei Technologies. - */ - -#include -#include -#include -#include - -.text -.pushsection .idmap.text, "awx" - -/* - * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for - * cpu_soft_restart. - * - * @el2_switch: Flag to indicate a switch to EL2 is needed. - * @entry: Location to jump to for soft reset. - * arg0: First argument passed to @entry. (relocation list) - * arg1: Second argument passed to @entry.(physical kernel entry) - * arg2: Third argument passed to @entry. (physical dtb address) - * - * Put the CPU into the same state as it would be if it had been reset, and - * branch to what would be the reset vector. It must be executed with the - * flat identity mapping. - */ -ENTRY(__cpu_soft_restart) - /* Clear sctlr_el1 flags. */ - mrs x12, sctlr_el1 - ldr x13, =SCTLR_ELx_FLAGS - bic x12, x12, x13 - pre_disable_mmu_workaround - msr sctlr_el1, x12 - isb - - cbz x0, 1f // el2_switch? - mov x0, #HVC_SOFT_RESTART - hvc #0 // no return - -1: mov x18, x1 // entry - mov x0, x2 // arg0 - mov x1, x3 // arg1 - mov x2, x4 // arg2 - br x18 -ENDPROC(__cpu_soft_restart) - -.popsection diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S deleted file mode 100644 index 304d5b02ca6712a693f060c26c9567370b8af130..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/efi-entry.S +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * EFI entry point. - * - * Copyright (C) 2013, 2014 Red Hat, Inc. - * Author: Mark Salter - */ -#include -#include - -#include - -#define EFI_LOAD_ERROR 0x8000000000000001 - - __INIT - - /* - * We arrive here from the EFI boot manager with: - * - * * CPU in little-endian mode - * * MMU on with identity-mapped RAM - * * Icache and Dcache on - * - * We will most likely be running from some place other than where - * we want to be. The kernel image wants to be placed at TEXT_OFFSET - * from start of RAM. - */ -ENTRY(entry) - /* - * Create a stack frame to save FP/LR with extra space - * for image_addr variable passed to efi_entry(). - */ - stp x29, x30, [sp, #-32]! - mov x29, sp - - /* - * Call efi_entry to do the real work. - * x0 and x1 are already set up by firmware. Current runtime - * address of image is calculated and passed via *image_addr. - * - * unsigned long efi_entry(void *handle, - * efi_system_table_t *sys_table, - * unsigned long *image_addr) ; - */ - adr_l x8, _text - add x2, sp, 16 - str x8, [x2] - bl efi_entry - cmn x0, #1 - b.eq efi_load_fail - - /* - * efi_entry() will have copied the kernel image if necessary and we - * return here with device tree address in x0 and the kernel entry - * point stored at *image_addr. Save those values in registers which - * are callee preserved. - */ - mov x20, x0 // DTB address - ldr x0, [sp, #16] // relocated _text address - ldr w21, =stext_offset - add x21, x0, x21 - - /* - * Calculate size of the kernel Image (same for original and copy). - */ - adr_l x1, _text - adr_l x2, _edata - sub x1, x2, x1 - - /* - * Flush the copied Image to the PoC, and ensure it is not shadowed by - * stale icache entries from before relocation. - */ - bl __flush_dcache_area - ic ialluis - - /* - * Ensure that the rest of this function (in the original Image) is - * visible when the caches are disabled. The I-cache can't have stale - * entries for the VA range of the current image, so no maintenance is - * necessary. - */ - adr x0, entry - adr x1, entry_end - sub x1, x1, x0 - bl __flush_dcache_area - - /* Turn off Dcache and MMU */ - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 - bic x0, x0, #1 << 0 // clear SCTLR.M - bic x0, x0, #1 << 2 // clear SCTLR.C - pre_disable_mmu_workaround - msr sctlr_el2, x0 - isb - b 2f -1: - mrs x0, sctlr_el1 - bic x0, x0, #1 << 0 // clear SCTLR.M - bic x0, x0, #1 << 2 // clear SCTLR.C - pre_disable_mmu_workaround - msr sctlr_el1, x0 - isb -2: - /* Jump to kernel entry point */ - mov x0, x20 - mov x1, xzr - mov x2, xzr - mov x3, xzr - br x21 - -efi_load_fail: - mov x0, #EFI_LOAD_ERROR - ldp x29, x30, [sp], #32 - ret - -entry_end: -ENDPROC(entry) diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S deleted file mode 100644 index a7cfacce3e15775bfa662d5458dfe6d90c99a10f..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/efi-header.S +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 - 2017 Linaro, Ltd. - * Copyright (C) 2013, 2014 Red Hat, Inc. - */ - -#include -#include - - .macro __EFI_PE_HEADER - .long PE_MAGIC -coff_header: - .short IMAGE_FILE_MACHINE_ARM64 // Machine - .short section_count // NumberOfSections - .long 0 // TimeDateStamp - .long 0 // PointerToSymbolTable - .long 0 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader - .short IMAGE_FILE_DEBUG_STRIPPED | \ - IMAGE_FILE_EXECUTABLE_IMAGE | \ - IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics - -optional_header: - .short PE_OPT_MAGIC_PE32PLUS // PE32+ format - .byte 0x02 // MajorLinkerVersion - .byte 0x14 // MinorLinkerVersion - .long __initdata_begin - efi_header_end // SizeOfCode - .long __pecoff_data_size // SizeOfInitializedData - .long 0 // SizeOfUninitializedData - .long __efistub_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode - -extra_header_fields: - .quad 0 // ImageBase - .long SZ_4K // SectionAlignment - .long PECOFF_FILE_ALIGNMENT // FileAlignment - .short 0 // MajorOperatingSystemVersion - .short 0 // MinorOperatingSystemVersion - .short 0 // MajorImageVersion - .short 0 // MinorImageVersion - .short 0 // MajorSubsystemVersion - .short 0 // MinorSubsystemVersion - .long 0 // Win32VersionValue - - .long _end - _head // SizeOfImage - - // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders - .long 0 // CheckSum - .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics - .quad 0 // SizeOfStackReserve - .quad 0 // SizeOfStackCommit - .quad 0 // SizeOfHeapReserve - .quad 0 // SizeOfHeapCommit - .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes - - .quad 0 // ExportTable - .quad 0 // ImportTable - .quad 0 // ResourceTable - .quad 0 // ExceptionTable - .quad 0 // CertificationTable - .quad 0 // BaseRelocationTable - -#ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size -#endif - - // Section table -section_table: - .ascii ".text\0\0\0" - .long __initdata_begin - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long __initdata_begin - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData - - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long IMAGE_SCN_CNT_CODE | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE // Characteristics - - .ascii ".data\0\0\0" - .long __pecoff_data_size // VirtualSize - .long __initdata_begin - _head // VirtualAddress - .long __pecoff_data_rawsize // SizeOfRawData - .long __initdata_begin - _head // PointerToRawData - - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_WRITE // Characteristics - - .set section_count, (. - section_table) / 40 - -#ifdef CONFIG_DEBUG_EFI - /* - * The debug table is referenced via its Relative Virtual Address (RVA), - * which is only defined for those parts of the image that are covered - * by a section declaration. Since this header is not covered by any - * section, the debug table must be emitted elsewhere. So stick it in - * the .init.rodata section instead. - * - * Note that the EFI debug entry itself may legally have a zero RVA, - * which means we can simply put it right after the section headers. - */ - __INITRODATA - - .align 2 -efi_debug_table: - // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY - .long 0 // Characteristics - .long 0 // TimeDateStamp - .short 0 // MajorVersion - .short 0 // MinorVersion - .long IMAGE_DEBUG_TYPE_CODEVIEW // Type - .long efi_debug_entry_size // SizeOfData - .long 0 // RVA - .long efi_debug_entry - _head // FileOffset - - .set efi_debug_table_size, . - efi_debug_table - .previous - -efi_debug_entry: - // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY - .ascii "NB10" // Signature - .long 0 // Unknown - .long 0 // Unknown2 - .long 0 // Unknown3 - - .asciz VMLINUX_PATH - - .set efi_debug_entry_size, . - efi_debug_entry -#endif - - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ - .align 12 -efi_header_end: - .endm diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S deleted file mode 100644 index 3fc71106cb2b45eb0dd7091c7da824856bae4ea9..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2018 Linaro Ltd - */ - -#include - -ENTRY(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-32]! - mov x29, sp - - /* - * Register x18 is designated as the 'platform' register by the AAPCS, - * which means firmware running at the same exception level as the OS - * (such as UEFI) should never touch it. - */ - stp x1, x18, [sp, #16] - - /* - * We are lucky enough that no EFI runtime services take more than - * 5 arguments, so all are passed in registers rather than via the - * stack. - */ - mov x8, x0 - mov x0, x2 - mov x1, x3 - mov x2, x4 - mov x3, x5 - mov x4, x6 - blr x8 - - ldp x1, x2, [sp, #16] - cmp x2, x18 - ldp x29, x30, [sp], #32 - b.ne 0f - ret -0: b efi_handle_corrupted_x18 // tail call -ENDPROC(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S deleted file mode 100644 index 0f24eae8f3cceccee6bf4f20b19eb31d4e67ce83..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/entry-fpsimd.S +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * FP/SIMD state saving and restoring - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas - */ - -#include - -#include -#include - -/* - * Save the FP registers. - * - * x0 - pointer to struct fpsimd_state - */ -ENTRY(fpsimd_save_state) - fpsimd_save x0, 8 - ret -ENDPROC(fpsimd_save_state) - -/* - * Load the FP registers. - * - * x0 - pointer to struct fpsimd_state - */ -ENTRY(fpsimd_load_state) - fpsimd_restore x0, 8 - ret -ENDPROC(fpsimd_load_state) - -#ifdef CONFIG_ARM64_SVE -ENTRY(sve_save_state) - sve_save 0, x1, 2 - ret -ENDPROC(sve_save_state) - -ENTRY(sve_load_state) - sve_load 0, x1, x2, 3, x4 - ret -ENDPROC(sve_load_state) - -ENTRY(sve_get_vl) - _sve_rdvl 0, 1 - ret -ENDPROC(sve_get_vl) -#endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S deleted file mode 100644 index 7d02f9966d3452233329959ce0b91dce4178c042..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/entry-ftrace.S +++ /dev/null @@ -1,344 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/kernel/entry-ftrace.S - * - * Copyright (C) 2013 Linaro Limited - * Author: AKASHI Takahiro - */ - -#include -#include -#include -#include -#include - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -/* - * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before - * the regular function prologue. For an enabled callsite, ftrace_init_nop() and - * ftrace_make_call() have patched those NOPs to: - * - * MOV X9, LR - * BL - * - * ... where is either ftrace_caller or ftrace_regs_caller. - * - * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are - * live, and x9-x18 are safe to clobber. - * - * We save the callsite's context into a pt_regs before invoking any ftrace - * callbacks. So that we can get a sensible backtrace, we create a stack record - * for the callsite and the ftrace entry assembly. This is not sufficient for - * reliable stacktrace: until we create the callsite stack record, its caller - * is missing from the LR and existing chain of frame records. - */ - .macro ftrace_regs_entry, allregs=0 - /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(S_FRAME_SIZE + 16) - - /* Save function arguments (and x9 for simplicity) */ - stp x0, x1, [sp, #S_X0] - stp x2, x3, [sp, #S_X2] - stp x4, x5, [sp, #S_X4] - stp x6, x7, [sp, #S_X6] - stp x8, x9, [sp, #S_X8] - - /* Optionally save the callee-saved registers, always save the FP */ - .if \allregs == 1 - stp x10, x11, [sp, #S_X10] - stp x12, x13, [sp, #S_X12] - stp x14, x15, [sp, #S_X14] - stp x16, x17, [sp, #S_X16] - stp x18, x19, [sp, #S_X18] - stp x20, x21, [sp, #S_X20] - stp x22, x23, [sp, #S_X22] - stp x24, x25, [sp, #S_X24] - stp x26, x27, [sp, #S_X26] - stp x28, x29, [sp, #S_X28] - .else - str x29, [sp, #S_FP] - .endif - - /* Save the callsite's SP and LR */ - add x10, sp, #(S_FRAME_SIZE + 16) - stp x9, x10, [sp, #S_LR] - - /* Save the PC after the ftrace callsite */ - str x30, [sp, #S_PC] - - /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #S_FRAME_SIZE] - add x29, sp, #S_FRAME_SIZE - - /* Create our frame record within pt_regs. */ - stp x29, x30, [sp, #S_STACKFRAME] - add x29, sp, #S_STACKFRAME - .endm - -ENTRY(ftrace_regs_caller) - ftrace_regs_entry 1 - b ftrace_common -ENDPROC(ftrace_regs_caller) - -ENTRY(ftrace_caller) - ftrace_regs_entry 0 - b ftrace_common -ENDPROC(ftrace_caller) - -ENTRY(ftrace_common) - sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) - mov x1, x9 // parent_ip (callsite's LR) - ldr_l x2, function_trace_op // op - mov x3, sp // regs - -GLOBAL(ftrace_call) - bl ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); - nop // If enabled, this will be replaced - // "b ftrace_graph_caller" -#endif - -/* - * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved - * x19-x29 per the AAPCS, and we created frame records upon entry, so we need - * to restore x0-x8, x29, and x30. - */ -ftrace_common_return: - /* Restore function arguments */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldr x8, [sp, #S_X8] - - /* Restore the callsite's FP, LR, PC */ - ldr x29, [sp, #S_FP] - ldr x30, [sp, #S_LR] - ldr x9, [sp, #S_PC] - - /* Restore the callsite's SP */ - add sp, sp, #S_FRAME_SIZE + 16 - - ret x9 -ENDPROC(ftrace_common) - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - ldr x0, [sp, #S_PC] - sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) - add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) - bl prepare_ftrace_return - b ftrace_common_return -ENDPROC(ftrace_graph_caller) -#endif - -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ - -/* - * Gcc with -pg will put the following code in the beginning of each function: - * mov x0, x30 - * bl _mcount - * [function's body ...] - * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic - * ftrace is enabled. - * - * Please note that x0 as an argument will not be used here because we can - * get lr(x30) of instrumented function at any time by winding up call stack - * as long as the kernel is compiled without -fomit-frame-pointer. - * (or CONFIG_FRAME_POINTER, this is forced on arm64) - * - * stack layout after mcount_enter in _mcount(): - * - * current sp/fp => 0:+-----+ - * in _mcount() | x29 | -> instrumented function's fp - * +-----+ - * | x30 | -> _mcount()'s lr (= instrumented function's pc) - * old sp => +16:+-----+ - * when instrumented | | - * function calls | ... | - * _mcount() | | - * | | - * instrumented => +xx:+-----+ - * function's fp | x29 | -> parent's fp - * +-----+ - * | x30 | -> instrumented function's lr (= parent's pc) - * +-----+ - * | ... | - */ - - .macro mcount_enter - stp x29, x30, [sp, #-16]! - mov x29, sp - .endm - - .macro mcount_exit - ldp x29, x30, [sp], #16 - ret - .endm - - .macro mcount_adjust_addr rd, rn - sub \rd, \rn, #AARCH64_INSN_SIZE - .endm - - /* for instrumented function's parent */ - .macro mcount_get_parent_fp reg - ldr \reg, [x29] - ldr \reg, [\reg] - .endm - - /* for instrumented function */ - .macro mcount_get_pc0 reg - mcount_adjust_addr \reg, x30 - .endm - - .macro mcount_get_pc reg - ldr \reg, [x29, #8] - mcount_adjust_addr \reg, \reg - .endm - - .macro mcount_get_lr reg - ldr \reg, [x29] - ldr \reg, [\reg, #8] - .endm - - .macro mcount_get_lr_addr reg - ldr \reg, [x29] - add \reg, \reg, #8 - .endm - -#ifndef CONFIG_DYNAMIC_FTRACE -/* - * void _mcount(unsigned long return_address) - * @return_address: return address to instrumented function - * - * This function makes calls, if enabled, to: - * - tracer function to probe instrumented function's entry, - * - ftrace_graph_caller to set up an exit hook - */ -ENTRY(_mcount) - mcount_enter - - ldr_l x2, ftrace_trace_function - adr x0, ftrace_stub - cmp x0, x2 // if (ftrace_trace_function - b.eq skip_ftrace_call // != ftrace_stub) { - - mcount_get_pc x0 // function's pc - mcount_get_lr x1 // function's lr (= parent's pc) - blr x2 // (*ftrace_trace_function)(pc, lr); - -skip_ftrace_call: // } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr_l x2, ftrace_graph_return - cmp x0, x2 // if ((ftrace_graph_return - b.ne ftrace_graph_caller // != ftrace_stub) - - ldr_l x2, ftrace_graph_entry // || (ftrace_graph_entry - adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) - cmp x0, x2 - b.ne ftrace_graph_caller // ftrace_graph_caller(); -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - mcount_exit -ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) -NOKPROBE(_mcount) - -#else /* CONFIG_DYNAMIC_FTRACE */ -/* - * _mcount() is used to build the kernel with -pg option, but all the branch - * instructions to _mcount() are replaced to NOP initially at kernel start up, - * and later on, NOP to branch to ftrace_caller() when enabled or branch to - * NOP when disabled per-function base. - */ -ENTRY(_mcount) - ret -ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) -NOKPROBE(_mcount) - -/* - * void ftrace_caller(unsigned long return_address) - * @return_address: return address to instrumented function - * - * This function is a counterpart of _mcount() in 'static' ftrace, and - * makes calls to: - * - tracer function to probe instrumented function's entry, - * - ftrace_graph_caller to set up an exit hook - */ -ENTRY(ftrace_caller) - mcount_enter - - mcount_get_pc0 x0 // function's pc - mcount_get_lr x1 // function's lr - -GLOBAL(ftrace_call) // tracer(pc, lr); - nop // This will be replaced with "bl xxx" - // where xxx can be any kind of tracer. - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); - nop // If enabled, this will be replaced - // "b ftrace_graph_caller" -#endif - - mcount_exit -ENDPROC(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * void ftrace_graph_caller(void) - * - * Called from _mcount() or ftrace_caller() when function_graph tracer is - * selected. - * This function w/ prepare_ftrace_return() fakes link register's value on - * the call stack in order to intercept instrumented function's return path - * and run return_to_handler() later on its exit. - */ -ENTRY(ftrace_graph_caller) - mcount_get_pc x0 // function's pc - mcount_get_lr_addr x1 // pointer to function's saved lr - mcount_get_parent_fp x2 // parent's fp - bl prepare_ftrace_return // prepare_ftrace_return(pc, &lr, fp) - - mcount_exit -ENDPROC(ftrace_graph_caller) -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ - -ENTRY(ftrace_stub) - ret -ENDPROC(ftrace_stub) - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * void return_to_handler(void) - * - * Run ftrace_return_to_handler() before going back to parent. - * @fp is checked against the value passed by ftrace_graph_caller(). - */ -ENTRY(return_to_handler) - /* save return value regs */ - sub sp, sp, #64 - stp x0, x1, [sp] - stp x2, x3, [sp, #16] - stp x4, x5, [sp, #32] - stp x6, x7, [sp, #48] - - mov x0, x29 // parent's fp - bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); - mov x30, x0 // restore the original return address - - /* restore return value regs */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #16] - ldp x4, x5, [sp, #32] - ldp x6, x7, [sp, #48] - add sp, sp, #64 - - ret -END(return_to_handler) -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S deleted file mode 100644 index cf3bd2976e5747ff5c5b22ee0665700476133816..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/entry.S +++ /dev/null @@ -1,1350 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Low-level exception handling code - * - * Copyright (C) 2012 ARM Ltd. - * Authors: Catalin Marinas - * Will Deacon - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. - */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING - bl enter_from_user_mode -#endif - .endm - - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter -#endif - .endm - - .macro clear_gp_regs - .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 - mov x\n, xzr - .endr - .endm - -/* - * Bad Abort numbers - *----------------- - */ -#define BAD_SYNC 0 -#define BAD_IRQ 1 -#define BAD_FIQ 2 -#define BAD_ERROR 3 - - .macro kernel_ventry, el, label, regsize = 64 - .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 - .if \el == 0 - .if \regsize == 64 - mrs x30, tpidrro_el0 - msr tpidrro_el0, xzr - .else - mov x30, xzr - .endif - .endif -alternative_else_nop_endif -#endif - - sub sp, sp, #S_FRAME_SIZE -#ifdef CONFIG_VMAP_STACK - /* - * Test whether the SP has overflowed, without corrupting a GPR. - * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT). - */ - add sp, sp, x0 // sp' = sp + x0 - sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp - tbnz x0, #THREAD_SHIFT, 0f - sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 - sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp - b el\()\el\()_\label - -0: - /* - * Either we've just detected an overflow, or we've taken an exception - * while on the overflow stack. Either way, we won't return to - * userspace, and can clobber EL0 registers to free up GPRs. - */ - - /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ - msr tpidr_el0, x0 - - /* Recover the original x0 value and stash it in tpidrro_el0 */ - sub x0, sp, x0 - msr tpidrro_el0, x0 - - /* Switch to the overflow stack */ - adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0 - - /* - * Check whether we were already on the overflow stack. This may happen - * after panic() re-enables interrupts. - */ - mrs x0, tpidr_el0 // sp of interrupted context - sub x0, sp, x0 // delta with top of overflow stack - tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range? - b.ne __bad_stack // no? -> bad stack pointer - - /* We were already on the overflow stack. Restore sp/x0 and carry on. */ - sub sp, sp, x0 - mrs x0, tpidrro_el0 -#endif - b el\()\el\()_\label - .endm - - .macro tramp_alias, dst, sym - mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) - .endm - - // This macro corrupts x0-x3. It is the caller's duty - // to save/restore them if required. - .macro apply_ssbd, state, tmp1, tmp2 -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b .L__asm_ssbd_skip\@ -alternative_cb_end - ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 - cbz \tmp2, .L__asm_ssbd_skip\@ - ldr \tmp2, [tsk, #TSK_TI_FLAGS] - tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 - mov w1, #\state -alternative_cb arm64_update_smccc_conduit - nop // Patched to SMC/HVC #0 -alternative_cb_end -.L__asm_ssbd_skip\@: -#endif - .endm - - .macro kernel_entry, el, regsize = 64 - .if \regsize == 32 - mov w0, w0 // zero upper 32 bits of x0 - .endif - stp x0, x1, [sp, #16 * 0] - stp x2, x3, [sp, #16 * 1] - stp x4, x5, [sp, #16 * 2] - stp x6, x7, [sp, #16 * 3] - stp x8, x9, [sp, #16 * 4] - stp x10, x11, [sp, #16 * 5] - stp x12, x13, [sp, #16 * 6] - stp x14, x15, [sp, #16 * 7] - stp x16, x17, [sp, #16 * 8] - stp x18, x19, [sp, #16 * 9] - stp x20, x21, [sp, #16 * 10] - stp x22, x23, [sp, #16 * 11] - stp x24, x25, [sp, #16 * 12] - stp x26, x27, [sp, #16 * 13] - stp x28, x29, [sp, #16 * 14] - - .if \el == 0 - clear_gp_regs - mrs x21, sp_el0 - ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, - ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug - disable_step_tsk x19, x20 // exceptions when scheduling. - - apply_ssbd 1, x22, x23 - - .else - add x21, sp, #S_FRAME_SIZE - get_current_task tsk - /* Save the task's original addr_limit and set USER_DS */ - ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] - str x20, [sp, #S_ORIG_ADDR_LIMIT] - mov x20, #USER_DS - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ - .endif /* \el == 0 */ - mrs x22, elr_el1 - mrs x23, spsr_el1 - stp lr, x21, [sp, #S_LR] - - /* - * In order to be able to dump the contents of struct pt_regs at the - * time the exception was taken (in case we attempt to walk the call - * stack later), chain it together with the stack frames. - */ - .if \el == 0 - stp xzr, xzr, [sp, #S_STACKFRAME] - .else - stp x29, x22, [sp, #S_STACKFRAME] - .endif - add x29, sp, #S_STACKFRAME - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Set the TTBR0 PAN bit in SPSR. When the exception is taken from - * EL0, there is no need to check the state of TTBR0_EL1 since - * accesses are always enabled. - * Note that the meaning of this bit differs from the ARMv8.1 PAN - * feature as all TTBR0_EL1 accesses are disabled, not just those to - * user mappings. - */ -alternative_if ARM64_HAS_PAN - b 1f // skip TTBR0 PAN -alternative_else_nop_endif - - .if \el != 0 - mrs x21, ttbr0_el1 - tst x21, #TTBR_ASID_MASK // Check for the reserved ASID - orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR - b.eq 1f // TTBR0 access already disabled - and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR - .endif - - __uaccess_ttbr0_disable x21 -1: -#endif - - stp x22, x23, [sp, #S_PC] - - /* Not in a syscall by default (el0_svc overwrites for real syscall) */ - .if \el == 0 - mov w21, #NO_SYSCALL - str w21, [sp, #S_SYSCALLNO] - .endif - - /* - * Set sp_el0 to current thread_info. - */ - .if \el == 0 - msr sp_el0, tsk - .endif - - /* Save pmr */ -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - mrs_s x20, SYS_ICC_PMR_EL1 - str x20, [sp, #S_PMR_SAVE] -alternative_else_nop_endif - - /* - * Registers that may be useful after this macro is invoked: - * - * x20 - ICC_PMR_EL1 - * x21 - aborted SP - * x22 - aborted PC - * x23 - aborted PSTATE - */ - .endm - - .macro kernel_exit, el - .if \el != 0 - disable_daif - - /* Restore the task's original addr_limit. */ - ldr x20, [sp, #S_ORIG_ADDR_LIMIT] - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - - /* No need to restore UAO, it will be restored from SPSR_EL1 */ - .endif - - /* Restore pmr */ -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - ldr x20, [sp, #S_PMR_SAVE] - msr_s SYS_ICC_PMR_EL1, x20 - /* Ensure priority change is seen by redistributor */ - dsb sy -alternative_else_nop_endif - - ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR - * PAN bit checking. - */ -alternative_if ARM64_HAS_PAN - b 2f // skip TTBR0 PAN -alternative_else_nop_endif - - .if \el != 0 - tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set - .endif - - __uaccess_ttbr0_enable x0, x1 - - .if \el == 0 - /* - * Enable errata workarounds only if returning to user. The only - * workaround currently required for TTBR0_EL1 changes are for the - * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache - * corruption). - */ - bl post_ttbr_update_workaround - .endif -1: - .if \el != 0 - and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit - .endif -2: -#endif - - .if \el == 0 - ldr x23, [sp, #S_SP] // load return stack pointer - msr sp_el0, x23 - tst x22, #PSR_MODE32_BIT // native task? - b.eq 3f - -#ifdef CONFIG_ARM64_ERRATUM_845719 -alternative_if ARM64_WORKAROUND_845719 -#ifdef CONFIG_PID_IN_CONTEXTIDR - mrs x29, contextidr_el1 - msr contextidr_el1, x29 -#else - msr contextidr_el1, xzr -#endif -alternative_else_nop_endif -#endif -3: -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if_not ARM64_WORKAROUND_1418040 - b 4f -alternative_else_nop_endif - /* - * if (x22.mode32 == cntkctl_el1.el0vcten) - * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten - */ - mrs x1, cntkctl_el1 - eon x0, x1, x22, lsr #3 - tbz x0, #1, 4f - eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x1 -4: -#endif - apply_ssbd 0, x0, x1 - .endif - - msr elr_el1, x21 // set up the return data - msr spsr_el1, x22 - ldp x0, x1, [sp, #16 * 0] - ldp x2, x3, [sp, #16 * 1] - ldp x4, x5, [sp, #16 * 2] - ldp x6, x7, [sp, #16 * 3] - ldp x8, x9, [sp, #16 * 4] - ldp x10, x11, [sp, #16 * 5] - ldp x12, x13, [sp, #16 * 6] - ldp x14, x15, [sp, #16 * 7] - ldp x16, x17, [sp, #16 * 8] - ldp x18, x19, [sp, #16 * 9] - ldp x20, x21, [sp, #16 * 10] - ldp x22, x23, [sp, #16 * 11] - ldp x24, x25, [sp, #16 * 12] - ldp x26, x27, [sp, #16 * 13] - ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp - - .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 5f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native - br x30 -5: - tramp_alias x30, tramp_exit_compat - br x30 -#endif - .else - eret - .endif - sb - .endm - - .macro irq_stack_entry - mov x19, sp // preserve the original sp - - /* - * Compare sp with the base of the task stack. - * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, - * and should switch to the irq stack. - */ - ldr x25, [tsk, TSK_STACK] - eor x25, x25, x19 - and x25, x25, #~(THREAD_SIZE - 1) - cbnz x25, 9998f - - ldr_this_cpu x25, irq_stack_ptr, x26 - mov x26, #IRQ_STACK_SIZE - add x26, x25, x26 - - /* switch to the irq stack */ - mov sp, x26 -9998: - .endm - - /* - * x19 should be preserved between irq_stack_entry and - * irq_stack_exit. - */ - .macro irq_stack_exit - mov sp, x19 - .endm - -/* GPRs used by entry code */ -tsk .req x28 // current thread_info - -/* - * Interrupt handling. - */ - .macro irq_handler - ldr_l x1, handle_arch_irq - mov x0, sp - irq_stack_entry - blr x1 - irq_stack_exit - .endm - -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * Set res to 0 if irqs were unmasked in interrupted context. - * Otherwise set res to non-0 value. - */ - .macro test_irqs_unmasked res:req, pmr:req -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - sub \res, \pmr, #GIC_PRIO_IRQON -alternative_else - mov \res, xzr -alternative_endif - .endm -#endif - - .macro gic_prio_kentry_setup, tmp:req -#ifdef CONFIG_ARM64_PSEUDO_NMI - alternative_if ARM64_HAS_IRQ_PRIO_MASKING - mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) - msr_s SYS_ICC_PMR_EL1, \tmp - alternative_else_nop_endif -#endif - .endm - - .macro gic_prio_irq_setup, pmr:req, tmp:req -#ifdef CONFIG_ARM64_PSEUDO_NMI - alternative_if ARM64_HAS_IRQ_PRIO_MASKING - orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET - msr_s SYS_ICC_PMR_EL1, \tmp - alternative_else_nop_endif -#endif - .endm - - .text - -/* - * Exception vectors. - */ - .pushsection ".entry.text", "ax" - - .align 11 -ENTRY(vectors) - kernel_ventry 1, sync_invalid // Synchronous EL1t - kernel_ventry 1, irq_invalid // IRQ EL1t - kernel_ventry 1, fiq_invalid // FIQ EL1t - kernel_ventry 1, error_invalid // Error EL1t - - kernel_ventry 1, sync // Synchronous EL1h - kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq_invalid // FIQ EL1h - kernel_ventry 1, error // Error EL1h - - kernel_ventry 0, sync // Synchronous 64-bit EL0 - kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 - kernel_ventry 0, error // Error 64-bit EL0 - -#ifdef CONFIG_COMPAT - kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 - kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 - kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 -#else - kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 - kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 - kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 -#endif -END(vectors) - -#ifdef CONFIG_VMAP_STACK - /* - * We detected an overflow in kernel_ventry, which switched to the - * overflow stack. Stash the exception regs, and head to our overflow - * handler. - */ -__bad_stack: - /* Restore the original x0 value */ - mrs x0, tpidrro_el0 - - /* - * Store the original GPRs to the new stack. The orginal SP (minus - * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. - */ - sub sp, sp, #S_FRAME_SIZE - kernel_entry 1 - mrs x0, tpidr_el0 - add x0, x0, #S_FRAME_SIZE - str x0, [sp, #S_SP] - - /* Stash the regs for handle_bad_stack */ - mov x0, sp - - /* Time to die */ - bl handle_bad_stack - ASM_BUG() -#endif /* CONFIG_VMAP_STACK */ - -/* - * Invalid mode handlers - */ - .macro inv_entry, el, reason, regsize = 64 - kernel_entry \el, \regsize - mov x0, sp - mov x1, #\reason - mrs x2, esr_el1 - bl bad_mode - ASM_BUG() - .endm - -el0_sync_invalid: - inv_entry 0, BAD_SYNC -ENDPROC(el0_sync_invalid) - -el0_irq_invalid: - inv_entry 0, BAD_IRQ -ENDPROC(el0_irq_invalid) - -el0_fiq_invalid: - inv_entry 0, BAD_FIQ -ENDPROC(el0_fiq_invalid) - -el0_error_invalid: - inv_entry 0, BAD_ERROR -ENDPROC(el0_error_invalid) - -#ifdef CONFIG_COMPAT -el0_fiq_invalid_compat: - inv_entry 0, BAD_FIQ, 32 -ENDPROC(el0_fiq_invalid_compat) -#endif - -el1_sync_invalid: - inv_entry 1, BAD_SYNC -ENDPROC(el1_sync_invalid) - -el1_irq_invalid: - inv_entry 1, BAD_IRQ -ENDPROC(el1_irq_invalid) - -el1_fiq_invalid: - inv_entry 1, BAD_FIQ -ENDPROC(el1_fiq_invalid) - -el1_error_invalid: - inv_entry 1, BAD_ERROR -ENDPROC(el1_error_invalid) - -/* - * EL1 mode handlers. - */ - .align 6 -el1_sync: - kernel_entry 1 - mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 - b.eq el1_da - cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 - b.eq el1_ia - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el1_undef - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el1_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 - b.eq el1_undef - cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 - b.ge el1_dbg - b el1_inv - -el1_ia: - /* - * Fall through to the Data abort case - */ -el1_da: - /* - * Data abort handling - */ - mrs x3, far_el1 - inherit_daif pstate=x23, tmp=x2 - untagged_addr x0, x3 - mov x2, sp // struct pt_regs - bl do_mem_abort - - kernel_exit 1 -el1_pc: - /* - * PC alignment exception handling. We don't handle SP alignment faults, - * since we will have hit a recursive exception when trying to push the - * initial pt_regs. - */ - mrs x0, far_el1 - inherit_daif pstate=x23, tmp=x2 - mov x2, sp - bl do_sp_pc_abort - ASM_BUG() -el1_undef: - /* - * Undefined instruction - */ - inherit_daif pstate=x23, tmp=x2 - mov x0, sp - bl do_undefinstr - kernel_exit 1 -el1_dbg: - /* - * Debug exception handling - */ - cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 - cinc x24, x24, eq // set bit '0' - tbz x24, #0, el1_inv // EL1 only - gic_prio_kentry_setup tmp=x3 - mrs x0, far_el1 - mov x2, sp // struct pt_regs - bl do_debug_exception - kernel_exit 1 -el1_inv: - // TODO: add support for undefined instructions in kernel mode - inherit_daif pstate=x23, tmp=x2 - mov x0, sp - mov x2, x1 - mov x1, #BAD_SYNC - bl bad_mode - ASM_BUG() -ENDPROC(el1_sync) - - .align 6 -el1_irq: - kernel_entry 1 - gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f - -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - - irq_handler - -#ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif - - kernel_exit 1 -ENDPROC(el1_irq) - -/* - * EL0 mode handlers. - */ - .align 6 -el0_sync: - kernel_entry 0 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state - b.eq el0_svc - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_SVE // SVE access - b.eq el0_sve_acc - cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - ccmp x24, #ESR_ELx_EC_WFx, #4, ne - b.eq el0_sys - cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el0_sp - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv - -#ifdef CONFIG_COMPAT - .align 6 -el0_sync_compat: - kernel_entry 0, 32 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state - b.eq el0_svc_compat - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv -el0_svc_compat: - gic_prio_kentry_setup tmp=x1 - mov x0, sp - bl el0_svc_compat_handler - b ret_to_user - - .align 6 -el0_irq_compat: - kernel_entry 0, 32 - b el0_irq_naked - -el0_error_compat: - kernel_entry 0, 32 - b el0_error_naked - -el0_cp15: - /* - * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_cp15instr - b ret_to_user -#endif - -el0_da: - /* - * Data abort handling - */ - mrs x26, far_el1 - ct_user_exit_irqoff - enable_daif - untagged_addr x0, x26 - mov x1, x25 - mov x2, sp - bl do_mem_abort - b ret_to_user -el0_ia: - /* - * Instruction abort handling - */ - mrs x26, far_el1 - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_el0_ia_bp_hardening - b ret_to_user -el0_fpsimd_acc: - /* - * Floating Point or Advanced SIMD access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_acc - b ret_to_user -el0_sve_acc: - /* - * Scalable Vector Extension access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sve_acc - b ret_to_user -el0_fpsimd_exc: - /* - * Floating Point, Advanced SIMD or SVE exception - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_exc - b ret_to_user -el0_sp: - ldr x26, [sp, #S_SP] - b el0_sp_pc -el0_pc: - mrs x26, far_el1 -el0_sp_pc: - /* - * Stack or PC alignment exception handling - */ - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_sp_pc_abort - b ret_to_user -el0_undef: - /* - * Undefined instruction - */ - ct_user_exit_irqoff - enable_daif - mov x0, sp - bl do_undefinstr - b ret_to_user -el0_sys: - /* - * System instructions, for trapped cache maintenance instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sysinstr - b ret_to_user -el0_dbg: - /* - * Debug exception handling - */ - tbnz x24, #0, el0_inv // EL0 only - mrs x24, far_el1 - gic_prio_kentry_setup tmp=x3 - ct_user_exit_irqoff - mov x0, x24 - mov x1, x25 - mov x2, sp - bl do_debug_exception - enable_da_f - b ret_to_user -el0_inv: - ct_user_exit_irqoff - enable_daif - mov x0, sp - mov x1, #BAD_SYNC - mov x2, x25 - bl bad_el0_sync - b ret_to_user -ENDPROC(el0_sync) - - .align 6 -el0_irq: - kernel_entry 0 -el0_irq_naked: - gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff - enable_da_f - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: -#endif - irq_handler - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif - b ret_to_user -ENDPROC(el0_irq) - -el1_error: - kernel_entry 1 - mrs x1, esr_el1 - gic_prio_kentry_setup tmp=x2 - enable_dbg - mov x0, sp - bl do_serror - kernel_exit 1 -ENDPROC(el1_error) - -el0_error: - kernel_entry 0 -el0_error_naked: - mrs x25, esr_el1 - gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff - enable_dbg - mov x0, sp - mov x1, x25 - bl do_serror - enable_da_f - b ret_to_user -ENDPROC(el0_error) - -/* - * Ok, we need to do extra processing, enter the slow path. - */ -work_pending: - mov x0, sp // 'regs' - bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step - b finish_ret_to_user -/* - * "slow" syscall return path. - */ -ret_to_user: - disable_daif - gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK - cbnz x2, work_pending -finish_ret_to_user: - enable_step_tsk x1, x2 -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK - bl stackleak_erase -#endif - kernel_exit 0 -ENDPROC(ret_to_user) - -/* - * SVC handler. - */ - .align 6 -el0_svc: - gic_prio_kentry_setup tmp=x1 - mov x0, sp - bl el0_svc_handler - b ret_to_user -ENDPROC(el0_svc) - - .popsection // .entry.text - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - - .macro tramp_map_kernel, tmp - mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) - bic \tmp, \tmp, #USER_ASID_FLAG - msr ttbr1_el1, \tmp -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 - /* ASID already in \tmp[63:48] */ - movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12) - movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12) - /* 2MB boundary containing the vectors, so we nobble the walk cache */ - movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12) - isb - tlbi vae1, \tmp - dsb nsh -alternative_else_nop_endif -#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */ - .endm - - .macro tramp_unmap_kernel, tmp - mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) - orr \tmp, \tmp, #USER_ASID_FLAG - msr ttbr1_el1, \tmp - /* - * We avoid running the post_ttbr_update_workaround here because - * it's only needed by Cavium ThunderX, which requires KPTI to be - * disabled. - */ - .endm - - .macro tramp_ventry, regsize = 64 - .align 7 -1: - .if \regsize == 64 - msr tpidrro_el0, x30 // Restored in kernel_ventry - .endif - /* - * Defend against branch aliasing attacks by pushing a dummy - * entry onto the return stack and using a RET instruction to - * enter the full-fat kernel vectors. - */ - bl 2f - b . -2: - tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE -alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif -alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] -alternative_else_nop_endif - msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) - isb - ret - .endm - - .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors - msr vbar_el1, x30 - tramp_unmap_kernel x30 - .if \regsize == 64 - mrs x30, far_el1 - .endif - eret - sb - .endm - - .align 11 -ENTRY(tramp_vectors) - .space 0x400 - - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry - - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 -END(tramp_vectors) - -ENTRY(tramp_exit_native) - tramp_exit -END(tramp_exit_native) - -ENTRY(tramp_exit_compat) - tramp_exit 32 -END(tramp_exit_compat) - - .ltorg - .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE - .pushsection ".rodata", "a" - .align PAGE_SHIFT - .globl __entry_tramp_data_start -__entry_tramp_data_start: - .quad vectors - .popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ - -/* - * Register switch for AArch64. The callee-saved registers need to be saved - * and restored. On entry: - * x0 = previous task_struct (must be preserved across the switch) - * x1 = next task_struct - * Previous and next are guaranteed not to be the same. - * - */ -ENTRY(cpu_switch_to) - mov x10, #THREAD_CPU_CONTEXT - add x8, x0, x10 - mov x9, sp - stp x19, x20, [x8], #16 // store callee-saved registers - stp x21, x22, [x8], #16 - stp x23, x24, [x8], #16 - stp x25, x26, [x8], #16 - stp x27, x28, [x8], #16 - stp x29, x9, [x8], #16 - str lr, [x8] - add x8, x1, x10 - ldp x19, x20, [x8], #16 // restore callee-saved registers - ldp x21, x22, [x8], #16 - ldp x23, x24, [x8], #16 - ldp x25, x26, [x8], #16 - ldp x27, x28, [x8], #16 - ldp x29, x9, [x8], #16 - ldr lr, [x8] - mov sp, x9 - msr sp_el0, x1 - ret -ENDPROC(cpu_switch_to) -NOKPROBE(cpu_switch_to) - -/* - * This is how we return from a fork. - */ -ENTRY(ret_from_fork) - bl schedule_tail - cbz x19, 1f // not a kernel thread - mov x0, x20 - blr x19 -1: get_current_task tsk - b ret_to_user -ENDPROC(ret_from_fork) -NOKPROBE(ret_from_fork) - -#ifdef CONFIG_ARM_SDE_INTERFACE - -#include -#include - -.macro sdei_handler_exit exit_mode - /* On success, this call never returns... */ - cmp \exit_mode, #SDEI_EXIT_SMC - b.ne 99f - smc #0 - b . -99: hvc #0 - b . -.endm - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * The regular SDEI entry point may have been unmapped along with the rest of - * the kernel. This trampoline restores the kernel mapping to make the x1 memory - * argument accessible. - * - * This clobbers x4, __sdei_handler() will restore this from firmware's - * copy. - */ -.ltorg -.pushsection ".entry.tramp.text", "ax" -ENTRY(__sdei_asm_entry_trampoline) - mrs x4, ttbr1_el1 - tbz x4, #USER_ASID_BIT, 1f - - tramp_map_kernel tmp=x4 - isb - mov x4, xzr - - /* - * Use reg->interrupted_regs.addr_limit to remember whether to unmap - * the kernel on exit. - */ -1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] - -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif - br x4 -ENDPROC(__sdei_asm_entry_trampoline) -NOKPROBE(__sdei_asm_entry_trampoline) - -/* - * Make the exit call and restore the original ttbr1_el1 - * - * x0 & x1: setup for the exit API call - * x2: exit_mode - * x4: struct sdei_registered_event argument from registration time. - */ -ENTRY(__sdei_asm_exit_trampoline) - ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] - cbnz x4, 1f - - tramp_unmap_kernel tmp=x4 - -1: sdei_handler_exit exit_mode=x2 -ENDPROC(__sdei_asm_exit_trampoline) -NOKPROBE(__sdei_asm_exit_trampoline) - .ltorg -.popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -__sdei_asm_trampoline_next_handler: - .quad __sdei_asm_handler -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ - -/* - * Software Delegated Exception entry point. - * - * x0: Event number - * x1: struct sdei_registered_event argument from registration time. - * x2: interrupted PC - * x3: interrupted PSTATE - * x4: maybe clobbered by the trampoline - * - * Firmware has preserved x0->x17 for us, we must save/restore the rest to - * follow SMC-CC. We save (or retrieve) all the registers as the handler may - * want them. - */ -ENTRY(__sdei_asm_handler) - stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC] - stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2] - stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3] - stp x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4] - stp x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5] - stp x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6] - stp x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7] - stp x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8] - stp x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9] - stp x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10] - stp x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11] - stp x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12] - stp x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13] - stp x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14] - mov x4, sp - stp lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR] - - mov x19, x1 - -#ifdef CONFIG_VMAP_STACK - /* - * entry.S may have been using sp as a scratch register, find whether - * this is a normal or critical event and switch to the appropriate - * stack for this CPU. - */ - ldrb w4, [x19, #SDEI_EVENT_PRIORITY] - cbnz w4, 1f - ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 - b 2f -1: ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6 -2: mov x6, #SDEI_STACK_SIZE - add x5, x5, x6 - mov sp, x5 -#endif - - /* - * We may have interrupted userspace, or a guest, or exit-from or - * return-to either of these. We can't trust sp_el0, restore it. - */ - mrs x28, sp_el0 - ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1 - msr sp_el0, x0 - - /* If we interrupted the kernel point to the previous stack/frame. */ - and x0, x3, #0xc - mrs x1, CurrentEL - cmp x0, x1 - csel x29, x29, xzr, eq // fp, or zero - csel x4, x2, xzr, eq // elr, or zero - - stp x29, x4, [sp, #-16]! - mov x29, sp - - add x0, x19, #SDEI_EVENT_INTREGS - mov x1, x19 - bl __sdei_handler - - msr sp_el0, x28 - /* restore regs >x17 that we clobbered */ - mov x4, x19 // keep x4 for __sdei_asm_exit_trampoline - ldp x28, x29, [x4, #SDEI_EVENT_INTREGS + 16 * 14] - ldp x18, x19, [x4, #SDEI_EVENT_INTREGS + 16 * 9] - ldp lr, x1, [x4, #SDEI_EVENT_INTREGS + S_LR] - mov sp, x1 - - mov x1, x0 // address to complete_and_resume - /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */ - cmp x0, #1 - mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE - mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME - csel x0, x2, x3, ls - - ldr_l x2, sdei_exit_mode - -alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 - sdei_handler_exit exit_mode=x2 -alternative_else_nop_endif - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline - br x5 -#endif -ENDPROC(__sdei_asm_handler) -NOKPROBE(__sdei_asm_handler) -#endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S deleted file mode 100644 index a2e0b37549433b8629183efb810c8513f1a21669..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/head.S +++ /dev/null @@ -1,987 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Low-level CPU initialisation - * Based on arch/arm/kernel/head.S - * - * Copyright (C) 1994-2002 Russell King - * Copyright (C) 2003-2012 ARM Ltd. - * Authors: Catalin Marinas - * Will Deacon - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "efi-header.S" - -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) - -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 -#error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB -#endif - -/* - * Kernel startup entry point. - * --------------------------- - * - * The requirements are: - * MMU = off, D-cache = off, I-cache = on or off, - * x0 = physical address to the FDT blob. - * - * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). - * - * Note that the callee-saved registers are used for storing variables - * that are useful before the MMU is enabled. The allocations are described - * in the entry routines. - */ - __HEAD -_head: - /* - * DO NOT MODIFY. Image header expected by Linux boot-loaders. - */ -#ifdef CONFIG_EFI - /* - * This add instruction has no meaningful effect except that - * its opcode forms the magic "MZ" signature required by UEFI. - */ - add x13, x18, #0x16 - b stext -#else - b stext // branch to kernel start, magic - .long 0 // reserved -#endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian - le64sym _kernel_size_le // Effective size of kernel image, little-endian - le64sym _kernel_flags_le // Informative flags, little-endian - .quad 0 // reserved - .quad 0 // reserved - .quad 0 // reserved - .ascii ARM64_IMAGE_MAGIC // Magic number -#ifdef CONFIG_EFI - .long pe_header - _head // Offset to the PE header. - -pe_header: - __EFI_PE_HEADER -#else - .long 0 // reserved -#endif - - __INIT - - /* - * The following callee saved general purpose registers are used on the - * primary lowlevel boot path: - * - * Register Scope Purpose - * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 - * x23 stext() .. start_kernel() physical misalignment/KASLR offset - * x28 __create_page_tables() callee preserved temp register - * x19/x20 __primary_switch() callee preserved temp registers - * x24 __primary_switch() .. relocate_kernel() - * current RELR displacement - */ -ENTRY(stext) - bl preserve_boot_args - bl el2_setup // Drop to EL1, w0=cpu_boot_mode - adrp x23, __PHYS_OFFSET - and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 - bl set_cpu_boot_mode_flag - bl __create_page_tables - /* - * The following calls CPU setup code, see arch/arm64/mm/proc.S for - * details. - * On return, the CPU will be ready for the MMU to be turned on and - * the TCR will have been set. - */ - bl __cpu_setup // initialise processor - b __primary_switch -ENDPROC(stext) - -/* - * Preserve the arguments passed by the bootloader in x0 .. x3 - */ -preserve_boot_args: - mov x21, x0 // x21=FDT - - adr_l x0, boot_args // record the contents of - stp x21, x1, [x0] // x0 .. x3 at kernel entry - stp x2, x3, [x0, #16] - - dmb sy // needed before dc ivac with - // MMU off - - mov x1, #0x20 // 4 x 8 bytes - b __inval_dcache_area // tail call -ENDPROC(preserve_boot_args) - -/* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: ptrs, tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - add \tmp1, \tbl, #PAGE_SIZE - phys_to_pte \tmp2, \tmp1 - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - lsr \tmp1, \virt, #\shift - sub \ptrs, \ptrs, #1 - and \tmp1, \tmp1, \ptrs // table index - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate page table entries, these entries can be pointers to the next level - * or last level entries pointing to physical memory. - * - * tbl: page table address - * rtbl: pointer to page table or physical memory - * index: start index to write - * eindex: end index to write - [index, eindex] written to - * flags: flags for pagetable entry to or in - * inc: increment to rtbl between each entry - * tmp1: temporary variable - * - * Preserves: tbl, eindex, flags, inc - * Corrupts: index, tmp1 - * Returns: rtbl - */ - .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 -.Lpe\@: phys_to_pte \tmp1, \rtbl - orr \tmp1, \tmp1, \flags // tmp1 = table entry - str \tmp1, [\tbl, \index, lsl #3] - add \rtbl, \rtbl, \inc // rtbl = pa next level - add \index, \index, #1 - cmp \index, \eindex - b.ls .Lpe\@ - .endm - -/* - * Compute indices of table entries from virtual address range. If multiple entries - * were needed in the previous page table level then the next page table level is assumed - * to be composed of multiple pages. (This effectively scales the end index). - * - * vstart: virtual address of start of range - * vend: virtual address of end of range - * shift: shift used to transform virtual address into index - * ptrs: number of entries in page table - * istart: index in table corresponding to vstart - * iend: index in table corresponding to vend - * count: On entry: how many extra entries were required in previous level, scales - * our end index. - * On exit: returns how many extra entries required for next page table level - * - * Preserves: vstart, vend, shift, ptrs - * Returns: istart, iend, count - */ - .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count - lsr \iend, \vend, \shift - mov \istart, \ptrs - sub \istart, \istart, #1 - and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) - mov \istart, \ptrs - mul \istart, \istart, \count - add \iend, \iend, \istart // iend += (count - 1) * ptrs - // our entries span multiple tables - - lsr \istart, \vstart, \shift - mov \count, \ptrs - sub \count, \count, #1 - and \istart, \istart, \count - - sub \count, \iend, \istart - .endm - -/* - * Map memory for specified virtual address range. Each level of page table needed supports - * multiple entries. If a level requires n entries the next page table level is assumed to be - * formed from n pages. - * - * tbl: location of page table - * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) - * vstart: start address to map - * vend: end address to map - we map [vstart, vend] - * flags: flags to use to map last level entries - * phys: physical address corresponding to vstart - physical memory is contiguous - * pgds: the number of pgd entries - * - * Temporaries: istart, iend, tmp, count, sv - these need to be different registers - * Preserves: vstart, vend, flags - * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv - */ - .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv - add \rtbl, \tbl, #PAGE_SIZE - mov \sv, \rtbl - mov \count, #0 - compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp - mov \tbl, \sv - mov \sv, \rtbl - -#if SWAPPER_PGTABLE_LEVELS > 3 - compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp - mov \tbl, \sv - mov \sv, \rtbl -#endif - -#if SWAPPER_PGTABLE_LEVELS > 2 - compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp - mov \tbl, \sv -#endif - - compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count - bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 - populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled - */ -__create_page_tables: - mov x28, lr - - /* - * Invalidate the init page tables to avoid potential dirty cache lines - * being evicted. Other page tables are allocated in rodata as part of - * the kernel image, and thus are clean to the PoC per the boot - * protocol. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area - - /* - * Clear the init page tables. - */ - adrp x0, init_pg_dir - adrp x1, init_pg_end - sub x1, x1, x0 -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - subs x1, x1, #64 - b.ne 1b - - mov x7, SWAPPER_MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - adrp x0, idmap_pg_dir - adrp x3, __idmap_text_start // __pa(__idmap_text_start) - -#ifdef CONFIG_ARM64_VA_BITS_52 - mrs_s x6, SYS_ID_AA64MMFR2_EL1 - and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) - mov x5, #52 - cbnz x6, 1f -#endif - mov x5, #VA_BITS_MIN -1: - adr_l x6, vabits_actual - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - - /* - * VA_BITS may be too small to allow for an ID mapping to be created - * that covers system RAM if that is located sufficiently high in the - * physical address space. So for the ID map, use an extended virtual - * range in that case, and configure an additional translation level - * if needed. - * - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of __idmap_text_end. - */ - adrp x5, __idmap_text_end - clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? - b.ge 1f // .. then skip VA range extension - - adr_l x6, idmap_t0sz - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - -#if (VA_BITS < 48) -#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) - - /* - * If VA_BITS < 48, we have to configure an additional table level. - * First, we have to verify our assumption that the current value of - * VA_BITS was chosen such that all translation levels are fully - * utilised, and that lowering T0SZ will always result in an additional - * translation level to be configured. - */ -#if VA_BITS != EXTRA_SHIFT -#error "Mismatch between VA_BITS and page size/number of translation levels" -#endif - - mov x4, EXTRA_PTRS - create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 -#else - /* - * If VA_BITS == 48, we don't have to configure an additional - * translation level, but the top-level table has more entries. - */ - mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) - str_l x4, idmap_ptrs_per_pgd, x5 -#endif -1: - ldr_l x4, idmap_ptrs_per_pgd - mov x5, x3 // __pa(__idmap_text_start) - adr_l x6, __idmap_text_end // __pa(__idmap_text_end) - - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) - add x5, x5, x23 // add KASLR displacement - mov x4, PTRS_PER_PGD - adrp x6, _end // runtime __pa(_end) - adrp x3, _text // runtime __pa(_text) - sub x6, x6, x3 // _end - _text - add x6, x6, x5 // runtime __va(_end) - - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 - - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate those tables again to - * remove any speculatively loaded cache lines. - */ - dmb sy - - adrp x0, idmap_pg_dir - adrp x1, idmap_pg_end - sub x1, x1, x0 - bl __inval_dcache_area - - adrp x0, init_pg_dir - adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area - - ret x28 -ENDPROC(__create_page_tables) - .ltorg - -/* - * The following fragment of code is executed with the MMU enabled. - * - * x0 = __PHYS_OFFSET - */ -__primary_switched: - adrp x4, init_thread_union - add sp, x4, #THREAD_SIZE - adr_l x5, init_task - msr sp_el0, x5 // Save thread_info - - adr_l x8, vectors // load VBAR_EL1 with virtual - msr vbar_el1, x8 // vector table address - isb - - stp xzr, x30, [sp, #-16]! - mov x29, sp - - str_l x21, __fdt_pointer, x5 // Save FDT pointer - - ldr_l x4, kimage_vaddr // Save the offset between - sub x4, x4, x0 // the kernel virtual and - str_l x4, kimage_voffset, x5 // physical mappings - - // Clear BSS - adr_l x0, __bss_start - mov x1, xzr - adr_l x2, __bss_stop - sub x2, x2, x0 - bl __pi_memset - dsb ishst // Make zero page visible to PTW - -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif -#ifdef CONFIG_RANDOMIZE_BASE - tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? - b.ne 0f - mov x0, x21 // pass FDT address in x0 - bl kaslr_early_init // parse FDT for KASLR options - cbz x0, 0f // KASLR disabled? just proceed - orr x23, x23, x0 // record KASLR offset - ldp x29, x30, [sp], #16 // we must enable KASLR, return - ret // to __primary_switch() -0: -#endif - add sp, sp, #16 - mov x29, #0 - mov x30, #0 - b start_kernel -ENDPROC(__primary_switched) - -/* - * end early head section, begin head code that is also used for - * hotplug and needs to have the same protections as the text region - */ - .section ".idmap.text","awx" - -ENTRY(kimage_vaddr) - .quad _text - TEXT_OFFSET -EXPORT_SYMBOL(kimage_vaddr) - -/* - * If we're fortunate enough to boot at EL2, ensure that the world is - * sane before dropping to EL1. - * - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if - * booted in EL1 or EL2 respectively. - */ -ENTRY(el2_setup) - msr SPsel, #1 // We want to use SP_EL{1,2} - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.eq 1f - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) - msr sctlr_el1, x0 - mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 - isb - ret - -1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) - msr sctlr_el2, x0 - -#ifdef CONFIG_ARM64_VHE - /* - * Check for VHE being present. For the rest of the EL2 setup, - * x2 being non-zero indicates that we do have VHE, and that the - * kernel is intended to run at EL2. - */ - mrs x2, id_aa64mmfr1_el1 - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr -#endif - - /* Hyp configuration. */ - mov_q x0, HCR_HOST_NVHE_FLAGS - cbz x2, set_hcr - mov_q x0, HCR_HOST_VHE_FLAGS -set_hcr: - msr hcr_el2, x0 - isb - - /* - * Allow Non-secure EL1 and EL0 to access physical timer and counter. - * This is not necessary for VHE, since the host kernel runs in EL2, - * and EL0 accesses are configured in the later stage of boot process. - * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout - * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined - * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 - * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in - * EL2. - */ - cbnz x2, 1f - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers - msr cnthctl_el2, x0 -1: - msr cntvoff_el2, xzr // Clear virtual offset - -#ifdef CONFIG_ARM_GIC_V3 - /* GICv3 system register access */ - mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 - cbz x0, 3f - - mrs_s x0, SYS_ICC_SRE_EL2 - orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 - orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s SYS_ICC_SRE_EL2, x0 - isb // Make sure SRE is now set - mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 3f // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults - -3: -#endif - - /* Populate ID registers. */ - mrs x0, midr_el1 - mrs x1, mpidr_el1 - msr vpidr_el2, x0 - msr vmpidr_el2, x1 - -#ifdef CONFIG_COMPAT - msr hstr_el2, xzr // Disable CP15 traps to EL2 -#endif - - /* EL2 debug */ - mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt 4f // Skip if no PMU present - mrs x0, pmcr_el0 // Disable debug access traps - ubfx x0, x0, #11, #5 // to EL2 and allow access to -4: - csel x3, xzr, x0, lt // all PMU counters from EL1 - - /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 - cbz x0, 7f // Skip if SPE not present - cbnz x2, 6f // VHE? - mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) - cbnz x4, 5f // then permit sampling of physical - mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) - msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter -5: - mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - orr x3, x3, x1 // If we don't have VHE, then - b 7f // use EL1&0 translation. -6: // For VHE, use EL2 translation - orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 -7: - msr mdcr_el2, x3 // Configure debug traps - - /* LORegions */ - mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 - cbz x0, 1f - msr_s SYS_LORC_EL1, xzr -1: - - /* Stage-2 translation */ - msr vttbr_el2, xzr - - cbz x2, install_el2_stub - - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 - isb - ret - -install_el2_stub: - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) - msr sctlr_el1, x0 - - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 - - /* SVE register access */ - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, 7f - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 - isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. - - /* Hypervisor stub */ -7: adr_l x0, __hyp_stub_vectors - msr vbar_el2, x0 - - /* spsr */ - mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, x0 - msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 - eret -ENDPROC(el2_setup) - -/* - * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed - * in w0. See arch/arm64/include/asm/virt.h for more info. - */ -set_cpu_boot_mode_flag: - adr_l x1, __boot_cpu_mode - cmp w0, #BOOT_CPU_MODE_EL2 - b.ne 1f - add x1, x1, #4 -1: str w0, [x1] // This CPU has booted in EL1 - dmb sy - dc ivac, x1 // Invalidate potentially stale cache line - ret -ENDPROC(set_cpu_boot_mode_flag) - -/* - * These values are written with the MMU off, but read with the MMU on. - * Writers will invalidate the corresponding address, discarding up to a - * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures - * sufficient alignment that the CWG doesn't overlap another section. - */ - .pushsection ".mmuoff.data.write", "aw" -/* - * We need to find out the CPU boot mode long after boot, so we need to - * store it in a writable variable. - * - * This is not in .bss, because we set it sufficiently early that the boot-time - * zeroing of .bss would clobber it. - */ -ENTRY(__boot_cpu_mode) - .long BOOT_CPU_MODE_EL2 - .long BOOT_CPU_MODE_EL1 -/* - * The booting CPU updates the failed status @__early_cpu_boot_status, - * with MMU turned off. - */ -ENTRY(__early_cpu_boot_status) - .quad 0 - - .popsection - - /* - * This provides a "holding pen" for platforms to hold all secondary - * cores are held until we're ready for them to initialise. - */ -ENTRY(secondary_holding_pen) - bl el2_setup // Drop to EL1, w0=cpu_boot_mode - bl set_cpu_boot_mode_flag - mrs x0, mpidr_el1 - mov_q x1, MPIDR_HWID_BITMASK - and x0, x0, x1 - adr_l x3, secondary_holding_pen_release -pen: ldr x4, [x3] - cmp x4, x0 - b.eq secondary_startup - wfe - b pen -ENDPROC(secondary_holding_pen) - - /* - * Secondary entry point that jumps straight into the kernel. Only to - * be used where CPUs are brought online dynamically by the kernel. - */ -ENTRY(secondary_entry) - bl el2_setup // Drop to EL1 - bl set_cpu_boot_mode_flag - b secondary_startup -ENDPROC(secondary_entry) - -secondary_startup: - /* - * Common entry point for secondary CPUs. - */ - bl __cpu_secondary_check52bitva - bl __cpu_setup // initialise processor - adrp x1, swapper_pg_dir - bl __enable_mmu - ldr x8, =__secondary_switched - br x8 -ENDPROC(secondary_startup) - -__secondary_switched: - adr_l x5, vectors - msr vbar_el1, x5 - isb - - adr_l x0, secondary_data - ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack - cbz x1, __secondary_too_slow - mov sp, x1 - ldr x2, [x0, #CPU_BOOT_TASK] - cbz x2, __secondary_too_slow - msr sp_el0, x2 - mov x29, #0 - mov x30, #0 - b secondary_start_kernel -ENDPROC(__secondary_switched) - -__secondary_too_slow: - wfe - wfi - b __secondary_too_slow -ENDPROC(__secondary_too_slow) - -/* - * The booting CPU updates the failed status @__early_cpu_boot_status, - * with MMU turned off. - * - * update_early_cpu_boot_status tmp, status - * - Corrupts tmp1, tmp2 - * - Writes 'status' to __early_cpu_boot_status and makes sure - * it is committed to memory. - */ - - .macro update_early_cpu_boot_status status, tmp1, tmp2 - mov \tmp2, #\status - adr_l \tmp1, __early_cpu_boot_status - str \tmp2, [\tmp1] - dmb sy - dc ivac, \tmp1 // Invalidate potentially stale cache line - .endm - -/* - * Enable the MMU. - * - * x0 = SCTLR_EL1 value for turning on the MMU. - * x1 = TTBR1_EL1 value - * - * Returns to the caller via x30/lr. This requires the caller to be covered - * by the .idmap.text section. - * - * Checks if the selected granule size is supported by the CPU. - * If it isn't, park the CPU - */ -ENTRY(__enable_mmu) - mrs x2, ID_AA64MMFR0_EL1 - ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED - b.ne __no_granule_support - update_early_cpu_boot_status 0, x2, x3 - adrp x2, idmap_pg_dir - phys_to_ttbr x1, x1 - phys_to_ttbr x2, x2 - msr ttbr0_el1, x2 // load TTBR0 - offset_ttbr1 x1, x3 - msr ttbr1_el1, x1 // load TTBR1 - isb - msr sctlr_el1, x0 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - ret -ENDPROC(__enable_mmu) - -ENTRY(__cpu_secondary_check52bitva) -#ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x0, vabits_actual - cmp x0, #52 - b.ne 2f - - mrs_s x0, SYS_ID_AA64MMFR2_EL1 - and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT) - cbnz x0, 2f - - update_early_cpu_boot_status \ - CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1 -1: wfe - wfi - b 1b - -#endif -2: ret -ENDPROC(__cpu_secondary_check52bitva) - -__no_granule_support: - /* Indicate that this CPU can't boot and is stuck in the kernel */ - update_early_cpu_boot_status \ - CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2 -1: - wfe - wfi - b 1b -ENDPROC(__no_granule_support) - -#ifdef CONFIG_RELOCATABLE -__relocate_kernel: - /* - * Iterate over each entry in the relocation table, and apply the - * relocations in place. - */ - ldr w9, =__rela_offset // offset to reloc table - ldr w10, =__rela_size // size of reloc table - - mov_q x11, KIMAGE_VADDR // default virtual offset - add x11, x11, x23 // actual virtual offset - add x9, x9, x11 // __va(.rela) - add x10, x9, x10 // __va(.rela) + sizeof(.rela) - -0: cmp x9, x10 - b.hs 1f - ldp x12, x13, [x9], #24 - ldr x14, [x9, #-8] - cmp w13, #R_AARCH64_RELATIVE - b.ne 0b - add x14, x14, x23 // relocate - str x14, [x12, x23] - b 0b - -1: -#ifdef CONFIG_RELR - /* - * Apply RELR relocations. - * - * RELR is a compressed format for storing relative relocations. The - * encoded sequence of entries looks like: - * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] - * - * i.e. start with an address, followed by any number of bitmaps. The - * address entry encodes 1 relocation. The subsequent bitmap entries - * encode up to 63 relocations each, at subsequent offsets following - * the last address entry. - * - * The bitmap entries must have 1 in the least significant bit. The - * assumption here is that an address cannot have 1 in lsb. Odd - * addresses are not supported. Any odd addresses are stored in the RELA - * section, which is handled above. - * - * Excluding the least significant bit in the bitmap, each non-zero - * bit in the bitmap represents a relocation to be applied to - * a corresponding machine word that follows the base address - * word. The second least significant bit represents the machine - * word immediately following the initial address, and each bit - * that follows represents the next word, in linear order. As such, - * a single bitmap can encode up to 63 relocations in a 64-bit object. - * - * In this implementation we store the address of the next RELR table - * entry in x9, the address being relocated by the current address or - * bitmap entry in x13 and the address being relocated by the current - * bit in x14. - * - * Because addends are stored in place in the binary, RELR relocations - * cannot be applied idempotently. We use x24 to keep track of the - * currently applied displacement so that we can correctly relocate if - * __relocate_kernel is called twice with non-zero displacements (i.e. - * if there is both a physical misalignment and a KASLR displacement). - */ - ldr w9, =__relr_offset // offset to reloc table - ldr w10, =__relr_size // size of reloc table - add x9, x9, x11 // __va(.relr) - add x10, x9, x10 // __va(.relr) + sizeof(.relr) - - sub x15, x23, x24 // delta from previous offset - cbz x15, 7f // nothing to do if unchanged - mov x24, x23 // save new offset - -2: cmp x9, x10 - b.hs 7f - ldr x11, [x9], #8 - tbnz x11, #0, 3f // branch to handle bitmaps - add x13, x11, x23 - ldr x12, [x13] // relocate address entry - add x12, x12, x15 - str x12, [x13], #8 // adjust to start of bitmap - b 2b - -3: mov x14, x13 -4: lsr x11, x11, #1 - cbz x11, 6f - tbz x11, #0, 5f // skip bit if not set - ldr x12, [x14] // relocate bit - add x12, x12, x15 - str x12, [x14] - -5: add x14, x14, #8 // move to next bit's address - b 4b - -6: /* - * Move to the next bitmap's address. 8 is the word size, and 63 is the - * number of significant bits in a bitmap entry. - */ - add x13, x13, #(8 * 63) - b 2b - -7: -#endif - ret - -ENDPROC(__relocate_kernel) -#endif - -__primary_switch: -#ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value -#endif - - adrp x1, init_pg_dir - bl __enable_mmu -#ifdef CONFIG_RELOCATABLE -#ifdef CONFIG_RELR - mov x24, #0 // no RELR displacement yet -#endif - bl __relocate_kernel -#ifdef CONFIG_RANDOMIZE_BASE - ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET - blr x8 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - pre_disable_mmu_workaround - msr sctlr_el1, x20 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - tlbi vmalle1 // Remove any stale TLB entries - dsb nsh - isb - - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb - - bl __relocate_kernel -#endif -#endif - ldr x8, =__primary_switched - adrp x0, __PHYS_OFFSET - br x8 -ENDPROC(__primary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S deleted file mode 100644 index 38bcd4d4e43bb1314f103f5ac477e1143c019240..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/hibernate-asm.S +++ /dev/null @@ -1,168 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Hibernate low-level support - * - * Copyright (C) 2016 ARM Ltd. - * Author: James Morse - */ -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * To prevent the possibility of old and new partial table walks being visible - * in the tlb, switch the ttbr to a zero page when we invalidate the old - * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i - * Even switching to our copied tables will cause a changed output address at - * each stage of the walk. - */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 - phys_to_ttbr \tmp, \zero_page - msr ttbr1_el1, \tmp - isb - tlbi vmalle1 - dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb -.endm - - -/* - * Resume from hibernate - * - * Loads temporary page tables then restores the memory image. - * Finally branches to cpu_resume() to restore the state saved by - * swsusp_arch_suspend(). - * - * Because this code has to be copied to a 'safe' page, it can't call out to - * other functions by PC-relative address. Also remember that it may be - * mid-way through over-writing other functions. For this reason it contains - * code from flush_icache_range() and uses the copy_page() macro. - * - * This 'safe' page is mapped via ttbr0, and executed from there. This function - * switches to a copy of the linear map in ttbr1, performs the restore, then - * switches ttbr1 to the original kernel's swapper_pg_dir. - * - * All of memory gets written to, including code. We need to clean the kernel - * text to the Point of Coherence (PoC) before secondary cores can be booted. - * Because the kernel modules and executable pages mapped to user space are - * also written as data, we clean all pages we touch to the Point of - * Unification (PoU). - * - * x0: physical address of temporary page tables - * x1: physical address of swapper page tables - * x2: address of cpu_resume - * x3: linear map address of restore_pblist in the current kernel - * x4: physical address of __hyp_stub_vectors, or 0 - * x5: physical address of a zero page that remains zero after resume - */ -.pushsection ".hibernate_exit.text", "ax" -ENTRY(swsusp_arch_suspend_exit) - /* - * We execute from ttbr0, change ttbr1 to our copied linear map tables - * with a break-before-make via the zero page - */ - break_before_make_ttbr_switch x5, x0, x6, x8 - - mov x21, x1 - mov x30, x2 - mov x24, x4 - mov x25, x5 - - /* walk the restore_pblist and use copy_page() to over-write memory */ - mov x19, x3 - -1: ldr x10, [x19, #HIBERN_PBE_ORIG] - mov x0, x10 - ldr x1, [x19, #HIBERN_PBE_ADDR] - - copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 - - add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on flush_icache_range() */ - raw_dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x10, x3 -2: dc cvau, x4 /* clean D line / unified line */ - add x4, x4, x2 - cmp x4, x1 - b.lo 2b - - ldr x19, [x19, #HIBERN_PBE_NEXT] - cbnz x19, 1b - dsb ish /* wait for PoU cleaning to finish */ - - /* switch to the restored kernels page tables */ - break_before_make_ttbr_switch x25, x21, x6, x8 - - ic ialluis - dsb ish - isb - - cbz x24, 3f /* Do we need to re-initialise EL2? */ - hvc #0 -3: ret - - .ltorg -ENDPROC(swsusp_arch_suspend_exit) - -/* - * Restore the hyp stub. - * This must be done before the hibernate page is unmapped by _cpu_resume(), - * but happens before any of the hyp-stub's code is cleaned to PoC. - * - * x24: The physical address of __hyp_stub_vectors - */ -el1_sync: - msr vbar_el2, x24 - eret -ENDPROC(el1_sync) - -.macro invalid_vector label -\label: - b \label -ENDPROC(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* el2 vectors - switch el2 here while we restore the memory image. */ - .align 11 -ENTRY(hibernate_el2_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -END(hibernate_el2_vectors) - -.popsection diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S deleted file mode 100644 index 73d46070b31500117ade72ea1d2a6c2ed31feb2b..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/hyp-stub.S +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Hypervisor stub - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - - .text - .pushsection .hyp.text, "ax" - - .align 11 - -ENTRY(__hyp_stub_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__hyp_stub_vectors) - - .align 11 - -el1_sync: - cmp x0, #HVC_SET_VECTORS - b.ne 2f - msr vbar_el2, x1 - b 9f - -2: cmp x0, #HVC_SOFT_RESTART - b.ne 3f - mov x0, x2 - mov x2, x4 - mov x4, x1 - mov x1, x3 - br x4 // no return - -3: cmp x0, #HVC_RESET_VECTORS - beq 9f // Nothing to reset! - - /* Someone called kvm_call_hyp() against the hyp-stub... */ - ldr x0, =HVC_STUB_ERR - eret - -9: mov x0, xzr - eret -ENDPROC(el1_sync) - -.macro invalid_vector label -\label: - b \label -ENDPROC(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* - * __hyp_set_vectors: Call this after boot to set the initial hypervisor - * vectors as part of hypervisor installation. On an SMP system, this should - * be called on each CPU. - * - * x0 must be the physical address of the new vector table, and must be - * 2KB aligned. - * - * Before calling this, you must check that the stub hypervisor is installed - * everywhere, by waiting for any secondary CPUs to be brought up and then - * checking that is_hyp_mode_available() is true. - * - * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or - * something else went wrong... in such cases, trying to install a new - * hypervisor is unlikely to work as desired. - * - * When you call into your shiny new hypervisor, sp_el2 will contain junk, - * so you will need to set that to something sensible at the new hypervisor's - * initialisation entry point. - */ - -ENTRY(__hyp_set_vectors) - mov x1, x0 - mov x0, #HVC_SET_VECTORS - hvc #0 - ret -ENDPROC(__hyp_set_vectors) - -ENTRY(__hyp_reset_vectors) - mov x0, #HVC_RESET_VECTORS - hvc #0 - ret -ENDPROC(__hyp_reset_vectors) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S deleted file mode 100644 index 42bd8c0c60e09d66fbd53aa73db820d38d18d1e0..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/kuser32.S +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * AArch32 user helpers. - * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. - * - * Copyright (C) 2005-2011 Nicolas Pitre - * Copyright (C) 2012-2018 ARM Ltd. - * - * The kuser helpers below are mapped at a fixed address by - * aarch32_setup_additional_pages() and are provided for compatibility - * reasons with 32 bit (aarch32) applications that need them. - * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. - */ - -#include - - .align 5 - .globl __kuser_helper_start -__kuser_helper_start: - -__kuser_cmpxchg64: // 0xffff0f60 - .inst 0xe92d00f0 // push {r4, r5, r6, r7} - .inst 0xe1c040d0 // ldrd r4, r5, [r0] - .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] - .inst 0xe0303004 // eors r3, r0, r4 - .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23e96 // stlexdeq r3, r6, [r2] - .inst 0x03330001 // teqeq r3, #1 - .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05b // dmb ish - .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} - .inst 0xe12fff1e // bx lr - - .align 5 -__kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05b // dmb ish - .inst 0xe12fff1e // bx lr - - .align 5 -__kuser_cmpxchg: // 0xffff0fc0 - .inst 0xe1923f9f // 1: ldrex r3, [r2] - .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823e91 // stlexeq r3, r1, [r2] - .inst 0x03330001 // teqeq r3, #1 - .inst 0x0afffffa // beq 1b - .inst 0xf57ff05b // dmb ish - .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xe12fff1e // bx lr - - .align 5 -__kuser_get_tls: // 0xffff0fe0 - .inst 0xee1d0f70 // mrc p15, 0, r0, c13, c0, 3 - .inst 0xe12fff1e // bx lr - .rep 5 - .word 0 - .endr - -__kuser_helper_version: // 0xffff0ffc - .word ((__kuser_helper_end - __kuser_helper_start) >> 5) - .globl __kuser_helper_end -__kuser_helper_end: diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S deleted file mode 100644 index 45dce03aaeafc12ab196d6b444e9331654b7fad2..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * trampoline entry and return code for kretprobes. - */ - -#include -#include -#include - - .text - - .macro save_all_base_regs - stp x0, x1, [sp, #S_X0] - stp x2, x3, [sp, #S_X2] - stp x4, x5, [sp, #S_X4] - stp x6, x7, [sp, #S_X6] - stp x8, x9, [sp, #S_X8] - stp x10, x11, [sp, #S_X10] - stp x12, x13, [sp, #S_X12] - stp x14, x15, [sp, #S_X14] - stp x16, x17, [sp, #S_X16] - stp x18, x19, [sp, #S_X18] - stp x20, x21, [sp, #S_X20] - stp x22, x23, [sp, #S_X22] - stp x24, x25, [sp, #S_X24] - stp x26, x27, [sp, #S_X26] - stp x28, x29, [sp, #S_X28] - add x0, sp, #S_FRAME_SIZE - stp lr, x0, [sp, #S_LR] - /* - * Construct a useful saved PSTATE - */ - mrs x0, nzcv - mrs x1, daif - orr x0, x0, x1 - mrs x1, CurrentEL - orr x0, x0, x1 - mrs x1, SPSel - orr x0, x0, x1 - stp xzr, x0, [sp, #S_PC] - .endm - - .macro restore_all_base_regs - ldr x0, [sp, #S_PSTATE] - and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) - msr nzcv, x0 - ldp x0, x1, [sp, #S_X0] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldp x8, x9, [sp, #S_X8] - ldp x10, x11, [sp, #S_X10] - ldp x12, x13, [sp, #S_X12] - ldp x14, x15, [sp, #S_X14] - ldp x16, x17, [sp, #S_X16] - ldp x18, x19, [sp, #S_X18] - ldp x20, x21, [sp, #S_X20] - ldp x22, x23, [sp, #S_X22] - ldp x24, x25, [sp, #S_X24] - ldp x26, x27, [sp, #S_X26] - ldp x28, x29, [sp, #S_X28] - .endm - -ENTRY(kretprobe_trampoline) - sub sp, sp, #S_FRAME_SIZE - - save_all_base_regs - - mov x0, sp - bl trampoline_probe_handler - /* - * Replace trampoline address in lr with actual orig_ret_addr return - * address. - */ - mov lr, x0 - - restore_all_base_regs - - add sp, sp, #S_FRAME_SIZE - ret - -ENDPROC(kretprobe_trampoline) diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S deleted file mode 100644 index 16a34f188f2672d01298b42cc7385da6c78bc14b..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/reloc_test_syms.S +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 Linaro, Ltd. - */ - -#include - -ENTRY(absolute_data64) - ldr x0, 0f - ret -0: .quad sym64_abs -ENDPROC(absolute_data64) - -ENTRY(absolute_data32) - ldr w0, 0f - ret -0: .long sym32_abs -ENDPROC(absolute_data32) - -ENTRY(absolute_data16) - adr x0, 0f - ldrh w0, [x0] - ret -0: .short sym16_abs, 0 -ENDPROC(absolute_data16) - -ENTRY(signed_movw) - movz x0, #:abs_g2_s:sym64_abs - movk x0, #:abs_g1_nc:sym64_abs - movk x0, #:abs_g0_nc:sym64_abs - ret -ENDPROC(signed_movw) - -ENTRY(unsigned_movw) - movz x0, #:abs_g3:sym64_abs - movk x0, #:abs_g2_nc:sym64_abs - movk x0, #:abs_g1_nc:sym64_abs - movk x0, #:abs_g0_nc:sym64_abs - ret -ENDPROC(unsigned_movw) - - .align 12 - .space 0xff8 -ENTRY(relative_adrp) - adrp x0, sym64_rel - add x0, x0, #:lo12:sym64_rel - ret -ENDPROC(relative_adrp) - - .align 12 - .space 0xffc -ENTRY(relative_adrp_far) - adrp x0, memstart_addr - add x0, x0, #:lo12:memstart_addr - ret -ENDPROC(relative_adrp_far) - -ENTRY(relative_adr) - adr x0, sym64_rel - ret -ENDPROC(relative_adr) - -ENTRY(relative_data64) - adr x1, 0f - ldr x0, [x1] - add x0, x0, x1 - ret -0: .quad sym64_rel - . -ENDPROC(relative_data64) - -ENTRY(relative_data32) - adr x1, 0f - ldr w0, [x1] - add x0, x0, x1 - ret -0: .long sym64_rel - . -ENDPROC(relative_data32) - -ENTRY(relative_data16) - adr x1, 0f - ldrsh w0, [x1] - add x0, x0, x1 - ret -0: .short sym64_rel - ., 0 -ENDPROC(relative_data16) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S deleted file mode 100644 index c1d7db71a7269c622c11f181ec5929a5fa351a7f..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/relocate_kernel.S +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * kexec for arm64 - * - * Copyright (C) Linaro. - * Copyright (C) Huawei Futurewei Technologies. - */ - -#include -#include - -#include -#include -#include -#include - -/* - * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. - * - * The memory that the old kernel occupies may be overwritten when coping the - * new image to its final location. To assure that the - * arm64_relocate_new_kernel routine which does that copy is not overwritten, - * all code and data needed by arm64_relocate_new_kernel must be between the - * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The - * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec - * control_code_page, a special page which has been set up to be preserved - * during the copy operation. - */ -ENTRY(arm64_relocate_new_kernel) - - /* Setup the list loop variables. */ - mov x18, x2 /* x18 = dtb address */ - mov x17, x1 /* x17 = kimage_start */ - mov x16, x0 /* x16 = kimage_head */ - raw_dcache_line_size x15, x0 /* x15 = dcache line size */ - mov x14, xzr /* x14 = entry ptr */ - mov x13, xzr /* x13 = copy dest */ - - /* Clear the sctlr_el2 flags. */ - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 - ldr x1, =SCTLR_ELx_FLAGS - bic x0, x0, x1 - pre_disable_mmu_workaround - msr sctlr_el2, x0 - isb -1: - - /* Check if the new image needs relocation. */ - tbnz x16, IND_DONE_BIT, .Ldone - -.Lloop: - and x12, x16, PAGE_MASK /* x12 = addr */ - - /* Test the entry flags. */ -.Ltest_source: - tbz x16, IND_SOURCE_BIT, .Ltest_indirection - - /* Invalidate dest page to PoC. */ - mov x0, x13 - add x20, x0, #PAGE_SIZE - sub x1, x15, #1 - bic x0, x0, x1 -2: dc ivac, x0 - add x0, x0, x15 - cmp x0, x20 - b.lo 2b - dsb sy - - mov x20, x13 - mov x21, x12 - copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 - - /* dest += PAGE_SIZE */ - add x13, x13, PAGE_SIZE - b .Lnext - -.Ltest_indirection: - tbz x16, IND_INDIRECTION_BIT, .Ltest_destination - - /* ptr = addr */ - mov x14, x12 - b .Lnext - -.Ltest_destination: - tbz x16, IND_DESTINATION_BIT, .Lnext - - /* dest = addr */ - mov x13, x12 - -.Lnext: - /* entry = *ptr++ */ - ldr x16, [x14], #8 - - /* while (!(entry & DONE)) */ - tbz x16, IND_DONE_BIT, .Lloop - -.Ldone: - /* wait for writes from copy_page to finish */ - dsb nsh - ic iallu - dsb nsh - isb - - /* Start new image. */ - mov x0, x18 - mov x1, xzr - mov x2, xzr - mov x3, xzr - br x17 - -ENDPROC(arm64_relocate_new_kernel) - -.ltorg - -.align 3 /* To keep the 64-bit values below naturally aligned. */ - -.Lcopy_end: -.org KEXEC_CONTROL_PAGE_SIZE - -/* - * arm64_relocate_new_kernel_size - Number of bytes to copy to the - * control_code_page. - */ -.globl arm64_relocate_new_kernel_size -arm64_relocate_new_kernel_size: - .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S deleted file mode 100644 index 475d30d471ac1634364bab74e7f3d58c0dfc1fb6..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/sigreturn32.S +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * AArch32 sigreturn code. - * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. - * - * Copyright (C) 2005-2011 Nicolas Pitre - * Copyright (C) 2012-2018 ARM Ltd. - * - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - * - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ - -#include - - .globl __aarch32_sigret_code_start -__aarch32_sigret_code_start: - - /* - * ARM Code - */ - .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn - - /* - * ARM code - */ - .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn - - .globl __aarch32_sigret_code_end -__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S deleted file mode 100644 index f5b04dd8a7107275b7cd145a9c29c38a8dd98cf4..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/sleep.S +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include - - .text -/* - * Implementation of MPIDR_EL1 hash algorithm through shifting - * and OR'ing. - * - * @dst: register containing hash result - * @rs0: register containing affinity level 0 bit shift - * @rs1: register containing affinity level 1 bit shift - * @rs2: register containing affinity level 2 bit shift - * @rs3: register containing affinity level 3 bit shift - * @mpidr: register containing MPIDR_EL1 value - * @mask: register containing MPIDR mask - * - * Pseudo C-code: - * - *u32 dst; - * - *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { - * u32 aff0, aff1, aff2, aff3; - * u64 mpidr_masked = mpidr & mask; - * aff0 = mpidr_masked & 0xff; - * aff1 = mpidr_masked & 0xff00; - * aff2 = mpidr_masked & 0xff0000; - * aff3 = mpidr_masked & 0xff00000000; - * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); - *} - * Input registers: rs0, rs1, rs2, rs3, mpidr, mask - * Output register: dst - * Note: input and output registers must be disjoint register sets - (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) - */ - .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask - and \mpidr, \mpidr, \mask // mask out MPIDR bits - and \dst, \mpidr, #0xff // mask=aff0 - lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 - and \mask, \mpidr, #0xff00 // mask = aff1 - lsr \mask ,\mask, \rs1 - orr \dst, \dst, \mask // dst|=(aff1>>rs1) - and \mask, \mpidr, #0xff0000 // mask = aff2 - lsr \mask ,\mask, \rs2 - orr \dst, \dst, \mask // dst|=(aff2>>rs2) - and \mask, \mpidr, #0xff00000000 // mask = aff3 - lsr \mask ,\mask, \rs3 - orr \dst, \dst, \mask // dst|=(aff3>>rs3) - .endm -/* - * Save CPU state in the provided sleep_stack_data area, and publish its - * location for cpu_resume()'s use in sleep_save_stash. - * - * cpu_resume() will restore this saved state, and return. Because the - * link-register is saved and restored, it will appear to return from this - * function. So that the caller can tell the suspend/resume paths apart, - * __cpu_suspend_enter() will always return a non-zero value, whereas the - * path through cpu_resume() will return 0. - * - * x0 = struct sleep_stack_data area - */ -ENTRY(__cpu_suspend_enter) - stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] - stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] - stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] - stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] - stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] - stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] - - /* save the sp in cpu_suspend_ctx */ - mov x2, sp - str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] - - /* find the mpidr_hash */ - ldr_l x1, sleep_save_stash - mrs x7, mpidr_el1 - adr_l x9, mpidr_hash - ldr x10, [x9, #MPIDR_HASH_MASK] - /* - * Following code relies on the struct mpidr_hash - * members size. - */ - ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] - ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] - compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 - add x1, x1, x8, lsl #3 - - str x0, [x1] - add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS - stp x29, lr, [sp, #-16]! - bl cpu_do_suspend - ldp x29, lr, [sp], #16 - mov x0, #1 - ret -ENDPROC(__cpu_suspend_enter) - - .pushsection ".idmap.text", "awx" -ENTRY(cpu_resume) - bl el2_setup // if in EL2 drop to EL1 cleanly - bl __cpu_setup - /* enable the MMU early - so we can access sleep_save_stash by va */ - adrp x1, swapper_pg_dir - bl __enable_mmu - ldr x8, =_cpu_resume - br x8 -ENDPROC(cpu_resume) - .ltorg - .popsection - -ENTRY(_cpu_resume) - mrs x1, mpidr_el1 - adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address - - /* retrieve mpidr_hash members to compute the hash */ - ldr x2, [x8, #MPIDR_HASH_MASK] - ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] - ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] - compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 - - /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_stash - ldr x0, [x0, x7, lsl #3] - add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS - add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS - /* load sp from context */ - ldr x2, [x0, #CPU_CTX_SP] - mov sp, x2 - /* - * cpu_do_resume expects x0 to contain context address pointer - */ - bl cpu_do_resume - -#ifdef CONFIG_KASAN - mov x0, sp - bl kasan_unpoison_task_stack_below -#endif - - ldp x19, x20, [x29, #16] - ldp x21, x22, [x29, #32] - ldp x23, x24, [x29, #48] - ldp x25, x26, [x29, #64] - ldp x27, x28, [x29, #80] - ldp x29, lr, [x29] - mov x0, #0 - ret -ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S deleted file mode 100644 index 54655273d1e0ba9e619953e9ce4d3bbfd73426da..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/smccc-call.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2015, Linaro Limited - */ -#include -#include - -#include -#include - - .macro SMCCC instr - .cfi_startproc - \instr #0 - ldr x4, [sp] - stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] - stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #8] - cbz x4, 1f /* no quirk structure */ - ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] - cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 - b.ne 1f - str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ret - .cfi_endproc - .endm - -/* - * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, - * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, - * struct arm_smccc_quirk *quirk) - */ -ENTRY(__arm_smccc_smc) - SMCCC smc -ENDPROC(__arm_smccc_smc) -EXPORT_SYMBOL(__arm_smccc_smc) - -/* - * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, - * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, - * struct arm_smccc_quirk *quirk) - */ -ENTRY(__arm_smccc_hvc) - SMCCC hvc -ENDPROC(__arm_smccc_hvc) -EXPORT_SYMBOL(__arm_smccc_hvc) diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh old mode 100755 new mode 100644 diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S deleted file mode 100644 index 0ce6ec75a525298b44de658c265f4762ecf4dd00..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso/note.S +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - * - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include -#include -#include -#include - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END - -BUILD_SALT diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S deleted file mode 100644 index 0723aa398d6eeea4a5baa58c17c6027fc8faea86..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Sigreturn trampoline for returning from a signal when the SA_RESTORER - * flag is not set. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#include -#include - - .text - - nop -ENTRY(__kernel_rt_sigreturn) - .cfi_startproc - .cfi_signal_frame - .cfi_def_cfa x29, 0 - .cfi_offset x29, 0 * 8 - .cfi_offset x30, 1 * 8 - mov x8, #__NR_rt_sigreturn - svc #0 - .cfi_endproc -ENDPROC(__kernel_rt_sigreturn) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S deleted file mode 100644 index d1414fee5274b7fbe76933672aeb99eb3884df0d..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso/vdso.S +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#include -#include -#include -#include - - .globl vdso_start, vdso_end - .section .rodata - .balign PAGE_SIZE -vdso_start: - .incbin "arch/arm64/kernel/vdso/vdso.so" - .balign PAGE_SIZE -vdso_end: - - .previous diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S deleted file mode 100644 index 815df253f96e055a3ee584a7364503d02d698ed2..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * GNU linker script for the VDSO library. -* - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - * Heavily based on the vDSO linker scripts for other archs. - */ - -#include -#include -#include - -OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") -OUTPUT_ARCH(aarch64) - -SECTIONS -{ - PROVIDE(_vdso_data = . - PAGE_SIZE); - . = VDSO_LBASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - /* - * Discard .note.gnu.property sections which are unused and have - * different alignment requirement from vDSO note sections. - */ - /DISCARD/ : { - *(.note.GNU-stack .note.gnu.property) - } - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - - .text : { *(.text*) } :text =0xd503201f - PROVIDE (__etext = .); - PROVIDE (_etext = .); - PROVIDE (etext = .); - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - - .dynamic : { *(.dynamic) } :text :dynamic - - .rodata : { *(.rodata*) } :text - - _end = .; - PROVIDE(end = .); - - /DISCARD/ : { - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.6.39 { - global: - __kernel_rt_sigreturn; - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - local: *; - }; -} - -/* - * Make the sigreturn code visible to the kernel. - */ -VDSO_sigtramp = __kernel_rt_sigreturn; diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S deleted file mode 100644 index 1a81277c2d09a7798397d40185693e7df13e91fd..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ /dev/null @@ -1,62 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file provides both A32 and T32 versions, in accordance with the - * arm sigreturn code. - * - * Copyright (C) 2018 ARM Limited - */ - -#include -#include -#include - -#define ARM_ENTRY(name) \ - ENTRY(name) - -#define ARM_ENDPROC(name) \ - .type name, %function; \ - END(name) - - .text - - .arm - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -ARM_ENTRY(__kernel_sigreturn_arm) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -ARM_ENDPROC(__kernel_sigreturn_arm) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -ARM_ENTRY(__kernel_rt_sigreturn_arm) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -ARM_ENDPROC(__kernel_rt_sigreturn_arm) - - .thumb - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -ARM_ENTRY(__kernel_sigreturn_thumb) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -ARM_ENDPROC(__kernel_sigreturn_thumb) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -ARM_ENTRY(__kernel_rt_sigreturn_thumb) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -ARM_ENDPROC(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S deleted file mode 100644 index e72ac7bc4c04f483f38e588c3098c53c96531a8d..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso32/vdso.S +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 ARM Limited - */ - -#include -#include -#include -#include - - .globl vdso32_start, vdso32_end - .section .rodata - .balign PAGE_SIZE -vdso32_start: - .incbin "arch/arm64/kernel/vdso32/vdso.so" - .balign PAGE_SIZE -vdso32_end: - - .previous diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S deleted file mode 100644 index a3944927eaeb49cc29f07e327fdf5f0e40de1cfe..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Adapted from arm64 version. - * - * GNU linker script for the VDSO library. - * Heavily based on the vDSO linker scripts for other archs. - * - * Copyright (C) 2012-2018 ARM Limited - */ - -#include -#include -#include - -OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") -OUTPUT_ARCH(arm) - -SECTIONS -{ - PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); - . = VDSO_LBASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - .dynamic : { *(.dynamic) } :text :dynamic - - .rodata : { *(.rodata*) } :text - - .text : { *(.text*) } :text =0xe7f001f2 - - .got : { *(.got) } - .rel.plt : { *(.rel.plt) } - - /DISCARD/ : { - *(.note.GNU-stack) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ -} - -VERSION -{ - LINUX_2.6 { - global: - __vdso_clock_gettime; - __vdso_gettimeofday; - __vdso_clock_getres; - __kernel_sigreturn_arm; - __kernel_sigreturn_thumb; - __kernel_rt_sigreturn_arm; - __kernel_rt_sigreturn_thumb; - __vdso_clock_gettime64; - local: *; - }; -} - -/* - * Make the sigreturn code visible to the kernel. - */ -VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; -VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; -VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; -VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S deleted file mode 100644 index 0bab37b1acbe98b857d56166ba497c0507c7c537..0000000000000000000000000000000000000000 --- a/arch/arm64/kernel/vmlinux.lds.S +++ /dev/null @@ -1,287 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ld script to make ARM Linux kernel - * taken from the i386 version by Russell King - * Written by Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "image.h" - -/* .exit.text needed in case of alternative patching */ -#define ARM_EXIT_KEEP(x) x -#define ARM_EXIT_DISCARD(x) - -OUTPUT_ARCH(aarch64) -ENTRY(_text) - -jiffies = jiffies_64; - - -#define HYPERVISOR_EXTABLE \ - . = ALIGN(SZ_8); \ - __start___kvm_ex_table = .; \ - *(__kvm_ex_table) \ - __stop___kvm_ex_table = .; - -#define HYPERVISOR_TEXT \ - /* \ - * Align to 4 KB so that \ - * a) the HYP vector table is at its minimum \ - * alignment of 2048 bytes \ - * b) the HYP init code will not cross a page \ - * boundary if its size does not exceed \ - * 4 KB (see related ASSERT() below) \ - */ \ - . = ALIGN(SZ_4K); \ - __hyp_idmap_text_start = .; \ - *(.hyp.idmap.text) \ - __hyp_idmap_text_end = .; \ - __hyp_text_start = .; \ - *(.hyp.text) \ - HYPERVISOR_EXTABLE \ - __hyp_text_end = .; - -#define IDMAP_TEXT \ - . = ALIGN(SZ_4K); \ - __idmap_text_start = .; \ - *(.idmap.text) \ - __idmap_text_end = .; - -#ifdef CONFIG_HIBERNATION -#define HIBERNATE_TEXT \ - . = ALIGN(SZ_4K); \ - __hibernate_exit_text_start = .; \ - *(.hibernate_exit.text) \ - __hibernate_exit_text_end = .; -#else -#define HIBERNATE_TEXT -#endif - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define TRAMP_TEXT \ - . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_start = .; \ - *(.entry.tramp.text) \ - . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_end = .; -#else -#define TRAMP_TEXT -#endif - -/* - * The size of the PE/COFF section that covers the kernel image, which - * runs from stext to _edata, must be a round multiple of the PE/COFF - * FileAlignment, which we set to its minimum value of 0x200. 'stext' - * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned - * boundary should be sufficient. - */ -PECOFF_FILE_ALIGNMENT = 0x200; - -#ifdef CONFIG_EFI -#define PECOFF_EDATA_PADDING \ - .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } -#else -#define PECOFF_EDATA_PADDING -#endif - -SECTIONS -{ - /* - * XXX: The linker does not define how output sections are - * assigned to input sections when there are multiple statements - * matching the same input section name. There is no documented - * order of matching. - */ - /DISCARD/ : { - ARM_EXIT_DISCARD(EXIT_TEXT) - ARM_EXIT_DISCARD(EXIT_DATA) - EXIT_CALL - *(.discard) - *(.discard.*) - *(.interp .dynamic) - *(.dynsym .dynstr .hash .gnu.hash) - *(.eh_frame) - } - - . = KIMAGE_VADDR + TEXT_OFFSET; - - .head.text : { - _text = .; - HEAD_TEXT - } - .text : { /* Real text segment */ - _stext = .; /* Text and read-only data */ - __exception_text_start = .; - *(.exception.text) - __exception_text_end = .; - IRQENTRY_TEXT - SOFTIRQENTRY_TEXT - ENTRY_TEXT - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - HYPERVISOR_TEXT - IDMAP_TEXT - HIBERNATE_TEXT - TRAMP_TEXT - *(.fixup) - *(.gnu.warning) - . = ALIGN(16); - *(.got) /* Global offset table */ - } - - . = ALIGN(SEGMENT_ALIGN); - _etext = .; /* End of text section */ - - RO_DATA(PAGE_SIZE) /* everything from this point to */ - EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ - NOTES - - . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - idmap_pg_end = .; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += PAGE_SIZE; - swapper_pg_end = .; - - . = ALIGN(SEGMENT_ALIGN); - __init_begin = .; - __inittext_begin = .; - - INIT_TEXT_SECTION(8) - - __exittext_begin = .; - .exit.text : { - ARM_EXIT_KEEP(EXIT_TEXT) - } - __exittext_end = .; - - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - - . = ALIGN(PAGE_SIZE); - __inittext_end = .; - __initdata_begin = .; - - .init.data : { - INIT_DATA - INIT_SETUP(16) - INIT_CALLS - CON_INITCALL - INIT_RAM_FS - *(.init.rodata.* .init.bss) /* from the EFI stub */ - } - .exit.data : { - ARM_EXIT_KEEP(EXIT_DATA) - } - - PERCPU_SECTION(L1_CACHE_BYTES) - - .rela.dyn : ALIGN(8) { - *(.rela .rela*) - } - - __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela.dyn); - -#ifdef CONFIG_RELR - .relr.dyn : ALIGN(8) { - *(.relr.dyn) - } - - __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); - __relr_size = SIZEOF(.relr.dyn); -#endif - - . = ALIGN(SEGMENT_ALIGN); - __initdata_end = .; - __init_end = .; - - _data = .; - _sdata = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) - - /* - * Data written with the MMU off but read with the MMU on requires - * cache lines to be invalidated, discarding up to a Cache Writeback - * Granule (CWG) of data from the cache. Keep the section that - * requires this type of maintenance to be in its own Cache Writeback - * Granule (CWG) area so the cache maintenance operations don't - * interfere with adjacent data. - */ - .mmuoff.data.write : ALIGN(SZ_2K) { - __mmuoff_data_start = .; - *(.mmuoff.data.write) - } - . = ALIGN(SZ_2K); - .mmuoff.data.read : { - *(.mmuoff.data.read) - __mmuoff_data_end = .; - } - - PECOFF_EDATA_PADDING - __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); - _edata = .; - - BSS_SECTION(0, 0, 0) - - . = ALIGN(PAGE_SIZE); - init_pg_dir = .; - . += INIT_DIR_SIZE; - init_pg_end = .; - - __pecoff_data_size = ABSOLUTE(. - __initdata_begin); - _end = .; - - STABS_DEBUG - - HEAD_SYMBOLS -} - -#include "image-vars.h" - -/* - * The HYP init code and ID map text can't be longer than a page each, - * and should not cross a page boundary. - */ -ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, - "HYP init code too big or misaligned") -ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, - "ID map text too big or misaligned") -#ifdef CONFIG_HIBERNATION -ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) - <= SZ_4K, "Hibernate exit text too big or misaligned") -#endif -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, - "Entry trampoline text too big") -#endif -/* - * If padding is applied before .head.text, virt<->phys conversions will fail. - */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S deleted file mode 100644 index dc41b505507d7afb7aca2b2235361dd54ddcef3f..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/hyp-init.S +++ /dev/null @@ -1,168 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - */ - -#include - -#include -#include -#include -#include -#include -#include - - .text - .pushsection .hyp.idmap.text, "ax" - - .align 11 - -ENTRY(__kvm_hyp_init) - ventry __invalid // Synchronous EL2t - ventry __invalid // IRQ EL2t - ventry __invalid // FIQ EL2t - ventry __invalid // Error EL2t - - ventry __invalid // Synchronous EL2h - ventry __invalid // IRQ EL2h - ventry __invalid // FIQ EL2h - ventry __invalid // Error EL2h - - ventry __do_hyp_init // Synchronous 64-bit EL1 - ventry __invalid // IRQ 64-bit EL1 - ventry __invalid // FIQ 64-bit EL1 - ventry __invalid // Error 64-bit EL1 - - ventry __invalid // Synchronous 32-bit EL1 - ventry __invalid // IRQ 32-bit EL1 - ventry __invalid // FIQ 32-bit EL1 - ventry __invalid // Error 32-bit EL1 - -__invalid: - b . - - /* - * x0: HYP pgd - * x1: HYP stack - * x2: HYP vectors - * x3: per-CPU offset - */ -__do_hyp_init: - /* Check for a stub HVC call */ - cmp x0, #HVC_STUB_HCALL_NR - b.lo __kvm_handle_stub_hvc - - phys_to_ttbr x4, x0 -alternative_if ARM64_HAS_CNP - orr x4, x4, #TTBR_CNP_BIT -alternative_else_nop_endif - msr ttbr0_el2, x4 - - mrs x4, tcr_el1 - ldr x5, =TCR_EL2_MASK - and x4, x4, x5 - mov x5, #TCR_EL2_RES1 - orr x4, x4, x5 - - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ - ldr_l x5, idmap_t0sz - bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH - - /* - * Set the PS bits in TCR_EL2. - */ - tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 - - msr tcr_el2, x4 - - mrs x4, mair_el1 - msr mair_el2, x4 - isb - - /* Invalidate the stale TLBs from Bootloader */ - tlbi alle2 - dsb sy - - /* - * Preserve all the RES1 bits while setting the default flags, - * as well as the EE bit on BE. Drop the A flag since the compiler - * is allowed to generate unaligned accesses. - */ - ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) -CPU_BE( orr x4, x4, #SCTLR_ELx_EE) - msr sctlr_el2, x4 - isb - - /* Set the stack and new vectors */ - kern_hyp_va x1 - mov sp, x1 - msr vbar_el2, x2 - - /* Set tpidr_el2 for use by HYP */ - msr tpidr_el2, x3 - - /* Hello, World! */ - eret -ENDPROC(__kvm_hyp_init) - -ENTRY(__kvm_handle_stub_hvc) - cmp x0, #HVC_SOFT_RESTART - b.ne 1f - - /* This is where we're about to jump, staying at EL2 */ - msr elr_el2, x1 - mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) - msr spsr_el2, x0 - - /* Shuffle the arguments, and don't come back */ - mov x0, x2 - mov x1, x3 - mov x2, x4 - b reset - -1: cmp x0, #HVC_RESET_VECTORS - b.ne 1f - - /* - * Set the HVC_RESET_VECTORS return code before entering the common - * path so that we do not clobber x0-x2 in case we are coming via - * HVC_SOFT_RESTART. - */ - mov x0, xzr -reset: - /* Reset kvm back to the hyp stub. */ - mrs x5, sctlr_el2 - ldr x6, =SCTLR_ELx_FLAGS - bic x5, x5, x6 // Clear SCTL_M and etc - pre_disable_mmu_workaround - msr sctlr_el2, x5 - isb - - /* Install stub vectors */ - adr_l x5, __hyp_stub_vectors - msr vbar_el2, x5 - eret - -1: /* Bad stub call */ - ldr x0, =HVC_STUB_ERR - eret - -ENDPROC(__kvm_handle_stub_hvc) - - .ltorg - - .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S deleted file mode 100644 index c0094d520dffedf74cd8df54aeea713bcdaf6d1d..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/hyp.S +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - */ - -#include - -#include -#include -#include - -/* - * u64 __kvm_call_hyp(void *hypfn, ...); - * - * This is not really a variadic function in the classic C-way and care must - * be taken when calling this to ensure parameters are passed in registers - * only, since the stack will change between the caller and the callee. - * - * Call the function with the first argument containing a pointer to the - * function you wish to call in Hyp mode, and subsequent arguments will be - * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the - * function pointer can be passed). The function being called must be mapped - * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in x0. - * - * A function pointer with a value less than 0xfff has a special meaning, - * and is used to implement hyp stubs in the same way as in - * arch/arm64/kernel/hyp_stub.S. - */ -ENTRY(__kvm_call_hyp) - hvc #0 - ret -ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S deleted file mode 100644 index dc3d7bc2292fd08dab40737454b89eba7fcd78da..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/hyp/entry.S +++ /dev/null @@ -1,198 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) - - .text - .pushsection .hyp.text, "ax" - -.macro save_callee_saved_regs ctxt - stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] - stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] - stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] - stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] - stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] - stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] -.endm - -.macro restore_callee_saved_regs ctxt - ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] - ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] - ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] - ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] - ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] - ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] -.endm - -/* - * u64 __guest_enter(struct kvm_vcpu *vcpu, - * struct kvm_cpu_context *host_ctxt); - */ -ENTRY(__guest_enter) - // x0: vcpu - // x1: host context - // x2-x17: clobbered by macros - // x18: guest context - - // Store the host regs - save_callee_saved_regs x1 - - // Now the host state is stored if we have a pending RAS SError it must - // affect the host. If any asynchronous exception is pending we defer - // the guest entry. The DSB isn't necessary before v8.2 as any SError - // would be fatal. -alternative_if ARM64_HAS_RAS_EXTN - dsb nshst - isb -alternative_else_nop_endif - mrs x1, isr_el1 - cbz x1, 1f - mov x0, #ARM_EXCEPTION_IRQ - ret - -1: - add x18, x0, #VCPU_CONTEXT - - // Macro ptrauth_switch_to_guest format: - // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) - // The below macro to restore guest keys is not implemented in C code - // as it may cause Pointer Authentication key signing mismatch errors - // when this feature is enabled for kernel code. - ptrauth_switch_to_guest x18, x0, x1, x2 - - // Restore guest regs x0-x17 - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] - - // Restore guest regs x19-x29, lr - restore_callee_saved_regs x18 - - // Restore guest reg x18 - ldr x18, [x18, #CPU_XREG_OFFSET(18)] - - // Do not touch any register after this! - eret - sb -ENDPROC(__guest_enter) - -ENTRY(__guest_exit) - // x0: return code - // x1: vcpu - // x2-x29,lr: vcpu regs - // vcpu x0-x1 on the stack - - add x1, x1, #VCPU_CONTEXT - - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) - - // Store the guest regs x2 and x3 - stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] - - // Retrieve the guest regs x0-x1 from the stack - ldp x2, x3, [sp], #16 // x0, x1 - - // Store the guest regs x0-x1 and x4-x18 - stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] - stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] - stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] - stp x8, x9, [x1, #CPU_XREG_OFFSET(8)] - stp x10, x11, [x1, #CPU_XREG_OFFSET(10)] - stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] - stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] - stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - str x18, [x1, #CPU_XREG_OFFSET(18)] - - // Store the guest regs x19-x29, lr - save_callee_saved_regs x1 - - get_host_ctxt x2, x3 - - // Macro ptrauth_switch_to_guest format: - // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3) - // The below macro to save/restore keys is not implemented in C code - // as it may cause Pointer Authentication key signing mismatch errors - // when this feature is enabled for kernel code. - ptrauth_switch_to_host x1, x2, x3, x4, x5 - - // Now restore the host regs - restore_callee_saved_regs x2 - -alternative_if ARM64_HAS_RAS_EXTN - // If we have the RAS extensions we can consume a pending error - // without an unmask-SError and isb. The ESB-instruction consumed any - // pending guest error when we took the exception from the guest. - mrs_s x2, SYS_DISR_EL1 - str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] - cbz x2, 1f - msr_s SYS_DISR_EL1, xzr - orr x0, x0, #(1< - */ - -#include - -#include - - .text - .pushsection .hyp.text, "ax" - -ENTRY(__fpsimd_save_state) - fpsimd_save x0, 1 - ret -ENDPROC(__fpsimd_save_state) - -ENTRY(__fpsimd_restore_state) - fpsimd_restore x0, 1 - ret -ENDPROC(__fpsimd_restore_state) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S deleted file mode 100644 index f36aad0f207bb582206e89cbe75c914df8c80032..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ /dev/null @@ -1,350 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015-2018 - ARM Ltd - * Author: Marc Zyngier - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -.macro save_caller_saved_regs_vect - /* x0 and x1 were saved in the vector entry */ - stp x2, x3, [sp, #-16]! - stp x4, x5, [sp, #-16]! - stp x6, x7, [sp, #-16]! - stp x8, x9, [sp, #-16]! - stp x10, x11, [sp, #-16]! - stp x12, x13, [sp, #-16]! - stp x14, x15, [sp, #-16]! - stp x16, x17, [sp, #-16]! -.endm - -.macro restore_caller_saved_regs_vect - ldp x16, x17, [sp], #16 - ldp x14, x15, [sp], #16 - ldp x12, x13, [sp], #16 - ldp x10, x11, [sp], #16 - ldp x8, x9, [sp], #16 - ldp x6, x7, [sp], #16 - ldp x4, x5, [sp], #16 - ldp x2, x3, [sp], #16 - ldp x0, x1, [sp], #16 -.endm - - .text - .pushsection .hyp.text, "ax" - -.macro do_el2_call - /* - * Shuffle the parameters before calling the function - * pointed to in x0. Assumes parameters in x[1,2,3]. - */ - str lr, [sp, #-16]! - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - ldr lr, [sp], #16 -.endm - -el1_sync: // Guest trapped into EL2 - - mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT - cmp x0, #ESR_ELx_EC_HVC64 - ccmp x0, #ESR_ELx_EC_HVC32, #4, ne - b.ne el1_trap - - mrs x1, vttbr_el2 // If vttbr is valid, the guest - cbnz x1, el1_hvc_guest // called HVC - - /* Here, we're pretty sure the host called HVC. */ - ldp x0, x1, [sp], #16 - - /* Check for a stub HVC call */ - cmp x0, #HVC_STUB_HCALL_NR - b.hs 1f - - /* - * Compute the idmap address of __kvm_handle_stub_hvc and - * jump there. Since we use kimage_voffset, do not use the - * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead - * (by loading it from the constant pool). - * - * Preserve x0-x4, which may contain stub parameters. - */ - ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 - br x5 - -1: - /* - * Perform the EL2 call - */ - kern_hyp_va x0 - do_el2_call - - eret - sb - -el1_hvc_guest: - /* - * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. - * The workaround has already been applied on the host, - * so let's quickly get back to the guest. We don't bother - * restoring x1, as it can be clobbered anyway. - */ - ldr x1, [sp] // Guest's x0 - eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 - cbz w1, wa_epilogue - - /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ - eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ - ARM_SMCCC_ARCH_WORKAROUND_2) - cbnz w1, el1_trap - -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b wa2_end -alternative_cb_end - get_vcpu_ptr x2, x0 - ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] - - // Sanitize the argument and update the guest flags - ldr x1, [sp, #8] // Guest's x1 - clz w1, w1 // Murphy's device: - lsr w1, w1, #5 // w1 = !!w1 without using - eor w1, w1, #1 // the flags... - bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 - str x0, [x2, #VCPU_WORKAROUND_FLAGS] - - /* Check that we actually need to perform the call */ - hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 - cbz x0, wa2_end - - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 - smc #0 - - /* Don't leak data from the SMC call */ - mov x3, xzr -wa2_end: - mov x2, xzr - mov x1, xzr -#endif - -wa_epilogue: - mov x0, xzr - add sp, sp, #16 - eret - sb - -el1_trap: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_TRAP - b __guest_exit - -el1_irq: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_IRQ - b __guest_exit - -el1_error: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_EL1_SERROR - b __guest_exit - -el2_sync: - /* Check for illegal exception return */ - mrs x0, spsr_el2 - tbnz x0, #20, 1f - - save_caller_saved_regs_vect - stp x29, x30, [sp, #-16]! - bl kvm_unexpected_el2_exception - ldp x29, x30, [sp], #16 - restore_caller_saved_regs_vect - - eret - -1: - /* Let's attempt a recovery from the illegal exception return */ - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_IL - b __guest_exit - - -el2_error: - save_caller_saved_regs_vect - stp x29, x30, [sp, #-16]! - - bl kvm_unexpected_el2_exception - - ldp x29, x30, [sp], #16 - restore_caller_saved_regs_vect - - eret - sb - -ENTRY(__hyp_do_panic) - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - sb -ENDPROC(__hyp_do_panic) - -ENTRY(__hyp_panic) - get_host_ctxt x0, x1 - b hyp_panic -ENDPROC(__hyp_panic) - -.macro invalid_vector label, target = __hyp_panic - .align 2 -\label: - b \target -ENDPROC(\label) -.endm - - /* None of these should ever happen */ - invalid_vector el2t_sync_invalid - invalid_vector el2t_irq_invalid - invalid_vector el2t_fiq_invalid - invalid_vector el2t_error_invalid - invalid_vector el2h_sync_invalid - invalid_vector el2h_irq_invalid - invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid - - .ltorg - - .align 11 - -.macro check_preamble_length start, end -/* kvm_patch_vector_branch() generates code that jumps over the preamble. */ -.if ((\end-\start) != KVM_VECTOR_PREAMBLE) - .error "KVM vector preamble length mismatch" -.endif -.endm - -.macro valid_vect target - .align 7 -661: - esb - stp x0, x1, [sp, #-16]! -662: - b \target - -check_preamble_length 661b, 662b -.endm - -.macro invalid_vect target - .align 7 -661: - b \target - nop -662: - ldp x0, x1, [sp], #16 - b \target - -check_preamble_length 661b, 662b -.endm - -ENTRY(__kvm_hyp_vector) - invalid_vect el2t_sync_invalid // Synchronous EL2t - invalid_vect el2t_irq_invalid // IRQ EL2t - invalid_vect el2t_fiq_invalid // FIQ EL2t - invalid_vect el2t_error_invalid // Error EL2t - - valid_vect el2_sync // Synchronous EL2h - invalid_vect el2h_irq_invalid // IRQ EL2h - invalid_vect el2h_fiq_invalid // FIQ EL2h - valid_vect el2_error // Error EL2h - - valid_vect el1_sync // Synchronous 64-bit EL1 - valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 - valid_vect el1_error // Error 64-bit EL1 - - valid_vect el1_sync // Synchronous 32-bit EL1 - valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 - valid_vect el1_error // Error 32-bit EL1 -ENDPROC(__kvm_hyp_vector) - -#ifdef CONFIG_KVM_INDIRECT_VECTORS -.macro hyp_ventry - .align 7 -1: esb - .rept 26 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where: - * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch - stp x0, x1, [sp, #-16]! - b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) - nop - nop - nop -alternative_cb_end -.endm - -.macro generate_vectors -0: - .rept 16 - hyp_ventry - .endr - .org 0b + SZ_2K // Safety measure -.endm - - .align 11 -ENTRY(__bp_harden_hyp_vecs_start) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr -ENTRY(__bp_harden_hyp_vecs_end) - - .popsection - -ENTRY(__smccc_workaround_1_smc_start) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -ENTRY(__smccc_workaround_1_smc_end) -#endif diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S deleted file mode 100644 index 073acbf02a7c842520eeb42df0df4f2dd3a15480..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/clear_page.S +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ - -#include -#include -#include -#include - -/* - * Clear page @dest - * - * Parameters: - * x0 - dest - */ -SYM_FUNC_START(clear_page) - mrs x1, dczid_el0 - and w1, w1, #0xf - mov x2, #4 - lsl x1, x2, x1 - -1: dc zva, x0 - add x0, x0, x1 - tst x0, #(PAGE_SIZE - 1) - b.ne 1b - ret -SYM_FUNC_END(clear_page) -EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S deleted file mode 100644 index 48a3a26eff663589a18c3944eff17d1240f66822..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/clear_user.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Based on arch/arm/lib/clear_user.S - * - * Copyright (C) 2012 ARM Ltd. - */ -#include - -#include -#include - - .text - -/* Prototype: int __arch_clear_user(void *addr, size_t sz) - * Purpose : clear some user memory - * Params : addr - user memory address to clear - * : sz - number of bytes to clear - * Returns : number of bytes NOT cleared - * - * Alignment fixed up by hardware. - */ -SYM_FUNC_START(__arch_clear_user) - mov x2, x1 // save the size for fixup return - subs x1, x1, #8 - b.mi 2f -1: -uao_user_alternative 9f, str, sttr, xzr, x0, 8 - subs x1, x1, #8 - b.pl 1b -2: adds x1, x1, #4 - b.mi 3f -uao_user_alternative 9f, str, sttr, wzr, x0, 4 - sub x1, x1, #4 -3: adds x1, x1, #2 - b.mi 4f -uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 - sub x1, x1, #2 -4: adds x1, x1, #1 - b.mi 5f -uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 -5: mov x0, #0 - ret -SYM_FUNC_END(__arch_clear_user) -EXPORT_SYMBOL(__arch_clear_user) - - .section .fixup,"ax" - .align 2 -9: mov x0, x2 // return the original size - ret - .previous diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S deleted file mode 100644 index 8e25e89ad01fd7daa41065bae7e1c9745dce986b..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/copy_from_user.S +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ - -#include - -#include -#include -#include - -/* - * Copy from user space to a kernel buffer (alignment handled by the hardware) - * - * Parameters: - * x0 - to - * x1 - from - * x2 - n - * Returns: - * x0 - bytes not copied - */ - - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val - .endm - - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val - .endm - - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val - .endm - - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val - .endm - - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val - .endm - - .macro str1 ptr, regB, val - str \ptr, [\regB], \val - .endm - - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val - .endm - - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val - .endm - -end .req x5 -SYM_FUNC_START(__arch_copy_from_user) - add end, x0, x2 -#include "copy_template.S" - mov x0, #0 // Nothing to copy - ret -SYM_FUNC_END(__arch_copy_from_user) -EXPORT_SYMBOL(__arch_copy_from_user) - - .section .fixup,"ax" - .align 2 -9998: sub x0, end, dst // bytes not copied - ret - .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S deleted file mode 100644 index 667139013ed171ef4b5de1ba916941060858475c..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/copy_in_user.S +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copy from user space to user space - * - * Copyright (C) 2012 ARM Ltd. - */ - -#include - -#include -#include -#include - -/* - * Copy from user space to user space (alignment handled by the hardware) - * - * Parameters: - * x0 - to - * x1 - from - * x2 - n - * Returns: - * x0 - bytes not copied - */ - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val - .endm - - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val - .endm - - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val - .endm - - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val - .endm - - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val - .endm - - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val - .endm - - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val - .endm - - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val - .endm - -end .req x5 - -SYM_FUNC_START(__arch_copy_in_user) - add end, x0, x2 -#include "copy_template.S" - mov x0, #0 - ret -SYM_FUNC_END(__arch_copy_in_user) -EXPORT_SYMBOL(__arch_copy_in_user) - - .section .fixup,"ax" - .align 2 -9998: sub x0, end, dst // bytes not copied - ret - .previous diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S deleted file mode 100644 index e125a84eb40009c371275b9be7320e690b92d7a8..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/copy_page.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ - -#include -#include -#include -#include -#include -#include - -/* - * Copy a page from src to dest (both are page aligned) - * - * Parameters: - * x0 - dest - * x1 - src - */ -SYM_FUNC_START(copy_page) -alternative_if ARM64_HAS_NO_HW_PREFETCH - // Prefetch three cache lines ahead. - prfm pldl1strm, [x1, #128] - prfm pldl1strm, [x1, #256] - prfm pldl1strm, [x1, #384] -alternative_else_nop_endif - - ldp x2, x3, [x1] - ldp x4, x5, [x1, #16] - ldp x6, x7, [x1, #32] - ldp x8, x9, [x1, #48] - ldp x10, x11, [x1, #64] - ldp x12, x13, [x1, #80] - ldp x14, x15, [x1, #96] - ldp x16, x17, [x1, #112] - - mov x18, #(PAGE_SIZE - 128) - add x1, x1, #128 -1: - subs x18, x18, #128 - -alternative_if ARM64_HAS_NO_HW_PREFETCH - prfm pldl1strm, [x1, #384] -alternative_else_nop_endif - - stnp x2, x3, [x0] - ldp x2, x3, [x1] - stnp x4, x5, [x0, #16] - ldp x4, x5, [x1, #16] - stnp x6, x7, [x0, #32] - ldp x6, x7, [x1, #32] - stnp x8, x9, [x0, #48] - ldp x8, x9, [x1, #48] - stnp x10, x11, [x0, #64] - ldp x10, x11, [x1, #64] - stnp x12, x13, [x0, #80] - ldp x12, x13, [x1, #80] - stnp x14, x15, [x0, #96] - ldp x14, x15, [x1, #96] - stnp x16, x17, [x0, #112] - ldp x16, x17, [x1, #112] - - add x0, x0, #128 - add x1, x1, #128 - - b.gt 1b - - stnp x2, x3, [x0] - stnp x4, x5, [x0, #16] - stnp x6, x7, [x0, #32] - stnp x8, x9, [x0, #48] - stnp x10, x11, [x0, #64] - stnp x12, x13, [x0, #80] - stnp x14, x15, [x0, #96] - stnp x16, x17, [x0, #112] - - ret -SYM_FUNC_END(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S deleted file mode 100644 index 488df234c49a2483d0151a463d745af8dfe70e2a..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/copy_template.S +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - - -/* - * Copy a buffer from src to dest (alignment handled by the hardware) - * - * Parameters: - * x0 - dest - * x1 - src - * x2 - n - * Returns: - * x0 - dest - */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -dst .req x6 - -A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 - - mov dst, dstin - cmp count, #16 - /*When memory length is less than 16, the accessed are not aligned.*/ - b.lo .Ltiny15 - - neg tmp2, src - ands tmp2, tmp2, #15/* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * Copy the leading memory data from src to dst in an increasing - * address order.By this way,the risk of overwriting the source - * memory data is eliminated when the distance between src and - * dst is less than 16. The memory accesses here are alignment. - */ - tbz tmp2, #0, 1f - ldrb1 tmp1w, src, #1 - strb1 tmp1w, dst, #1 -1: - tbz tmp2, #1, 2f - ldrh1 tmp1w, src, #2 - strh1 tmp1w, dst, #2 -2: - tbz tmp2, #2, 3f - ldr1 tmp1w, src, #4 - str1 tmp1w, dst, #4 -3: - tbz tmp2, #3, .LSrcAligned - ldr1 tmp1, src, #8 - str1 tmp1, dst, #8 - -.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltiny15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp1 A_l, A_h, src, #16 - stp1 A_l, A_h, dst, #16 -1: - ldp1 A_l, A_h, src, #16 - stp1 A_l, A_h, dst, #16 -2: - ldp1 A_l, A_h, src, #16 - stp1 A_l, A_h, dst, #16 -.Ltiny15: - /* - * Prefer to break one ldp/stp into several load/store to access - * memory in an increasing address order,rather than to load/store 16 - * bytes from (src-16) to (dst-16) and to backward the src to aligned - * address,which way is used in original cortex memcpy. If keeping - * the original memcpy process here, memmove need to satisfy the - * precondition that src address is at least 16 bytes bigger than dst - * address,otherwise some source data will be overwritten when memove - * call memcpy directly. To make memmove simpler and decouple the - * memcpy's dependency on memmove, withdrew the original process. - */ - tbz count, #3, 1f - ldr1 tmp1, src, #8 - str1 tmp1, dst, #8 -1: - tbz count, #2, 2f - ldr1 tmp1w, src, #4 - str1 tmp1w, dst, #4 -2: - tbz count, #1, 3f - ldrh1 tmp1w, src, #2 - strh1 tmp1w, dst, #2 -3: - tbz count, #0, .Lexitfunc - ldrb1 tmp1w, src, #1 - strb1 tmp1w, dst, #1 - - b .Lexitfunc - -.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. - */ - ldp1 A_l, A_h, src, #16 - stp1 A_l, A_h, dst, #16 - ldp1 B_l, B_h, src, #16 - ldp1 C_l, C_h, src, #16 - stp1 B_l, B_h, dst, #16 - stp1 C_l, C_h, dst, #16 - ldp1 D_l, D_h, src, #16 - stp1 D_l, D_h, dst, #16 - - tst count, #0x3f - b.ne .Ltail63 - b .Lexitfunc - - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-get 64 bytes data. */ - ldp1 A_l, A_h, src, #16 - ldp1 B_l, B_h, src, #16 - ldp1 C_l, C_h, src, #16 - ldp1 D_l, D_h, src, #16 -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp1 A_l, A_h, dst, #16 - ldp1 A_l, A_h, src, #16 - stp1 B_l, B_h, dst, #16 - ldp1 B_l, B_h, src, #16 - stp1 C_l, C_h, dst, #16 - ldp1 C_l, C_h, src, #16 - stp1 D_l, D_h, dst, #16 - ldp1 D_l, D_h, src, #16 - subs count, count, #64 - b.ge 1b - stp1 A_l, A_h, dst, #16 - stp1 B_l, B_h, dst, #16 - stp1 C_l, C_h, dst, #16 - stp1 D_l, D_h, dst, #16 - - tst count, #0x3f - b.ne .Ltail63 -.Lexitfunc: diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S deleted file mode 100644 index 1a104d0089f3a4036574bb6d1c5ac9796740dc5b..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/copy_to_user.S +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ - -#include - -#include -#include -#include - -/* - * Copy to user space from a kernel buffer (alignment handled by the hardware) - * - * Parameters: - * x0 - to - * x1 - from - * x2 - n - * Returns: - * x0 - bytes not copied - */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val - .endm - - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val - .endm - - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val - .endm - - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val - .endm - - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val - .endm - - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val - .endm - - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val - .endm - - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val - .endm - -end .req x5 -SYM_FUNC_START(__arch_copy_to_user) - add end, x0, x2 -#include "copy_template.S" - mov x0, #0 - ret -SYM_FUNC_END(__arch_copy_to_user) -EXPORT_SYMBOL(__arch_copy_to_user) - - .section .fixup,"ax" - .align 2 -9998: sub x0, end, dst // bytes not copied - ret - .previous diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S deleted file mode 100644 index 243e107e98963b21552ea2cc2ee52e9fc24026e3..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/crc32.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Accelerated CRC32(C) using AArch64 CRC instructions - * - * Copyright (C) 2016 - 2018 Linaro Ltd - */ - -#include -#include -#include - - .cpu generic+crc - - .macro __crc32, c - cmp x2, #16 - b.lt 8f // less than 16 bytes - - and x7, x2, #0x1f - and x2, x2, #~0x1f - cbz x7, 32f // multiple of 32 bytes - - and x8, x7, #0xf - ldp x3, x4, [x1] - add x8, x8, x1 - add x1, x1, x7 - ldp x5, x6, [x8] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) - - tst x7, #8 - crc32\c\()x w8, w0, x3 - csel x3, x3, x4, eq - csel w0, w0, w8, eq - tst x7, #4 - lsr x4, x3, #32 - crc32\c\()w w8, w0, w3 - csel x3, x3, x4, eq - csel w0, w0, w8, eq - tst x7, #2 - lsr w4, w3, #16 - crc32\c\()h w8, w0, w3 - csel w3, w3, w4, eq - csel w0, w0, w8, eq - tst x7, #1 - crc32\c\()b w8, w0, w3 - csel w0, w0, w8, eq - tst x7, #16 - crc32\c\()x w8, w0, x5 - crc32\c\()x w8, w8, x6 - csel w0, w0, w8, eq - cbz x2, 0f - -32: ldp x3, x4, [x1], #32 - sub x2, x2, #32 - ldp x5, x6, [x1, #-16] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) - crc32\c\()x w0, w0, x3 - crc32\c\()x w0, w0, x4 - crc32\c\()x w0, w0, x5 - crc32\c\()x w0, w0, x6 - cbnz x2, 32b -0: ret - -8: tbz x2, #3, 4f - ldr x3, [x1], #8 -CPU_BE( rev x3, x3 ) - crc32\c\()x w0, w0, x3 -4: tbz x2, #2, 2f - ldr w3, [x1], #4 -CPU_BE( rev w3, w3 ) - crc32\c\()w w0, w0, w3 -2: tbz x2, #1, 1f - ldrh w3, [x1], #2 -CPU_BE( rev16 w3, w3 ) - crc32\c\()h w0, w0, w3 -1: tbz x2, #0, 0f - ldrb w3, [x1] - crc32\c\()b w0, w0, w3 -0: ret - .endm - - .align 5 -SYM_FUNC_START(crc32_le) -alternative_if_not ARM64_HAS_CRC32 - b crc32_le_base -alternative_else_nop_endif - __crc32 -SYM_FUNC_END(crc32_le) - - .align 5 -SYM_FUNC_START(__crc32c_le) -alternative_if_not ARM64_HAS_CRC32 - b __crc32c_le_base -alternative_else_nop_endif - __crc32 c -SYM_FUNC_END(__crc32c_le) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S deleted file mode 100644 index edf6b970a2774374ad79a828eaefd645cf7d2f1e..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/memchr.S +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Based on arch/arm/lib/memchr.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2013 ARM Ltd. - */ - -#include -#include - -/* - * Find a character in an area of memory. - * - * Parameters: - * x0 - buf - * x1 - c - * x2 - n - * Returns: - * x0 - address of first occurrence of 'c' or 0 - */ -SYM_FUNC_START_WEAK_PI(memchr) - and w1, w1, #0xff -1: subs x2, x2, #1 - b.mi 2f - ldrb w3, [x0], #1 - cmp w3, w1 - b.ne 1b - sub x0, x0, #1 - ret -2: mov x0, #0 - ret -SYM_FUNC_END_PI(memchr) -EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S deleted file mode 100644 index c0671e793ea9183e5ddc63696d79c8fa2466c2a6..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/memcmp.S +++ /dev/null @@ -1,247 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include - -/* -* compare memory areas(when two memory areas' offset are different, -* alignment handled by the hardware) -* -* Parameters: -* x0 - const memory area 1 pointer -* x1 - const memory area 2 pointer -* x2 - the maximal compare byte length -* Returns: -* x0 - a compare result, maybe less than, equal to, or greater than ZERO -*/ - -/* Parameters and result. */ -src1 .req x0 -src2 .req x1 -limit .req x2 -result .req x0 - -/* Internal variables. */ -data1 .req x3 -data1w .req w3 -data2 .req x4 -data2w .req w4 -has_nul .req x5 -diff .req x6 -endloop .req x7 -tmp1 .req x8 -tmp2 .req x9 -tmp3 .req x10 -pos .req x11 -limit_wd .req x12 -mask .req x13 - -SYM_FUNC_START_WEAK_PI(memcmp) - cbz limit, .Lret0 - eor tmp1, src1, src2 - tst tmp1, #7 - b.ne .Lmisaligned8 - ands tmp1, src1, #7 - b.ne .Lmutual_align - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ - /* - * The input source addresses are at alignment boundary. - * Directly compare eight bytes each time. - */ -.Lloop_aligned: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned: - subs limit_wd, limit_wd, #1 - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, cs /* Last Dword or differences. */ - cbz endloop, .Lloop_aligned - - /* Not reached the limit, must have found a diff. */ - tbz limit_wd, #63, .Lnot_limit - - /* Limit % 8 == 0 => the diff is in the last 8 bytes. */ - ands limit, limit, #7 - b.eq .Lnot_limit - /* - * The remained bytes less than 8. It is needed to extract valid data - * from last eight bytes of the intended memory range. - */ - lsl limit, limit, #3 /* bytes-> bits. */ - mov mask, #~0 -CPU_BE( lsr mask, mask, limit ) -CPU_LE( lsl mask, mask, limit ) - bic data1, data1, mask - bic data2, data2, mask - - orr diff, diff, mask - b .Lnot_limit - -.Lmutual_align: - /* - * Sources are mutually aligned, but are not currently at an - * alignment boundary. Round down the addresses and then mask off - * the bytes that precede the start point. - */ - bic src1, src1, #7 - bic src2, src2, #7 - ldr data1, [src1], #8 - ldr data2, [src2], #8 - /* - * We can not add limit with alignment offset(tmp1) here. Since the - * addition probably make the limit overflown. - */ - sub limit_wd, limit, #1/*limit != 0, so no underflow.*/ - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - add tmp3, tmp3, tmp1 - add limit_wd, limit_wd, tmp3, lsr #3 - add limit, limit, tmp1/* Adjust the limit for the extra. */ - - lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/ - neg tmp1, tmp1/* Bits to alignment -64. */ - mov tmp2, #~0 - /*mask off the non-intended bytes before the start address.*/ -CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/ - /* Little-endian. Early bytes are at LSB. */ -CPU_LE( lsr tmp2, tmp2, tmp1 ) - - orr data1, data1, tmp2 - orr data2, data2, tmp2 - b .Lstart_realigned - - /*src1 and src2 have different alignment offset.*/ -.Lmisaligned8: - cmp limit, #8 - b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/ - - and tmp1, src1, #7 - neg tmp1, tmp1 - add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/ - and tmp2, src2, #7 - neg tmp2, tmp2 - add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/ - subs tmp3, tmp1, tmp2 - csel pos, tmp1, tmp2, hi /*Choose the maximum.*/ - - sub limit, limit, pos - /*compare the proceeding bytes in the first 8 byte segment.*/ -.Ltinycmp: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs pos, pos, #1 - ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ - b.eq .Ltinycmp - cbnz pos, 1f /*diff occurred before the last byte.*/ - cmp data1w, data2w - b.eq .Lstart_align -1: - sub result, data1, data2 - ret - -.Lstart_align: - lsr limit_wd, limit, #3 - cbz limit_wd, .Lremain8 - - ands xzr, src1, #7 - b.eq .Lrecal_offset - /*process more leading bytes to make src1 aligned...*/ - add src1, src1, tmp3 /*backwards src1 to alignment boundary*/ - add src2, src2, tmp3 - sub limit, limit, tmp3 - lsr limit_wd, limit, #3 - cbz limit_wd, .Lremain8 - /*load 8 bytes from aligned SRC1..*/ - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - subs limit_wd, limit_wd, #1 - eor diff, data1, data2 /*Non-zero if differences found.*/ - csinv endloop, diff, xzr, ne - cbnz endloop, .Lunequal_proc - /*How far is the current SRC2 from the alignment boundary...*/ - and tmp3, tmp3, #7 - -.Lrecal_offset:/*src1 is aligned now..*/ - neg pos, tmp3 -.Lloopcmp_proc: - /* - * Divide the eight bytes into two parts. First,backwards the src2 - * to an alignment boundary,load eight bytes and compare from - * the SRC2 alignment boundary. If all 8 bytes are equal,then start - * the second part's comparison. Otherwise finish the comparison. - * This special handle can garantee all the accesses are in the - * thread/task space in avoid to overrange access. - */ - ldr data1, [src1,pos] - ldr data2, [src2,pos] - eor diff, data1, data2 /* Non-zero if differences found. */ - cbnz diff, .Lnot_limit - - /*The second part process*/ - ldr data1, [src1], #8 - ldr data2, [src2], #8 - eor diff, data1, data2 /* Non-zero if differences found. */ - subs limit_wd, limit_wd, #1 - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ - cbz endloop, .Lloopcmp_proc -.Lunequal_proc: - cbz diff, .Lremain8 - -/* There is difference occurred in the latest comparison. */ -.Lnot_limit: -/* -* For little endian,reverse the low significant equal bits into MSB,then -* following CLZ can find how many equal bits exist. -*/ -CPU_LE( rev diff, diff ) -CPU_LE( rev data1, data1 ) -CPU_LE( rev data2, data2 ) - - /* - * The MS-non-zero bit of DIFF marks either the first bit - * that is different, or the end of the significant data. - * Shifting left now will bring the critical information into the - * top bits. - */ - clz pos, diff - lsl data1, data1, pos - lsl data2, data2, pos - /* - * We need to zero-extend (char is unsigned) the value and then - * perform a signed subtraction. - */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret - -.Lremain8: - /* Limit % 8 == 0 =>. all data are equal.*/ - ands limit, limit, #7 - b.eq .Lret0 - -.Ltiny8proc: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs limit, limit, #1 - - ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ - b.eq .Ltiny8proc - sub result, data1, data2 - ret -.Lret0: - mov result, #0 - ret -SYM_FUNC_END_PI(memcmp) -EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S deleted file mode 100644 index b03cbb3455d4da23413dc91b468efa79396261b7..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/memcpy.S +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include -#include - -/* - * Copy a buffer from src to dest (alignment handled by the hardware) - * - * Parameters: - * x0 - dest - * x1 - src - * x2 - n - * Returns: - * x0 - dest - */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val - .endm - - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val - .endm - - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val - .endm - - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val - .endm - - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val - .endm - - .macro str1 ptr, regB, val - str \ptr, [\regB], \val - .endm - - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val - .endm - - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val - .endm - -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) -#include "copy_template.S" - ret -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S deleted file mode 100644 index 1035dce4bdaf42572708d94412420ee4c37253fc..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/memmove.S +++ /dev/null @@ -1,189 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include -#include - -/* - * Move a buffer from src to test (alignment handled by the hardware). - * If dest <= src, call memcpy, otherwise copy in reverse order. - * - * Parameters: - * x0 - dest - * x1 - src - * x2 - n - * Returns: - * x0 - dest - */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -tmp3 .req x5 -tmp3w .req w5 -dst .req x6 - -A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 - -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_PI(memmove) - cmp dstin, src - b.lo __memcpy - add tmp1, src, count - cmp dstin, tmp1 - b.hs __memcpy /* No overlap. */ - - add dst, dstin, count - add src, src, count - cmp count, #16 - b.lo .Ltail15 /*probably non-alignment accesses.*/ - - ands tmp2, src, #15 /* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * process the aligned offset length to make the src aligned firstly. - * those extra instructions' cost is acceptable. It also make the - * coming accesses are based on aligned address. - */ - tbz tmp2, #0, 1f - ldrb tmp1w, [src, #-1]! - strb tmp1w, [dst, #-1]! -1: - tbz tmp2, #1, 2f - ldrh tmp1w, [src, #-2]! - strh tmp1w, [dst, #-2]! -2: - tbz tmp2, #2, 3f - ldr tmp1w, [src, #-4]! - str tmp1w, [dst, #-4]! -3: - tbz tmp2, #3, .LSrcAligned - ldr tmp1, [src, #-8]! - str tmp1, [dst, #-8]! - -.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltail15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src, #-16]! - stp A_l, A_h, [dst, #-16]! -1: - ldp A_l, A_h, [src, #-16]! - stp A_l, A_h, [dst, #-16]! -2: - ldp A_l, A_h, [src, #-16]! - stp A_l, A_h, [dst, #-16]! - -.Ltail15: - tbz count, #3, 1f - ldr tmp1, [src, #-8]! - str tmp1, [dst, #-8]! -1: - tbz count, #2, 2f - ldr tmp1w, [src, #-4]! - str tmp1w, [dst, #-4]! -2: - tbz count, #1, 3f - ldrh tmp1w, [src, #-2]! - strh tmp1w, [dst, #-2]! -3: - tbz count, #0, .Lexitfunc - ldrb tmp1w, [src, #-1] - strb tmp1w, [dst, #-1] - -.Lexitfunc: - ret - -.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 bytes here and then jump - * to the tail. - */ - ldp A_l, A_h, [src, #-16] - stp A_l, A_h, [dst, #-16] - ldp B_l, B_h, [src, #-32] - ldp C_l, C_h, [src, #-48] - stp B_l, B_h, [dst, #-32] - stp C_l, C_h, [dst, #-48] - ldp D_l, D_h, [src, #-64]! - stp D_l, D_h, [dst, #-64]! - - tst count, #0x3f - b.ne .Ltail63 - ret - - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-load 64 bytes data. */ - ldp A_l, A_h, [src, #-16] - ldp B_l, B_h, [src, #-32] - ldp C_l, C_h, [src, #-48] - ldp D_l, D_h, [src, #-64]! -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp A_l, A_h, [dst, #-16] - ldp A_l, A_h, [src, #-16] - stp B_l, B_h, [dst, #-32] - ldp B_l, B_h, [src, #-32] - stp C_l, C_h, [dst, #-48] - ldp C_l, C_h, [src, #-48] - stp D_l, D_h, [dst, #-64]! - ldp D_l, D_h, [src, #-64]! - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst, #-16] - stp B_l, B_h, [dst, #-32] - stp C_l, C_h, [dst, #-48] - stp D_l, D_h, [dst, #-64]! - - tst count, #0x3f - b.ne .Ltail63 - ret -SYM_FUNC_END_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) -EXPORT_SYMBOL(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S deleted file mode 100644 index a9c1c9a01ea906954953c6dce74d4c3e482328da..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/memset.S +++ /dev/null @@ -1,208 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include -#include - -/* - * Fill in the buffer with character c (alignment handled by the hardware) - * - * Parameters: - * x0 - buf - * x1 - c - * x2 - n - * Returns: - * x0 - buf - */ - -dstin .req x0 -val .req w1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -zva_len_x .req x5 -zva_len .req w5 -zva_bits_x .req x6 - -A_l .req x7 -A_lw .req w7 -dst .req x8 -tmp3w .req w9 -tmp3 .req x9 - -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) - mov dst, dstin /* Preserve return value. */ - and A_lw, val, #255 - orr A_lw, A_lw, A_lw, lsl #8 - orr A_lw, A_lw, A_lw, lsl #16 - orr A_l, A_l, A_l, lsl #32 - - cmp count, #15 - b.hi .Lover16_proc - /*All store maybe are non-aligned..*/ - tbz count, #3, 1f - str A_l, [dst], #8 -1: - tbz count, #2, 2f - str A_lw, [dst], #4 -2: - tbz count, #1, 3f - strh A_lw, [dst], #2 -3: - tbz count, #0, 4f - strb A_lw, [dst] -4: - ret - -.Lover16_proc: - /*Whether the start address is aligned with 16.*/ - neg tmp2, dst - ands tmp2, tmp2, #15 - b.eq .Laligned -/* -* The count is not less than 16, we can use stp to store the start 16 bytes, -* then adjust the dst aligned with 16.This process will make the current -* memory address at alignment boundary. -*/ - stp A_l, A_l, [dst] /*non-aligned store..*/ - /*make the dst aligned..*/ - sub count, count, tmp2 - add dst, dst, tmp2 - -.Laligned: - cbz A_l, .Lzero_mem - -.Ltail_maybe_long: - cmp count, #64 - b.ge .Lnot_short -.Ltail63: - ands tmp1, count, #0x30 - b.eq 3f - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - stp A_l, A_l, [dst], #16 -1: - stp A_l, A_l, [dst], #16 -2: - stp A_l, A_l, [dst], #16 -/* -* The last store length is less than 16,use stp to write last 16 bytes. -* It will lead some bytes written twice and the access is non-aligned. -*/ -3: - ands count, count, #15 - cbz count, 4f - add dst, dst, count - stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ -4: - ret - - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line, this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lnot_short: - sub dst, dst, #16/* Pre-bias. */ - sub count, count, #64 -1: - stp A_l, A_l, [dst, #16] - stp A_l, A_l, [dst, #32] - stp A_l, A_l, [dst, #48] - stp A_l, A_l, [dst, #64]! - subs count, count, #64 - b.ge 1b - tst count, #0x3f - add dst, dst, #16 - b.ne .Ltail63 -.Lexitfunc: - ret - - /* - * For zeroing memory, check to see if we can use the ZVA feature to - * zero entire 'cache' lines. - */ -.Lzero_mem: - cmp count, #63 - b.le .Ltail63 - /* - * For zeroing small amounts of memory, it's not worth setting up - * the line-clear code. - */ - cmp count, #128 - b.lt .Lnot_short /*count is at least 128 bytes*/ - - mrs tmp1, dczid_el0 - tbnz tmp1, #4, .Lnot_short - mov tmp3w, #4 - and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ - lsl zva_len, tmp3w, zva_len - - ands tmp3w, zva_len, #63 - /* - * ensure the zva_len is not less than 64. - * It is not meaningful to use ZVA if the block size is less than 64. - */ - b.ne .Lnot_short -.Lzero_by_line: - /* - * Compute how far we need to go to become suitably aligned. We're - * already at quad-word alignment. - */ - cmp count, zva_len_x - b.lt .Lnot_short /* Not enough to reach alignment. */ - sub zva_bits_x, zva_len_x, #1 - neg tmp2, dst - ands tmp2, tmp2, zva_bits_x - b.eq 2f /* Already aligned. */ - /* Not aligned, check that there's enough to copy after alignment.*/ - sub tmp1, count, tmp2 - /* - * grantee the remain length to be ZVA is bigger than 64, - * avoid to make the 2f's process over mem range.*/ - cmp tmp1, #64 - ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ - b.lt .Lnot_short - /* - * We know that there's at least 64 bytes to zero and that it's safe - * to overrun by 64 bytes. - */ - mov count, tmp1 -1: - stp A_l, A_l, [dst] - stp A_l, A_l, [dst, #16] - stp A_l, A_l, [dst, #32] - subs tmp2, tmp2, #64 - stp A_l, A_l, [dst, #48] - add dst, dst, #64 - b.ge 1b - /* We've overrun a bit, so adjust dst downwards.*/ - add dst, dst, tmp2 -2: - sub count, count, zva_len_x -3: - dc zva, dst - add dst, dst, zva_len_x - subs count, count, zva_len_x - b.ge 3b - ands count, count, zva_bits_x - b.ne .Ltail_maybe_long - ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) -EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S deleted file mode 100644 index 1f47eae3b0d6d618d24c347db7c2da9ffce98068..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strchr.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Based on arch/arm/lib/strchr.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2013 ARM Ltd. - */ - -#include -#include - -/* - * Find the first occurrence of a character in a string. - * - * Parameters: - * x0 - str - * x1 - c - * Returns: - * x0 - address of first occurrence of 'c' or 0 - */ -SYM_FUNC_START_WEAK(strchr) - and w1, w1, #0xff -1: ldrb w2, [x0], #1 - cmp w2, w1 - ccmp w2, wzr, #4, ne - b.ne 1b - sub x0, x0, #1 - cmp w2, w1 - csel x0, x0, xzr, eq - ret -SYM_FUNC_END(strchr) -EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S deleted file mode 100644 index 4767540d1b94ed4bacb2903cbf3904d376c4f5d9..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strcmp.S +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include - -/* - * compare two strings - * - * Parameters: - * x0 - const string 1 pointer - * x1 - const string 2 pointer - * Returns: - * x0 - an integer less than, equal to, or greater than zero - * if s1 is found, respectively, to be less than, to match, - * or be greater than s2. - */ - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -/* Parameters and result. */ -src1 .req x0 -src2 .req x1 -result .req x0 - -/* Internal variables. */ -data1 .req x2 -data1w .req w2 -data2 .req x3 -data2w .req w3 -has_nul .req x4 -diff .req x5 -syndrome .req x6 -tmp1 .req x7 -tmp2 .req x8 -tmp3 .req x9 -zeroones .req x10 -pos .req x11 - -SYM_FUNC_START_WEAK_PI(strcmp) - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 - b.ne .Lmisaligned8 - ands tmp1, src1, #7 - b.ne .Lmutual_align - - /* - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - * can be done in parallel across the entire word. - */ -.Lloop_aligned: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned: - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - orr syndrome, diff, has_nul - cbz syndrome, .Lloop_aligned - b .Lcal_cmpresult - -.Lmutual_align: - /* - * Sources are mutually aligned, but are not currently at an - * alignment boundary. Round down the addresses and then mask off - * the bytes that preceed the start point. - */ - bic src1, src1, #7 - bic src2, src2, #7 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 - /* Big-endian. Early bytes are at MSB. */ -CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ - /* Little-endian. Early bytes are at LSB. */ -CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ - - orr data1, data1, tmp2 - orr data2, data2, tmp2 - b .Lstart_realigned - -.Lmisaligned8: - /* - * Get the align offset length to compare per byte first. - * After this process, one string's address will be aligned. - */ - and tmp1, src1, #7 - neg tmp1, tmp1 - add tmp1, tmp1, #8 - and tmp2, src2, #7 - neg tmp2, tmp2 - add tmp2, tmp2, #8 - subs tmp3, tmp1, tmp2 - csel pos, tmp1, tmp2, hi /*Choose the maximum. */ -.Ltinycmp: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs pos, pos, #1 - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq .Ltinycmp - cbnz pos, 1f /*find the null or unequal...*/ - cmp data1w, #1 - ccmp data1w, data2w, #0, cs - b.eq .Lstart_align /*the last bytes are equal....*/ -1: - sub result, data1, data2 - ret - -.Lstart_align: - ands xzr, src1, #7 - b.eq .Lrecal_offset - /*process more leading bytes to make str1 aligned...*/ - add src1, src1, tmp3 - add src2, src2, tmp3 - /*load 8 bytes from aligned str1 and non-aligned str2..*/ - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bic has_nul, tmp1, tmp2 - eor diff, data1, data2 /* Non-zero if differences found. */ - orr syndrome, diff, has_nul - cbnz syndrome, .Lcal_cmpresult - /*How far is the current str2 from the alignment boundary...*/ - and tmp3, tmp3, #7 -.Lrecal_offset: - neg pos, tmp3 -.Lloopcmp_proc: - /* - * Divide the eight bytes into two parts. First,backwards the src2 - * to an alignment boundary,load eight bytes from the SRC2 alignment - * boundary,then compare with the relative bytes from SRC1. - * If all 8 bytes are equal,then start the second part's comparison. - * Otherwise finish the comparison. - * This special handle can garantee all the accesses are in the - * thread/task space in avoid to overrange access. - */ - ldr data1, [src1,pos] - ldr data2, [src2,pos] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bic has_nul, tmp1, tmp2 - eor diff, data1, data2 /* Non-zero if differences found. */ - orr syndrome, diff, has_nul - cbnz syndrome, .Lcal_cmpresult - - /*The second part process*/ - ldr data1, [src1], #8 - ldr data2, [src2], #8 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bic has_nul, tmp1, tmp2 - eor diff, data1, data2 /* Non-zero if differences found. */ - orr syndrome, diff, has_nul - cbz syndrome, .Lloopcmp_proc - -.Lcal_cmpresult: - /* - * reversed the byte-order as big-endian,then CLZ can find the most - * significant zero bits. - */ -CPU_LE( rev syndrome, syndrome ) -CPU_LE( rev data1, data1 ) -CPU_LE( rev data2, data2 ) - - /* - * For big-endian we cannot use the trick with the syndrome value - * as carry-propagation can corrupt the upper bits if the trailing - * bytes in the string contain 0x01. - * However, if there is no NUL byte in the dword, we can generate - * the result directly. We ca not just subtract the bytes as the - * MSB might be significant. - */ -CPU_BE( cbnz has_nul, 1f ) -CPU_BE( cmp data1, data2 ) -CPU_BE( cset result, ne ) -CPU_BE( cneg result, result, lo ) -CPU_BE( ret ) -CPU_BE( 1: ) - /*Re-compute the NUL-byte detection, using a byte-reversed value. */ -CPU_BE( rev tmp3, data1 ) -CPU_BE( sub tmp1, tmp3, zeroones ) -CPU_BE( orr tmp2, tmp3, #REP8_7f ) -CPU_BE( bic has_nul, tmp1, tmp2 ) -CPU_BE( rev has_nul, has_nul ) -CPU_BE( orr syndrome, diff, has_nul ) - - clz pos, syndrome - /* - * The MS-non-zero bit of the syndrome marks either the first bit - * that is different, or the top bit of the first zero byte. - * Shifting left now will bring the critical information into the - * top bits. - */ - lsl data1, data1, pos - lsl data2, data2, pos - /* - * But we need to zero-extend (char is unsigned) the value and then - * perform a signed 32-bit subtraction. - */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S deleted file mode 100644 index ee3ed882dd79fbfd4aecdbd6c4b5e1948603011b..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strlen.S +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include - -/* - * calculate the length of a string - * - * Parameters: - * x0 - const string pointer - * Returns: - * x0 - the return length of specific string - */ - -/* Arguments and results. */ -srcin .req x0 -len .req x0 - -/* Locals and temporaries. */ -src .req x1 -data1 .req x2 -data2 .req x3 -data2a .req x4 -has_nul1 .req x5 -has_nul2 .req x6 -tmp1 .req x7 -tmp2 .req x8 -tmp3 .req x9 -tmp4 .req x10 -zeroones .req x11 -pos .req x12 - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -SYM_FUNC_START_WEAK_PI(strlen) - mov zeroones, #REP8_01 - bic src, srcin, #15 - ands tmp1, srcin, #15 - b.ne .Lmisaligned - /* - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - * can be done in parallel across the entire word. - */ - /* - * The inner loop deals with two Dwords at a time. This has a - * slightly higher start-up cost, but we should win quite quickly, - * especially on cores with a high number of issue slots per - * cycle, as we get much better parallelism out of the operations. - */ -.Lloop: - ldp data1, data2, [src], #16 -.Lrealigned: - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bics has_nul2, tmp3, tmp4 - ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ - b.eq .Lloop - - sub len, src, srcin - cbz has_nul1, .Lnul_in_data2 -CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/ - sub len, len, #8 - mov has_nul2, has_nul1 -.Lnul_in_data2: - /* - * For big-endian, carry propagation (if the final byte in the - * string is 0x01) means we cannot use has_nul directly. The - * easiest way to get the correct byte is to byte-swap the data - * and calculate the syndrome a second time. - */ -CPU_BE( rev data2, data2 ) -CPU_BE( sub tmp1, data2, zeroones ) -CPU_BE( orr tmp2, data2, #REP8_7f ) -CPU_BE( bic has_nul2, tmp1, tmp2 ) - - sub len, len, #8 - rev has_nul2, has_nul2 - clz pos, has_nul2 - add len, len, pos, lsr #3 /* Bits to bytes. */ - ret - -.Lmisaligned: - cmp tmp1, #8 - neg tmp1, tmp1 - ldp data1, data2, [src], #16 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - mov tmp2, #~0 - /* Big-endian. Early bytes are at MSB. */ -CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ - /* Little-endian. Early bytes are at LSB. */ -CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ - - orr data1, data1, tmp2 - orr data2a, data2, tmp2 - csinv data1, data1, xzr, le - csel data2, data2, data2a, le - b .Lrealigned -SYM_FUNC_END_PI(strlen) -EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S deleted file mode 100644 index 2a7ee949ed4714fd376a0913e6cc66bced6391e5..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strncmp.S +++ /dev/null @@ -1,299 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include - -/* - * compare two strings - * - * Parameters: - * x0 - const string 1 pointer - * x1 - const string 2 pointer - * x2 - the maximal length to be compared - * Returns: - * x0 - an integer less than, equal to, or greater than zero if s1 is found, - * respectively, to be less than, to match, or be greater than s2. - */ - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -/* Parameters and result. */ -src1 .req x0 -src2 .req x1 -limit .req x2 -result .req x0 - -/* Internal variables. */ -data1 .req x3 -data1w .req w3 -data2 .req x4 -data2w .req w4 -has_nul .req x5 -diff .req x6 -syndrome .req x7 -tmp1 .req x8 -tmp2 .req x9 -tmp3 .req x10 -zeroones .req x11 -pos .req x12 -limit_wd .req x13 -mask .req x14 -endloop .req x15 - -SYM_FUNC_START_WEAK_PI(strncmp) - cbz limit, .Lret0 - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 - b.ne .Lmisaligned8 - ands tmp1, src1, #7 - b.ne .Lmutual_align - /* Calculate the number of full and partial words -1. */ - /* - * when limit is mulitply of 8, if not sub 1, - * the judgement of last dword will wrong. - */ - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ - - /* - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - * can be done in parallel across the entire word. - */ -.Lloop_aligned: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned: - subs limit_wd, limit_wd, #1 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, pl /* Last Dword or differences.*/ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp endloop, #0, #0, eq - b.eq .Lloop_aligned - - /*Not reached the limit, must have found the end or a diff. */ - tbz limit_wd, #63, .Lnot_limit - - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq .Lnot_limit - - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -CPU_BE( lsr mask, mask, limit ) -CPU_LE( lsl mask, mask, limit ) - bic data1, data1, mask - bic data2, data2, mask - - /* Make sure that the NUL byte is marked in the syndrome. */ - orr has_nul, has_nul, mask - -.Lnot_limit: - orr syndrome, diff, has_nul - b .Lcal_cmpresult - -.Lmutual_align: - /* - * Sources are mutually aligned, but are not currently at an - * alignment boundary. Round down the addresses and then mask off - * the bytes that precede the start point. - * We also need to adjust the limit calculations, but without - * overflowing if the limit is near ULONG_MAX. - */ - bic src1, src1, #7 - bic src2, src2, #7 - ldr data1, [src1], #8 - neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ - ldr data2, [src2], #8 - mov tmp2, #~0 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - /* Big-endian. Early bytes are at MSB. */ -CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ - /* Little-endian. Early bytes are at LSB. */ -CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ - - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/ - add limit, limit, tmp1 - add tmp3, tmp3, tmp1 - orr data1, data1, tmp2 - orr data2, data2, tmp2 - add limit_wd, limit_wd, tmp3, lsr #3 - b .Lstart_realigned - -/*when src1 offset is not equal to src2 offset...*/ -.Lmisaligned8: - cmp limit, #8 - b.lo .Ltiny8proc /*limit < 8... */ - /* - * Get the align offset length to compare per byte first. - * After this process, one string's address will be aligned.*/ - and tmp1, src1, #7 - neg tmp1, tmp1 - add tmp1, tmp1, #8 - and tmp2, src2, #7 - neg tmp2, tmp2 - add tmp2, tmp2, #8 - subs tmp3, tmp1, tmp2 - csel pos, tmp1, tmp2, hi /*Choose the maximum. */ - /* - * Here, limit is not less than 8, so directly run .Ltinycmp - * without checking the limit.*/ - sub limit, limit, pos -.Ltinycmp: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs pos, pos, #1 - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq .Ltinycmp - cbnz pos, 1f /*find the null or unequal...*/ - cmp data1w, #1 - ccmp data1w, data2w, #0, cs - b.eq .Lstart_align /*the last bytes are equal....*/ -1: - sub result, data1, data2 - ret - -.Lstart_align: - lsr limit_wd, limit, #3 - cbz limit_wd, .Lremain8 - /*process more leading bytes to make str1 aligned...*/ - ands xzr, src1, #7 - b.eq .Lrecal_offset - add src1, src1, tmp3 /*tmp3 is positive in this branch.*/ - add src2, src2, tmp3 - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub limit, limit, tmp3 - lsr limit_wd, limit, #3 - subs limit_wd, limit_wd, #1 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ - bics has_nul, tmp1, tmp2 - ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ - b.ne .Lunequal_proc - /*How far is the current str2 from the alignment boundary...*/ - and tmp3, tmp3, #7 -.Lrecal_offset: - neg pos, tmp3 -.Lloopcmp_proc: - /* - * Divide the eight bytes into two parts. First,backwards the src2 - * to an alignment boundary,load eight bytes from the SRC2 alignment - * boundary,then compare with the relative bytes from SRC1. - * If all 8 bytes are equal,then start the second part's comparison. - * Otherwise finish the comparison. - * This special handle can garantee all the accesses are in the - * thread/task space in avoid to overrange access. - */ - ldr data1, [src1,pos] - ldr data2, [src2,pos] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, eq - cbnz endloop, .Lunequal_proc - - /*The second part process*/ - ldr data1, [src1], #8 - ldr data2, [src2], #8 - subs limit_wd, limit_wd, #1 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ - bics has_nul, tmp1, tmp2 - ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ - b.eq .Lloopcmp_proc - -.Lunequal_proc: - orr syndrome, diff, has_nul - cbz syndrome, .Lremain8 -.Lcal_cmpresult: - /* - * reversed the byte-order as big-endian,then CLZ can find the most - * significant zero bits. - */ -CPU_LE( rev syndrome, syndrome ) -CPU_LE( rev data1, data1 ) -CPU_LE( rev data2, data2 ) - /* - * For big-endian we cannot use the trick with the syndrome value - * as carry-propagation can corrupt the upper bits if the trailing - * bytes in the string contain 0x01. - * However, if there is no NUL byte in the dword, we can generate - * the result directly. We can't just subtract the bytes as the - * MSB might be significant. - */ -CPU_BE( cbnz has_nul, 1f ) -CPU_BE( cmp data1, data2 ) -CPU_BE( cset result, ne ) -CPU_BE( cneg result, result, lo ) -CPU_BE( ret ) -CPU_BE( 1: ) - /* Re-compute the NUL-byte detection, using a byte-reversed value.*/ -CPU_BE( rev tmp3, data1 ) -CPU_BE( sub tmp1, tmp3, zeroones ) -CPU_BE( orr tmp2, tmp3, #REP8_7f ) -CPU_BE( bic has_nul, tmp1, tmp2 ) -CPU_BE( rev has_nul, has_nul ) -CPU_BE( orr syndrome, diff, has_nul ) - /* - * The MS-non-zero bit of the syndrome marks either the first bit - * that is different, or the top bit of the first zero byte. - * Shifting left now will bring the critical information into the - * top bits. - */ - clz pos, syndrome - lsl data1, data1, pos - lsl data2, data2, pos - /* - * But we need to zero-extend (char is unsigned) the value and then - * perform a signed 32-bit subtraction. - */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret - -.Lremain8: - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq .Lret0 -.Ltiny8proc: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs limit, limit, #1 - - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq .Ltiny8proc - sub result, data1, data2 - ret - -.Lret0: - mov result, #0 - ret -SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S deleted file mode 100644 index b72913a990389a22be61fc981a730816e9a427b6..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strnlen.S +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2013 ARM Ltd. - * Copyright (C) 2013 Linaro. - * - * This code is based on glibc cortex strings work originally authored by Linaro - * be found @ - * - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ - * files/head:/src/aarch64/ - */ - -#include -#include - -/* - * determine the length of a fixed-size string - * - * Parameters: - * x0 - const string pointer - * x1 - maximal string length - * Returns: - * x0 - the return length of specific string - */ - -/* Arguments and results. */ -srcin .req x0 -len .req x0 -limit .req x1 - -/* Locals and temporaries. */ -src .req x2 -data1 .req x3 -data2 .req x4 -data2a .req x5 -has_nul1 .req x6 -has_nul2 .req x7 -tmp1 .req x8 -tmp2 .req x9 -tmp3 .req x10 -tmp4 .req x11 -zeroones .req x12 -pos .req x13 -limit_wd .req x14 - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -SYM_FUNC_START_WEAK_PI(strnlen) - cbz limit, .Lhit_limit - mov zeroones, #REP8_01 - bic src, srcin, #15 - ands tmp1, srcin, #15 - b.ne .Lmisaligned - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ - - /* - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - * can be done in parallel across the entire word. - */ - /* - * The inner loop deals with two Dwords at a time. This has a - * slightly higher start-up cost, but we should win quite quickly, - * especially on cores with a high number of issue slots per - * cycle, as we get much better parallelism out of the operations. - */ -.Lloop: - ldp data1, data2, [src], #16 -.Lrealigned: - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - subs limit_wd, limit_wd, #1 - orr tmp1, has_nul1, has_nul2 - ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ - b.eq .Lloop - - cbz tmp1, .Lhit_limit /* No null in final Qword. */ - - /* - * We know there's a null in the final Qword. The easiest thing - * to do now is work out the length of the string and return - * MIN (len, limit). - */ - sub len, src, srcin - cbz has_nul1, .Lnul_in_data2 -CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/ - - sub len, len, #8 - mov has_nul2, has_nul1 -.Lnul_in_data2: - /* - * For big-endian, carry propagation (if the final byte in the - * string is 0x01) means we cannot use has_nul directly. The - * easiest way to get the correct byte is to byte-swap the data - * and calculate the syndrome a second time. - */ -CPU_BE( rev data2, data2 ) -CPU_BE( sub tmp1, data2, zeroones ) -CPU_BE( orr tmp2, data2, #REP8_7f ) -CPU_BE( bic has_nul2, tmp1, tmp2 ) - - sub len, len, #8 - rev has_nul2, has_nul2 - clz pos, has_nul2 - add len, len, pos, lsr #3 /* Bits to bytes. */ - cmp len, limit - csel len, len, limit, ls /* Return the lower value. */ - ret - -.Lmisaligned: - /* - * Deal with a partial first word. - * We're doing two things in parallel here; - * 1) Calculate the number of words (but avoiding overflow if - * limit is near ULONG_MAX) - to do this we need to work out - * limit + tmp1 - 1 as a 65-bit value before shifting it; - * 2) Load and mask the initial data words - we force the bytes - * before the ones we are interested in to 0xff - this ensures - * early bytes will not hit any zero detection. - */ - ldp data1, data2, [src], #16 - - sub limit_wd, limit, #1 - and tmp3, limit_wd, #15 - lsr limit_wd, limit_wd, #4 - - add tmp3, tmp3, tmp1 - add limit_wd, limit_wd, tmp3, lsr #4 - - neg tmp4, tmp1 - lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ - - mov tmp2, #~0 - /* Big-endian. Early bytes are at MSB. */ -CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ - /* Little-endian. Early bytes are at LSB. */ -CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ - - cmp tmp1, #8 - - orr data1, data1, tmp2 - orr data2a, data2, tmp2 - - csinv data1, data1, xzr, le - csel data2, data2, data2a, le - b .Lrealigned - -.Lhit_limit: - mov len, limit - ret -SYM_FUNC_END_PI(strnlen) -EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S deleted file mode 100644 index 13132d1ed6d127913883f3215a3c0819cbb5598e..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/strrchr.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Based on arch/arm/lib/strrchr.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2013 ARM Ltd. - */ - -#include -#include - -/* - * Find the last occurrence of a character in a string. - * - * Parameters: - * x0 - str - * x1 - c - * Returns: - * x0 - address of last occurrence of 'c' or 0 - */ -SYM_FUNC_START_WEAK_PI(strrchr) - mov x3, #0 - and w1, w1, #0xff -1: ldrb w2, [x0], #1 - cbz w2, 2f - cmp w2, w1 - b.ne 1b - sub x3, x0, #1 - b 1b -2: mov x0, x3 - ret -SYM_FUNC_END_PI(strrchr) -EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S deleted file mode 100644 index a88613834fb07b350031390cb3ecf4595bc877f5..0000000000000000000000000000000000000000 --- a/arch/arm64/lib/tishift.S +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) - * - * Copyright (C) 2017-2018 Jason A. Donenfeld . All Rights Reserved. - */ - -#include - -#include - -SYM_FUNC_START(__ashlti3) - cbz x2, 1f - mov x3, #64 - sub x3, x3, x2 - cmp x3, #0 - b.le 2f - lsl x1, x1, x2 - lsr x3, x0, x3 - lsl x2, x0, x2 - orr x1, x1, x3 - mov x0, x2 -1: - ret -2: - neg w1, w3 - mov x2, #0 - lsl x1, x0, x1 - mov x0, x2 - ret -SYM_FUNC_END(__ashlti3) -EXPORT_SYMBOL(__ashlti3) - -SYM_FUNC_START(__ashrti3) - cbz x2, 1f - mov x3, #64 - sub x3, x3, x2 - cmp x3, #0 - b.le 2f - lsr x0, x0, x2 - lsl x3, x1, x3 - asr x2, x1, x2 - orr x0, x0, x3 - mov x1, x2 -1: - ret -2: - neg w0, w3 - asr x2, x1, #63 - asr x0, x1, x0 - mov x1, x2 - ret -SYM_FUNC_END(__ashrti3) -EXPORT_SYMBOL(__ashrti3) - -SYM_FUNC_START(__lshrti3) - cbz x2, 1f - mov x3, #64 - sub x3, x3, x2 - cmp x3, #0 - b.le 2f - lsr x0, x0, x2 - lsl x3, x1, x3 - lsr x2, x1, x2 - orr x0, x0, x3 - mov x1, x2 -1: - ret -2: - neg w0, w3 - mov x2, #0 - lsr x0, x1, x0 - mov x1, x2 - ret -SYM_FUNC_END(__lshrti3) -EXPORT_SYMBOL(__lshrti3) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S deleted file mode 100644 index db767b072601e36fddb8ee7991d801d6f1f2f6d6..0000000000000000000000000000000000000000 --- a/arch/arm64/mm/cache.S +++ /dev/null @@ -1,246 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Cache maintenance - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2012 ARM Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * flush_icache_range(start,end) - * - * Ensure that the I and D caches are coherent within specified region. - * This is typically used when code has been written to a memory region, - * and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(__flush_icache_range) - /* FALLTHROUGH */ - -/* - * __flush_cache_user_range(start,end) - * - * Ensure that the I and D caches are coherent within specified region. - * This is typically used when code has been written to a memory region, - * and will be executed. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(__flush_cache_user_range) - uaccess_ttbr0_enable x2, x3, x4 -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - b 7f -alternative_else_nop_endif - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x0, x3 -1: -user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE - add x4, x4, x2 - cmp x4, x1 - b.lo 1b - dsb ish - -7: -alternative_if ARM64_HAS_CACHE_DIC - isb - b 8f -alternative_else_nop_endif - invalidate_icache_by_line x0, x1, x2, x3, 9f -8: mov x0, #0 -1: - uaccess_ttbr0_disable x1, x2 - ret -9: - mov x0, #-EFAULT - b 1b -ENDPROC(__flush_icache_range) -ENDPROC(__flush_cache_user_range) - -/* - * invalidate_icache_range(start,end) - * - * Ensure that the I cache is invalid within specified region. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -ENTRY(invalidate_icache_range) -alternative_if ARM64_HAS_CACHE_DIC - mov x0, xzr - isb - ret -alternative_else_nop_endif - - uaccess_ttbr0_enable x2, x3, x4 - - invalidate_icache_by_line x0, x1, x2, x3, 2f - mov x0, xzr -1: - uaccess_ttbr0_disable x1, x2 - ret -2: - mov x0, #-EFAULT - b 1b -ENDPROC(invalidate_icache_range) - -/* - * __flush_dcache_area(kaddr, size) - * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) - * are cleaned and invalidated to the PoC. - * - * - kaddr - kernel address - * - size - size in question - */ -ENTRY(__flush_dcache_area) - dcache_by_line_op civac, sy, x0, x1, x2, x3 - ret -ENDPIPROC(__flush_dcache_area) - -/* - * __clean_dcache_area_pou(kaddr, size) - * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) - * are cleaned to the PoU. - * - * - kaddr - kernel address - * - size - size in question - */ -ENTRY(__clean_dcache_area_pou) -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - ret -alternative_else_nop_endif - dcache_by_line_op cvau, ish, x0, x1, x2, x3 - ret -ENDPROC(__clean_dcache_area_pou) - -/* - * __inval_dcache_area(kaddr, size) - * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) - * are invalidated. Any partial lines at the ends of the interval are - * also cleaned to PoC to prevent data loss. - * - * - kaddr - kernel address - * - size - size in question - */ -ENTRY(__inval_dcache_area) - /* FALLTHROUGH */ - -/* - * __dma_inv_area(start, size) - * - start - virtual start address of region - * - size - size in question - */ -__dma_inv_area: - add x1, x1, x0 - dcache_line_size x2, x3 - sub x3, x2, #1 - tst x1, x3 // end cache line aligned? - bic x1, x1, x3 - b.eq 1f - dc civac, x1 // clean & invalidate D / U line -1: tst x0, x3 // start cache line aligned? - bic x0, x0, x3 - b.eq 2f - dc civac, x0 // clean & invalidate D / U line - b 3f -2: dc ivac, x0 // invalidate D / U line -3: add x0, x0, x2 - cmp x0, x1 - b.lo 2b - dsb sy - ret -ENDPIPROC(__inval_dcache_area) -ENDPROC(__dma_inv_area) - -/* - * __clean_dcache_area_poc(kaddr, size) - * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) - * are cleaned to the PoC. - * - * - kaddr - kernel address - * - size - size in question - */ -ENTRY(__clean_dcache_area_poc) - /* FALLTHROUGH */ - -/* - * __dma_clean_area(start, size) - * - start - virtual start address of region - * - size - size in question - */ -__dma_clean_area: - dcache_by_line_op cvac, sy, x0, x1, x2, x3 - ret -ENDPIPROC(__clean_dcache_area_poc) -ENDPROC(__dma_clean_area) - -/* - * __clean_dcache_area_pop(kaddr, size) - * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) - * are cleaned to the PoP. - * - * - kaddr - kernel address - * - size - size in question - */ -ENTRY(__clean_dcache_area_pop) - alternative_if_not ARM64_HAS_DCPOP - b __clean_dcache_area_poc - alternative_else_nop_endif - dcache_by_line_op cvap, sy, x0, x1, x2, x3 - ret -ENDPIPROC(__clean_dcache_area_pop) - -/* - * __dma_flush_area(start, size) - * - * clean & invalidate D / U line - * - * - start - virtual start address of region - * - size - size in question - */ -ENTRY(__dma_flush_area) - dcache_by_line_op civac, sy, x0, x1, x2, x3 - ret -ENDPIPROC(__dma_flush_area) - -/* - * __dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(__dma_map_area) - cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_area - b __dma_clean_area -ENDPIPROC(__dma_map_area) - -/* - * __dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(__dma_unmap_area) - cmp w2, #DMA_TO_DEVICE - b.ne __dma_inv_area - ret -ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S deleted file mode 100644 index a1e0592d1fbcd8833e41b2486869d7ae9898981b..0000000000000000000000000000000000000000 --- a/arch/arm64/mm/proc.S +++ /dev/null @@ -1,478 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Based on arch/arm/mm/proc.S - * - * Copyright (C) 2001 Deep Blue Solutions Ltd. - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_ARM64_64K_PAGES -#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K -#elif defined(CONFIG_ARM64_16K_PAGES) -#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K -#else /* CONFIG_ARM64_4K_PAGES */ -#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K -#endif - -#ifdef CONFIG_RANDOMIZE_BASE -#define TCR_KASLR_FLAGS TCR_NFD1 -#else -#define TCR_KASLR_FLAGS 0 -#endif - -#define TCR_SMP_FLAGS TCR_SHARED - -/* PTWs cacheable, inner/outer WBWA */ -#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA - -#ifdef CONFIG_KASAN_SW_TAGS -#define TCR_KASAN_FLAGS TCR_TBI1 -#else -#define TCR_KASAN_FLAGS 0 -#endif - -#define MAIR(attr, mt) ((attr) << ((mt) * 8)) - -#ifdef CONFIG_CPU_PM -/** - * cpu_do_suspend - save CPU registers context - * - * x0: virtual address of context pointer - */ -ENTRY(cpu_do_suspend) - mrs x2, tpidr_el0 - mrs x3, tpidrro_el0 - mrs x4, contextidr_el1 - mrs x5, osdlr_el1 - mrs x6, cpacr_el1 - mrs x7, tcr_el1 - mrs x8, vbar_el1 - mrs x9, mdscr_el1 - mrs x10, oslsr_el1 - mrs x11, sctlr_el1 -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x12, tpidr_el1 -alternative_else - mrs x12, tpidr_el2 -alternative_endif - mrs x13, sp_el0 - stp x2, x3, [x0] - stp x4, x5, [x0, #16] - stp x6, x7, [x0, #32] - stp x8, x9, [x0, #48] - stp x10, x11, [x0, #64] - stp x12, x13, [x0, #80] - ret -ENDPROC(cpu_do_suspend) - -/** - * cpu_do_resume - restore CPU register context - * - * x0: Address of context pointer - */ - .pushsection ".idmap.text", "awx" -ENTRY(cpu_do_resume) - ldp x2, x3, [x0] - ldp x4, x5, [x0, #16] - ldp x6, x8, [x0, #32] - ldp x9, x10, [x0, #48] - ldp x11, x12, [x0, #64] - ldp x13, x14, [x0, #80] - msr tpidr_el0, x2 - msr tpidrro_el0, x3 - msr contextidr_el1, x4 - msr cpacr_el1, x6 - - /* Don't change t0sz here, mask those bits when restoring */ - mrs x7, tcr_el1 - bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH - - msr tcr_el1, x8 - msr vbar_el1, x9 - - /* - * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking - * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug - * exception. Mask them until local_daif_restore() in cpu_suspend() - * resets them. - */ - disable_daif - msr mdscr_el1, x10 - - msr sctlr_el1, x12 -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - msr tpidr_el1, x13 -alternative_else - msr tpidr_el2, x13 -alternative_endif - msr sp_el0, x14 - /* - * Restore oslsr_el1 by writing oslar_el1 - */ - msr osdlr_el1, x5 - ubfx x11, x11, #1, #1 - msr oslar_el1, x11 - reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - -alternative_if ARM64_HAS_RAS_EXTN - msr_s SYS_DISR_EL1, xzr -alternative_else_nop_endif - - isb - ret -ENDPROC(cpu_do_resume) - .popsection -#endif - -/* - * cpu_do_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys. - * - * - pgd_phys - physical address of new TTB - */ -ENTRY(cpu_do_switch_mm) - mrs x2, ttbr1_el1 - mmid x1, x1 // get mm->context.id - phys_to_ttbr x3, x0 - -alternative_if ARM64_HAS_CNP - cbz x1, 1f // skip CNP for reserved ASID - orr x3, x3, #TTBR_CNP_BIT -1: -alternative_else_nop_endif -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - bfi x3, x1, #48, #16 // set the ASID field in TTBR0 -#endif - bfi x2, x1, #48, #16 // set the ASID - msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) - isb - msr ttbr0_el1, x3 // now update TTBR0 - isb - b post_ttbr_update_workaround // Back to C code... -ENDPROC(cpu_do_switch_mm) - - .pushsection ".idmap.text", "awx" - -.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 - adrp \tmp1, empty_zero_page - phys_to_ttbr \tmp2, \tmp1 - offset_ttbr1 \tmp2, \tmp1 - msr ttbr1_el1, \tmp2 - isb - tlbi vmalle1 - dsb nsh - isb -.endm - -/* - * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) - * - * This is the low-level counterpart to cpu_replace_ttbr1, and should not be - * called by anything else. It can only be executed from a TTBR0 mapping. - */ -ENTRY(idmap_cpu_replace_ttbr1) - save_and_disable_daif flags=x2 - - __idmap_cpu_set_reserved_ttbr1 x1, x3 - - offset_ttbr1 x0, x3 - msr ttbr1_el1, x0 - isb - - restore_daif x2 - - ret -ENDPROC(idmap_cpu_replace_ttbr1) - .popsection - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - .pushsection ".idmap.text", "awx" - - .macro __idmap_kpti_get_pgtable_ent, type - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries - .endm - - .macro __idmap_kpti_put_pgtable_ent_ng, type - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. - .endm - -/* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) - * - * Called exactly once from stop_machine context by each CPU found during boot. - */ -__idmap_kpti_flag: - .long 1 -ENTRY(idmap_kpti_install_ng_mappings) - cpu .req w0 - num_cpus .req w1 - swapper_pa .req x2 - swapper_ttb .req x3 - flag_ptr .req x4 - cur_pgdp .req x5 - end_pgdp .req x6 - pgd .req x7 - cur_pudp .req x8 - end_pudp .req x9 - pud .req x10 - cur_pmdp .req x11 - end_pmdp .req x12 - pmd .req x13 - cur_ptep .req x14 - end_ptep .req x15 - pte .req x16 - - mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb - adr flag_ptr, __idmap_kpti_flag - - cbnz cpu, __idmap_kpti_secondary - - /* We're the boot CPU. Wait for the others to catch up */ - sevl -1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b - - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 - isb - - /* Everybody is enjoying the idmap, so we can rewrite swapper. */ - /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd - - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish - isb - - /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 - isb - - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - - /* Set the flag to zero to indicate that we're all done */ - str wzr, [flag_ptr] - ret - - /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 - pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd - .endif - - /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 - pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud - .endif - - /* PTE */ -walk_ptes: - pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd - - /* Secondary CPUs end up here */ -__idmap_kpti_secondary: - /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 - - /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] - cbnz w17, 1b - - /* Wait for the boot CPU to finish messing around with swapper */ - sevl -1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b - - /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x18 - msr ttbr1_el1, swapper_ttb - isb - ret - - .unreq cpu - .unreq num_cpus - .unreq swapper_pa - .unreq swapper_ttb - .unreq flag_ptr - .unreq cur_pgdp - .unreq end_pgdp - .unreq pgd - .unreq cur_pudp - .unreq end_pudp - .unreq pud - .unreq cur_pmdp - .unreq end_pmdp - .unreq pmd - .unreq cur_ptep - .unreq end_ptep - .unreq pte -ENDPROC(idmap_kpti_install_ng_mappings) - .popsection -#endif - -/* - * __cpu_setup - * - * Initialise the processor for turning the MMU on. Return in x0 the - * value of the SCTLR_EL1 register. - */ - .pushsection ".idmap.text", "awx" -ENTRY(__cpu_setup) - tlbi vmalle1 // Invalidate local TLB - dsb nsh - - mov x0, #3 << 20 - msr cpacr_el1, x0 // Enable FP/ASIMD - mov x0, #1 << 12 // Reset mdscr_el1 and disable - msr mdscr_el1, x0 // access to the DCC from EL0 - isb // Unmask debug exceptions now, - enable_dbg // since this is per-cpu - reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - /* - * Memory region attributes for LPAE: - * - * n = AttrIndx[2:0] - * n MAIR - * DEVICE_nGnRnE 000 00000000 - * DEVICE_nGnRE 001 00000100 - * DEVICE_GRE 010 00001100 - * NORMAL_NC 011 01000100 - * NORMAL 100 11111111 - * NORMAL_WT 101 10111011 - */ - ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ - MAIR(0x04, MT_DEVICE_nGnRE) | \ - MAIR(0x0c, MT_DEVICE_GRE) | \ - MAIR(0x44, MT_NORMAL_NC) | \ - MAIR(0xff, MT_NORMAL) | \ - MAIR(0xbb, MT_NORMAL_WT) - msr mair_el1, x5 - /* - * Prepare SCTLR - */ - mov_q x0, SCTLR_EL1_SET - /* - * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for - * both user and kernel. - */ - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS - tcr_clear_errata_bits x10, x9, x5 - -#ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x9, vabits_actual - sub x9, xzr, x9 - add x9, x9, #64 - tcr_set_t1sz x10, x9 -#else - ldr_l x9, idmap_t0sz -#endif - tcr_set_t0sz x10, x9 - - /* - * Set the IPS bits in TCR_EL1. - */ - tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 -#ifdef CONFIG_ARM64_HW_AFDBM - /* - * Enable hardware update of the Access Flags bit. - * Hardware dirty bit management is enabled later, - * via capabilities. - */ - mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, #0xf - cbz x9, 1f - orr x10, x10, #TCR_HA // hardware Access flag update -1: -#endif /* CONFIG_ARM64_HW_AFDBM */ - msr tcr_el1, x10 - ret // return to head.S -ENDPROC(__cpu_setup) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S deleted file mode 100644 index c5f05c4a4d00883422ed6e211135302cff3be14f..0000000000000000000000000000000000000000 --- a/arch/arm64/xen/hypercall.S +++ /dev/null @@ -1,112 +0,0 @@ -/****************************************************************************** - * hypercall.S - * - * Xen hypercall wrappers - * - * Stefano Stabellini , Citrix, 2012 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * The Xen hypercall calling convention is very similar to the procedure - * call standard for the ARM 64-bit architecture: the first parameter is - * passed in x0, the second in x1, the third in x2, the fourth in x3 and - * the fifth in x4. - * - * The hypercall number is passed in x16. - * - * The return value is in x0. - * - * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM - * hypercall tag. - * - * Parameter structs passed to hypercalls are laid out according to - * the ARM 64-bit EABI standard. - */ - -#include -#include -#include -#include - - -#define XEN_IMM 0xEA1 - -#define HYPERCALL_SIMPLE(hypercall) \ -ENTRY(HYPERVISOR_##hypercall) \ - mov x16, #__HYPERVISOR_##hypercall; \ - hvc XEN_IMM; \ - ret; \ -ENDPROC(HYPERVISOR_##hypercall) - -#define HYPERCALL0 HYPERCALL_SIMPLE -#define HYPERCALL1 HYPERCALL_SIMPLE -#define HYPERCALL2 HYPERCALL_SIMPLE -#define HYPERCALL3 HYPERCALL_SIMPLE -#define HYPERCALL4 HYPERCALL_SIMPLE -#define HYPERCALL5 HYPERCALL_SIMPLE - - .text - -HYPERCALL2(xen_version); -HYPERCALL3(console_io); -HYPERCALL3(grant_table_op); -HYPERCALL2(sched_op); -HYPERCALL2(event_channel_op); -HYPERCALL2(hvm_op); -HYPERCALL2(memory_op); -HYPERCALL2(physdev_op); -HYPERCALL3(vcpu_op); -HYPERCALL1(tmem_op); -HYPERCALL1(platform_op_raw); -HYPERCALL2(multicall); -HYPERCALL2(vm_assist); -HYPERCALL3(dm_op); - -ENTRY(privcmd_call) - mov x16, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - mov x3, x4 - mov x4, x5 - /* - * Privcmd calls are issued by the userspace. The kernel needs to - * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 - * translations to user memory via AT instructions. Since AT - * instructions are not affected by the PAN bit (ARMv8.1), we only - * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation - * is enabled (it implies that hardware UAO and PAN disabled). - */ - uaccess_ttbr0_enable x6, x7, x8 - hvc XEN_IMM - - /* - * Disable userspace access from kernel once the hyp call completed. - */ - uaccess_ttbr0_disable x6, x7 - ret -ENDPROC(privcmd_call); diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S deleted file mode 100644 index 4332a10aec6c79e137d8d4166328978ee6598366..0000000000000000000000000000000000000000 --- a/arch/c6x/kernel/entry.S +++ /dev/null @@ -1,736 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0-only -; -; Port on Texas Instruments TMS320C6x architecture -; -; Copyright (C) 2004-2011 Texas Instruments Incorporated -; Author: Aurelien Jacquiot (aurelien.jacquiot@virtuallogix.com) -; Updated for 2.6.34: Mark Salter -; - -#include -#include -#include -#include -#include -#include - -; Registers naming -#define DP B14 -#define SP B15 - -#ifndef CONFIG_PREEMPT -#define resume_kernel restore_all -#endif - - .altmacro - - .macro MASK_INT reg - MVC .S2 CSR,reg - CLR .S2 reg,0,0,reg - MVC .S2 reg,CSR - .endm - - .macro UNMASK_INT reg - MVC .S2 CSR,reg - SET .S2 reg,0,0,reg - MVC .S2 reg,CSR - .endm - - .macro GET_THREAD_INFO reg - SHR .S1X SP,THREAD_SHIFT,reg - SHL .S1 reg,THREAD_SHIFT,reg - .endm - - ;; - ;; This defines the normal kernel pt_regs layout. - ;; - .macro SAVE_ALL __rp __tsr - STW .D2T2 B0,*SP--[2] ; save original B0 - MVKL .S2 current_ksp,B0 - MVKH .S2 current_ksp,B0 - LDW .D2T2 *B0,B1 ; KSP - - NOP 3 - STW .D2T2 B1,*+SP[1] ; save original B1 - XOR .D2 SP,B1,B0 ; (SP ^ KSP) - LDW .D2T2 *+SP[1],B1 ; restore B0/B1 - LDW .D2T2 *++SP[2],B0 - SHR .S2 B0,THREAD_SHIFT,B0 ; 0 if already using kstack - [B0] STDW .D2T2 SP:DP,*--B1[1] ; user: save user sp/dp kstack - [B0] MV .S2 B1,SP ; and switch to kstack -||[!B0] STDW .D2T2 SP:DP,*--SP[1] ; kernel: save on current stack - - SUBAW .D2 SP,2,SP - - ADD .D1X SP,-8,A15 - || STDW .D2T1 A15:A14,*SP--[16] ; save A15:A14 - - STDW .D2T2 B13:B12,*SP--[1] - || STDW .D1T1 A13:A12,*A15--[1] - || MVC .S2 __rp,B13 - - STDW .D2T2 B11:B10,*SP--[1] - || STDW .D1T1 A11:A10,*A15--[1] - || MVC .S2 CSR,B12 - - STDW .D2T2 B9:B8,*SP--[1] - || STDW .D1T1 A9:A8,*A15--[1] - || MVC .S2 RILC,B11 - STDW .D2T2 B7:B6,*SP--[1] - || STDW .D1T1 A7:A6,*A15--[1] - || MVC .S2 ILC,B10 - - STDW .D2T2 B5:B4,*SP--[1] - || STDW .D1T1 A5:A4,*A15--[1] - - STDW .D2T2 B3:B2,*SP--[1] - || STDW .D1T1 A3:A2,*A15--[1] - || MVC .S2 __tsr,B5 - - STDW .D2T2 B1:B0,*SP--[1] - || STDW .D1T1 A1:A0,*A15--[1] - || MV .S1X B5,A5 - - STDW .D2T2 B31:B30,*SP--[1] - || STDW .D1T1 A31:A30,*A15--[1] - STDW .D2T2 B29:B28,*SP--[1] - || STDW .D1T1 A29:A28,*A15--[1] - STDW .D2T2 B27:B26,*SP--[1] - || STDW .D1T1 A27:A26,*A15--[1] - STDW .D2T2 B25:B24,*SP--[1] - || STDW .D1T1 A25:A24,*A15--[1] - STDW .D2T2 B23:B22,*SP--[1] - || STDW .D1T1 A23:A22,*A15--[1] - STDW .D2T2 B21:B20,*SP--[1] - || STDW .D1T1 A21:A20,*A15--[1] - STDW .D2T2 B19:B18,*SP--[1] - || STDW .D1T1 A19:A18,*A15--[1] - STDW .D2T2 B17:B16,*SP--[1] - || STDW .D1T1 A17:A16,*A15--[1] - - STDW .D2T2 B13:B12,*SP--[1] ; save PC and CSR - - STDW .D2T2 B11:B10,*SP--[1] ; save RILC and ILC - STDW .D2T1 A5:A4,*SP--[1] ; save TSR and orig A4 - - ;; We left an unused word on the stack just above pt_regs. - ;; It is used to save whether or not this frame is due to - ;; a syscall. It is cleared here, but the syscall handler - ;; sets it to a non-zero value. - MVK .L2 0,B1 - STW .D2T2 B1,*+SP(REGS__END+8) ; clear syscall flag - .endm - - .macro RESTORE_ALL __rp __tsr - LDDW .D2T2 *++SP[1],B9:B8 ; get TSR (B9) - LDDW .D2T2 *++SP[1],B11:B10 ; get RILC (B11) and ILC (B10) - LDDW .D2T2 *++SP[1],B13:B12 ; get PC (B13) and CSR (B12) - - ADDAW .D1X SP,30,A15 - - LDDW .D1T1 *++A15[1],A17:A16 - || LDDW .D2T2 *++SP[1],B17:B16 - LDDW .D1T1 *++A15[1],A19:A18 - || LDDW .D2T2 *++SP[1],B19:B18 - LDDW .D1T1 *++A15[1],A21:A20 - || LDDW .D2T2 *++SP[1],B21:B20 - LDDW .D1T1 *++A15[1],A23:A22 - || LDDW .D2T2 *++SP[1],B23:B22 - LDDW .D1T1 *++A15[1],A25:A24 - || LDDW .D2T2 *++SP[1],B25:B24 - LDDW .D1T1 *++A15[1],A27:A26 - || LDDW .D2T2 *++SP[1],B27:B26 - LDDW .D1T1 *++A15[1],A29:A28 - || LDDW .D2T2 *++SP[1],B29:B28 - LDDW .D1T1 *++A15[1],A31:A30 - || LDDW .D2T2 *++SP[1],B31:B30 - - LDDW .D1T1 *++A15[1],A1:A0 - || LDDW .D2T2 *++SP[1],B1:B0 - - LDDW .D1T1 *++A15[1],A3:A2 - || LDDW .D2T2 *++SP[1],B3:B2 - || MVC .S2 B9,__tsr - LDDW .D1T1 *++A15[1],A5:A4 - || LDDW .D2T2 *++SP[1],B5:B4 - || MVC .S2 B11,RILC - LDDW .D1T1 *++A15[1],A7:A6 - || LDDW .D2T2 *++SP[1],B7:B6 - || MVC .S2 B10,ILC - - LDDW .D1T1 *++A15[1],A9:A8 - || LDDW .D2T2 *++SP[1],B9:B8 - || MVC .S2 B13,__rp - - LDDW .D1T1 *++A15[1],A11:A10 - || LDDW .D2T2 *++SP[1],B11:B10 - || MVC .S2 B12,CSR - - LDDW .D1T1 *++A15[1],A13:A12 - || LDDW .D2T2 *++SP[1],B13:B12 - - MV .D2X A15,SP - || MVKL .S1 current_ksp,A15 - MVKH .S1 current_ksp,A15 - || ADDAW .D1X SP,6,A14 - STW .D1T1 A14,*A15 ; save kernel stack pointer - - LDDW .D2T1 *++SP[1],A15:A14 - - B .S2 __rp ; return from interruption - LDDW .D2T2 *+SP[1],SP:DP - NOP 4 - .endm - - .section .text - - ;; - ;; Jump to schedule() then return to ret_from_exception - ;; -_reschedule: -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 schedule,A0 - MVKH .S1 schedule,A0 - B .S2X A0 -#else - B .S1 schedule -#endif - ADDKPC .S2 ret_from_exception,B3,4 - - ;; - ;; Called before syscall handler when process is being debugged - ;; -tracesys_on: -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 syscall_trace_entry,A0 - MVKH .S1 syscall_trace_entry,A0 - B .S2X A0 -#else - B .S1 syscall_trace_entry -#endif - ADDKPC .S2 ret_from_syscall_trace,B3,3 - ADD .S1X 8,SP,A4 - -ret_from_syscall_trace: - ;; tracing returns (possibly new) syscall number - MV .D2X A4,B0 - || MVK .S2 __NR_syscalls,B1 - CMPLTU .L2 B0,B1,B1 - - [!B1] BNOP .S2 ret_from_syscall_function,5 - || MVK .S1 -ENOSYS,A4 - - ;; reload syscall args from (possibly modified) stack frame - ;; and get syscall handler addr from sys_call_table: - LDW .D2T2 *+SP(REGS_B4+8),B4 - || MVKL .S2 sys_call_table,B1 - LDW .D2T1 *+SP(REGS_A6+8),A6 - || MVKH .S2 sys_call_table,B1 - LDW .D2T2 *+B1[B0],B0 - || MVKL .S2 ret_from_syscall_function,B3 - LDW .D2T2 *+SP(REGS_B6+8),B6 - || MVKH .S2 ret_from_syscall_function,B3 - LDW .D2T1 *+SP(REGS_A8+8),A8 - LDW .D2T2 *+SP(REGS_B8+8),B8 - NOP - ; B0 = sys_call_table[__NR_*] - BNOP .S2 B0,5 ; branch to syscall handler - || LDW .D2T1 *+SP(REGS_ORIG_A4+8),A4 - -syscall_exit_work: - AND .D1 _TIF_SYSCALL_TRACE,A2,A0 - [!A0] BNOP .S1 work_pending,5 - [A0] B .S2 syscall_trace_exit - ADDKPC .S2 resume_userspace,B3,1 - MVC .S2 CSR,B1 - SET .S2 B1,0,0,B1 - MVC .S2 B1,CSR ; enable ints - -work_pending: - AND .D1 _TIF_NEED_RESCHED,A2,A0 - [!A0] BNOP .S1 work_notifysig,5 - -work_resched: -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 schedule,A1 - MVKH .S1 schedule,A1 - B .S2X A1 -#else - B .S2 schedule -#endif - ADDKPC .S2 work_rescheduled,B3,4 -work_rescheduled: - ;; make sure we don't miss an interrupt setting need_resched or - ;; sigpending between sampling and the rti - MASK_INT B2 - GET_THREAD_INFO A12 - LDW .D1T1 *+A12(THREAD_INFO_FLAGS),A2 - MVK .S1 _TIF_WORK_MASK,A1 - MVK .S1 _TIF_NEED_RESCHED,A3 - NOP 2 - AND .D1 A1,A2,A0 - || AND .S1 A3,A2,A1 - [!A0] BNOP .S1 restore_all,5 - [A1] BNOP .S1 work_resched,5 - -work_notifysig: - ;; enable interrupts for do_notify_resume() - UNMASK_INT B2 - B .S2 do_notify_resume - LDW .D2T1 *+SP(REGS__END+8),A6 ; syscall flag - ADDKPC .S2 resume_userspace,B3,1 - ADD .S1X 8,SP,A4 ; pt_regs pointer is first arg - MV .D2X A2,B4 ; thread_info flags is second arg - - ;; - ;; On C64x+, the return way from exception and interrupt - ;; is a little bit different - ;; -ENTRY(ret_from_exception) -#ifdef CONFIG_PREEMPT - MASK_INT B2 -#endif - -ENTRY(ret_from_interrupt) - ;; - ;; Check if we are comming from user mode. - ;; - LDW .D2T2 *+SP(REGS_TSR+8),B0 - MVK .S2 0x40,B1 - NOP 3 - AND .D2 B0,B1,B0 - [!B0] BNOP .S2 resume_kernel,5 - -resume_userspace: - ;; make sure we don't miss an interrupt setting need_resched or - ;; sigpending between sampling and the rti - MASK_INT B2 - GET_THREAD_INFO A12 - LDW .D1T1 *+A12(THREAD_INFO_FLAGS),A2 - MVK .S1 _TIF_WORK_MASK,A1 - MVK .S1 _TIF_NEED_RESCHED,A3 - NOP 2 - AND .D1 A1,A2,A0 - [A0] BNOP .S1 work_pending,5 - BNOP .S1 restore_all,5 - - ;; - ;; System call handling - ;; B0 = syscall number (in sys_call_table) - ;; A4,B4,A6,B6,A8,B8 = arguments of the syscall function - ;; A4 is the return value register - ;; -system_call_saved: - MVK .L2 1,B2 - STW .D2T2 B2,*+SP(REGS__END+8) ; set syscall flag - MVC .S2 B2,ECR ; ack the software exception - - UNMASK_INT B2 ; re-enable global IT - -system_call_saved_noack: - ;; Check system call number - MVK .S2 __NR_syscalls,B1 -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 sys_ni_syscall,A0 -#endif - CMPLTU .L2 B0,B1,B1 -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKH .S1 sys_ni_syscall,A0 -#endif - - ;; Check for ptrace - GET_THREAD_INFO A12 - -#ifdef CONFIG_C6X_BIG_KERNEL - [!B1] B .S2X A0 -#else - [!B1] B .S2 sys_ni_syscall -#endif - [!B1] ADDKPC .S2 ret_from_syscall_function,B3,4 - - ;; Get syscall handler addr from sys_call_table - ;; call tracesys_on or call syscall handler - LDW .D1T1 *+A12(THREAD_INFO_FLAGS),A2 - || MVKL .S2 sys_call_table,B1 - MVKH .S2 sys_call_table,B1 - LDW .D2T2 *+B1[B0],B0 - NOP 2 - ; A2 = thread_info flags - AND .D1 _TIF_SYSCALL_TRACE,A2,A2 - [A2] BNOP .S1 tracesys_on,5 - ;; B0 = _sys_call_table[__NR_*] - B .S2 B0 - ADDKPC .S2 ret_from_syscall_function,B3,4 - -ret_from_syscall_function: - STW .D2T1 A4,*+SP(REGS_A4+8) ; save return value in A4 - ; original A4 is in orig_A4 -syscall_exit: - ;; make sure we don't miss an interrupt setting need_resched or - ;; sigpending between sampling and the rti - MASK_INT B2 - LDW .D1T1 *+A12(THREAD_INFO_FLAGS),A2 - MVK .S1 _TIF_ALLWORK_MASK,A1 - NOP 3 - AND .D1 A1,A2,A2 ; check for work to do - [A2] BNOP .S1 syscall_exit_work,5 - -restore_all: - RESTORE_ALL NRP,NTSR - - ;; - ;; After a fork we jump here directly from resume, - ;; so that A4 contains the previous task structure. - ;; -ENTRY(ret_from_fork) -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 schedule_tail,A0 - MVKH .S1 schedule_tail,A0 - B .S2X A0 -#else - B .S2 schedule_tail -#endif - ADDKPC .S2 ret_from_fork_2,B3,4 -ret_from_fork_2: - ;; return 0 in A4 for child process - GET_THREAD_INFO A12 - BNOP .S2 syscall_exit,3 - MVK .L2 0,B0 - STW .D2T2 B0,*+SP(REGS_A4+8) -ENDPROC(ret_from_fork) - -ENTRY(ret_from_kernel_thread) -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 schedule_tail,A0 - MVKH .S1 schedule_tail,A0 - B .S2X A0 -#else - B .S2 schedule_tail -#endif - LDW .D2T2 *+SP(REGS_A0+8),B10 /* get fn */ - ADDKPC .S2 0f,B3,3 -0: - B .S2 B10 /* call fn */ - LDW .D2T1 *+SP(REGS_A1+8),A4 /* get arg */ - ADDKPC .S2 ret_from_fork_2,B3,3 -ENDPROC(ret_from_kernel_thread) - - ;; - ;; These are the interrupt handlers, responsible for calling c6x_do_IRQ() - ;; - .macro SAVE_ALL_INT - SAVE_ALL IRP,ITSR - .endm - - .macro CALL_INT int -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 c6x_do_IRQ,A0 - MVKH .S1 c6x_do_IRQ,A0 - BNOP .S2X A0,1 - MVK .S1 int,A4 - ADDAW .D2 SP,2,B4 - MVKL .S2 ret_from_interrupt,B3 - MVKH .S2 ret_from_interrupt,B3 -#else - CALLP .S2 c6x_do_IRQ,B3 - || MVK .S1 int,A4 - || ADDAW .D2 SP,2,B4 - B .S1 ret_from_interrupt - NOP 5 -#endif - .endm - -ENTRY(_int4_handler) - SAVE_ALL_INT - CALL_INT 4 -ENDPROC(_int4_handler) - -ENTRY(_int5_handler) - SAVE_ALL_INT - CALL_INT 5 -ENDPROC(_int5_handler) - -ENTRY(_int6_handler) - SAVE_ALL_INT - CALL_INT 6 -ENDPROC(_int6_handler) - -ENTRY(_int7_handler) - SAVE_ALL_INT - CALL_INT 7 -ENDPROC(_int7_handler) - -ENTRY(_int8_handler) - SAVE_ALL_INT - CALL_INT 8 -ENDPROC(_int8_handler) - -ENTRY(_int9_handler) - SAVE_ALL_INT - CALL_INT 9 -ENDPROC(_int9_handler) - -ENTRY(_int10_handler) - SAVE_ALL_INT - CALL_INT 10 -ENDPROC(_int10_handler) - -ENTRY(_int11_handler) - SAVE_ALL_INT - CALL_INT 11 -ENDPROC(_int11_handler) - -ENTRY(_int12_handler) - SAVE_ALL_INT - CALL_INT 12 -ENDPROC(_int12_handler) - -ENTRY(_int13_handler) - SAVE_ALL_INT - CALL_INT 13 -ENDPROC(_int13_handler) - -ENTRY(_int14_handler) - SAVE_ALL_INT - CALL_INT 14 -ENDPROC(_int14_handler) - -ENTRY(_int15_handler) - SAVE_ALL_INT - CALL_INT 15 -ENDPROC(_int15_handler) - - ;; - ;; Handler for uninitialized and spurious interrupts - ;; -ENTRY(_bad_interrupt) - B .S2 IRP - NOP 5 -ENDPROC(_bad_interrupt) - - ;; - ;; Entry for NMI/exceptions/syscall - ;; -ENTRY(_nmi_handler) - SAVE_ALL NRP,NTSR - - MVC .S2 EFR,B2 - CMPEQ .L2 1,B2,B2 - || MVC .S2 TSR,B1 - CLR .S2 B1,10,10,B1 - MVC .S2 B1,TSR -#ifdef CONFIG_C6X_BIG_KERNEL - [!B2] MVKL .S1 process_exception,A0 - [!B2] MVKH .S1 process_exception,A0 - [!B2] B .S2X A0 -#else - [!B2] B .S2 process_exception -#endif - [B2] B .S2 system_call_saved - [!B2] ADDAW .D2 SP,2,B1 - [!B2] MV .D1X B1,A4 - ADDKPC .S2 ret_from_trap,B3,2 - -ret_from_trap: - MV .D2X A4,B0 - [!B0] BNOP .S2 ret_from_exception,5 - -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S2 system_call_saved_noack,B3 - MVKH .S2 system_call_saved_noack,B3 -#endif - LDW .D2T2 *+SP(REGS_B0+8),B0 - LDW .D2T1 *+SP(REGS_A4+8),A4 - LDW .D2T2 *+SP(REGS_B4+8),B4 - LDW .D2T1 *+SP(REGS_A6+8),A6 - LDW .D2T2 *+SP(REGS_B6+8),B6 - LDW .D2T1 *+SP(REGS_A8+8),A8 -#ifdef CONFIG_C6X_BIG_KERNEL - || B .S2 B3 -#else - || B .S2 system_call_saved_noack -#endif - LDW .D2T2 *+SP(REGS_B8+8),B8 - NOP 4 -ENDPROC(_nmi_handler) - - ;; - ;; Jump to schedule() then return to ret_from_isr - ;; -#ifdef CONFIG_PREEMPT -resume_kernel: - GET_THREAD_INFO A12 - LDW .D1T1 *+A12(THREAD_INFO_PREEMPT_COUNT),A1 - NOP 4 - [A1] BNOP .S2 restore_all,5 - -preempt_schedule: - GET_THREAD_INFO A2 - LDW .D1T1 *+A2(THREAD_INFO_FLAGS),A1 -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S2 preempt_schedule_irq,B0 - MVKH .S2 preempt_schedule_irq,B0 - NOP 2 -#else - NOP 4 -#endif - AND .D1 _TIF_NEED_RESCHED,A1,A1 - [!A1] BNOP .S2 restore_all,5 -#ifdef CONFIG_C6X_BIG_KERNEL - B .S2 B0 -#else - B .S2 preempt_schedule_irq -#endif - ADDKPC .S2 preempt_schedule,B3,4 -#endif /* CONFIG_PREEMPT */ - -ENTRY(enable_exception) - DINT - MVC .S2 TSR,B0 - MVC .S2 B3,NRP - MVK .L2 0xc,B1 - OR .D2 B0,B1,B0 - MVC .S2 B0,TSR ; Set GEE and XEN in TSR - B .S2 NRP - NOP 5 -ENDPROC(enable_exception) - - ;; - ;; Special system calls - ;; return address is in B3 - ;; -ENTRY(sys_rt_sigreturn) - ADD .D1X SP,8,A4 -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 do_rt_sigreturn,A0 - MVKH .S1 do_rt_sigreturn,A0 - BNOP .S2X A0,5 -#else - || B .S2 do_rt_sigreturn - NOP 5 -#endif -ENDPROC(sys_rt_sigreturn) - -ENTRY(sys_pread_c6x) - MV .D2X A8,B7 -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 sys_pread64,A0 - MVKH .S1 sys_pread64,A0 - BNOP .S2X A0,5 -#else - || B .S2 sys_pread64 - NOP 5 -#endif -ENDPROC(sys_pread_c6x) - -ENTRY(sys_pwrite_c6x) - MV .D2X A8,B7 -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 sys_pwrite64,A0 - MVKH .S1 sys_pwrite64,A0 - BNOP .S2X A0,5 -#else - || B .S2 sys_pwrite64 - NOP 5 -#endif -ENDPROC(sys_pwrite_c6x) - -;; On Entry -;; A4 - path -;; B4 - offset_lo (LE), offset_hi (BE) -;; A6 - offset_lo (BE), offset_hi (LE) -ENTRY(sys_truncate64_c6x) -#ifdef CONFIG_CPU_BIG_ENDIAN - MV .S2 B4,B5 - MV .D2X A6,B4 -#else - MV .D2X A6,B5 -#endif -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 sys_truncate64,A0 - MVKH .S1 sys_truncate64,A0 - BNOP .S2X A0,5 -#else - || B .S2 sys_truncate64 - NOP 5 -#endif -ENDPROC(sys_truncate64_c6x) - -;; On Entry -;; A4 - fd -;; B4 - offset_lo (LE), offset_hi (BE) -;; A6 - offset_lo (BE), offset_hi (LE) -ENTRY(sys_ftruncate64_c6x) -#ifdef CONFIG_CPU_BIG_ENDIAN - MV .S2 B4,B5 - MV .D2X A6,B4 -#else - MV .D2X A6,B5 -#endif -#ifdef CONFIG_C6X_BIG_KERNEL - || MVKL .S1 sys_ftruncate64,A0 - MVKH .S1 sys_ftruncate64,A0 - BNOP .S2X A0,5 -#else - || B .S2 sys_ftruncate64 - NOP 5 -#endif -ENDPROC(sys_ftruncate64_c6x) - -;; On Entry -;; A4 - fd -;; B4 - offset_lo (LE), offset_hi (BE) -;; A6 - offset_lo (BE), offset_hi (LE) -;; B6 - len_lo (LE), len_hi (BE) -;; A8 - len_lo (BE), len_hi (LE) -;; B8 - advice -ENTRY(sys_fadvise64_64_c6x) -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 sys_fadvise64_64,A0 - MVKH .S1 sys_fadvise64_64,A0 - BNOP .S2X A0,2 -#else - B .S2 sys_fadvise64_64 - NOP 2 -#endif -#ifdef CONFIG_CPU_BIG_ENDIAN - MV .L2 B4,B5 - || MV .D2X A6,B4 - MV .L1 A8,A6 - || MV .D1X B6,A7 -#else - MV .D2X A6,B5 - MV .L1 A8,A7 - || MV .D1X B6,A6 -#endif - MV .L2 B8,B6 -ENDPROC(sys_fadvise64_64_c6x) - -;; On Entry -;; A4 - fd -;; B4 - mode -;; A6 - offset_hi -;; B6 - offset_lo -;; A8 - len_hi -;; B8 - len_lo -ENTRY(sys_fallocate_c6x) -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 sys_fallocate,A0 - MVKH .S1 sys_fallocate,A0 - BNOP .S2X A0,1 -#else - B .S2 sys_fallocate - NOP -#endif - MV .D1 A6,A7 - MV .D1X B6,A6 - MV .D2X A8,B7 - MV .D2 B8,B6 -ENDPROC(sys_fallocate_c6x) - - ;; put this in .neardata for faster access when using DSBT mode - .section .neardata,"aw",@progbits - .global current_ksp - .hidden current_ksp -current_ksp: - .word init_thread_union + THREAD_START_SP diff --git a/arch/c6x/kernel/head.S b/arch/c6x/kernel/head.S deleted file mode 100644 index fecbeef827bc186491a9fba7ad91927128c84f13..0000000000000000000000000000000000000000 --- a/arch/c6x/kernel/head.S +++ /dev/null @@ -1,81 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0-only -; -; Port on Texas Instruments TMS320C6x architecture -; -; Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated -; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) -; -#include -#include -#include - - __HEAD -ENTRY(_c_int00) - ;; Save magic and pointer - MV .S1 A4,A10 - MV .S2 B4,B10 - MVKL .S2 __bss_start,B5 - MVKH .S2 __bss_start,B5 - MVKL .S2 __bss_stop,B6 - MVKH .S2 __bss_stop,B6 - SUB .L2 B6,B5,B6 ; bss size - - ;; Set the stack pointer - MVKL .S2 current_ksp,B0 - MVKH .S2 current_ksp,B0 - LDW .D2T2 *B0,B15 - - ;; clear bss - SHR .S2 B6,3,B0 ; number of dwords to clear - ZERO .L2 B13 - ZERO .L2 B12 -bss_loop: - BDEC .S2 bss_loop,B0 - NOP 3 - CMPLT .L2 B0,0,B1 - [!B1] STDW .D2T2 B13:B12,*B5++[1] - - NOP 4 - AND .D2 ~7,B15,B15 - - ;; Clear GIE and PGIE - MVC .S2 CSR,B2 - CLR .S2 B2,0,1,B2 - MVC .S2 B2,CSR - MVC .S2 TSR,B2 - CLR .S2 B2,0,1,B2 - MVC .S2 B2,TSR - MVC .S2 ITSR,B2 - CLR .S2 B2,0,1,B2 - MVC .S2 B2,ITSR - MVC .S2 NTSR,B2 - CLR .S2 B2,0,1,B2 - MVC .S2 B2,NTSR - - ;; pass DTB pointer to machine_init (or zero if none) - MVKL .S1 OF_DT_HEADER,A0 - MVKH .S1 OF_DT_HEADER,A0 - CMPEQ .L1 A10,A0,A0 - [A0] MV .S1X B10,A4 - [!A0] MVK .S1 0,A4 - -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 machine_init,A0 - MVKH .S1 machine_init,A0 - B .S2X A0 - ADDKPC .S2 0f,B3,4 -0: -#else - CALLP .S2 machine_init,B3 -#endif - - ;; Jump to Linux init -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 start_kernel,A0 - MVKH .S1 start_kernel,A0 - B .S2X A0 -#else - B .S2 start_kernel -#endif - NOP 5 -L1: BNOP .S2 L1,5 diff --git a/arch/c6x/kernel/switch_to.S b/arch/c6x/kernel/switch_to.S deleted file mode 100644 index b7f9f607042e68a5c6ffeec1168e666aa5bac00e..0000000000000000000000000000000000000000 --- a/arch/c6x/kernel/switch_to.S +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2011 Texas Instruments Incorporated - * Author: Mark Salter (msalter@redhat.com) - */ - -#include -#include - -#define SP B15 - - /* - * void __switch_to(struct thread_info *prev, - * struct thread_info *next, - * struct task_struct *tsk) ; - */ -ENTRY(__switch_to) - LDDW .D2T2 *+B4(THREAD_B15_14),B7:B6 - || MV .L2X A4,B5 ; prev - || MV .L1X B4,A5 ; next - || MVC .S2 RILC,B1 - - STW .D2T2 B3,*+B5(THREAD_PC) - || STDW .D1T1 A13:A12,*+A4(THREAD_A13_12) - || MVC .S2 ILC,B0 - - LDW .D2T2 *+B4(THREAD_PC),B3 - || LDDW .D1T1 *+A5(THREAD_A13_12),A13:A12 - - STDW .D1T1 A11:A10,*+A4(THREAD_A11_10) - || STDW .D2T2 B1:B0,*+B5(THREAD_RICL_ICL) -#ifndef __DSBT__ - || MVKL .S2 current_ksp,B1 -#endif - - STDW .D2T2 B15:B14,*+B5(THREAD_B15_14) - || STDW .D1T1 A15:A14,*+A4(THREAD_A15_14) -#ifndef __DSBT__ - || MVKH .S2 current_ksp,B1 -#endif - - ;; Switch to next SP - MV .S2 B7,SP -#ifdef __DSBT__ - || STW .D2T2 B7,*+B14(current_ksp) -#else - || STW .D2T2 B7,*B1 - || MV .L2 B6,B14 -#endif - || LDDW .D1T1 *+A5(THREAD_RICL_ICL),A1:A0 - - STDW .D2T2 B11:B10,*+B5(THREAD_B11_10) - || LDDW .D1T1 *+A5(THREAD_A15_14),A15:A14 - - STDW .D2T2 B13:B12,*+B5(THREAD_B13_12) - || LDDW .D1T1 *+A5(THREAD_A11_10),A11:A10 - - B .S2 B3 ; return in next E1 - || LDDW .D2T2 *+B4(THREAD_B13_12),B13:B12 - - LDDW .D2T2 *+B4(THREAD_B11_10),B11:B10 - NOP - - MV .L2X A0,B0 - || MV .S1 A6,A4 - - MVC .S2 B0,ILC - || MV .L2X A1,B1 - - MVC .S2 B1,RILC -ENDPROC(__switch_to) diff --git a/arch/c6x/kernel/vectors.S b/arch/c6x/kernel/vectors.S deleted file mode 100644 index ad3dc006a6d3a952c866e90edf38a5a00efd074f..0000000000000000000000000000000000000000 --- a/arch/c6x/kernel/vectors.S +++ /dev/null @@ -1,78 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0-only -; -; Port on Texas Instruments TMS320C6x architecture -; -; Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated -; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) -; -; This section handles all the interrupt vector routines. -; At RESET the processor sets up the DRAM timing parameters and -; branches to the label _c_int00 which handles initialization for the C code. -; - -#define ALIGNMENT 5 - - .macro IRQVEC name, handler - .align ALIGNMENT - .hidden \name - .global \name -\name: -#ifdef CONFIG_C6X_BIG_KERNEL - STW .D2T1 A0,*B15--[2] - || MVKL .S1 \handler,A0 - MVKH .S1 \handler,A0 - B .S2X A0 - LDW .D2T1 *++B15[2],A0 - NOP 4 - NOP - NOP - .endm -#else /* CONFIG_C6X_BIG_KERNEL */ - B .S2 \handler - NOP - NOP - NOP - NOP - NOP - NOP - NOP - .endm -#endif /* CONFIG_C6X_BIG_KERNEL */ - - .sect ".vectors","ax" - .align ALIGNMENT - .global RESET - .hidden RESET -RESET: -#ifdef CONFIG_C6X_BIG_KERNEL - MVKL .S1 _c_int00,A0 ; branch to _c_int00 - MVKH .S1 _c_int00,A0 - B .S2X A0 -#else - B .S2 _c_int00 - NOP - NOP -#endif - NOP - NOP - NOP - NOP - NOP - - - IRQVEC NMI,_nmi_handler ; NMI interrupt - IRQVEC AINT,_bad_interrupt ; reserved - IRQVEC MSGINT,_bad_interrupt ; reserved - - IRQVEC INT4,_int4_handler - IRQVEC INT5,_int5_handler - IRQVEC INT6,_int6_handler - IRQVEC INT7,_int7_handler - IRQVEC INT8,_int8_handler - IRQVEC INT9,_int9_handler - IRQVEC INT10,_int10_handler - IRQVEC INT11,_int11_handler - IRQVEC INT12,_int12_handler - IRQVEC INT13,_int13_handler - IRQVEC INT14,_int14_handler - IRQVEC INT15,_int15_handler diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S deleted file mode 100644 index 584bab2bace6e22d59619434f86f95ee41a04e62..0000000000000000000000000000000000000000 --- a/arch/c6x/kernel/vmlinux.lds.S +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ld script for the c6x kernel - * - * Copyright (C) 2010, 2011 Texas Instruments Incorporated - * Mark Salter - */ -#include -#include -#include - -ENTRY(_c_int00) - -#if defined(CONFIG_CPU_BIG_ENDIAN) -jiffies = jiffies_64 + 4; -#else -jiffies = jiffies_64; -#endif - -#define READONLY_SEGMENT_START \ - . = PAGE_OFFSET; -#define READWRITE_SEGMENT_START \ - . = ALIGN(128); \ - _data_lma = .; - -SECTIONS -{ - /* - * Start kernel read only segment - */ - READONLY_SEGMENT_START - - .vectors : - { - _vectors_start = .; - *(.vectors) - . = ALIGN(0x400); - _vectors_end = .; - } - - /* - * This section contains data which may be shared with other - * cores. It needs to be a fixed offset from PAGE_OFFSET - * regardless of kernel configuration. - */ - .virtio_ipc_dev : - { - *(.virtio_ipc_dev) - } - - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init : - { - _sinittext = .; - HEAD_TEXT - INIT_TEXT - _einittext = .; - } - - INIT_DATA_SECTION(16) - - PERCPU_SECTION(128) - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .text : - { - _text = .; - _stext = .; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - IRQENTRY_TEXT - SOFTIRQENTRY_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) - } - - EXCEPTION_TABLE(16) - NOTES - - RO_DATA_SECTION(PAGE_SIZE) - .const : - { - *(.const .const.* .gnu.linkonce.r.*) - *(.switch) - } - - _etext = .; - - /* - * Start kernel read-write segment. - */ - READWRITE_SEGMENT_START - _sdata = .; - - .fardata : AT(ADDR(.fardata) - LOAD_OFFSET) - { - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - PAGE_ALIGNED_DATA(PAGE_SIZE) - CACHELINE_ALIGNED_DATA(128) - READ_MOSTLY_DATA(128) - DATA_DATA - CONSTRUCTORS - *(.data1) - *(.fardata .fardata.*) - *(.data.debug_bpt) - } - - .neardata ALIGN(8) : AT(ADDR(.neardata) - LOAD_OFFSET) - { - *(.neardata2 .neardata2.* .gnu.linkonce.s2.*) - *(.neardata .neardata.* .gnu.linkonce.s.*) - . = ALIGN(8); - } - - BUG_TABLE - - _edata = .; - - __bss_start = .; - SBSS(8) - BSS(8) - .far : - { - . = ALIGN(8); - *(.dynfar) - *(.far .far.* .gnu.linkonce.b.*) - . = ALIGN(8); - } - __bss_stop = .; - - _end = .; - - DWARF_DEBUG - - /DISCARD/ : - { - EXIT_TEXT - EXIT_DATA - EXIT_CALL - *(.discard) - *(.discard.*) - *(.interp) - } -} diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S deleted file mode 100644 index 8e625a30fd435a676ec27804ce2618209fd77829..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/csum_64plus.S +++ /dev/null @@ -1,416 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0-only -; -; linux/arch/c6x/lib/csum_64plus.s -; -; Port on Texas Instruments TMS320C6x architecture -; -; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated -; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) -; -#include - -; -;unsigned int csum_partial_copy(const char *src, char * dst, -; int len, int sum) -; -; A4: src -; B4: dst -; A6: len -; B6: sum -; return csum in A4 -; - - .text -ENTRY(csum_partial_copy) - MVC .S2 ILC,B30 - - MV .D1X B6,A31 ; given csum - ZERO .D1 A9 ; csum (a side) -|| ZERO .D2 B9 ; csum (b side) -|| SHRU .S2X A6,2,B5 ; len / 4 - - ;; Check alignment and size - AND .S1 3,A4,A1 -|| AND .S2 3,B4,B0 - OR .L2X B0,A1,B0 ; non aligned condition -|| MVC .S2 B5,ILC -|| MVK .D2 1,B2 -|| MV .D1X B5,A1 ; words condition - [!A1] B .S1 L8 - [B0] BNOP .S1 L6,5 - - SPLOOP 1 - - ;; Main loop for aligned words - LDW .D1T1 *A4++,A7 - NOP 4 - MV .S2X A7,B7 -|| EXTU .S1 A7,0,16,A16 - STW .D2T2 B7,*B4++ -|| MPYU .M2 B7,B2,B8 -|| ADD .L1 A16,A9,A9 - NOP - SPKERNEL 8,0 -|| ADD .L2 B8,B9,B9 - - ZERO .D1 A1 -|| ADD .L1X A9,B9,A9 ; add csum from a and b sides - -L6: - [!A1] BNOP .S1 L8,5 - - ;; Main loop for non-aligned words - SPLOOP 2 - || MVK .L1 1,A2 - - LDNW .D1T1 *A4++,A7 - NOP 3 - - NOP - MV .S2X A7,B7 - || EXTU .S1 A7,0,16,A16 - || MPYU .M1 A7,A2,A8 - - ADD .L1 A16,A9,A9 - SPKERNEL 6,0 - || STNW .D2T2 B7,*B4++ - || ADD .L1 A8,A9,A9 - -L8: AND .S2X 2,A6,B5 - CMPGT .L2 B5,0,B0 - [!B0] BNOP .S1 L82,4 - - ;; Manage half-word - ZERO .L1 A7 -|| ZERO .D1 A8 - -#ifdef CONFIG_CPU_BIG_ENDIAN - - LDBU .D1T1 *A4++,A7 - LDBU .D1T1 *A4++,A8 - NOP 3 - SHL .S1 A7,8,A0 - ADD .S1 A8,A9,A9 - STB .D2T1 A7,*B4++ -|| ADD .S1 A0,A9,A9 - STB .D2T1 A8,*B4++ - -#else - - LDBU .D1T1 *A4++,A7 - LDBU .D1T1 *A4++,A8 - NOP 3 - ADD .S1 A7,A9,A9 - SHL .S1 A8,8,A0 - - STB .D2T1 A7,*B4++ -|| ADD .S1 A0,A9,A9 - STB .D2T1 A8,*B4++ - -#endif - - ;; Manage eventually the last byte -L82: AND .S2X 1,A6,B0 - [!B0] BNOP .S1 L9,5 - -|| ZERO .L1 A7 - -L83: LDBU .D1T1 *A4++,A7 - NOP 4 - - MV .L2X A7,B7 - -#ifdef CONFIG_CPU_BIG_ENDIAN - - STB .D2T2 B7,*B4++ -|| SHL .S1 A7,8,A7 - ADD .S1 A7,A9,A9 - -#else - - STB .D2T2 B7,*B4++ -|| ADD .S1 A7,A9,A9 - -#endif - - ;; Fold the csum -L9: SHRU .S2X A9,16,B0 - [!B0] BNOP .S1 L10,5 - -L91: SHRU .S2X A9,16,B4 -|| EXTU .S1 A9,16,16,A3 - ADD .D1X A3,B4,A9 - - SHRU .S1 A9,16,A0 - [A0] BNOP .S1 L91,5 - -L10: ADD .D1 A31,A9,A9 - MV .D1 A9,A4 - - BNOP .S2 B3,4 - MVC .S2 B30,ILC -ENDPROC(csum_partial_copy) - -; -;unsigned short -;ip_fast_csum(unsigned char *iph, unsigned int ihl) -;{ -; unsigned int checksum = 0; -; unsigned short *tosum = (unsigned short *) iph; -; int len; -; -; len = ihl*4; -; -; if (len <= 0) -; return 0; -; -; while(len) { -; len -= 2; -; checksum += *tosum++; -; } -; if (len & 1) -; checksum += *(unsigned char*) tosum; -; -; while(checksum >> 16) -; checksum = (checksum & 0xffff) + (checksum >> 16); -; -; return ~checksum; -;} -; -; A4: iph -; B4: ihl -; return checksum in A4 -; - .text - -ENTRY(ip_fast_csum) - ZERO .D1 A5 - || MVC .S2 ILC,B30 - SHL .S2 B4,2,B0 - CMPGT .L2 B0,0,B1 - [!B1] BNOP .S1 L15,4 - [!B1] ZERO .D1 A3 - - [!B0] B .S1 L12 - SHRU .S2 B0,1,B0 - MVC .S2 B0,ILC - NOP 3 - - SPLOOP 1 - LDHU .D1T1 *A4++,A3 - NOP 3 - NOP - SPKERNEL 5,0 - || ADD .L1 A3,A5,A5 - -L12: SHRU .S1 A5,16,A0 - [!A0] BNOP .S1 L14,5 - -L13: SHRU .S2X A5,16,B4 - EXTU .S1 A5,16,16,A3 - ADD .D1X A3,B4,A5 - SHRU .S1 A5,16,A0 - [A0] BNOP .S1 L13,5 - -L14: NOT .D1 A5,A3 - EXTU .S1 A3,16,16,A3 - -L15: BNOP .S2 B3,3 - MVC .S2 B30,ILC - MV .D1 A3,A4 -ENDPROC(ip_fast_csum) - -; -;unsigned short -;do_csum(unsigned char *buff, unsigned int len) -;{ -; int odd, count; -; unsigned int result = 0; -; -; if (len <= 0) -; goto out; -; odd = 1 & (unsigned long) buff; -; if (odd) { -;#ifdef __LITTLE_ENDIAN -; result += (*buff << 8); -;#else -; result = *buff; -;#endif -; len--; -; buff++; -; } -; count = len >> 1; /* nr of 16-bit words.. */ -; if (count) { -; if (2 & (unsigned long) buff) { -; result += *(unsigned short *) buff; -; count--; -; len -= 2; -; buff += 2; -; } -; count >>= 1; /* nr of 32-bit words.. */ -; if (count) { -; unsigned int carry = 0; -; do { -; unsigned int w = *(unsigned int *) buff; -; count--; -; buff += 4; -; result += carry; -; result += w; -; carry = (w > result); -; } while (count); -; result += carry; -; result = (result & 0xffff) + (result >> 16); -; } -; if (len & 2) { -; result += *(unsigned short *) buff; -; buff += 2; -; } -; } -; if (len & 1) -;#ifdef __LITTLE_ENDIAN -; result += *buff; -;#else -; result += (*buff << 8); -;#endif -; result = (result & 0xffff) + (result >> 16); -; /* add up carry.. */ -; result = (result & 0xffff) + (result >> 16); -; if (odd) -; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); -;out: -; return result; -;} -; -; A4: buff -; B4: len -; return checksum in A4 -; - -ENTRY(do_csum) - CMPGT .L2 B4,0,B0 - [!B0] BNOP .S1 L26,3 - EXTU .S1 A4,31,31,A0 - - MV .L1 A0,A3 -|| MV .S1X B3,A5 -|| MV .L2 B4,B3 -|| ZERO .D1 A1 - -#ifdef CONFIG_CPU_BIG_ENDIAN - [A0] SUB .L2 B3,1,B3 -|| [A0] LDBU .D1T1 *A4++,A1 -#else - [!A0] BNOP .S1 L21,5 -|| [A0] LDBU .D1T1 *A4++,A0 - SUB .L2 B3,1,B3 -|| SHL .S1 A0,8,A1 -L21: -#endif - SHR .S2 B3,1,B0 - [!B0] BNOP .S1 L24,3 - MVK .L1 2,A0 - AND .L1 A4,A0,A0 - - [!A0] BNOP .S1 L22,5 -|| [A0] LDHU .D1T1 *A4++,A0 - SUB .L2 B0,1,B0 -|| SUB .S2 B3,2,B3 -|| ADD .L1 A0,A1,A1 -L22: - SHR .S2 B0,1,B0 -|| ZERO .L1 A0 - - [!B0] BNOP .S1 L23,5 -|| [B0] MVC .S2 B0,ILC - - SPLOOP 3 - SPMASK L1 -|| MV .L1 A1,A2 -|| LDW .D1T1 *A4++,A1 - - NOP 4 - ADD .L1 A0,A1,A0 - ADD .L1 A2,A0,A2 - - SPKERNEL 1,2 -|| CMPGTU .L1 A1,A2,A0 - - ADD .L1 A0,A2,A6 - EXTU .S1 A6,16,16,A7 - SHRU .S2X A6,16,B0 - NOP 1 - ADD .L1X A7,B0,A1 -L23: - MVK .L2 2,B0 - AND .L2 B3,B0,B0 - [B0] LDHU .D1T1 *A4++,A0 - NOP 4 - [B0] ADD .L1 A0,A1,A1 -L24: - EXTU .S2 B3,31,31,B0 -#ifdef CONFIG_CPU_BIG_ENDIAN - [!B0] BNOP .S1 L25,4 -|| [B0] LDBU .D1T1 *A4,A0 - SHL .S1 A0,8,A0 - ADD .L1 A0,A1,A1 -L25: -#else - [B0] LDBU .D1T1 *A4,A0 - NOP 4 - [B0] ADD .L1 A0,A1,A1 -#endif - EXTU .S1 A1,16,16,A0 - SHRU .S2X A1,16,B0 - NOP 1 - ADD .L1X A0,B0,A0 - SHRU .S1 A0,16,A1 - ADD .L1 A0,A1,A0 - EXTU .S1 A0,16,16,A1 - EXTU .S1 A1,16,24,A2 - - EXTU .S1 A1,24,16,A0 -|| MV .L2X A3,B0 - - [B0] OR .L1 A0,A2,A1 -L26: - NOP 1 - BNOP .S2X A5,4 - MV .L1 A1,A4 -ENDPROC(do_csum) - -;__wsum csum_partial(const void *buff, int len, __wsum wsum) -;{ -; unsigned int sum = (__force unsigned int)wsum; -; unsigned int result = do_csum(buff, len); -; -; /* add in old sum, and carry.. */ -; result += sum; -; if (sum > result) -; result += 1; -; return (__force __wsum)result; -;} -; -ENTRY(csum_partial) - MV .L1X B3,A9 -|| CALLP .S2 do_csum,B3 -|| MV .S1 A6,A8 - BNOP .S2X A9,2 - ADD .L1 A8,A4,A1 - CMPGTU .L1 A8,A1,A0 - ADD .L1 A1,A0,A4 -ENDPROC(csum_partial) - -;unsigned short -;ip_compute_csum(unsigned char *buff, unsigned int len) -; -; A4: buff -; B4: len -; return checksum in A4 - -ENTRY(ip_compute_csum) - MV .L1X B3,A9 -|| CALLP .S2 do_csum,B3 - BNOP .S2X A9,3 - NOT .S1 A4,A4 - CLR .S1 A4,16,31,A4 -ENDPROC(ip_compute_csum) diff --git a/arch/c6x/lib/divi.S b/arch/c6x/lib/divi.S deleted file mode 100644 index d1764ae0b519e027502d70d12f0180ff4b9aed0e..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/divi.S +++ /dev/null @@ -1,41 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - - .text -ENTRY(__c6xabi_divi) - call .s2 __c6xabi_divu -|| mv .d2 B3, B5 -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B1 - - [A1] neg .l1 A4, A4 -|| [B1] neg .l2 B4, B4 -|| xor .s1x A1, B1, A1 - [A1] addkpc .s2 _divu_ret, B3, 4 -_divu_ret: - neg .l1 A4, A4 -|| mv .l2 B3,B5 -|| ret .s2 B5 - nop 5 -ENDPROC(__c6xabi_divi) diff --git a/arch/c6x/lib/divremi.S b/arch/c6x/lib/divremi.S deleted file mode 100644 index 575fc57a8a7673e23eb9232c6940cb2d873bf6db..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/divremi.S +++ /dev/null @@ -1,34 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text -ENTRY(__c6xabi_divremi) - stw .d2t2 B3, *B15--[2] -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B2 -|| mv .s1 A4, A5 -|| call .s2 __c6xabi_divu - - [A1] neg .l1 A4, A4 -|| [B2] neg .l2 B4, B4 -|| xor .s2x B2, A1, B0 -|| mv .d2 B4, B2 - - [B0] addkpc .s2 _divu_ret_1, B3, 1 - [!B0] addkpc .s2 _divu_ret_2, B3, 1 - nop 2 -_divu_ret_1: - neg .l1 A4, A4 -_divu_ret_2: - ldw .d2t2 *++B15[2], B3 - - mpy32 .m1x A4, B2, A6 - nop 3 - ret .s2 B3 - sub .l1 A5, A6, A5 - nop 4 -ENDPROC(__c6xabi_divremi) diff --git a/arch/c6x/lib/divremu.S b/arch/c6x/lib/divremu.S deleted file mode 100644 index 5f6a6a2997ae9806004fab70590605a3fc8a66fa..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/divremu.S +++ /dev/null @@ -1,75 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2011 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text -ENTRY(__c6xabi_divremu) - ;; We use a series of up to 31 subc instructions. First, we find - ;; out how many leading zero bits there are in the divisor. This - ;; gives us both a shift count for aligning (shifting) the divisor - ;; to the, and the number of times we have to execute subc. - - ;; At the end, we have both the remainder and most of the quotient - ;; in A4. The top bit of the quotient is computed first and is - ;; placed in A2. - - ;; Return immediately if the dividend is zero. Setting B4 to 1 - ;; is a trick to allow us to leave the following insns in the jump - ;; delay slot without affecting the result. - mv .s2x A4, B1 - - [b1] lmbd .l2 1, B4, B1 -||[!b1] b .s2 B3 ; RETURN A -||[!b1] mvk .d2 1, B4 - -||[!b1] zero .s1 A5 - mv .l1x B1, A6 -|| shl .s2 B4, B1, B4 - - ;; The loop performs a maximum of 28 steps, so we do the - ;; first 3 here. - cmpltu .l1x A4, B4, A2 - [!A2] sub .l1x A4, B4, A4 -|| shru .s2 B4, 1, B4 -|| xor .s1 1, A2, A2 - - shl .s1 A2, 31, A2 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - - ;; RETURN A may happen here (note: must happen before the next branch) -__divremu0: - cmpgt .l2 B1, 7, B0 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -|| [b0] b .s1 __divremu0 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -|| mvk .s1 32, A1 - sub .l1 A1, A6, A6 -|| extu .s1 A4, A6, A5 - shl .s1 A4, A6, A4 - shru .s1 A4, 1, A4 -|| sub .l1 A6, 1, A6 - or .l1 A2, A4, A4 - shru .s1 A4, A6, A4 - nop -ENDPROC(__c6xabi_divremu) diff --git a/arch/c6x/lib/divu.S b/arch/c6x/lib/divu.S deleted file mode 100644 index f0f6082944c23917fdb3099d609f11dfbffe34b8..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/divu.S +++ /dev/null @@ -1,86 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - - .text -ENTRY(__c6xabi_divu) - ;; We use a series of up to 31 subc instructions. First, we find - ;; out how many leading zero bits there are in the divisor. This - ;; gives us both a shift count for aligning (shifting) the divisor - ;; to the, and the number of times we have to execute subc. - - ;; At the end, we have both the remainder and most of the quotient - ;; in A4. The top bit of the quotient is computed first and is - ;; placed in A2. - - ;; Return immediately if the dividend is zero. - mv .s2x A4, B1 - [B1] lmbd .l2 1, B4, B1 -|| [!B1] b .s2 B3 ; RETURN A -|| [!B1] mvk .d2 1, B4 - mv .l1x B1, A6 -|| shl .s2 B4, B1, B4 - - ;; The loop performs a maximum of 28 steps, so we do the - ;; first 3 here. - cmpltu .l1x A4, B4, A2 - [!A2] sub .l1x A4, B4, A4 -|| shru .s2 B4, 1, B4 -|| xor .s1 1, A2, A2 - - shl .s1 A2, 31, A2 -|| [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - - ;; RETURN A may happen here (note: must happen before the next branch) -_divu_loop: - cmpgt .l2 B1, 7, B0 -|| [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 -|| [B0] b .s1 _divu_loop - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -|| mvk .s1 32, A1 - sub .l1 A1, A6, A6 - shl .s1 A4, A6, A4 - shru .s1 A4, 1, A4 -|| sub .l1 A6, 1, A6 - or .l1 A2, A4, A4 - shru .s1 A4, A6, A4 - nop -ENDPROC(__c6xabi_divu) diff --git a/arch/c6x/lib/llshl.S b/arch/c6x/lib/llshl.S deleted file mode 100644 index 3272499618e0b2b4a2715c92758cc2648922ccbc..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/llshl.S +++ /dev/null @@ -1,25 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright (C) 2010 Texas Instruments Incorporated -;; Contributed by Mark Salter . -;; - -;; uint64_t __c6xabi_llshl(uint64_t val, uint shift) - -#include - - .text -ENTRY(__c6xabi_llshl) - mv .l1x B4,A1 - [!A1] b .s2 B3 ; just return if zero shift - mvk .s1 32,A0 - sub .d1 A0,A1,A0 - cmplt .l1 0,A0,A2 - [A2] shru .s1 A4,A0,A0 - [!A2] neg .l1 A0,A5 -|| [A2] shl .s1 A5,A1,A5 - [!A2] shl .s1 A4,A5,A5 -|| [A2] or .d1 A5,A0,A5 -|| [!A2] mvk .l1 0,A4 - [A2] shl .s1 A4,A1,A4 - bnop .s2 B3,5 -ENDPROC(__c6xabi_llshl) diff --git a/arch/c6x/lib/llshr.S b/arch/c6x/lib/llshr.S deleted file mode 100644 index 6bfaacd15e73573efc7ef7154992e4057314bef0..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/llshr.S +++ /dev/null @@ -1,26 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright (C) 2010 Texas Instruments Incorporated -;; Contributed by Mark Salter . -;; - -;; uint64_t __c6xabi_llshr(uint64_t val, uint shift) - -#include - - .text -ENTRY(__c6xabi_llshr) - mv .l1x B4,A1 - [!A1] b .s2 B3 ; return if zero shift count - mvk .s1 32,A0 - sub .d1 A0,A1,A0 - cmplt .l1 0,A0,A2 - [A2] shl .s1 A5,A0,A0 - nop - [!A2] neg .l1 A0,A4 -|| [A2] shru .s1 A4,A1,A4 - [!A2] shr .s1 A5,A4,A4 -|| [A2] or .d1 A4,A0,A4 - [!A2] shr .s1 A5,0x1f,A5 - [A2] shr .s1 A5,A1,A5 - bnop .s2 B3,5 -ENDPROC(__c6xabi_llshr) diff --git a/arch/c6x/lib/llshru.S b/arch/c6x/lib/llshru.S deleted file mode 100644 index 103128f50770abd88a9ff8970f9abd893c90bcd4..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/llshru.S +++ /dev/null @@ -1,26 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright (C) 2010 Texas Instruments Incorporated -;; Contributed by Mark Salter . -;; - -;; uint64_t __c6xabi_llshru(uint64_t val, uint shift) - -#include - - .text -ENTRY(__c6xabi_llshru) - mv .l1x B4,A1 - [!A1] b .s2 B3 ; return if zero shift count - mvk .s1 32,A0 - sub .d1 A0,A1,A0 - cmplt .l1 0,A0,A2 - [A2] shl .s1 A5,A0,A0 - nop - [!A2] neg .l1 A0,A4 -|| [A2] shru .s1 A4,A1,A4 - [!A2] shru .s1 A5,A4,A4 -|| [A2] or .d1 A4,A0,A4 -|| [!A2] mvk .l1 0,A5 - [A2] shru .s1 A5,A1,A5 - bnop .s2 B3,5 -ENDPROC(__c6xabi_llshru) diff --git a/arch/c6x/lib/memcpy_64plus.S b/arch/c6x/lib/memcpy_64plus.S deleted file mode 100644 index 157a30486bfd0837c330224643eeffb92ad4c191..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/memcpy_64plus.S +++ /dev/null @@ -1,43 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0-only -; Port on Texas Instruments TMS320C6x architecture -; -; Copyright (C) 2006, 2009, 2010 Texas Instruments Incorporated -; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) -; - -#include - - .text - -ENTRY(memcpy) - AND .L1 0x1,A6,A0 - || AND .S1 0x2,A6,A1 - || AND .L2X 0x4,A6,B0 - || MV .D1 A4,A3 - || MVC .S2 ILC,B2 - - [A0] LDB .D2T1 *B4++,A5 - [A1] LDB .D2T1 *B4++,A7 - [A1] LDB .D2T1 *B4++,A8 - [B0] LDNW .D2T1 *B4++,A9 - || SHRU .S2X A6,0x3,B1 - [!B1] BNOP .S2 B3,1 - - [A0] STB .D1T1 A5,*A3++ - ||[B1] MVC .S2 B1,ILC - [A1] STB .D1T1 A7,*A3++ - [A1] STB .D1T1 A8,*A3++ - [B0] STNW .D1T1 A9,*A3++ ; return when len < 8 - - SPLOOP 2 - - LDNDW .D2T1 *B4++,A9:A8 - NOP 3 - - NOP - SPKERNEL 0,0 - || STNDW .D1T1 A9:A8,*A3++ - - BNOP .S2 B3,4 - MVC .S2 B2,ILC -ENDPROC(memcpy) diff --git a/arch/c6x/lib/mpyll.S b/arch/c6x/lib/mpyll.S deleted file mode 100644 index d07c13ec4fd4c1a54c522b4d5be4b77b201d7946..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/mpyll.S +++ /dev/null @@ -1,37 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright (C) 2010 Texas Instruments Incorporated -;; Contributed by Mark Salter . -;; - -#include - - ;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y) - ;; - ;; 64x64 multiply - ;; First compute partial results using 32-bit parts of x and y: - ;; - ;; b63 b32 b31 b0 - ;; ----------------------------- - ;; | 1 | 0 | - ;; ----------------------------- - ;; - ;; P0 = X0*Y0 - ;; P1 = X0*Y1 + X1*Y0 - ;; P2 = X1*Y1 - ;; - ;; result = (P2 << 64) + (P1 << 32) + P0 - ;; - ;; Since the result is also 64-bit, we can skip the P2 term. - - .text -ENTRY(__c6xabi_mpyll) - mpy32u .m1x A4,B4,A1:A0 ; X0*Y0 - b .s2 B3 - || mpy32u .m2x B5,A4,B1:B0 ; X0*Y1 (don't need upper 32-bits) - || mpy32u .m1x A5,B4,A3:A2 ; X1*Y0 (don't need upper 32-bits) - nop - nop - mv .s1 A0,A4 - add .l1x A2,B0,A5 - add .s1 A1,A5,A5 -ENDPROC(__c6xabi_mpyll) diff --git a/arch/c6x/lib/negll.S b/arch/c6x/lib/negll.S deleted file mode 100644 index 9ba434db5366308a1e0c2e19b2b8c3df503ae791..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/negll.S +++ /dev/null @@ -1,19 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright (C) 2010 Texas Instruments Incorporated -;; Contributed by Mark Salter . -;; - -;; int64_t __c6xabi_negll(int64_t val) - -#include - - .text -ENTRY(__c6xabi_negll) - b .s2 B3 - mvk .l1 0,A0 - subu .l1 A0,A4,A3:A2 - sub .l1 A0,A5,A0 -|| ext .s1 A3,24,24,A5 - add .l1 A5,A0,A5 - mv .s1 A2,A4 -ENDPROC(__c6xabi_negll) diff --git a/arch/c6x/lib/pop_rts.S b/arch/c6x/lib/pop_rts.S deleted file mode 100644 index f129e32943c57b370f5f3bcbc66ad92f10ba81a2..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/pop_rts.S +++ /dev/null @@ -1,20 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text - -ENTRY(__c6xabi_pop_rts) - lddw .d2t2 *++B15, B3:B2 - lddw .d2t1 *++B15, A11:A10 - lddw .d2t2 *++B15, B11:B10 - lddw .d2t1 *++B15, A13:A12 - lddw .d2t2 *++B15, B13:B12 - lddw .d2t1 *++B15, A15:A14 -|| b .s2 B3 - ldw .d2t2 *++B15[2], B14 - nop 4 -ENDPROC(__c6xabi_pop_rts) diff --git a/arch/c6x/lib/push_rts.S b/arch/c6x/lib/push_rts.S deleted file mode 100644 index 40b0a4fe937c4014eaffed3cc3921f6f079721fd..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/push_rts.S +++ /dev/null @@ -1,19 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text - -ENTRY(__c6xabi_push_rts) - stw .d2t2 B14, *B15--[2] - stdw .d2t1 A15:A14, *B15-- -|| b .s2x A3 - stdw .d2t2 B13:B12, *B15-- - stdw .d2t1 A13:A12, *B15-- - stdw .d2t2 B11:B10, *B15-- - stdw .d2t1 A11:A10, *B15-- - stdw .d2t2 B3:B2, *B15-- -ENDPROC(__c6xabi_push_rts) diff --git a/arch/c6x/lib/remi.S b/arch/c6x/lib/remi.S deleted file mode 100644 index 96a1335eac202ddd9d42e278ce841cf2c410620a..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/remi.S +++ /dev/null @@ -1,52 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - - .text - -ENTRY(__c6xabi_remi) - stw .d2t2 B3, *B15--[2] -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B2 -|| mv .s1 A4, A5 -|| call .s2 __c6xabi_divu - - [A1] neg .l1 A4, A4 -|| [B2] neg .l2 B4, B4 -|| xor .s2x B2, A1, B0 -|| mv .d2 B4, B2 - - [B0] addkpc .s2 _divu_ret_1, B3, 1 - [!B0] addkpc .s2 _divu_ret_2, B3, 1 - nop 2 -_divu_ret_1: - neg .l1 A4, A4 -_divu_ret_2: - ldw .d2t2 *++B15[2], B3 - - mpy32 .m1x A4, B2, A6 - nop 3 - ret .s2 B3 - sub .l1 A5, A6, A4 - nop 4 -ENDPROC(__c6xabi_remi) diff --git a/arch/c6x/lib/remu.S b/arch/c6x/lib/remu.S deleted file mode 100644 index 428feb9c06c06616c5fd1847235f8bd04a3f7761..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/remu.S +++ /dev/null @@ -1,70 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - - - .text - -ENTRY(__c6xabi_remu) - ;; The ABI seems designed to prevent these functions calling each other, - ;; so we duplicate most of the divsi3 code here. - mv .s2x A4, B1 - lmbd .l2 1, B4, B1 -|| [!B1] b .s2 B3 ; RETURN A -|| [!B1] mvk .d2 1, B4 - - mv .l1x B1, A7 -|| shl .s2 B4, B1, B4 - - cmpltu .l1x A4, B4, A1 - [!A1] sub .l1x A4, B4, A4 - shru .s2 B4, 1, B4 - -_remu_loop: - cmpgt .l2 B1, 7, B0 -|| [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - ;; RETURN A may happen here (note: must happen before the next branch) - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 -|| [B0] b .s1 _remu_loop - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 - [B1] subc .l1x A4,B4,A4 -|| [B1] add .s2 -1, B1, B1 - [B1] subc .l1x A4,B4,A4 - - extu .s1 A4, A7, A4 - nop 2 -ENDPROC(__c6xabi_remu) diff --git a/arch/c6x/lib/strasgi.S b/arch/c6x/lib/strasgi.S deleted file mode 100644 index 715aeb2007924d0ab4584f294f5a87cde95cd8e4..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/strasgi.S +++ /dev/null @@ -1,77 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text - -ENTRY(__c6xabi_strasgi) - ;; This is essentially memcpy, with alignment known to be at least - ;; 4, and the size a multiple of 4 greater than or equal to 28. - ldw .d2t1 *B4++, A0 -|| mvk .s2 16, B1 - ldw .d2t1 *B4++, A1 -|| mvk .s2 20, B2 -|| sub .d1 A6, 24, A6 - ldw .d2t1 *B4++, A5 - ldw .d2t1 *B4++, A7 -|| mv .l2x A6, B7 - ldw .d2t1 *B4++, A8 - ldw .d2t1 *B4++, A9 -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - -_strasgi_loop: - stw .d1t2 B5, *A4++ -|| [B0] ldw .d2t1 *B4++, A0 -|| mv .s2x A1, B5 -|| mv .l2 B7, B6 - - [B0] sub .d2 B6, 24, B7 -|| [B0] b .s2 _strasgi_loop -|| cmpltu .l2 B1, B6, B0 - - [B0] ldw .d2t1 *B4++, A1 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A5, B5 -|| cmpltu .l2 12, B6, B0 - - [B0] ldw .d2t1 *B4++, A5 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A7, B5 -|| cmpltu .l2 8, B6, B0 - - [B0] ldw .d2t1 *B4++, A7 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A8, B5 -|| cmpltu .l2 4, B6, B0 - - [B0] ldw .d2t1 *B4++, A8 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A9, B5 -|| cmpltu .l2 0, B6, B0 - - [B0] ldw .d2t1 *B4++, A9 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - - ;; loop back branch happens here - - cmpltu .l2 B1, B6, B0 -|| ret .s2 b3 - - [B0] stw .d1t1 A1, *A4++ -|| cmpltu .l2 12, B6, B0 - [B0] stw .d1t1 A5, *A4++ -|| cmpltu .l2 8, B6, B0 - [B0] stw .d1t1 A7, *A4++ -|| cmpltu .l2 4, B6, B0 - [B0] stw .d1t1 A8, *A4++ -|| cmpltu .l2 0, B6, B0 - [B0] stw .d1t1 A9, *A4++ - - ;; return happens here -ENDPROC(__c6xabi_strasgi) diff --git a/arch/c6x/lib/strasgi_64plus.S b/arch/c6x/lib/strasgi_64plus.S deleted file mode 100644 index d10aa2dc32498b6b8b62f19e11aafe3388603e1e..0000000000000000000000000000000000000000 --- a/arch/c6x/lib/strasgi_64plus.S +++ /dev/null @@ -1,27 +0,0 @@ -;; SPDX-License-Identifier: GPL-2.0-or-later -;; Copyright 2010 Free Software Foundation, Inc. -;; Contributed by Bernd Schmidt . -;; - -#include - - .text - -ENTRY(__c6xabi_strasgi_64plus) - shru .s2x a6, 2, b31 -|| mv .s1 a4, a30 -|| mv .d2 b4, b30 - - add .s2 -4, b31, b31 - - sploopd 1 -|| mvc .s2 b31, ilc - ldw .d2t2 *b30++, b31 - nop 4 - mv .s1x b31,a31 - spkernel 6, 0 -|| stw .d1t1 a31, *a30++ - - ret .s2 b3 - nop 5 -ENDPROC(__c6xabi_strasgi_64plus) diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S deleted file mode 100644 index 5078eb5169faebb1b9c5b62fbcb29411494b8664..0000000000000000000000000000000000000000 --- a/arch/csky/abiv1/memcpy.S +++ /dev/null @@ -1,347 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include - -.macro GET_FRONT_BITS rx y -#ifdef __cskyLE__ - lsri \rx, \y -#else - lsli \rx, \y -#endif -.endm - -.macro GET_AFTER_BITS rx y -#ifdef __cskyLE__ - lsli \rx, \y -#else - lsri \rx, \y -#endif -.endm - -/* void *memcpy(void *dest, const void *src, size_t n); */ -ENTRY(memcpy) - mov r7, r2 - cmplti r4, 4 - bt .L_copy_by_byte - mov r6, r2 - andi r6, 3 - cmpnei r6, 0 - jbt .L_dest_not_aligned - mov r6, r3 - andi r6, 3 - cmpnei r6, 0 - jbt .L_dest_aligned_but_src_not_aligned -.L0: - cmplti r4, 16 - jbt .L_aligned_and_len_less_16bytes - subi sp, 8 - stw r8, (sp, 0) -.L_aligned_and_len_larger_16bytes: - ldw r1, (r3, 0) - ldw r5, (r3, 4) - ldw r8, (r3, 8) - stw r1, (r7, 0) - ldw r1, (r3, 12) - stw r5, (r7, 4) - stw r8, (r7, 8) - stw r1, (r7, 12) - subi r4, 16 - addi r3, 16 - addi r7, 16 - cmplti r4, 16 - jbf .L_aligned_and_len_larger_16bytes - ldw r8, (sp, 0) - addi sp, 8 - cmpnei r4, 0 - jbf .L_return - -.L_aligned_and_len_less_16bytes: - cmplti r4, 4 - bt .L_copy_by_byte -.L1: - ldw r1, (r3, 0) - stw r1, (r7, 0) - subi r4, 4 - addi r3, 4 - addi r7, 4 - cmplti r4, 4 - jbf .L1 - br .L_copy_by_byte - -.L_return: - rts - -.L_copy_by_byte: /* len less than 4 bytes */ - cmpnei r4, 0 - jbf .L_return -.L4: - ldb r1, (r3, 0) - stb r1, (r7, 0) - addi r3, 1 - addi r7, 1 - decne r4 - jbt .L4 - rts - -/* - * If dest is not aligned, just copying some bytes makes the dest align. - * Afther that, we judge whether the src is aligned. - */ -.L_dest_not_aligned: - mov r5, r3 - rsub r5, r5, r7 - abs r5, r5 - cmplt r5, r4 - bt .L_copy_by_byte - mov r5, r7 - sub r5, r3 - cmphs r5, r4 - bf .L_copy_by_byte - mov r5, r6 -.L5: - ldb r1, (r3, 0) /* makes the dest align. */ - stb r1, (r7, 0) - addi r5, 1 - subi r4, 1 - addi r3, 1 - addi r7, 1 - cmpnei r5, 4 - jbt .L5 - cmplti r4, 4 - jbt .L_copy_by_byte - mov r6, r3 /* judge whether the src is aligned. */ - andi r6, 3 - cmpnei r6, 0 - jbf .L0 - -/* Judge the number of misaligned, 1, 2, 3? */ -.L_dest_aligned_but_src_not_aligned: - mov r5, r3 - rsub r5, r5, r7 - abs r5, r5 - cmplt r5, r4 - bt .L_copy_by_byte - bclri r3, 0 - bclri r3, 1 - ldw r1, (r3, 0) - addi r3, 4 - cmpnei r6, 2 - bf .L_dest_aligned_but_src_not_aligned_2bytes - cmpnei r6, 3 - bf .L_dest_aligned_but_src_not_aligned_3bytes - -.L_dest_aligned_but_src_not_aligned_1byte: - mov r5, r7 - sub r5, r3 - cmphs r5, r4 - bf .L_copy_by_byte - cmplti r4, 16 - bf .L11 -.L10: /* If the len is less than 16 bytes */ - GET_FRONT_BITS r1 8 - mov r5, r1 - ldw r6, (r3, 0) - mov r1, r6 - GET_AFTER_BITS r6 24 - or r5, r6 - stw r5, (r7, 0) - subi r4, 4 - addi r3, 4 - addi r7, 4 - cmplti r4, 4 - bf .L10 - subi r3, 3 - br .L_copy_by_byte -.L11: - subi sp, 16 - stw r8, (sp, 0) - stw r9, (sp, 4) - stw r10, (sp, 8) - stw r11, (sp, 12) -.L12: - ldw r5, (r3, 0) - ldw r11, (r3, 4) - ldw r8, (r3, 8) - ldw r9, (r3, 12) - - GET_FRONT_BITS r1 8 /* little or big endian? */ - mov r10, r5 - GET_AFTER_BITS r5 24 - or r5, r1 - - GET_FRONT_BITS r10 8 - mov r1, r11 - GET_AFTER_BITS r11 24 - or r11, r10 - - GET_FRONT_BITS r1 8 - mov r10, r8 - GET_AFTER_BITS r8 24 - or r8, r1 - - GET_FRONT_BITS r10 8 - mov r1, r9 - GET_AFTER_BITS r9 24 - or r9, r10 - - stw r5, (r7, 0) - stw r11, (r7, 4) - stw r8, (r7, 8) - stw r9, (r7, 12) - subi r4, 16 - addi r3, 16 - addi r7, 16 - cmplti r4, 16 - jbf .L12 - ldw r8, (sp, 0) - ldw r9, (sp, 4) - ldw r10, (sp, 8) - ldw r11, (sp, 12) - addi sp , 16 - cmplti r4, 4 - bf .L10 - subi r3, 3 - br .L_copy_by_byte - -.L_dest_aligned_but_src_not_aligned_2bytes: - cmplti r4, 16 - bf .L21 -.L20: - GET_FRONT_BITS r1 16 - mov r5, r1 - ldw r6, (r3, 0) - mov r1, r6 - GET_AFTER_BITS r6 16 - or r5, r6 - stw r5, (r7, 0) - subi r4, 4 - addi r3, 4 - addi r7, 4 - cmplti r4, 4 - bf .L20 - subi r3, 2 - br .L_copy_by_byte - rts - -.L21: /* n > 16 */ - subi sp, 16 - stw r8, (sp, 0) - stw r9, (sp, 4) - stw r10, (sp, 8) - stw r11, (sp, 12) - -.L22: - ldw r5, (r3, 0) - ldw r11, (r3, 4) - ldw r8, (r3, 8) - ldw r9, (r3, 12) - - GET_FRONT_BITS r1 16 - mov r10, r5 - GET_AFTER_BITS r5 16 - or r5, r1 - - GET_FRONT_BITS r10 16 - mov r1, r11 - GET_AFTER_BITS r11 16 - or r11, r10 - - GET_FRONT_BITS r1 16 - mov r10, r8 - GET_AFTER_BITS r8 16 - or r8, r1 - - GET_FRONT_BITS r10 16 - mov r1, r9 - GET_AFTER_BITS r9 16 - or r9, r10 - - stw r5, (r7, 0) - stw r11, (r7, 4) - stw r8, (r7, 8) - stw r9, (r7, 12) - subi r4, 16 - addi r3, 16 - addi r7, 16 - cmplti r4, 16 - jbf .L22 - ldw r8, (sp, 0) - ldw r9, (sp, 4) - ldw r10, (sp, 8) - ldw r11, (sp, 12) - addi sp, 16 - cmplti r4, 4 - bf .L20 - subi r3, 2 - br .L_copy_by_byte - - -.L_dest_aligned_but_src_not_aligned_3bytes: - cmplti r4, 16 - bf .L31 -.L30: - GET_FRONT_BITS r1 24 - mov r5, r1 - ldw r6, (r3, 0) - mov r1, r6 - GET_AFTER_BITS r6 8 - or r5, r6 - stw r5, (r7, 0) - subi r4, 4 - addi r3, 4 - addi r7, 4 - cmplti r4, 4 - bf .L30 - subi r3, 1 - br .L_copy_by_byte -.L31: - subi sp, 16 - stw r8, (sp, 0) - stw r9, (sp, 4) - stw r10, (sp, 8) - stw r11, (sp, 12) -.L32: - ldw r5, (r3, 0) - ldw r11, (r3, 4) - ldw r8, (r3, 8) - ldw r9, (r3, 12) - - GET_FRONT_BITS r1 24 - mov r10, r5 - GET_AFTER_BITS r5 8 - or r5, r1 - - GET_FRONT_BITS r10 24 - mov r1, r11 - GET_AFTER_BITS r11 8 - or r11, r10 - - GET_FRONT_BITS r1 24 - mov r10, r8 - GET_AFTER_BITS r8 8 - or r8, r1 - - GET_FRONT_BITS r10 24 - mov r1, r9 - GET_AFTER_BITS r9 8 - or r9, r10 - - stw r5, (r7, 0) - stw r11, (r7, 4) - stw r8, (r7, 8) - stw r9, (r7, 12) - subi r4, 16 - addi r3, 16 - addi r7, 16 - cmplti r4, 16 - jbf .L32 - ldw r8, (sp, 0) - ldw r9, (sp, 4) - ldw r10, (sp, 8) - ldw r11, (sp, 12) - addi sp, 16 - cmplti r4, 4 - bf .L30 - subi r3, 1 - br .L_copy_by_byte diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S deleted file mode 100644 index 326402e65f9e0bc610215eb63a78fd4dbaa54e55..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/mcount.S +++ /dev/null @@ -1,159 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include - -/* - * csky-gcc with -pg will put the following asm after prologue: - * push r15 - * jsri _mcount - * - * stack layout after mcount_enter in _mcount(): - * - * current sp => 0:+-------+ - * | a0-a3 | -> must save all argument regs - * +16:+-------+ - * | lr | -> _mcount lr (instrumente function's pc) - * +20:+-------+ - * | fp=r8 | -> instrumented function fp - * +24:+-------+ - * | plr | -> instrumented function lr (parent's pc) - * +-------+ - */ - -.macro mcount_enter - subi sp, 24 - stw a0, (sp, 0) - stw a1, (sp, 4) - stw a2, (sp, 8) - stw a3, (sp, 12) - stw lr, (sp, 16) - stw r8, (sp, 20) -.endm - -.macro mcount_exit - ldw a0, (sp, 0) - ldw a1, (sp, 4) - ldw a2, (sp, 8) - ldw a3, (sp, 12) - ldw t1, (sp, 16) - ldw r8, (sp, 20) - ldw lr, (sp, 24) - addi sp, 28 - jmp t1 -.endm - -.macro save_return_regs - subi sp, 16 - stw a0, (sp, 0) - stw a1, (sp, 4) - stw a2, (sp, 8) - stw a3, (sp, 12) -.endm - -.macro restore_return_regs - mov lr, a0 - ldw a0, (sp, 0) - ldw a1, (sp, 4) - ldw a2, (sp, 8) - ldw a3, (sp, 12) - addi sp, 16 -.endm - -.macro nop32_stub - nop32 - nop32 - nop32 -.endm - -ENTRY(ftrace_stub) - jmp lr -END(ftrace_stub) - -#ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(_mcount) - mcount_enter - - /* r26 is link register, only used with jsri translation */ - lrw r26, ftrace_trace_function - ldw r26, (r26, 0) - lrw a1, ftrace_stub - cmpne r26, a1 - bf skip_ftrace - - mov a0, lr - subi a0, 4 - ldw a1, (sp, 24) - - jsr r26 - -#ifndef CONFIG_FUNCTION_GRAPH_TRACER -skip_ftrace: - mcount_exit -#else -skip_ftrace: - lrw a0, ftrace_graph_return - ldw a0, (a0, 0) - lrw a1, ftrace_stub - cmpne a0, a1 - bt ftrace_graph_caller - - lrw a0, ftrace_graph_entry - ldw a0, (a0, 0) - lrw a1, ftrace_graph_entry_stub - cmpne a0, a1 - bt ftrace_graph_caller - - mcount_exit -#endif -END(_mcount) -#else /* CONFIG_DYNAMIC_FTRACE */ -ENTRY(_mcount) - mov t1, lr - ldw lr, (sp, 0) - addi sp, 4 - jmp t1 -ENDPROC(_mcount) - -ENTRY(ftrace_caller) - mcount_enter - - ldw a0, (sp, 16) - subi a0, 4 - ldw a1, (sp, 24) - - nop -GLOBAL(ftrace_call) - nop32_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - nop -GLOBAL(ftrace_graph_call) - nop32_stub -#endif - - mcount_exit -ENDPROC(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - mov a0, sp - addi a0, 24 - ldw a1, (sp, 16) - subi a1, 4 - mov a2, r8 - lrw r26, prepare_ftrace_return - jsr r26 - mcount_exit -END(ftrace_graph_caller) - -ENTRY(return_to_handler) - save_return_regs - mov a0, r8 - jsri ftrace_return_to_handler - restore_return_regs - jmp lr -END(return_to_handler) -#endif diff --git a/arch/csky/abiv2/memcmp.S b/arch/csky/abiv2/memcmp.S deleted file mode 100644 index bf0d809f09e225fd9600993f893ea8de7394a60c..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/memcmp.S +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - -ENTRY(memcmp) - /* Test if len less than 4 bytes. */ - mov r3, r0 - movi r0, 0 - mov r12, r4 - cmplti r2, 4 - bt .L_compare_by_byte - - andi r13, r0, 3 - movi r19, 4 - - /* Test if s1 is not 4 bytes aligned. */ - bnez r13, .L_s1_not_aligned - - LABLE_ALIGN -.L_s1_aligned: - /* If dest is aligned, then copy. */ - zext r18, r2, 31, 4 - /* Test if len less than 16 bytes. */ - bez r18, .L_compare_by_word - -.L_compare_by_4word: - /* If aligned, load word each time. */ - ldw r20, (r3, 0) - ldw r21, (r1, 0) - /* If s1[i] != s2[i], goto .L_byte_check. */ - cmpne r20, r21 - bt .L_byte_check - - ldw r20, (r3, 4) - ldw r21, (r1, 4) - cmpne r20, r21 - bt .L_byte_check - - ldw r20, (r3, 8) - ldw r21, (r1, 8) - cmpne r20, r21 - bt .L_byte_check - - ldw r20, (r3, 12) - ldw r21, (r1, 12) - cmpne r20, r21 - bt .L_byte_check - - PRE_BNEZAD (r18) - addi a3, 16 - addi a1, 16 - - BNEZAD (r18, .L_compare_by_4word) - -.L_compare_by_word: - zext r18, r2, 3, 2 - bez r18, .L_compare_by_byte -.L_compare_by_word_loop: - ldw r20, (r3, 0) - ldw r21, (r1, 0) - addi r3, 4 - PRE_BNEZAD (r18) - cmpne r20, r21 - addi r1, 4 - bt .L_byte_check - BNEZAD (r18, .L_compare_by_word_loop) - -.L_compare_by_byte: - zext r18, r2, 1, 0 - bez r18, .L_return -.L_compare_by_byte_loop: - ldb r0, (r3, 0) - ldb r4, (r1, 0) - addi r3, 1 - subu r0, r4 - PRE_BNEZAD (r18) - addi r1, 1 - bnez r0, .L_return - BNEZAD (r18, .L_compare_by_byte_loop) - -.L_return: - mov r4, r12 - rts - -# ifdef __CSKYBE__ -/* d[i] != s[i] in word, so we check byte 0. */ -.L_byte_check: - xtrb0 r0, r20 - xtrb0 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 1 */ - xtrb1 r0, r20 - xtrb1 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 2 */ - xtrb2 r0, r20 - xtrb2 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 3 */ - xtrb3 r0, r20 - xtrb3 r2, r21 - subu r0, r2 -# else -/* s1[i] != s2[i] in word, so we check byte 3. */ -.L_byte_check: - xtrb3 r0, r20 - xtrb3 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 2 */ - xtrb2 r0, r20 - xtrb2 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 1 */ - xtrb1 r0, r20 - xtrb1 r2, r21 - subu r0, r2 - bnez r0, .L_return - - /* check byte 0 */ - xtrb0 r0, r20 - xtrb0 r2, r21 - subu r0, r2 - br .L_return -# endif /* !__CSKYBE__ */ - -/* Compare when s1 is not aligned. */ -.L_s1_not_aligned: - sub r13, r19, r13 - sub r2, r13 -.L_s1_not_aligned_loop: - ldb r0, (r3, 0) - ldb r4, (r1, 0) - addi r3, 1 - subu r0, r4 - PRE_BNEZAD (r13) - addi r1, 1 - bnez r0, .L_return - BNEZAD (r13, .L_s1_not_aligned_loop) - br .L_s1_aligned -ENDPROC(memcmp) diff --git a/arch/csky/abiv2/memcpy.S b/arch/csky/abiv2/memcpy.S deleted file mode 100644 index 145bf3a9360ee18636a4c66862ef34d4591e14fa..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/memcpy.S +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - -ENTRY(__memcpy) -ENTRY(memcpy) - /* Test if len less than 4 bytes. */ - mov r12, r0 - cmplti r2, 4 - bt .L_copy_by_byte - - andi r13, r0, 3 - movi r19, 4 - /* Test if dest is not 4 bytes aligned. */ - bnez r13, .L_dest_not_aligned - -/* Hardware can handle unaligned access directly. */ -.L_dest_aligned: - /* If dest is aligned, then copy. */ - zext r18, r2, 31, 4 - - /* Test if len less than 16 bytes. */ - bez r18, .L_len_less_16bytes - movi r19, 0 - - LABLE_ALIGN -.L_len_larger_16bytes: -#if defined(__CK860__) - ldw r3, (r1, 0) - stw r3, (r0, 0) - ldw r3, (r1, 4) - stw r3, (r0, 4) - ldw r3, (r1, 8) - stw r3, (r0, 8) - ldw r3, (r1, 12) - addi r1, 16 - stw r3, (r0, 12) - addi r0, 16 -#else - ldw r20, (r1, 0) - ldw r21, (r1, 4) - ldw r22, (r1, 8) - ldw r23, (r1, 12) - stw r20, (r0, 0) - stw r21, (r0, 4) - stw r22, (r0, 8) - stw r23, (r0, 12) - PRE_BNEZAD (r18) - addi r1, 16 - addi r0, 16 -#endif - BNEZAD (r18, .L_len_larger_16bytes) - -.L_len_less_16bytes: - zext r18, r2, 3, 2 - bez r18, .L_copy_by_byte -.L_len_less_16bytes_loop: - ldw r3, (r1, 0) - PRE_BNEZAD (r18) - addi r1, 4 - stw r3, (r0, 0) - addi r0, 4 - BNEZAD (r18, .L_len_less_16bytes_loop) - -/* Test if len less than 4 bytes. */ -.L_copy_by_byte: - zext r18, r2, 1, 0 - bez r18, .L_return -.L_copy_by_byte_loop: - ldb r3, (r1, 0) - PRE_BNEZAD (r18) - addi r1, 1 - stb r3, (r0, 0) - addi r0, 1 - BNEZAD (r18, .L_copy_by_byte_loop) - -.L_return: - mov r0, r12 - rts - -/* - * If dest is not aligned, just copying some bytes makes the - * dest align. - */ -.L_dest_not_aligned: - sub r13, r19, r13 - sub r2, r13 - -/* Makes the dest align. */ -.L_dest_not_aligned_loop: - ldb r3, (r1, 0) - PRE_BNEZAD (r13) - addi r1, 1 - stb r3, (r0, 0) - addi r0, 1 - BNEZAD (r13, .L_dest_not_aligned_loop) - cmplti r2, 4 - bt .L_copy_by_byte - - /* Check whether the src is aligned. */ - jbr .L_dest_aligned -ENDPROC(__memcpy) diff --git a/arch/csky/abiv2/memmove.S b/arch/csky/abiv2/memmove.S deleted file mode 100644 index 5721e73ad3d8f4185059952a903337be80698d36..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/memmove.S +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - - .weak memmove -ENTRY(__memmove) -ENTRY(memmove) - subu r3, r0, r1 - cmphs r3, r2 - bt memcpy - - mov r12, r0 - addu r0, r0, r2 - addu r1, r1, r2 - - /* Test if len less than 4 bytes. */ - cmplti r2, 4 - bt .L_copy_by_byte - - andi r13, r0, 3 - /* Test if dest is not 4 bytes aligned. */ - bnez r13, .L_dest_not_aligned - /* Hardware can handle unaligned access directly. */ -.L_dest_aligned: - /* If dest is aligned, then copy. */ - zext r18, r2, 31, 4 - /* Test if len less than 16 bytes. */ - bez r18, .L_len_less_16bytes - movi r19, 0 - - /* len > 16 bytes */ - LABLE_ALIGN -.L_len_larger_16bytes: - subi r1, 16 - subi r0, 16 -#if defined(__CK860__) - ldw r3, (r1, 12) - stw r3, (r0, 12) - ldw r3, (r1, 8) - stw r3, (r0, 8) - ldw r3, (r1, 4) - stw r3, (r0, 4) - ldw r3, (r1, 0) - stw r3, (r0, 0) -#else - ldw r20, (r1, 0) - ldw r21, (r1, 4) - ldw r22, (r1, 8) - ldw r23, (r1, 12) - stw r20, (r0, 0) - stw r21, (r0, 4) - stw r22, (r0, 8) - stw r23, (r0, 12) - PRE_BNEZAD (r18) -#endif - BNEZAD (r18, .L_len_larger_16bytes) - -.L_len_less_16bytes: - zext r18, r2, 3, 2 - bez r18, .L_copy_by_byte -.L_len_less_16bytes_loop: - subi r1, 4 - subi r0, 4 - ldw r3, (r1, 0) - PRE_BNEZAD (r18) - stw r3, (r0, 0) - BNEZAD (r18, .L_len_less_16bytes_loop) - - /* Test if len less than 4 bytes. */ -.L_copy_by_byte: - zext r18, r2, 1, 0 - bez r18, .L_return -.L_copy_by_byte_loop: - subi r1, 1 - subi r0, 1 - ldb r3, (r1, 0) - PRE_BNEZAD (r18) - stb r3, (r0, 0) - BNEZAD (r18, .L_copy_by_byte_loop) - -.L_return: - mov r0, r12 - rts - - /* If dest is not aligned, just copy some bytes makes the dest - align. */ -.L_dest_not_aligned: - sub r2, r13 -.L_dest_not_aligned_loop: - subi r1, 1 - subi r0, 1 - /* Makes the dest align. */ - ldb r3, (r1, 0) - PRE_BNEZAD (r13) - stb r3, (r0, 0) - BNEZAD (r13, .L_dest_not_aligned_loop) - cmplti r2, 4 - bt .L_copy_by_byte - /* Check whether the src is aligned. */ - jbr .L_dest_aligned -ENDPROC(memmove) -ENDPROC(__memmove) diff --git a/arch/csky/abiv2/memset.S b/arch/csky/abiv2/memset.S deleted file mode 100644 index a7e7d994b667d48916aa696e2803c7f93e7346b3..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/memset.S +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - - .weak memset -ENTRY(__memset) -ENTRY(memset) - /* Test if len less than 4 bytes. */ - mov r12, r0 - cmplti r2, 8 - bt .L_set_by_byte - - andi r13, r0, 3 - movi r19, 4 - /* Test if dest is not 4 bytes aligned. */ - bnez r13, .L_dest_not_aligned - /* Hardware can handle unaligned access directly. */ -.L_dest_aligned: - zextb r3, r1 - lsli r1, 8 - or r1, r3 - lsli r3, r1, 16 - or r3, r1 - - /* If dest is aligned, then copy. */ - zext r18, r2, 31, 4 - /* Test if len less than 16 bytes. */ - bez r18, .L_len_less_16bytes - - LABLE_ALIGN -.L_len_larger_16bytes: - stw r3, (r0, 0) - stw r3, (r0, 4) - stw r3, (r0, 8) - stw r3, (r0, 12) - PRE_BNEZAD (r18) - addi r0, 16 - BNEZAD (r18, .L_len_larger_16bytes) - -.L_len_less_16bytes: - zext r18, r2, 3, 2 - andi r2, 3 - bez r18, .L_set_by_byte -.L_len_less_16bytes_loop: - stw r3, (r0, 0) - PRE_BNEZAD (r18) - addi r0, 4 - BNEZAD (r18, .L_len_less_16bytes_loop) - - /* Test if len less than 4 bytes. */ -.L_set_by_byte: - zext r18, r2, 2, 0 - bez r18, .L_return -.L_set_by_byte_loop: - stb r1, (r0, 0) - PRE_BNEZAD (r18) - addi r0, 1 - BNEZAD (r18, .L_set_by_byte_loop) - -.L_return: - mov r0, r12 - rts - - /* If dest is not aligned, just set some bytes makes the dest - align. */ - -.L_dest_not_aligned: - sub r13, r19, r13 - sub r2, r13 -.L_dest_not_aligned_loop: - /* Makes the dest align. */ - stb r1, (r0, 0) - PRE_BNEZAD (r13) - addi r0, 1 - BNEZAD (r13, .L_dest_not_aligned_loop) - cmplti r2, 8 - bt .L_set_by_byte - /* Check whether the src is aligned. */ - jbr .L_dest_aligned -ENDPROC(memset) -ENDPROC(__memset) diff --git a/arch/csky/abiv2/strcmp.S b/arch/csky/abiv2/strcmp.S deleted file mode 100644 index f8403f4d8c2beb75b91da2e97c7d5161182ef7b3..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/strcmp.S +++ /dev/null @@ -1,168 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - -ENTRY(strcmp) - mov a3, a0 - /* Check if the s1 addr is aligned. */ - xor a2, a3, a1 - andi a2, 0x3 - bnez a2, 7f - andi t1, a0, 0x3 - bnez t1, 5f - -1: - /* If aligned, load word each time. */ - ldw t0, (a3, 0) - ldw t1, (a1, 0) - /* If s1[i] != s2[i], goto 2f. */ - cmpne t0, t1 - bt 2f - /* If s1[i] == s2[i], check if s1 or s2 is at the end. */ - tstnbz t0 - /* If at the end, goto 3f (finish comparing). */ - bf 3f - - ldw t0, (a3, 4) - ldw t1, (a1, 4) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 8) - ldw t1, (a1, 8) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 12) - ldw t1, (a1, 12) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 16) - ldw t1, (a1, 16) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 20) - ldw t1, (a1, 20) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 24) - ldw t1, (a1, 24) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - ldw t0, (a3, 28) - ldw t1, (a1, 28) - cmpne t0, t1 - bt 2f - tstnbz t0 - bf 3f - - addi a3, 32 - addi a1, 32 - - br 1b - -# ifdef __CSKYBE__ - /* d[i] != s[i] in word, so we check byte 0. */ -2: - xtrb0 a0, t0 - xtrb0 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 1 */ - xtrb1 a0, t0 - xtrb1 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 2 */ - xtrb2 a0, t0 - xtrb2 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 3 */ - xtrb3 a0, t0 - xtrb3 a2, t1 - subu a0, a2 -# else - /* s1[i] != s2[i] in word, so we check byte 3. */ -2: - xtrb3 a0, t0 - xtrb3 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 2 */ - xtrb2 a0, t0 - xtrb2 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 1 */ - xtrb1 a0, t0 - xtrb1 a2, t1 - subu a0, a2 - bez a2, 4f - bnez a0, 4f - - /* check byte 0 */ - xtrb0 a0, t0 - xtrb0 a2, t1 - subu a0, a2 - -# endif /* !__CSKYBE__ */ - jmp lr -3: - movi a0, 0 -4: - jmp lr - - /* Compare when s1 or s2 is not aligned. */ -5: - subi t1, 4 -6: - ldb a0, (a3, 0) - ldb a2, (a1, 0) - subu a0, a2 - bez a2, 4b - bnez a0, 4b - addi t1, 1 - addi a1, 1 - addi a3, 1 - bnez t1, 6b - br 1b - -7: - ldb a0, (a3, 0) - addi a3, 1 - ldb a2, (a1, 0) - addi a1, 1 - subu a0, a2 - bnez a0, 4b - bnez a2, 7b - jmp r15 -ENDPROC(strcmp) diff --git a/arch/csky/abiv2/strcpy.S b/arch/csky/abiv2/strcpy.S deleted file mode 100644 index 3c6d3f6a573a1edcfb6efd22f61216a8b157ef03..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/strcpy.S +++ /dev/null @@ -1,123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - -ENTRY(strcpy) - mov a3, a0 - /* Check if the src addr is aligned. */ - andi t0, a1, 3 - bnez t0, 11f -1: - /* Check if all the bytes in the word are not zero. */ - ldw a2, (a1) - tstnbz a2 - bf 9f - stw a2, (a3) - - ldw a2, (a1, 4) - tstnbz a2 - bf 2f - stw a2, (a3, 4) - - ldw a2, (a1, 8) - tstnbz a2 - bf 3f - stw a2, (a3, 8) - - ldw a2, (a1, 12) - tstnbz a2 - bf 4f - stw a2, (a3, 12) - - ldw a2, (a1, 16) - tstnbz a2 - bf 5f - stw a2, (a3, 16) - - ldw a2, (a1, 20) - tstnbz a2 - bf 6f - stw a2, (a3, 20) - - ldw a2, (a1, 24) - tstnbz a2 - bf 7f - stw a2, (a3, 24) - - ldw a2, (a1, 28) - tstnbz a2 - bf 8f - stw a2, (a3, 28) - - addi a3, 32 - addi a1, 32 - br 1b - - -2: - addi a3, 4 - br 9f - -3: - addi a3, 8 - br 9f - -4: - addi a3, 12 - br 9f - -5: - addi a3, 16 - br 9f - -6: - addi a3, 20 - br 9f - -7: - addi a3, 24 - br 9f - -8: - addi a3, 28 -9: -# ifdef __CSKYBE__ - xtrb0 t0, a2 - st.b t0, (a3) - bez t0, 10f - xtrb1 t0, a2 - st.b t0, (a3, 1) - bez t0, 10f - xtrb2 t0, a2 - st.b t0, (a3, 2) - bez t0, 10f - stw a2, (a3) -# else - xtrb3 t0, a2 - st.b t0, (a3) - bez t0, 10f - xtrb2 t0, a2 - st.b t0, (a3, 1) - bez t0, 10f - xtrb1 t0, a2 - st.b t0, (a3, 2) - bez t0, 10f - stw a2, (a3) -# endif /* !__CSKYBE__ */ -10: - jmp lr - -11: - subi t0, 4 -12: - ld.b a2, (a1) - st.b a2, (a3) - bez a2, 10b - addi t0, 1 - addi a1, a1, 1 - addi a3, a3, 1 - bnez t0, 12b - jbr 1b -ENDPROC(strcpy) diff --git a/arch/csky/abiv2/strlen.S b/arch/csky/abiv2/strlen.S deleted file mode 100644 index bcdd70764d086441eeb80414e37f99e4d4781eb6..0000000000000000000000000000000000000000 --- a/arch/csky/abiv2/strlen.S +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include "sysdep.h" - -ENTRY(strlen) - /* Check if the start addr is aligned. */ - mov r3, r0 - andi r1, r0, 3 - movi r2, 4 - movi r0, 0 - bnez r1, .L_start_not_aligned - - LABLE_ALIGN -.L_start_addr_aligned: - /* Check if all the bytes in the word are not zero. */ - ldw r1, (r3) - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 4) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 8) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 12) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 16) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 20) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 24) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - ldw r1, (r3, 28) - addi r0, 4 - tstnbz r1 - bf .L_string_tail - - addi r0, 4 - addi r3, 32 - br .L_start_addr_aligned - -.L_string_tail: -# ifdef __CSKYBE__ - xtrb0 r3, r1 - bez r3, .L_return - addi r0, 1 - xtrb1 r3, r1 - bez r3, .L_return - addi r0, 1 - xtrb2 r3, r1 - bez r3, .L_return - addi r0, 1 -# else - xtrb3 r3, r1 - bez r3, .L_return - addi r0, 1 - xtrb2 r3, r1 - bez r3, .L_return - addi r0, 1 - xtrb1 r3, r1 - bez r3, .L_return - addi r0, 1 -# endif /* !__CSKYBE__ */ - -.L_return: - rts - -.L_start_not_aligned: - sub r2, r2, r1 -.L_start_not_aligned_loop: - ldb r1, (r3) - PRE_BNEZAD (r2) - addi r3, 1 - bez r1, .L_return - addi r0, 1 - BNEZAD (r2, .L_start_not_aligned_loop) - br .L_start_addr_aligned -ENDPROC(strlen) diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S deleted file mode 100644 index 3821ef9b75672d8a5af90839ffa7f95cfdb4da50..0000000000000000000000000000000000000000 --- a/arch/csky/kernel/atomic.S +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include - -.text - -/* - * int csky_cmpxchg(int oldval, int newval, int *ptr) - * - * If *ptr != oldval && return 1, - * else *ptr = newval return 0. - */ -ENTRY(csky_cmpxchg) - USPTOKSP - mfcr a3, epc - addi a3, TRAP0_SIZE - - subi sp, 16 - stw a3, (sp, 0) - mfcr a3, epsr - stw a3, (sp, 4) - mfcr a3, usp - stw a3, (sp, 8) - - psrset ee -#ifdef CONFIG_CPU_HAS_LDSTEX -1: - ldex a3, (a2) - cmpne a0, a3 - bt16 2f - mov a3, a1 - stex a3, (a2) - bez a3, 1b -2: - sync.is -#else -1: - ldw a3, (a2) - cmpne a0, a3 - bt16 3f -2: - stw a1, (a2) -3: -#endif - mvc a0 - ldw a3, (sp, 0) - mtcr a3, epc - ldw a3, (sp, 4) - mtcr a3, epsr - ldw a3, (sp, 8) - mtcr a3, usp - addi sp, 16 - KSPTOUSP - rte -END(csky_cmpxchg) - -#ifndef CONFIG_CPU_HAS_LDSTEX -/* - * Called from tlbmodified exception - */ -ENTRY(csky_cmpxchg_fixup) - mfcr a0, epc - lrw a1, 2b - cmpne a1, a0 - bt 1f - subi a1, (2b - 1b) - stw a1, (sp, LSAVE_PC) -1: - rts -END(csky_cmpxchg_fixup) -#endif diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S deleted file mode 100644 index 4349528fbf38a59432911a22dd26c665409d11a2..0000000000000000000000000000000000000000 --- a/arch/csky/kernel/entry.S +++ /dev/null @@ -1,345 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PTE_INDX_MSK 0xffc -#define PTE_INDX_SHIFT 10 -#define _PGDIR_SHIFT 22 - -.macro zero_fp -#ifdef CONFIG_STACKTRACE - movi r8, 0 -#endif -.endm - -.macro tlbop_begin name, val0, val1, val2 -ENTRY(csky_\name) - mtcr a3, ss2 - mtcr r6, ss3 - mtcr a2, ss4 - - RD_PGDR r6 - RD_MEH a3 -#ifdef CONFIG_CPU_HAS_TLBI - tlbi.vaas a3 - sync.is - - btsti a3, 31 - bf 1f - RD_PGDR_K r6 -1: -#else - bgeni a2, 31 - WR_MCIR a2 - bgeni a2, 25 - WR_MCIR a2 -#endif - bclri r6, 0 - lrw a2, va_pa_offset - ld.w a2, (a2, 0) - subu r6, a2 - bseti r6, 31 - - mov a2, a3 - lsri a2, _PGDIR_SHIFT - lsli a2, 2 - addu r6, a2 - ldw r6, (r6) - - lrw a2, va_pa_offset - ld.w a2, (a2, 0) - subu r6, a2 - bseti r6, 31 - - lsri a3, PTE_INDX_SHIFT - lrw a2, PTE_INDX_MSK - and a3, a2 - addu r6, a3 - ldw a3, (r6) - - movi a2, (_PAGE_PRESENT | \val0) - and a3, a2 - cmpne a3, a2 - bt \name - - /* First read/write the page, just update the flags */ - ldw a3, (r6) - bgeni a2, PAGE_VALID_BIT - bseti a2, PAGE_ACCESSED_BIT - bseti a2, \val1 - bseti a2, \val2 - or a3, a2 - stw a3, (r6) - - /* Some cpu tlb-hardrefill bypass the cache */ -#ifdef CONFIG_CPU_NEED_TLBSYNC - movi a2, 0x22 - bseti a2, 6 - mtcr r6, cr22 - mtcr a2, cr17 - sync -#endif - - mfcr a3, ss2 - mfcr r6, ss3 - mfcr a2, ss4 - rte -\name: - mfcr a3, ss2 - mfcr r6, ss3 - mfcr a2, ss4 - SAVE_ALL 0 -.endm -.macro tlbop_end is_write - zero_fp - RD_MEH a2 - psrset ee, ie - mov a0, sp - movi a1, \is_write - jbsr do_page_fault - jmpi ret_from_exception -.endm - -.text - -tlbop_begin tlbinvalidl, _PAGE_READ, PAGE_VALID_BIT, PAGE_ACCESSED_BIT -tlbop_end 0 - -tlbop_begin tlbinvalids, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT -tlbop_end 1 - -tlbop_begin tlbmodified, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT -#ifndef CONFIG_CPU_HAS_LDSTEX -jbsr csky_cmpxchg_fixup -#endif -tlbop_end 1 - -ENTRY(csky_systemcall) - SAVE_ALL TRAP0_SIZE - zero_fp - - psrset ee, ie - - lrw r11, __NR_syscalls - cmphs syscallid, r11 /* Check nr of syscall */ - bt ret_from_exception - - lrw r13, sys_call_table - ixw r13, syscallid - ldw r11, (r13) - cmpnei r11, 0 - bf ret_from_exception - - mov r9, sp - bmaski r10, THREAD_SHIFT - andn r9, r10 - ldw r12, (r9, TINFO_FLAGS) - ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r12, 0 - bt csky_syscall_trace -#if defined(__CSKYABIV2__) - subi sp, 8 - stw r5, (sp, 0x4) - stw r4, (sp, 0x0) - jsr r11 /* Do system call */ - addi sp, 8 -#else - jsr r11 -#endif - stw a0, (sp, LSAVE_A0) /* Save return value */ - jmpi ret_from_exception - -csky_syscall_trace: - mov a0, sp /* sp = pt_regs pointer */ - jbsr syscall_trace_enter - /* Prepare args before do system call */ - ldw a0, (sp, LSAVE_A0) - ldw a1, (sp, LSAVE_A1) - ldw a2, (sp, LSAVE_A2) - ldw a3, (sp, LSAVE_A3) -#if defined(__CSKYABIV2__) - subi sp, 8 - ldw r9, (sp, LSAVE_A4) - stw r9, (sp, 0x0) - ldw r9, (sp, LSAVE_A5) - stw r9, (sp, 0x4) -#else - ldw r6, (sp, LSAVE_A4) - ldw r7, (sp, LSAVE_A5) -#endif - jsr r11 /* Do system call */ -#if defined(__CSKYABIV2__) - addi sp, 8 -#endif - stw a0, (sp, LSAVE_A0) /* Save return value */ - - mov a0, sp /* right now, sp --> pt_regs */ - jbsr syscall_trace_exit - br ret_from_exception - -ENTRY(ret_from_kernel_thread) - jbsr schedule_tail - mov a0, r10 - jsr r9 - jbsr ret_from_exception - -ENTRY(ret_from_fork) - jbsr schedule_tail - mov r9, sp - bmaski r10, THREAD_SHIFT - andn r9, r10 - ldw r12, (r9, TINFO_FLAGS) - ANDI_R3 r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) - cmpnei r12, 0 - bf ret_from_exception - mov a0, sp /* sp = pt_regs pointer */ - jbsr syscall_trace_exit - -ret_from_exception: - ld syscallid, (sp, LSAVE_PSR) - btsti syscallid, 31 - bt 1f - - /* - * Load address of current->thread_info, Then get address of task_struct - * Get task_needreshed in task_struct - */ - mov r9, sp - bmaski r10, THREAD_SHIFT - andn r9, r10 - - ldw r12, (r9, TINFO_FLAGS) - andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) - cmpnei r12, 0 - bt exit_work -1: - RESTORE_ALL - -exit_work: - lrw syscallid, ret_from_exception - mov lr, syscallid - - btsti r12, TIF_NEED_RESCHED - bt work_resched - - mov a0, sp - mov a1, r12 - jmpi do_notify_resume - -work_resched: - jmpi schedule - -ENTRY(csky_trap) - SAVE_ALL 0 - zero_fp - psrset ee - mov a0, sp /* Push Stack pointer arg */ - jbsr trap_c /* Call C-level trap handler */ - jmpi ret_from_exception - -/* - * Prototype from libc for abiv1: - * register unsigned int __result asm("a0"); - * asm( "trap 3" :"=r"(__result)::); - */ -ENTRY(csky_get_tls) - USPTOKSP - - /* increase epc for continue */ - mfcr a0, epc - addi a0, TRAP0_SIZE - mtcr a0, epc - - /* get current task thread_info with kernel 8K stack */ - bmaski a0, THREAD_SHIFT - not a0 - subi sp, 1 - and a0, sp - addi sp, 1 - - /* get tls */ - ldw a0, (a0, TINFO_TP_VALUE) - - KSPTOUSP - rte - -ENTRY(csky_irq) - SAVE_ALL 0 - zero_fp - psrset ee - -#ifdef CONFIG_PREEMPT - mov r9, sp /* Get current stack pointer */ - bmaski r10, THREAD_SHIFT - andn r9, r10 /* Get thread_info */ - - /* - * Get task_struct->stack.preempt_count for current, - * and increase 1. - */ - ldw r12, (r9, TINFO_PREEMPT) - addi r12, 1 - stw r12, (r9, TINFO_PREEMPT) -#endif - - mov a0, sp - jbsr csky_do_IRQ - -#ifdef CONFIG_PREEMPT - subi r12, 1 - stw r12, (r9, TINFO_PREEMPT) - cmpnei r12, 0 - bt 2f - ldw r12, (r9, TINFO_FLAGS) - btsti r12, TIF_NEED_RESCHED - bf 2f - jbsr preempt_schedule_irq /* irq en/disable is done inside */ -#endif -2: - jmpi ret_from_exception - -/* - * a0 = prev task_struct * - * a1 = next task_struct * - * a0 = return next - */ -ENTRY(__switch_to) - lrw a3, TASK_THREAD - addu a3, a0 - - mfcr a2, psr /* Save PSR value */ - stw a2, (a3, THREAD_SR) /* Save PSR in task struct */ - - SAVE_SWITCH_STACK - - stw sp, (a3, THREAD_KSP) - - /* Set up next process to run */ - lrw a3, TASK_THREAD - addu a3, a1 - - ldw sp, (a3, THREAD_KSP) /* Set next kernel sp */ - - ldw a2, (a3, THREAD_SR) /* Set next PSR */ - mtcr a2, psr - -#if defined(__CSKYABIV2__) - addi r7, a1, TASK_THREAD_INFO - ldw tls, (r7, TINFO_TP_VALUE) -#endif - - RESTORE_SWITCH_STACK - - rts -ENDPROC(__switch_to) diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S deleted file mode 100644 index 17ed9d2504807dfa385f5da0ef380b635a388c99..0000000000000000000000000000000000000000 --- a/arch/csky/kernel/head.S +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include -#include -#include - -__HEAD -ENTRY(_start) - SETUP_MMU - - /* set stack point */ - lrw r6, init_thread_union + THREAD_SIZE - mov sp, r6 - - jmpi csky_start -END(_start) - -#ifdef CONFIG_SMP -.align 10 -ENTRY(_start_smp_secondary) - SETUP_MMU - - /* copy msa1 from CPU0 */ - lrw r6, secondary_msa1 - ld.w r6, (r6, 0) - mtcr r6, cr<31, 15> - - /* set stack point */ - lrw r6, secondary_stack - ld.w r6, (r6, 0) - mov sp, r6 - - jmpi csky_start_secondary -END(_start_smp_secondary) -#endif diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S deleted file mode 100644 index ae7961b973f26e8794a7597773fb9758b335a2bc..0000000000000000000000000000000000000000 --- a/arch/csky/kernel/vmlinux.lds.S +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include - -OUTPUT_ARCH(csky) -ENTRY(_start) - -#ifndef __cskyBE__ -jiffies = jiffies_64; -#else -jiffies = jiffies_64 + 4; -#endif - -#define VBR_BASE \ - . = ALIGN(1024); \ - vec_base = .; \ - . += 512; - -SECTIONS -{ - . = PAGE_OFFSET + PHYS_OFFSET_OFFSET; - - _stext = .; - __init_begin = .; - HEAD_TEXT_SECTION - INIT_TEXT_SECTION(PAGE_SIZE) - INIT_DATA_SECTION(PAGE_SIZE) - PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; - IRQENTRY_TEXT - SOFTIRQENTRY_TEXT - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) - } = 0 - _etext = .; - - /* __init_begin __init_end must be page aligned for free_initmem */ - . = ALIGN(PAGE_SIZE); - - - _sdata = .; - RO_DATA_SECTION(PAGE_SIZE) - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - _edata = .; - - NOTES - EXCEPTION_TABLE(L1_CACHE_BYTES) - BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES) - VBR_BASE - _end = . ; - - STABS_DEBUG - DWARF_DEBUG - - DISCARDS -} diff --git a/arch/h8300/boot/compressed/head.S b/arch/h8300/boot/compressed/head.S deleted file mode 100644 index 11ef509579cfa651ca3b087c38a97e0ada253fbb..0000000000000000000000000000000000000000 --- a/arch/h8300/boot/compressed/head.S +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/arch/h8300/boot/compressed/head.S - * - * Copyright (C) 2006 Yoshinori Sato - */ - -#include - - .section .text..startup,"ax" - .global startup -startup: - mov.l #startup, sp - mov.l er0, er4 - mov.l #__sbss, er0 - mov.l #__ebss, er1 - sub.l er0, er1 - shlr er1 - shlr er1 - sub.l er2, er2 -1: - mov.l er2, @er0 - adds #4, er0 - dec.l #1, er1 - bne 1b - jsr @decompress_kernel - mov.l er4, er0 - jmp @output - - .align 9 -fake_headers_as_bzImage: - .word 0 - .ascii "HdrS" ; header signature - .word 0x0202 ; header version number (>= 0x0105) - ; or else old loadlin-1.5 will fail) - .word 0 ; default_switch - .word 0 ; SETUPSEG - .word 0x1000 - .word 0 ; pointing to kernel version string - .byte 0 ; = 0, old one (LILO, Loadlin, - ; 0xTV: T=0 for LILO - ; V = version - .byte 1 ; Load flags bzImage=1 - .word 0x8000 ; size to move, when setup is not - .long 0x100000 ; 0x100000 = default for big kernel - .long 0 ; address of loaded ramdisk image - .long 0 ; its size in bytes - - .end diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S deleted file mode 100644 index 4ade5f8299baed742c24b839d5bb3bec867b3d78..0000000000000000000000000000000000000000 --- a/arch/h8300/kernel/entry.S +++ /dev/null @@ -1,434 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * linux/arch/h8300/kernel/entry.S - * - * Yoshinori Sato - * David McCullough - * - */ - -/* - * entry.S - * include exception/interrupt gateway - * system call entry - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_CPU_H8300H) -#define USERRET 8 -INTERRUPTS = 64 - .h8300h - .macro SHLL2 reg - shll.l \reg - shll.l \reg - .endm - .macro SHLR2 reg - shlr.l \reg - shlr.l \reg - .endm - .macro SAVEREGS - mov.l er0,@-sp - mov.l er1,@-sp - mov.l er2,@-sp - mov.l er3,@-sp - .endm - .macro RESTOREREGS - mov.l @sp+,er3 - mov.l @sp+,er2 - .endm - .macro SAVEEXR - .endm - .macro RESTOREEXR - .endm -#endif -#if defined(CONFIG_CPU_H8S) -#define USERRET 10 -#define USEREXR 8 -INTERRUPTS = 128 - .h8300s - .macro SHLL2 reg - shll.l #2,\reg - .endm - .macro SHLR2 reg - shlr.l #2,\reg - .endm - .macro SAVEREGS - stm.l er0-er3,@-sp - .endm - .macro RESTOREREGS - ldm.l @sp+,er2-er3 - .endm - .macro SAVEEXR - mov.w @(USEREXR:16,er0),r1 - mov.w r1,@(LEXR-LER3:16,sp) /* copy EXR */ - .endm - .macro RESTOREEXR - mov.w @(LEXR-LER1:16,sp),r1 /* restore EXR */ - mov.b r1l,r1h - mov.w r1,@(USEREXR:16,er0) - .endm -#endif - - -/* CPU context save/restore macros. */ - - .macro SAVE_ALL - mov.l er0,@-sp - stc ccr,r0l /* check kernel mode */ - btst #4,r0l - bne 5f - - /* user mode */ - mov.l sp,@_sw_usp - mov.l @sp,er0 /* restore saved er0 */ - orc #0x10,ccr /* switch kernel stack */ - mov.l @_sw_ksp,sp - sub.l #(LRET-LORIG),sp /* allocate LORIG - LRET */ - SAVEREGS - mov.l @_sw_usp,er0 - mov.l @(USERRET:16,er0),er1 /* copy the RET addr */ - mov.l er1,@(LRET-LER3:16,sp) - SAVEEXR - - mov.l @(LORIG-LER3:16,sp),er0 - mov.l er0,@(LER0-LER3:16,sp) /* copy ER0 */ - mov.w e1,r1 /* e1 highbyte = ccr */ - and #0xef,r1h /* mask mode? flag */ - bra 6f -5: - /* kernel mode */ - mov.l @sp,er0 /* restore saved er0 */ - subs #2,sp /* set dummy ccr */ - subs #4,sp /* set dummp sp */ - SAVEREGS - mov.w @(LRET-LER3:16,sp),r1 /* copy old ccr */ -6: - mov.b r1h,r1l - mov.b #0,r1h - mov.w r1,@(LCCR-LER3:16,sp) /* set ccr */ - mov.l @_sw_usp,er2 - mov.l er2,@(LSP-LER3:16,sp) /* set usp */ - mov.l er6,@-sp /* syscall arg #6 */ - mov.l er5,@-sp /* syscall arg #5 */ - mov.l er4,@-sp /* syscall arg #4 */ - .endm /* r1 = ccr */ - - .macro RESTORE_ALL - mov.l @sp+,er4 - mov.l @sp+,er5 - mov.l @sp+,er6 - RESTOREREGS - mov.w @(LCCR-LER1:16,sp),r0 /* check kernel mode */ - btst #4,r0l - bne 7f - - orc #0xc0,ccr - mov.l @(LSP-LER1:16,sp),er0 - mov.l @(LER0-LER1:16,sp),er1 /* restore ER0 */ - mov.l er1,@er0 - RESTOREEXR - mov.w @(LCCR-LER1:16,sp),r1 /* restore the RET addr */ - mov.b r1l,r1h - mov.b @(LRET+1-LER1:16,sp),r1l - mov.w r1,e1 - mov.w @(LRET+2-LER1:16,sp),r1 - mov.l er1,@(USERRET:16,er0) - - mov.l @sp+,er1 - add.l #(LRET-LER1),sp /* remove LORIG - LRET */ - mov.l sp,@_sw_ksp - andc #0xef,ccr /* switch to user mode */ - mov.l er0,sp - bra 8f -7: - mov.l @sp+,er1 - add.l #10,sp -8: - mov.l @sp+,er0 - adds #4,sp /* remove the sw created LVEC */ - rte - .endm - -.globl _system_call -.globl ret_from_exception -.globl ret_from_fork -.globl ret_from_kernel_thread -.globl ret_from_interrupt -.globl _interrupt_redirect_table -.globl _sw_ksp,_sw_usp -.globl _resume -.globl _interrupt_entry -.globl _trace_break -.globl _nmi - -#if defined(CONFIG_ROMKERNEL) - .section .int_redirect,"ax" -_interrupt_redirect_table: -#if defined(CONFIG_CPU_H8300H) - .rept 7 - .long 0 - .endr -#endif -#if defined(CONFIG_CPU_H8S) - .rept 5 - .long 0 - .endr - jmp @_trace_break - .long 0 -#endif - - jsr @_interrupt_entry /* NMI */ - jmp @_system_call /* TRAPA #0 (System call) */ - .long 0 -#if defined(CONFIG_KGDB) - jmp @_kgdb_trap -#else - .long 0 -#endif - jmp @_trace_break /* TRAPA #3 (breakpoint) */ - .rept INTERRUPTS-12 - jsr @_interrupt_entry - .endr -#endif -#if defined(CONFIG_RAMKERNEL) -.globl _interrupt_redirect_table - .section .bss -_interrupt_redirect_table: - .space 4 -#endif - - .section .text - .align 2 -_interrupt_entry: - SAVE_ALL -/* r1l is saved ccr */ - mov.l sp,er0 - add.l #LVEC,er0 - btst #4,r1l - bne 1f - /* user LVEC */ - mov.l @_sw_usp,er0 - adds #4,er0 -1: - mov.l @er0,er0 /* LVEC address */ -#if defined(CONFIG_ROMKERNEL) - sub.l #_interrupt_redirect_table,er0 -#endif -#if defined(CONFIG_RAMKERNEL) - mov.l @_interrupt_redirect_table,er1 - sub.l er1,er0 -#endif - SHLR2 er0 - dec.l #1,er0 - mov.l sp,er1 - subs #4,er1 /* adjust ret_pc */ -#if defined(CONFIG_CPU_H8S) - orc #7,exr -#endif - jsr @do_IRQ - jmp @ret_from_interrupt - -_system_call: - subs #4,sp /* dummy LVEC */ - SAVE_ALL - /* er0: syscall nr */ - andc #0xbf,ccr - mov.l er0,er4 - - /* save top of frame */ - mov.l sp,er0 - jsr @set_esp0 - andc #0x3f,ccr - mov.l sp,er2 - and.w #0xe000,r2 - mov.l @(TI_FLAGS:16,er2),er2 - and.w #_TIF_WORK_SYSCALL_MASK,r2 - beq 1f - mov.l sp,er0 - jsr @do_syscall_trace_enter -1: - cmp.l #__NR_syscalls,er4 - bcc badsys - SHLL2 er4 - mov.l #_sys_call_table,er0 - add.l er4,er0 - mov.l @er0,er4 - beq ret_from_exception:16 - mov.l @(LER1:16,sp),er0 - mov.l @(LER2:16,sp),er1 - mov.l @(LER3:16,sp),er2 - jsr @er4 - mov.l er0,@(LER0:16,sp) /* save the return value */ - mov.l sp,er2 - and.w #0xe000,r2 - mov.l @(TI_FLAGS:16,er2),er2 - and.w #_TIF_WORK_SYSCALL_MASK,r2 - beq 2f - mov.l sp,er0 - jsr @do_syscall_trace_leave -2: - orc #0xc0,ccr - bra resume_userspace - -badsys: - mov.l #-ENOSYS,er0 - mov.l er0,@(LER0:16,sp) - bra resume_userspace - -#if !defined(CONFIG_PREEMPT) -#define resume_kernel restore_all -#endif - -ret_from_exception: -#if defined(CONFIG_PREEMPT) - orc #0xc0,ccr -#endif -ret_from_interrupt: - mov.b @(LCCR+1:16,sp),r0l - btst #4,r0l - bne resume_kernel:16 /* return from kernel */ -resume_userspace: - andc #0xbf,ccr - mov.l sp,er4 - and.w #0xe000,r4 /* er4 <- current thread info */ - mov.l @(TI_FLAGS:16,er4),er1 - and.l #_TIF_WORK_MASK,er1 - beq restore_all:8 -work_pending: - btst #TIF_NEED_RESCHED,r1l - bne work_resched:8 - /* work notifysig */ - mov.l sp,er0 - subs #4,er0 /* er0: pt_regs */ - jsr @do_notify_resume - bra resume_userspace:8 -work_resched: - mov.l sp,er0 - jsr @set_esp0 - jsr @schedule - bra resume_userspace:8 -restore_all: - RESTORE_ALL /* Does RTE */ - -#if defined(CONFIG_PREEMPT) -resume_kernel: - mov.l @(TI_PRE_COUNT:16,er4),er0 - bne restore_all:8 -need_resched: - mov.l @(TI_FLAGS:16,er4),er0 - btst #TIF_NEED_RESCHED,r0l - beq restore_all:8 - mov.b @(LCCR+1:16,sp),r0l /* Interrupt Enabled? */ - bmi restore_all:8 - mov.l sp,er0 - jsr @set_esp0 - jsr @preempt_schedule_irq - bra need_resched:8 -#endif - -ret_from_fork: - mov.l er2,er0 - jsr @schedule_tail - jmp @ret_from_exception - -ret_from_kernel_thread: - mov.l er2,er0 - jsr @schedule_tail - mov.l @(LER4:16,sp),er0 - mov.l @(LER5:16,sp),er1 - jsr @er1 - jmp @ret_from_exception - -_resume: - /* - * Beware - when entering resume, offset of tss is in d1, - * prev (the current task) is in a0, next (the new task) - * is in a1 and d2.b is non-zero if the mm structure is - * shared between the tasks, so don't change these - * registers until their contents are no longer needed. - */ - - /* save sr */ - sub.w r3,r3 - stc ccr,r3l - mov.w r3,@(THREAD_CCR+2:16,er0) - - /* disable interrupts */ - orc #0xc0,ccr - mov.l @_sw_usp,er3 - mov.l er3,@(THREAD_USP:16,er0) - mov.l sp,@(THREAD_KSP:16,er0) - - /* Skip address space switching if they are the same. */ - /* FIXME: what did we hack out of here, this does nothing! */ - - mov.l @(THREAD_USP:16,er1),er0 - mov.l er0,@_sw_usp - mov.l @(THREAD_KSP:16,er1),sp - - /* restore status register */ - mov.w @(THREAD_CCR+2:16,er1),r3 - - ldc r3l,ccr - rts - -_trace_break: - subs #4,sp - SAVE_ALL - sub.l er1,er1 - dec.l #1,er1 - mov.l er1,@(LORIG,sp) - mov.l sp,er0 - jsr @set_esp0 - mov.l @_sw_usp,er0 - mov.l @er0,er1 - mov.w @(-2:16,er1),r2 - cmp.w #0x5730,r2 - beq 1f - subs #2,er1 - mov.l er1,@er0 -1: - and.w #0xff,e1 - mov.l er1,er0 - jsr @trace_trap - jmp @ret_from_exception - -_nmi: - subs #4, sp - mov.l er0, @-sp - mov.l @_interrupt_redirect_table, er0 - add.l #8*4, er0 - mov.l er0, @(4,sp) - mov.l @sp+, er0 - jmp @_interrupt_entry - -#if defined(CONFIG_KGDB) -_kgdb_trap: - subs #4,sp - SAVE_ALL - mov.l sp,er0 - add.l #LRET,er0 - mov.l er0,@(LSP,sp) - jsr @set_esp0 - mov.l sp,er0 - subs #4,er0 - jsr @h8300_kgdb_trap - jmp @ret_from_exception -#endif - - .section .bss -_sw_ksp: - .space 4 -_sw_usp: - .space 4 - - .end diff --git a/arch/h8300/kernel/head_ram.S b/arch/h8300/kernel/head_ram.S deleted file mode 100644 index dbf8429f5fab5f91071242a059f159fbe92c0c62..0000000000000000000000000000000000000000 --- a/arch/h8300/kernel/head_ram.S +++ /dev/null @@ -1,61 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_CPU_H8300H) - .h8300h -#define SYSCR 0xfee012 -#define IRAMTOP 0xffff20 -#endif -#if defined(CONFIG_CPU_H8S) - .h8300s -#define INTCR 0xffff31 -#define IRAMTOP 0xffc000 -#endif - - __HEAD - .global _start -_start: - mov.l #IRAMTOP,sp - /* .bss clear */ - mov.l #_sbss,er5 - mov.l #_ebss,er4 - sub.l er5,er4 - shlr er4 - shlr er4 - sub.l er2,er2 -1: - mov.l er2,@er5 - adds #4,er5 - dec.l #1,er4 - bne 1b - jsr @h8300_fdt_init - - /* linux kernel start */ -#if defined(CONFIG_CPU_H8300H) - ldc #0xd0,ccr /* running kernel */ - mov.l #SYSCR,er0 - bclr #3,@er0 -#endif -#if defined(CONFIG_CPU_H8S) - ldc #0x07,exr - bclr #4,@INTCR:8 - bset #5,@INTCR:8 /* Interrupt mode 2 */ - ldc #0x90,ccr /* running kernel */ -#endif - mov.l #init_thread_union,sp - add.l #0x2000,sp - jsr @start_kernel - -1: - bra 1b - - .end diff --git a/arch/h8300/kernel/head_rom.S b/arch/h8300/kernel/head_rom.S deleted file mode 100644 index ab55a9cb2f367cf2cdbe9c2dd18c845b70ca5a82..0000000000000000000000000000000000000000 --- a/arch/h8300/kernel/head_rom.S +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -#if defined(CONFIG_CPU_H8300H) - .h8300h -#define SYSCR 0xfee012 -#define IRAMTOP 0xffff20 -#define NR_INT 64 -#endif -#if defined(CONFIG_CPU_H8S) - .h8300s -#define INTCR 0xffff31 -#define IRAMTOP 0xffc000 -#define NR_INT 128 -#endif - - __HEAD - .global _start -_start: - mov.l #IRAMTOP,sp -#if !defined(CONFIG_H8300H_SIM) && \ - !defined(CONFIG_H8S_SIM) - jsr @lowlevel_init - - /* copy .data */ - mov.l #_begin_data,er5 - mov.l #_sdata,er6 - mov.l #_edata,er4 - sub.l er6,er4 - shlr.l er4 - shlr.l er4 -1: - mov.l @er5+,er0 - mov.l er0,@er6 - adds #4,er6 - dec.l #1,er4 - bne 1b - /* .bss clear */ - mov.l #_sbss,er5 - mov.l #_ebss,er4 - sub.l er5,er4 - shlr er4 - shlr er4 - sub.l er0,er0 -1: - mov.l er0,@er5 - adds #4,er5 - dec.l #1,er4 - bne 1b -#else - /* get cmdline from gdb */ - jsr @0xcc - ;; er0 - argc - ;; er1 - argv - mov.l #command_line,er3 - adds #4,er1 - dec.l #1,er0 - beq 4f -1: - mov.l @er1+,er2 -2: - mov.b @er2+,r4l - beq 3f - mov.b r4l,@er3 - adds #1,er3 - bra 2b -3: - mov.b #' ',r4l - mov.b r4l,@er3 - adds #1,er3 - dec.l #1,er0 - bne 1b - subs #1,er3 - mov.b #0,r4l - mov.b r4l,@er3 -4: -#endif - sub.l er0,er0 - jsr @h8300_fdt_init - /* linux kernel start */ -#if defined(CONFIG_CPU_H8300H) - ldc #0xd0,ccr /* running kernel */ - mov.l #SYSCR,er0 - bclr #3,@er0 -#endif -#if defined(CONFIG_CPU_H8S) - ldc #0x07,exr - bclr #4,@INTCR:8 - bset #5,@INTCR:8 /* Interrupt mode 2 */ - ldc #0x90,ccr /* running kernel */ -#endif - mov.l #init_thread_union,sp - add.l #0x2000,sp - jsr @start_kernel - -1: - bra 1b - -#if defined(CONFIG_ROMKERNEL) - /* interrupt vector */ - .section .vectors,"ax" - .long _start - .long _start -vector = 2 - .rept NR_INT - 2 - .long _interrupt_redirect_table+vector*4 -vector = vector + 1 - .endr -#endif - .end diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S deleted file mode 100644 index 49f716c0a1df977d5981c6b80dc4e126c282cb83..0000000000000000000000000000000000000000 --- a/arch/h8300/kernel/vmlinux.lds.S +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include - -#define ROMTOP 0x000000 -#define RAMTOP 0x400000 - -jiffies = jiffies_64 + 4; - -ENTRY(_start) - -SECTIONS -{ -#if defined(CONFIG_ROMKERNEL) - . = ROMTOP; - .vectors : - { - _vector = . ; - *(.vector*) - } -#else - . = RAMTOP; - _ramstart = .; - . = . + CONFIG_OFFSET; -#endif - _text = .; - HEAD_TEXT_SECTION - .text : { - _stext = . ; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT -#if defined(CONFIG_ROMKERNEL) - *(.int_redirect) -#endif - _etext = . ; - } - EXCEPTION_TABLE(16) - NOTES - RO_DATA_SECTION(4) - ROMEND = .; -#if defined(CONFIG_ROMKERNEL) - . = RAMTOP; - _ramstart = .; -#define ADDR(x) ROMEND -#endif - _sdata = . ; - __data_start = . ; - RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE) -#if defined(CONFIG_ROMKERNEL) -#undef ADDR -#endif - . = ALIGN(0x4) ; - __init_begin = .; - INIT_TEXT_SECTION(4) - INIT_DATA_SECTION(4) - __init_end = .; - _edata = . ; - _begin_data = LOADADDR(.data); - _sbss =.; - BSS_SECTION(0, 0 ,0) - _ebss =.; - _ramend = .; - _end = .; - DISCARDS -} diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S deleted file mode 100644 index 6e1a4ed3af53369871ac530a282953bea89c20d0..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/abs.S +++ /dev/null @@ -1,21 +0,0 @@ -;;; SPDX-License-Identifier: GPL-2.0 -;;; abs.S - -#include - -#if defined(CONFIG_CPU_H8300H) - .h8300h -#endif -#if defined(CONFIG_CPU_H8S) - .h8300s -#endif - .text -.global _abs - -;;; int abs(int n) -_abs: - mov.l er0,er0 - bpl 1f - neg.l er0 -1: - rts diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S deleted file mode 100644 index f1cd67d5e3ea68e165402673f01bab686bc32b81..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/memcpy.S +++ /dev/null @@ -1,86 +0,0 @@ -;;; SPDX-License-Identifier: GPL-2.0 -;;; memcpy.S - -#include - -#if defined(CONFIG_CPU_H8300H) - .h8300h -#endif -#if defined(CONFIG_CPU_H8S) - .h8300s -#endif - .text -.global memcpy - -;;; void *memcpy(void *to, void *from, size_t n) -memcpy: - mov.l er2,er2 - bne 1f - rts -1: - ;; address check - bld #0,r0l - bxor #0,r1l - bcs 4f - mov.l er4,@-sp - mov.l er0,@-sp - btst #0,r0l - beq 1f - ;; (aligned even) odd address - mov.b @er1,r3l - mov.b r3l,@er0 - adds #1,er1 - adds #1,er0 - dec.l #1,er2 - beq 3f -1: - ;; n < sizeof(unsigned long) check - sub.l er4,er4 - adds #4,er4 ; loop count check value - cmp.l er4,er2 - blo 2f - ;; unsigned long copy -1: - mov.l @er1,er3 - mov.l er3,@er0 - adds #4,er0 - adds #4,er1 - subs #4,er2 - cmp.l er4,er2 - bcc 1b - ;; rest -2: - mov.l er2,er2 - beq 3f -1: - mov.b @er1,r3l - mov.b r3l,@er0 - adds #1,er1 - adds #1,er0 - dec.l #1,er2 - bne 1b -3: - mov.l @sp+,er0 - mov.l @sp+,er4 - rts - - ;; odd <- even / even <- odd -4: - mov.l er4,er3 - mov.l er2,er4 - mov.l er5,er2 - mov.l er1,er5 - mov.l er6,er1 - mov.l er0,er6 -1: - eepmov.w - mov.w r4,r4 - bne 1b - dec.w #1,e4 - bpl 1b - mov.l er1,er6 - mov.l er2,er5 - mov.l er3,er4 - rts - - .end diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S deleted file mode 100644 index 2d1abc37fd08b0a78856190fdb00e41f4a264fca..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/memset.S +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* memset.S */ - -#include - -#if defined(CONFIG_CPU_H8300H) - .h8300h -#endif -#if defined(CONFIG_CPU_H8S) - .h8300s -#endif - .text - -.global memset -.global clear_user - -;;void *memset(*ptr, int c, size_t count) -;; ptr = er0 -;; c = er1(r1l) -;; count = er2 -memset: - btst #0,r0l - beq 2f - - ;; odd address -1: - mov.b r1l,@er0 - adds #1,er0 - dec.l #1,er2 - beq 6f - - ;; even address -2: - mov.l er2,er3 - cmp.l #4,er2 - blo 4f - ;; count>=4 -> count/4 -#if defined(CONFIG_CPU_H8300H) - shlr.l er2 - shlr.l er2 -#endif -#if defined(CONFIG_CPU_H8S) - shlr.l #2,er2 -#endif - ;; byte -> long - mov.b r1l,r1h - mov.w r1,e1 -3: - mov.l er1,@er0 - adds #4,er0 - dec.l #1,er2 - bne 3b -4: - ;; count % 4 - and.b #3,r3l - beq 6f -5: - mov.b r1l,@er0 - adds #1,er0 - dec.b r3l - bne 5b -6: - rts - -clear_user: - mov.l er1, er2 - sub.l er1, er1 - bra memset - - .end diff --git a/arch/h8300/lib/moddivsi3.S b/arch/h8300/lib/moddivsi3.S deleted file mode 100644 index 9e33ab0456c7501d89e9fedba025b50e1ed584ea..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/moddivsi3.S +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include "libgcc.h" - -; numerator in A0/A1 -; denominator in A2/A3 - .global __modsi3 -__modsi3: - PUSHP S2P - bsr modnorm - bsr __divsi3 - mov.l er3,er0 - bra exitdiv - - .global __umodsi3 -__umodsi3: - bsr __udivsi3:16 - mov.l er3,er0 - rts - - .global __divsi3 -__divsi3: - PUSHP S2P - bsr divnorm - bsr __udivsi3:16 - - ; examine what the sign should be -exitdiv: - btst #3,S2L - beq reti - - ; should be -ve - neg.l A0P - -reti: - POPP S2P - rts - -divnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - neg.l A0P ; negate arg - -postive: - mov.l A1P,A1P ; is the denominator -ve - bge postive2 - - neg.l A1P ; negate arg - xor.b #0x08,S2L ; toggle the result sign - -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - neg.l A0P ; negate arg - -mpostive: - mov.l A1P,A1P ; is the denominator -ve - bge mpostive2 - - neg.l A1P ; negate arg - -mpostive2: - rts - - .end diff --git a/arch/h8300/lib/modsi3.S b/arch/h8300/lib/modsi3.S deleted file mode 100644 index bdc8a002921df129a5ff9a584b87a96035cb483d..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/modsi3.S +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include "libgcc.h" - -; numerator in A0/A1 -; denominator in A2/A3 - .global __modsi3 -__modsi3: - PUSHP S2P - bsr modnorm - bsr __divsi3 - mov.l er3,er0 - bra exitdiv - - .global __umodsi3 -__umodsi3: - bsr __udivsi3 - mov.l er3,er0 - rts - - .global __divsi3 -__divsi3: - PUSHP S2P - jsr divnorm - bsr __udivsi3 - - ; examine what the sign should be -exitdiv: - btst #3,S2L - beq reti - - ; should be -ve - neg.l A0P - -reti: - POPP S2P - rts - -divnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - neg.l A0P ; negate arg - -postive: - mov.l A1P,A1P ; is the denominator -ve - bge postive2 - - neg.l A1P ; negate arg - xor.b #0x08,S2L ; toggle the result sign - -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - neg.l A0P ; negate arg - -mpostive: - mov.l A1P,A1P ; is the denominator -ve - bge mpostive2 - - neg.l A1P ; negate arg - -mpostive2: - rts - - .end diff --git a/arch/h8300/lib/mulsi3.S b/arch/h8300/lib/mulsi3.S deleted file mode 100644 index 5a062fd298d128b0d7f505655e908448b6557388..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/mulsi3.S +++ /dev/null @@ -1,39 +0,0 @@ -; SPDX-License-Identifier: GPL-2.0 -; -; mulsi3 for H8/300H - based on Renesas SH implementation -; -; by Toshiyasu Morita -; -; Old code: -; -; 16b * 16b = 372 states (worst case) -; 32b * 32b = 724 states (worst case) -; -; New code: -; -; 16b * 16b = 48 states -; 16b * 32b = 72 states -; 32b * 32b = 92 states -; - - .global __mulsi3 -__mulsi3: - mov.w r1,r2 ; ( 2 states) b * d - mulxu r0,er2 ; (22 states) - - mov.w e0,r3 ; ( 2 states) a * d - beq L_skip1 ; ( 4 states) - mulxu r1,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip1: - mov.w e1,r3 ; ( 2 states) c * b - beq L_skip2 ; ( 4 states) - mulxu r0,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip2: - mov.l er2,er0 ; ( 2 states) - rts ; (10 states) - - .end diff --git a/arch/h8300/lib/strncpy.S b/arch/h8300/lib/strncpy.S deleted file mode 100644 index 8b65d7c4727b280ee30b406a1091642683369f04..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/strncpy.S +++ /dev/null @@ -1,35 +0,0 @@ -;;; SPDX-License-Identifier: GPL-2.0 -;;; strncpy.S - -#include - - .text -.global strncpy_from_user - -;;; long strncpy_from_user(void *to, void *from, size_t n) -strncpy_from_user: - mov.l er2,er2 - bne 1f - sub.l er0,er0 - rts -1: - mov.l er4,@-sp - sub.l er3,er3 -2: - mov.b @er1+,r4l - mov.b r4l,@er0 - adds #1,er0 - beq 3f - inc.l #1,er3 - dec.l #1,er2 - bne 2b -3: - dec.l #1,er2 -4: - mov.b r4l,@er0 - adds #1,er0 - dec.l #1,er2 - bne 4b - mov.l er3,er0 - mov.l @sp+,er4 - rts diff --git a/arch/h8300/lib/udivsi3.S b/arch/h8300/lib/udivsi3.S deleted file mode 100644 index b810aba8e1009603c5cf07b88ad3c5ebd3333f4b..0000000000000000000000000000000000000000 --- a/arch/h8300/lib/udivsi3.S +++ /dev/null @@ -1,77 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include "libgcc.h" - - ;; This function also computes the remainder and stores it in er3. - .global __udivsi3 -__udivsi3: - mov.w A1E,A1E ; denominator top word 0? - bne DenHighNonZero - - ; do it the easy way, see page 107 in manual - mov.w A0E,A2 - extu.l A2P - divxu.w A1,A2P - mov.w A2E,A0E - divxu.w A1,A0P - mov.w A0E,A3 - mov.w A2,A0E - extu.l A3P - rts - - ; er0 = er0 / er1 - ; er3 = er0 % er1 - ; trashes er1 er2 - ; expects er1 >= 2^16 -DenHighNonZero: - mov.l er0,er3 - mov.l er1,er2 -#ifdef CONFIG_CPU_H8300H -divmod_L21: - shlr.l er0 - shlr.l er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -#else - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - beq divmod_L22A -divmod_L21: - shlr.l #2,er0 -divmod_L22: - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -divmod_L22A: - rotxl.w r2 - bcs divmod_L23 - shlr.l er0 - bra divmod_L24 -divmod_L23: - rotxr.w r2 - shlr.l #2,er0 -divmod_L24: -#endif - ;; At this point, - ;; er0 contains shifted dividend - ;; er1 contains divisor - ;; er2 contains shifted divisor - ;; er3 contains dividend, later remainder - divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) - extu.l er0 - beq divmod_L25 - subs #1,er0 ; er0 = AQ - 1 - mov.w e1,r2 - mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor - sub.w r2,e3 ; dividend - 65536 * er2 - mov.w r1,r2 - mulxu.w r0,er2 ; compute er3 = remainder (tentative) - sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor -divmod_L25: - cmp.l er1,er3 ; is divisor < remainder? - blo divmod_L26 - adds #1,er0 - sub.l er1,er3 ; correct the remainder -divmod_L26: - rts - - .end diff --git a/arch/hexagon/kernel/head.S b/arch/hexagon/kernel/head.S deleted file mode 100644 index 0b016308cc79f041579cfcf8929afe1e5b2b6b5f..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/head.S +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Early kernel startup code for Hexagon - * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define SEGTABLE_ENTRIES #0x0e0 - - __INIT -ENTRY(stext) - /* - * VMM will already have set up true vector page, MMU, etc. - * To set up initial kernel identity map, we have to pass - * the VMM a pointer to some canonical page tables. In - * this implementation, we're assuming that we've got - * them precompiled. Generate value in R24, as we'll need - * it again shortly. - */ - r24.L = #LO(swapper_pg_dir) - r24.H = #HI(swapper_pg_dir) - - /* - * Symbol is kernel segment address, but we need - * the logical/physical address. - */ - r25 = pc; - r2.h = #0xffc0; - r2.l = #0x0000; - r25 = and(r2,r25); /* R25 holds PHYS_OFFSET now */ - r1.h = #HI(PAGE_OFFSET); - r1.l = #LO(PAGE_OFFSET); - r24 = sub(r24,r1); /* swapper_pg_dir - PAGE_OFFSET */ - r24 = add(r24,r25); /* + PHYS_OFFSET */ - - r0 = r24; /* aka __pa(swapper_pg_dir) */ - - /* - * Initialize page dir to make the virtual and physical - * addresses where the kernel was loaded be identical. - * Done in 4MB chunks. - */ -#define PTE_BITS ( __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \ - | __HEXAGON_C_WB_L2 << 6 \ - | __HVM_PDE_S_4MB) - - /* - * Get number of VA=PA entries; only really needed for jump - * to hyperspace; gets blown away immediately after - */ - - { - r1.l = #LO(_end); - r2.l = #LO(stext); - r3 = #1; - } - { - r1.h = #HI(_end); - r2.h = #HI(stext); - r3 = asl(r3, #22); - } - { - r1 = sub(r1, r2); - r3 = add(r3, #-1); - } /* r1 = _end - stext */ - r1 = add(r1, r3); /* + (4M-1) */ - r26 = lsr(r1, #22); /* / 4M = # of entries */ - - r1 = r25; - r2.h = #0xffc0; - r2.l = #0x0000; /* round back down to 4MB boundary */ - r1 = and(r1,r2); - r2 = lsr(r1, #22) /* 4MB page number */ - r2 = asl(r2, #2) /* times sizeof(PTE) (4bytes) */ - r0 = add(r0,r2) /* r0 = address of correct PTE */ - r2 = #PTE_BITS - r1 = add(r1,r2) /* r1 = 4MB PTE for the first entry */ - r2.h = #0x0040 - r2.l = #0x0000 /* 4MB increments */ - loop0(1f,r26); -1: - memw(r0 ++ #4) = r1 - { r1 = add(r1, r2); } :endloop0 - - /* Also need to overwrite the initial 0xc0000000 entries */ - /* PAGE_OFFSET >> (4MB shift - 4 bytes per entry shift) */ - R1.H = #HI(PAGE_OFFSET >> (22 - 2)) - R1.L = #LO(PAGE_OFFSET >> (22 - 2)) - - r0 = add(r1, r24); /* advance to 0xc0000000 entry */ - r1 = r25; - r2.h = #0xffc0; - r2.l = #0x0000; /* round back down to 4MB boundary */ - r1 = and(r1,r2); /* for huge page */ - r2 = #PTE_BITS - r1 = add(r1,r2); - r2.h = #0x0040 - r2.l = #0x0000 /* 4MB increments */ - - loop0(1f,SEGTABLE_ENTRIES); -1: - memw(r0 ++ #4) = r1; - { r1 = add(r1,r2); } :endloop0 - - r0 = r24; - - /* - * The subroutine wrapper around the virtual instruction touches - * no memory, so we should be able to use it even here. - * Note that in this version, R1 and R2 get "clobbered"; see - * vm_ops.S - */ - r1 = #VM_TRANS_TYPE_TABLE - call __vmnewmap; - - /* Jump into virtual address range. */ - - r31.h = #hi(__head_s_vaddr_target) - r31.l = #lo(__head_s_vaddr_target) - jumpr r31 - - /* Insert trippy space effects. */ - -__head_s_vaddr_target: - /* - * Tear down VA=PA translation now that we are running - * in kernel virtual space. - */ - r0 = #__HVM_PDE_S_INVALID - - r1.h = #0xffc0; - r1.l = #0x0000; - r2 = r25; /* phys_offset */ - r2 = and(r1,r2); - - r1.l = #lo(swapper_pg_dir) - r1.h = #hi(swapper_pg_dir) - r2 = lsr(r2, #22) /* 4MB page number */ - r2 = asl(r2, #2) /* times sizeof(PTE) (4bytes) */ - r1 = add(r1,r2); - loop0(1f,r26) - -1: - { - memw(R1 ++ #4) = R0 - }:endloop0 - - r0 = r24 - r1 = #VM_TRANS_TYPE_TABLE - call __vmnewmap - - /* Go ahead and install the trap0 return so angel calls work */ - r0.h = #hi(_K_provisional_vec) - r0.l = #lo(_K_provisional_vec) - call __vmsetvec - - /* - * OK, at this point we should start to be much more careful, - * we're going to enter C code and start touching memory - * in all sorts of places. - * This means: - * SGP needs to be OK - * Need to lock shared resources - * A bunch of other things that will cause - * all kinds of painful bugs - */ - - /* - * Stack pointer should be pointed at the init task's - * thread stack, which should have been declared in arch/init_task.c. - * So uhhhhh... - * It's accessible via the init_thread_union, which is a union - * of a thread_info struct and a stack; of course, the top - * of the stack is not for you. The end of the stack - * is simply init_thread_union + THREAD_SIZE. - */ - - {r29.H = #HI(init_thread_union); r0.H = #HI(_THREAD_SIZE); } - {r29.L = #LO(init_thread_union); r0.L = #LO(_THREAD_SIZE); } - - /* initialize the register used to point to current_thread_info */ - /* Fixme: THREADINFO_REG can't be R2 because of that memset thing. */ - {r29 = add(r29,r0); THREADINFO_REG = r29; } - - /* Hack: zero bss; */ - { r0.L = #LO(__bss_start); r1 = #0; r2.l = #LO(__bss_stop); } - { r0.H = #HI(__bss_start); r2.h = #HI(__bss_stop); } - - r2 = sub(r2,r0); - call memset; - - /* Set PHYS_OFFSET; should be in R25 */ -#ifdef CONFIG_HEXAGON_PHYS_OFFSET - r0.l = #LO(__phys_offset); - r0.h = #HI(__phys_offset); - memw(r0) = r25; -#endif - - /* Time to make the doughnuts. */ - call start_kernel - - /* - * Should not reach here. - */ -1: - jump 1b - -.p2align PAGE_SHIFT -ENTRY(external_cmdline_buffer) - .fill _PAGE_SIZE,1,0 - -.data -.p2align PAGE_SHIFT -ENTRY(empty_zero_page) - .fill _PAGE_SIZE,1,0 diff --git a/arch/hexagon/kernel/trampoline.S b/arch/hexagon/kernel/trampoline.S deleted file mode 100644 index 58f631870f7e3ae780da2f16df052c4d4df533cc..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/trampoline.S +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* - * Trampoline sequences to be copied onto user stack. - * This consumes a little more space than hand-assembling - * immediate constants for use in C, but is more portable - * to future tweaks to the Hexagon instruction set. - */ - -#include - -/* Sig trampolines - call sys_sigreturn or sys_rt_sigreturn as appropriate */ - -/* plain sigreturn is gone. */ - - .globl __rt_sigtramp_template -__rt_sigtramp_template: - r6 = #__NR_rt_sigreturn; - trap0(#1); diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S deleted file mode 100644 index 4023fdbea4902e090e0798cf2c2f1006b1d2b259..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vm_entry.S +++ /dev/null @@ -1,380 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Event entry/exit for Hexagon - * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. - */ - -#include /* assembly-safer versions of C defines */ -#include /* sigh, except for page_offset */ -#include -#include - -/* - * Entry into guest-mode Linux under Hexagon Virtual Machine. - * Stack pointer points to event record - build pt_regs on top of it, - * set up a plausible C stack frame, and dispatch to the C handler. - * On return, do vmrte virtual instruction with SP where we started. - * - * VM Spec 0.5 uses a trap to fetch HVM record now. - */ - -/* - * Save full register state, while setting up thread_info struct - * pointer derived from kernel stack pointer in THREADINFO_REG - * register, putting prior thread_info.regs pointer in a callee-save - * register (R24, which had better not ever be assigned to THREADINFO_REG), - * and updating thread_info.regs to point to current stack frame, - * so as to support nested events in kernel mode. - * - * As this is common code, we set the pt_regs system call number - * to -1 for all events. It will be replaced with the system call - * number in the case where we decode a system call (trap0(#1)). - */ - -#if CONFIG_HEXAGON_ARCH_VERSION < 4 -#define save_pt_regs()\ - memd(R0 + #_PT_R3130) = R31:30; \ - { memw(R0 + #_PT_R2928) = R28; \ - R31 = memw(R0 + #_PT_ER_VMPSP); }\ - { memw(R0 + #(_PT_R2928 + 4)) = R31; \ - R31 = ugp; } \ - { memd(R0 + #_PT_R2726) = R27:26; \ - R30 = gp ; } \ - memd(R0 + #_PT_R2524) = R25:24; \ - memd(R0 + #_PT_R2322) = R23:22; \ - memd(R0 + #_PT_R2120) = R21:20; \ - memd(R0 + #_PT_R1918) = R19:18; \ - memd(R0 + #_PT_R1716) = R17:16; \ - memd(R0 + #_PT_R1514) = R15:14; \ - memd(R0 + #_PT_R1312) = R13:12; \ - { memd(R0 + #_PT_R1110) = R11:10; \ - R15 = lc0; } \ - { memd(R0 + #_PT_R0908) = R9:8; \ - R14 = sa0; } \ - { memd(R0 + #_PT_R0706) = R7:6; \ - R13 = lc1; } \ - { memd(R0 + #_PT_R0504) = R5:4; \ - R12 = sa1; } \ - { memd(R0 + #_PT_GPUGP) = R31:30; \ - R11 = m1; \ - R2.H = #HI(_THREAD_SIZE); } \ - { memd(R0 + #_PT_LC0SA0) = R15:14; \ - R10 = m0; \ - R2.L = #LO(_THREAD_SIZE); } \ - { memd(R0 + #_PT_LC1SA1) = R13:12; \ - R15 = p3:0; \ - R2 = neg(R2); } \ - { memd(R0 + #_PT_M1M0) = R11:10; \ - R14 = usr; \ - R2 = and(R0,R2); } \ - { memd(R0 + #_PT_PREDSUSR) = R15:14; \ - THREADINFO_REG = R2; } \ - { r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \ - memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \ - R2 = #-1; } \ - { memw(R0 + #_PT_SYSCALL_NR) = R2; \ - R30 = #0; } -#else -/* V4+ */ -/* the # ## # syntax inserts a literal ## */ -#define save_pt_regs()\ - { memd(R0 + #_PT_R3130) = R31:30; \ - R30 = memw(R0 + #_PT_ER_VMPSP); }\ - { memw(R0 + #_PT_R2928) = R28; \ - memw(R0 + #(_PT_R2928 + 4)) = R30; }\ - { R31:30 = C11:10; \ - memd(R0 + #_PT_R2726) = R27:26; \ - memd(R0 + #_PT_R2524) = R25:24; }\ - { memd(R0 + #_PT_R2322) = R23:22; \ - memd(R0 + #_PT_R2120) = R21:20; }\ - { memd(R0 + #_PT_R1918) = R19:18; \ - memd(R0 + #_PT_R1716) = R17:16; }\ - { memd(R0 + #_PT_R1514) = R15:14; \ - memd(R0 + #_PT_R1312) = R13:12; \ - R17:16 = C13:12; }\ - { memd(R0 + #_PT_R1110) = R11:10; \ - memd(R0 + #_PT_R0908) = R9:8; \ - R15:14 = C1:0; } \ - { memd(R0 + #_PT_R0706) = R7:6; \ - memd(R0 + #_PT_R0504) = R5:4; \ - R13:12 = C3:2; } \ - { memd(R0 + #_PT_GPUGP) = R31:30; \ - memd(R0 + #_PT_LC0SA0) = R15:14; \ - R11:10 = C7:6; }\ - { THREADINFO_REG = and(R0, # ## #-_THREAD_SIZE); \ - memd(R0 + #_PT_LC1SA1) = R13:12; \ - R15 = p3:0; }\ - { memd(R0 + #_PT_M1M0) = R11:10; \ - memw(R0 + #_PT_PREDSUSR + 4) = R15; }\ - { r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \ - memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \ - R2 = #-1; } \ - { memw(R0 + #_PT_SYSCALL_NR) = R2; \ - memd(R0 + #_PT_CS1CS0) = R17:16; \ - R30 = #0; } -#endif - -/* - * Restore registers and thread_info.regs state. THREADINFO_REG - * is assumed to still be sane, and R24 to have been correctly - * preserved. Don't restore R29 (SP) until later. - */ - -#if CONFIG_HEXAGON_ARCH_VERSION < 4 -#define restore_pt_regs() \ - { memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \ - R15:14 = memd(R0 + #_PT_PREDSUSR); } \ - { R11:10 = memd(R0 + #_PT_M1M0); \ - p3:0 = R15; } \ - { R13:12 = memd(R0 + #_PT_LC1SA1); \ - usr = R14; } \ - { R15:14 = memd(R0 + #_PT_LC0SA0); \ - m1 = R11; } \ - { R3:2 = memd(R0 + #_PT_R0302); \ - m0 = R10; } \ - { R5:4 = memd(R0 + #_PT_R0504); \ - lc1 = R13; } \ - { R7:6 = memd(R0 + #_PT_R0706); \ - sa1 = R12; } \ - { R9:8 = memd(R0 + #_PT_R0908); \ - lc0 = R15; } \ - { R11:10 = memd(R0 + #_PT_R1110); \ - sa0 = R14; } \ - { R13:12 = memd(R0 + #_PT_R1312); \ - R15:14 = memd(R0 + #_PT_R1514); } \ - { R17:16 = memd(R0 + #_PT_R1716); \ - R19:18 = memd(R0 + #_PT_R1918); } \ - { R21:20 = memd(R0 + #_PT_R2120); \ - R23:22 = memd(R0 + #_PT_R2322); } \ - { R25:24 = memd(R0 + #_PT_R2524); \ - R27:26 = memd(R0 + #_PT_R2726); } \ - R31:30 = memd(R0 + #_PT_GPUGP); \ - { R28 = memw(R0 + #_PT_R2928); \ - ugp = R31; } \ - { R31:30 = memd(R0 + #_PT_R3130); \ - gp = R30; } -#else -/* V4+ */ -#define restore_pt_regs() \ - { memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \ - R15:14 = memd(R0 + #_PT_PREDSUSR); } \ - { R11:10 = memd(R0 + #_PT_M1M0); \ - R13:12 = memd(R0 + #_PT_LC1SA1); \ - p3:0 = R15; } \ - { R15:14 = memd(R0 + #_PT_LC0SA0); \ - R3:2 = memd(R0 + #_PT_R0302); \ - usr = R14; } \ - { R5:4 = memd(R0 + #_PT_R0504); \ - R7:6 = memd(R0 + #_PT_R0706); \ - C7:6 = R11:10; }\ - { R9:8 = memd(R0 + #_PT_R0908); \ - R11:10 = memd(R0 + #_PT_R1110); \ - C3:2 = R13:12; }\ - { R13:12 = memd(R0 + #_PT_R1312); \ - R15:14 = memd(R0 + #_PT_R1514); \ - C1:0 = R15:14; }\ - { R17:16 = memd(R0 + #_PT_R1716); \ - R19:18 = memd(R0 + #_PT_R1918); } \ - { R21:20 = memd(R0 + #_PT_R2120); \ - R23:22 = memd(R0 + #_PT_R2322); } \ - { R25:24 = memd(R0 + #_PT_R2524); \ - R27:26 = memd(R0 + #_PT_R2726); } \ - R31:30 = memd(R0 + #_PT_CS1CS0); \ - { C13:12 = R31:30; \ - R31:30 = memd(R0 + #_PT_GPUGP) ; \ - R28 = memw(R0 + #_PT_R2928); }\ - { C11:10 = R31:30; \ - R31:30 = memd(R0 + #_PT_R3130); } -#endif - - /* - * Clears off enough space for the rest of pt_regs; evrec is a part - * of pt_regs in HVM mode. Save R0/R1, set handler's address in R1. - * R0 is the address of pt_regs and is the parameter to save_pt_regs. - */ - -/* - * Since the HVM isn't automagically pushing the EVREC onto the stack anymore, - * we'll subract the entire size out and then fill it in ourselves. - * Need to save off R0, R1, R2, R3 immediately. - */ - -#if CONFIG_HEXAGON_ARCH_VERSION < 4 -#define vm_event_entry(CHandler) \ - { \ - R29 = add(R29, #-(_PT_REGS_SIZE)); \ - memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \ - } \ - { \ - memd(R29 +#_PT_R0302) = R3:2; \ - } \ - trap1(#HVM_TRAP1_VMGETREGS); \ - { \ - memd(R29 + #_PT_ER_VMEL) = R1:0; \ - R0 = R29; \ - R1.L = #LO(CHandler); \ - } \ - { \ - memd(R29 + #_PT_ER_VMPSP) = R3:2; \ - R1.H = #HI(CHandler); \ - jump event_dispatch; \ - } -#else -/* V4+ */ -/* turn on I$ prefetch early */ -/* the # ## # syntax inserts a literal ## */ -#define vm_event_entry(CHandler) \ - { \ - R29 = add(R29, #-(_PT_REGS_SIZE)); \ - memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \ - memd(R29 + #(_PT_R0302 + -_PT_REGS_SIZE)) = R3:2; \ - R0 = usr; \ - } \ - { \ - memw(R29 + #_PT_PREDSUSR) = R0; \ - R0 = setbit(R0, #16); \ - } \ - usr = R0; \ - R1:0 = G1:0; \ - { \ - memd(R29 + #_PT_ER_VMEL) = R1:0; \ - R1 = # ## #(CHandler); \ - R3:2 = G3:2; \ - } \ - { \ - R0 = R29; \ - memd(R29 + #_PT_ER_VMPSP) = R3:2; \ - jump event_dispatch; \ - } -#endif - -.text - /* - * Do bulk save/restore in one place. - * Adds a jump to dispatch latency, but - * saves hundreds of bytes. - */ - -event_dispatch: - save_pt_regs() - callr r1 - - /* - * Coming back from the C-world, our thread info pointer - * should be in the designated register (usually R19) - * - * If we were in kernel mode, we don't need to check scheduler - * or signals if CONFIG_PREEMPT is not set. If set, then it has - * to jump to a need_resched kind of block. - * BTW, CONFIG_PREEMPT is not supported yet. - */ - -#ifdef CONFIG_PREEMPT - R0 = #VM_INT_DISABLE - trap1(#HVM_TRAP1_VMSETIE) -#endif - - /* "Nested control path" -- if the previous mode was kernel */ - { - R0 = memw(R29 + #_PT_ER_VMEST); - R26.L = #LO(do_work_pending); - } - { - P0 = tstbit(R0, #HVM_VMEST_UM_SFT); - if (!P0.new) jump:nt restore_all; - R26.H = #HI(do_work_pending); - R0 = #VM_INT_DISABLE; - } - - /* - * Check also the return from fork/system call, normally coming back from - * user mode - * - * R26 needs to have do_work_pending, and R0 should have VM_INT_DISABLE - */ - -check_work_pending: - /* Disable interrupts while checking TIF */ - trap1(#HVM_TRAP1_VMSETIE) - { - R0 = R29; /* regs should still be at top of stack */ - R1 = memw(THREADINFO_REG + #_THREAD_INFO_FLAGS); - callr R26; - } - - { - P0 = cmp.eq(R0, #0); if (!P0.new) jump:nt check_work_pending; - R0 = #VM_INT_DISABLE; - } - -restore_all: - /* - * Disable interrupts, if they weren't already, before reg restore. - * R0 gets preloaded with #VM_INT_DISABLE before we get here. - */ - trap1(#HVM_TRAP1_VMSETIE) - - /* do the setregs here for VM 0.5 */ - /* R29 here should already be pointing at pt_regs */ - { - R1:0 = memd(R29 + #_PT_ER_VMEL); - R3:2 = memd(R29 + #_PT_ER_VMPSP); - } -#if CONFIG_HEXAGON_ARCH_VERSION < 4 - trap1(#HVM_TRAP1_VMSETREGS); -#else - G1:0 = R1:0; - G3:2 = R3:2; -#endif - - R0 = R29 - restore_pt_regs() - { - R1:0 = memd(R29 + #_PT_R0100); - R29 = add(R29, #_PT_REGS_SIZE); - } - trap1(#HVM_TRAP1_VMRTE) - /* Notreached */ - - - .globl _K_enter_genex -_K_enter_genex: - vm_event_entry(do_genex) - - .globl _K_enter_interrupt -_K_enter_interrupt: - vm_event_entry(arch_do_IRQ) - - .globl _K_enter_trap0 -_K_enter_trap0: - vm_event_entry(do_trap0) - - .globl _K_enter_machcheck -_K_enter_machcheck: - vm_event_entry(do_machcheck) - - .globl _K_enter_debug -_K_enter_debug: - vm_event_entry(do_debug_exception) - - .globl ret_from_fork -ret_from_fork: - { - call schedule_tail - R26.H = #HI(do_work_pending); - } - { - P0 = cmp.eq(R24, #0); - R26.L = #LO(do_work_pending); - R0 = #VM_INT_DISABLE; - } - if (P0) jump check_work_pending - { - R0 = R25; - callr R24 - } - { - jump check_work_pending - R0 = #VM_INT_DISABLE; - } diff --git a/arch/hexagon/kernel/vm_init_segtable.S b/arch/hexagon/kernel/vm_init_segtable.S deleted file mode 100644 index 2638a090636101b7c5cd63193c85c3d39060197c..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vm_init_segtable.S +++ /dev/null @@ -1,429 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Initial page table for Linux kernel under Hexagon VM, - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* - * These tables are pre-computed and linked into kernel. - */ - -#include -/* #include */ - -/* - * Start with mapping PA=0 to both VA=0x0 and VA=0xc000000 as 16MB large pages. - * No user mode access, RWX, write-back cache. The entry needs - * to be replicated for all 4 virtual segments mapping to the page. - */ - -/* "Big Kernel Page" */ -#define BKP(pa) (((pa) & __HVM_PTE_PGMASK_4MB) \ - | __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \ - | __HEXAGON_C_WB_L2 << 6 \ - | __HVM_PDE_S_16MB) - -/* No cache version */ - -#define BKPG_IO(pa) (((pa) & __HVM_PTE_PGMASK_16MB) \ - | __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \ - | __HVM_PDE_S_16MB | __HEXAGON_C_DEV << 6 ) - -#define FOURK_IO(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \ - | __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \ - | __HEXAGON_C_DEV << 6 ) - -#define L2_PTR(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \ - | __HVM_PDE_S_4KB ) - -#define X __HVM_PDE_S_INVALID - - .p2align 12 - .globl swapper_pg_dir - .globl _K_init_segtable -swapper_pg_dir: -/* VA 0x00000000 */ - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X -/* VA 0x40000000 */ - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X -/* VA 0x80000000 */ - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X -/*0xa8*/.word X,X,X,X -#ifdef CONFIG_COMET_EARLY_UART_DEBUG -UART_PTE_ENTRY: -/*0xa9*/.word BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000) -#else -/*0xa9*/.word X,X,X,X -#endif -/*0xaa*/.word X,X,X,X -/*0xab*/.word X,X,X,X -/*0xac*/.word X,X,X,X -/*0xad*/.word X,X,X,X -/*0xae*/.word X,X,X,X -/*0xaf*/.word X,X,X,X -/*0xb0*/.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X - .word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X -_K_init_segtable: -/* VA 0xC0000000 */ - .word BKP(0x00000000), BKP(0x00400000), BKP(0x00800000), BKP(0x00c00000) - .word BKP(0x01000000), BKP(0x01400000), BKP(0x01800000), BKP(0x01c00000) - .word BKP(0x02000000), BKP(0x02400000), BKP(0x02800000), BKP(0x02c00000) - .word BKP(0x03000000), BKP(0x03400000), BKP(0x03800000), BKP(0x03c00000) - .word BKP(0x04000000), BKP(0x04400000), BKP(0x04800000), BKP(0x04c00000) - .word BKP(0x05000000), BKP(0x05400000), BKP(0x05800000), BKP(0x05c00000) - .word BKP(0x06000000), BKP(0x06400000), BKP(0x06800000), BKP(0x06c00000) - .word BKP(0x07000000), BKP(0x07400000), BKP(0x07800000), BKP(0x07c00000) - - .word BKP(0x08000000), BKP(0x08400000), BKP(0x08800000), BKP(0x08c00000) - .word BKP(0x09000000), BKP(0x09400000), BKP(0x09800000), BKP(0x09c00000) - .word BKP(0x0a000000), BKP(0x0a400000), BKP(0x0a800000), BKP(0x0ac00000) - .word BKP(0x0b000000), BKP(0x0b400000), BKP(0x0b800000), BKP(0x0bc00000) - .word BKP(0x0c000000), BKP(0x0c400000), BKP(0x0c800000), BKP(0x0cc00000) - .word BKP(0x0d000000), BKP(0x0d400000), BKP(0x0d800000), BKP(0x0dc00000) - .word BKP(0x0e000000), BKP(0x0e400000), BKP(0x0e800000), BKP(0x0ec00000) - .word BKP(0x0f000000), BKP(0x0f400000), BKP(0x0f800000), BKP(0x0fc00000) - - .word BKP(0x10000000), BKP(0x10400000), BKP(0x10800000), BKP(0x10c00000) - .word BKP(0x11000000), BKP(0x11400000), BKP(0x11800000), BKP(0x11c00000) - .word BKP(0x12000000), BKP(0x12400000), BKP(0x12800000), BKP(0x12c00000) - .word BKP(0x13000000), BKP(0x13400000), BKP(0x13800000), BKP(0x13c00000) - .word BKP(0x14000000), BKP(0x14400000), BKP(0x14800000), BKP(0x14c00000) - .word BKP(0x15000000), BKP(0x15400000), BKP(0x15800000), BKP(0x15c00000) - .word BKP(0x16000000), BKP(0x16400000), BKP(0x16800000), BKP(0x16c00000) - .word BKP(0x17000000), BKP(0x17400000), BKP(0x17800000), BKP(0x17c00000) - - .word BKP(0x18000000), BKP(0x18400000), BKP(0x18800000), BKP(0x18c00000) - .word BKP(0x19000000), BKP(0x19400000), BKP(0x19800000), BKP(0x19c00000) - .word BKP(0x1a000000), BKP(0x1a400000), BKP(0x1a800000), BKP(0x1ac00000) - .word BKP(0x1b000000), BKP(0x1b400000), BKP(0x1b800000), BKP(0x1bc00000) - .word BKP(0x1c000000), BKP(0x1c400000), BKP(0x1c800000), BKP(0x1cc00000) - .word BKP(0x1d000000), BKP(0x1d400000), BKP(0x1d800000), BKP(0x1dc00000) - .word BKP(0x1e000000), BKP(0x1e400000), BKP(0x1e800000), BKP(0x1ec00000) - .word BKP(0x1f000000), BKP(0x1f400000), BKP(0x1f800000), BKP(0x1fc00000) - - .word BKP(0x20000000), BKP(0x20400000), BKP(0x20800000), BKP(0x20c00000) - .word BKP(0x21000000), BKP(0x21400000), BKP(0x21800000), BKP(0x21c00000) - .word BKP(0x22000000), BKP(0x22400000), BKP(0x22800000), BKP(0x22c00000) - .word BKP(0x23000000), BKP(0x23400000), BKP(0x23800000), BKP(0x23c00000) - .word BKP(0x24000000), BKP(0x24400000), BKP(0x24800000), BKP(0x24c00000) - .word BKP(0x25000000), BKP(0x25400000), BKP(0x25800000), BKP(0x25c00000) - .word BKP(0x26000000), BKP(0x26400000), BKP(0x26800000), BKP(0x26c00000) - .word BKP(0x27000000), BKP(0x27400000), BKP(0x27800000), BKP(0x27c00000) - - .word BKP(0x28000000), BKP(0x28400000), BKP(0x28800000), BKP(0x28c00000) - .word BKP(0x29000000), BKP(0x29400000), BKP(0x29800000), BKP(0x29c00000) - .word BKP(0x2a000000), BKP(0x2a400000), BKP(0x2a800000), BKP(0x2ac00000) - .word BKP(0x2b000000), BKP(0x2b400000), BKP(0x2b800000), BKP(0x2bc00000) - .word BKP(0x2c000000), BKP(0x2c400000), BKP(0x2c800000), BKP(0x2cc00000) - .word BKP(0x2d000000), BKP(0x2d400000), BKP(0x2d800000), BKP(0x2dc00000) - .word BKP(0x2e000000), BKP(0x2e400000), BKP(0x2e800000), BKP(0x2ec00000) - .word BKP(0x2f000000), BKP(0x2f400000), BKP(0x2f800000), BKP(0x2fc00000) - - .word BKP(0x30000000), BKP(0x30400000), BKP(0x30800000), BKP(0x30c00000) - .word BKP(0x31000000), BKP(0x31400000), BKP(0x31800000), BKP(0x31c00000) - .word BKP(0x32000000), BKP(0x32400000), BKP(0x32800000), BKP(0x32c00000) - .word BKP(0x33000000), BKP(0x33400000), BKP(0x33800000), BKP(0x33c00000) - .word BKP(0x34000000), BKP(0x34400000), BKP(0x34800000), BKP(0x34c00000) - .word BKP(0x35000000), BKP(0x35400000), BKP(0x35800000), BKP(0x35c00000) - .word BKP(0x36000000), BKP(0x36400000), BKP(0x36800000), BKP(0x36c00000) - .word BKP(0x37000000), BKP(0x37400000), BKP(0x37800000), BKP(0x37c00000) - - .word BKP(0x38000000), BKP(0x38400000), BKP(0x38800000), BKP(0x38c00000) - .word BKP(0x39000000), BKP(0x39400000), BKP(0x39800000), BKP(0x39c00000) - .word BKP(0x3a000000), BKP(0x3a400000), BKP(0x3a800000), BKP(0x3ac00000) - .word BKP(0x3b000000), BKP(0x3b400000), BKP(0x3b800000), BKP(0x3bc00000) - .word BKP(0x3c000000), BKP(0x3c400000), BKP(0x3c800000), BKP(0x3cc00000) - .word BKP(0x3d000000), BKP(0x3d400000), BKP(0x3d800000), BKP(0x3dc00000) -_K_io_map: - .word X,X,X,X /* 0x3e000000 - device IO early remap */ - .word X,X,X,X /* 0x3f000000 - hypervisor space*/ - -#if 0 -/* - * This is in here as an example for devices which need to be mapped really - * early. - */ - .p2align 12 - .globl _K_io_kmap - .globl _K_init_devicetable -_K_init_devicetable: /* Should be 4MB worth of entries */ - .word FOURK_IO(MSM_GPIO1_PHYS),FOURK_IO(MSM_GPIO2_PHYS),FOURK_IO(MSM_SIRC_PHYS),X - .word FOURK_IO(TLMM_GPIO1_PHYS),X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X - .word X,X,X,X -#endif diff --git a/arch/hexagon/kernel/vm_ops.S b/arch/hexagon/kernel/vm_ops.S deleted file mode 100644 index f61c04d485f6f9111a89048960c14029b909c321..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vm_ops.S +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Hexagon VM instruction support - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#include -#include - -/* - * C wrappers for virtual machine "instructions". These - * could be, and perhaps some day will be, handled as in-line - * macros, but for tracing/debugging it's handy to have - * a single point of invocation for each of them. - * Conveniently, they take parameters and return values - * consistent with the ABI calling convention. - */ - -ENTRY(__vmrte) - trap1(#HVM_TRAP1_VMRTE); - jumpr R31; - -ENTRY(__vmsetvec) - trap1(#HVM_TRAP1_VMSETVEC); - jumpr R31; - -ENTRY(__vmsetie) - trap1(#HVM_TRAP1_VMSETIE); - jumpr R31; - -ENTRY(__vmgetie) - trap1(#HVM_TRAP1_VMGETIE); - jumpr R31; - -ENTRY(__vmintop) - trap1(#HVM_TRAP1_VMINTOP); - jumpr R31; - -ENTRY(__vmclrmap) - trap1(#HVM_TRAP1_VMCLRMAP); - jumpr R31; - -ENTRY(__vmnewmap) - r1 = #VM_NEWMAP_TYPE_PGTABLES; - trap1(#HVM_TRAP1_VMNEWMAP); - jumpr R31; - -ENTRY(__vmcache) - trap1(#HVM_TRAP1_VMCACHE); - jumpr R31; - -ENTRY(__vmgettime) - trap1(#HVM_TRAP1_VMGETTIME); - jumpr R31; - -ENTRY(__vmsettime) - trap1(#HVM_TRAP1_VMSETTIME); - jumpr R31; - -ENTRY(__vmwait) - trap1(#HVM_TRAP1_VMWAIT); - jumpr R31; - -ENTRY(__vmyield) - trap1(#HVM_TRAP1_VMYIELD); - jumpr R31; - -ENTRY(__vmstart) - trap1(#HVM_TRAP1_VMSTART); - jumpr R31; - -ENTRY(__vmstop) - trap1(#HVM_TRAP1_VMSTOP); - jumpr R31; - -ENTRY(__vmvpid) - trap1(#HVM_TRAP1_VMVPID); - jumpr R31; - -/* Probably not actually going to use these; see vm_entry.S */ - -ENTRY(__vmsetregs) - trap1(#HVM_TRAP1_VMSETREGS); - jumpr R31; - -ENTRY(__vmgetregs) - trap1(#HVM_TRAP1_VMGETREGS); - jumpr R31; diff --git a/arch/hexagon/kernel/vm_switch.S b/arch/hexagon/kernel/vm_switch.S deleted file mode 100644 index 5ec2d43fee1f94a3931a87a9fada10e6918c40e6..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vm_switch.S +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Context switch support for Hexagon - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#include - -.text - -/* - * The register used as a fast-path thread information pointer - * is determined as a kernel configuration option. If it happens - * to be a callee-save register, we're going to be saving and - * restoring it twice here. - * - * This code anticipates a revised ABI where R20-23 are added - * to the set of callee-save registers, but this should be - * backward compatible to legacy tools. - */ - - -/* - * void switch_to(struct task_struct *prev, - * struct task_struct *next, struct task_struct *last); - */ - .p2align 2 - .globl __switch_to - .type __switch_to, @function - -/* - * When we exit the wormhole, we need to store the previous task - * in the new R0's pointer. Technically it should be R2, but they should - * be the same; seems like a legacy thing. In short, don't butcher - * R0, let it go back out unmolested. - */ - -__switch_to: - /* - * Push callee-saves onto "prev" stack. - * Here, we're sneaky because the LR and FP - * storage of the thread_stack structure - * is automagically allocated by allocframe, - * so we pass struct size less 8. - */ - allocframe(#(_SWITCH_STACK_SIZE - 8)); - memd(R29+#(_SWITCH_R2726))=R27:26; - memd(R29+#(_SWITCH_R2524))=R25:24; - memd(R29+#(_SWITCH_R2322))=R23:22; - memd(R29+#(_SWITCH_R2120))=R21:20; - memd(R29+#(_SWITCH_R1918))=R19:18; - memd(R29+#(_SWITCH_R1716))=R17:16; - /* Stash thread_info pointer in task_struct */ - memw(R0+#_TASK_THREAD_INFO) = THREADINFO_REG; - memw(R0 +#(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP)) = R29; - /* Switch to "next" stack and restore callee saves from there */ - R29 = memw(R1 + #(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP)); - { - R27:26 = memd(R29+#(_SWITCH_R2726)); - R25:24 = memd(R29+#(_SWITCH_R2524)); - } - { - R23:22 = memd(R29+#(_SWITCH_R2322)); - R21:20 = memd(R29+#(_SWITCH_R2120)); - } - { - R19:18 = memd(R29+#(_SWITCH_R1918)); - R17:16 = memd(R29+#(_SWITCH_R1716)); - } - { - /* THREADINFO_REG is currently one of the callee-saved regs - * above, and so be sure to re-load it last. - */ - THREADINFO_REG = memw(R1 + #_TASK_THREAD_INFO); - R31:30 = memd(R29+#_SWITCH_FP); - } - { - R29 = add(R29,#_SWITCH_STACK_SIZE); - jumpr R31; - } - .size __switch_to, .-__switch_to diff --git a/arch/hexagon/kernel/vm_vectors.S b/arch/hexagon/kernel/vm_vectors.S deleted file mode 100644 index fba33745ce579c4071730c181f76345f15c21724..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vm_vectors.S +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Event jump tables - * - * Copyright (c) 2010-2012,2013, The Linux Foundation. All rights reserved. - */ - -#include - -.text - -/* This is registered early on to allow angel */ -.global _K_provisional_vec -_K_provisional_vec: - jump 1f; - jump 1f; - jump 1f; - jump 1f; - jump 1f; - trap1(#HVM_TRAP1_VMRTE) - jump 1f; - jump 1f; - - -.global _K_VM_event_vector -_K_VM_event_vector: -1: - jump 1b; /* Reset */ - jump _K_enter_machcheck; - jump _K_enter_genex; - jump _K_enter_debug; - jump 1b; /* 4 Rsvd */ - jump _K_enter_trap0; - jump 1b; /* 6 Rsvd */ - jump _K_enter_interrupt; diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S deleted file mode 100644 index 78f2418e97c8425dc2898d5d1fee76b4da30f708..0000000000000000000000000000000000000000 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Linker script for Hexagon kernel - * - * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. - */ - -#include -#include /* Most of the kernel defines are here */ -#include /* except for page_offset */ -#include /* and now we're pulling cache line size */ -#include /* and we need THREAD_SIZE too */ - -OUTPUT_ARCH(hexagon) -ENTRY(stext) - -jiffies = jiffies_64; - -/* -See asm-generic/vmlinux.lds.h for expansion of some of these macros. -See asm-generic/sections.h for seemingly required labels. -*/ - -#define PAGE_SIZE _PAGE_SIZE - -SECTIONS -{ - . = PAGE_OFFSET; - - __init_begin = .; - HEAD_TEXT_SECTION - INIT_TEXT_SECTION(PAGE_SIZE) - PERCPU_SECTION(L1_CACHE_BYTES) - __init_end = .; - - . = ALIGN(_PAGE_SIZE); - _stext = .; - .text : AT(ADDR(.text)) { - _text = .; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - } - _etext = .; - - INIT_DATA_SECTION(PAGE_SIZE) - - _sdata = .; - RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE) - RO_DATA_SECTION(PAGE_SIZE) - _edata = .; - - EXCEPTION_TABLE(16) - NOTES - - BSS_SECTION(_PAGE_SIZE, _PAGE_SIZE, _PAGE_SIZE) - - _end = .; - - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - EXIT_CALL - } - - STABS_DEBUG - DWARF_DEBUG - -} diff --git a/arch/hexagon/lib/memcpy.S b/arch/hexagon/lib/memcpy.S deleted file mode 100644 index f8b3c02a8ad1bdb154ec9b82b9cabd57f72256ed..0000000000000000000000000000000000000000 --- a/arch/hexagon/lib/memcpy.S +++ /dev/null @@ -1,529 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* - * Description - * - * library function for memcpy where length bytes are copied from - * ptr_in to ptr_out. ptr_out is returned unchanged. - * Allows any combination of alignment on input and output pointers - * and length from 0 to 2^32-1 - * - * Restrictions - * The arrays should not overlap, the program will produce undefined output - * if they do. - * For blocks less than 16 bytes a byte by byte copy is performed. For - * 8byte alignments, and length multiples, a dword copy is performed up to - * 96bytes - * History - * - * DJH 5/15/09 Initial version 1.0 - * DJH 6/ 1/09 Version 1.1 modified ABI to inlcude R16-R19 - * DJH 7/12/09 Version 1.2 optimized codesize down to 760 was 840 - * DJH 10/14/09 Version 1.3 added special loop for aligned case, was - * overreading bloated codesize back up to 892 - * DJH 4/20/10 Version 1.4 fixed Ldword_loop_epilog loop to prevent loads - * occurring if only 1 left outstanding, fixes bug - * # 3888, corrected for all alignments. Peeled off - * 1 32byte chunk from kernel loop and extended 8byte - * loop at end to solve all combinations and prevent - * over read. Fixed Ldword_loop_prolog to prevent - * overread for blocks less than 48bytes. Reduced - * codesize to 752 bytes - * DJH 4/21/10 version 1.5 1.4 fix broke code for input block ends not - * aligned to dword boundaries,underwriting by 1 - * byte, added detection for this and fixed. A - * little bloat. - * DJH 4/23/10 version 1.6 corrected stack error, R20 was not being restored - * always, fixed the error of R20 being modified - * before it was being saved - * Natural c model - * =============== - * void * memcpy(char * ptr_out, char * ptr_in, int length) { - * int i; - * if(length) for(i=0; i < length; i++) { ptr_out[i] = ptr_in[i]; } - * return(ptr_out); - * } - * - * Optimized memcpy function - * ========================= - * void * memcpy(char * ptr_out, char * ptr_in, int len) { - * int i, prolog, kernel, epilog, mask; - * u8 offset; - * s64 data0, dataF8, data70; - * - * s64 * ptr8_in; - * s64 * ptr8_out; - * s32 * ptr4; - * s16 * ptr2; - * - * offset = ((int) ptr_in) & 7; - * ptr8_in = (s64 *) &ptr_in[-offset]; //read in the aligned pointers - * - * data70 = *ptr8_in++; - * dataF8 = *ptr8_in++; - * - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * - * prolog = 32 - ((int) ptr_out); - * mask = 0x7fffffff >> HEXAGON_R_cl0_R(len); - * prolog = prolog & mask; - * kernel = len - prolog; - * epilog = kernel & 0x1F; - * kernel = kernel>>5; - * - * if (prolog & 1) { ptr_out[0] = (u8) data0; data0 >>= 8; ptr_out += 1;} - * ptr2 = (s16 *) &ptr_out[0]; - * if (prolog & 2) { ptr2[0] = (u16) data0; data0 >>= 16; ptr_out += 2;} - * ptr4 = (s32 *) &ptr_out[0]; - * if (prolog & 4) { ptr4[0] = (u32) data0; data0 >>= 32; ptr_out += 4;} - * - * offset = offset + (prolog & 7); - * if (offset >= 8) { - * data70 = dataF8; - * dataF8 = *ptr8_in++; - * } - * offset = offset & 0x7; - * - * prolog = prolog >> 3; - * if (prolog) for (i=0; i < prolog; i++) { - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * data70 = dataF8; - * dataF8 = *ptr8_in++; - * } - * if(kernel) { kernel -= 1; epilog += 32; } - * if(kernel) for(i=0; i < kernel; i++) { - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * data70 = *ptr8_in++; - * - * data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * dataF8 = *ptr8_in++; - * - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * data70 = *ptr8_in++; - * - * data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * dataF8 = *ptr8_in++; - * } - * epilogdws = epilog >> 3; - * if (epilogdws) for (i=0; i < epilogdws; i++) { - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8; - * data70 = dataF8; - * dataF8 = *ptr8_in++; - * } - * data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset); - * - * ptr4 = (s32 *) &ptr_out[0]; - * if (epilog & 4) { ptr4[0] = (u32) data0; data0 >>= 32; ptr_out += 4;} - * ptr2 = (s16 *) &ptr_out[0]; - * if (epilog & 2) { ptr2[0] = (u16) data0; data0 >>= 16; ptr_out += 2;} - * if (epilog & 1) { *ptr_out++ = (u8) data0; } - * - * return(ptr_out - length); - * } - * - * Codesize : 784 bytes - */ - - -#define ptr_out R0 /* destination pounter */ -#define ptr_in R1 /* source pointer */ -#define len R2 /* length of copy in bytes */ - -#define data70 R13:12 /* lo 8 bytes of non-aligned transfer */ -#define dataF8 R11:10 /* hi 8 bytes of non-aligned transfer */ -#define ldata0 R7:6 /* even 8 bytes chunks */ -#define ldata1 R25:24 /* odd 8 bytes chunks */ -#define data1 R7 /* lower 8 bytes of ldata1 */ -#define data0 R6 /* lower 8 bytes of ldata0 */ - -#define ifbyte p0 /* if transfer has bytes in epilog/prolog */ -#define ifhword p0 /* if transfer has shorts in epilog/prolog */ -#define ifword p0 /* if transfer has words in epilog/prolog */ -#define noprolog p0 /* no prolog, xfer starts at 32byte */ -#define nokernel p1 /* no 32byte multiple block in the transfer */ -#define noepilog p0 /* no epilog, xfer ends on 32byte boundary */ -#define align p2 /* alignment of input rel to 8byte boundary */ -#define kernel1 p0 /* kernel count == 1 */ - -#define dalign R25 /* rel alignment of input to output data */ -#define star3 R16 /* number bytes in prolog - dwords */ -#define rest R8 /* length - prolog bytes */ -#define back R7 /* nr bytes > dword boundary in src block */ -#define epilog R3 /* bytes in epilog */ -#define inc R15:14 /* inc kernel by -1 and defetch ptr by 32 */ -#define kernel R4 /* number of 32byte chunks in kernel */ -#define ptr_in_p_128 R5 /* pointer for prefetch of input data */ -#define mask R8 /* mask used to determine prolog size */ -#define shift R8 /* used to work a shifter to extract bytes */ -#define shift2 R5 /* in epilog to workshifter to extract bytes */ -#define prolog R15 /* bytes in prolog */ -#define epilogdws R15 /* number dwords in epilog */ -#define shiftb R14 /* used to extract bytes */ -#define offset R9 /* same as align in reg */ -#define ptr_out_p_32 R17 /* pointer to output dczero */ -#define align888 R14 /* if simple dword loop can be used */ -#define len8 R9 /* number of dwords in length */ -#define over R20 /* nr of bytes > last inp buf dword boundary */ - -#define ptr_in_p_128kernel R5:4 /* packed fetch pointer & kernel cnt */ - - .section .text - .p2align 4 - .global memcpy - .type memcpy, @function -memcpy: -{ - p2 = cmp.eq(len, #0); /* =0 */ - align888 = or(ptr_in, ptr_out); /* %8 < 97 */ - p0 = cmp.gtu(len, #23); /* %1, <24 */ - p1 = cmp.eq(ptr_in, ptr_out); /* attempt to overwrite self */ -} -{ - p1 = or(p2, p1); - p3 = cmp.gtu(len, #95); /* %8 < 97 */ - align888 = or(align888, len); /* %8 < 97 */ - len8 = lsr(len, #3); /* %8 < 97 */ -} -{ - dcfetch(ptr_in); /* zero/ptrin=ptrout causes fetch */ - p2 = bitsclr(align888, #7); /* %8 < 97 */ - if(p1) jumpr r31; /* =0 */ -} -{ - p2 = and(p2,!p3); /* %8 < 97 */ - if (p2.new) len = add(len, #-8); /* %8 < 97 */ - if (p2.new) jump:NT .Ldwordaligned; /* %8 < 97 */ -} -{ - if(!p0) jump .Lbytes23orless; /* %1, <24 */ - mask.l = #LO(0x7fffffff); - /* all bytes before line multiples of data */ - prolog = sub(#0, ptr_out); -} -{ - /* save r31 on stack, decrement sp by 16 */ - allocframe(#24); - mask.h = #HI(0x7fffffff); - ptr_in_p_128 = add(ptr_in, #32); - back = cl0(len); -} -{ - memd(sp+#0) = R17:16; /* save r16,r17 on stack6 */ - r31.l = #LO(.Lmemcpy_return); /* set up final return pointer */ - prolog &= lsr(mask, back); - offset = and(ptr_in, #7); -} -{ - memd(sp+#8) = R25:24; /* save r25,r24 on stack */ - dalign = sub(ptr_out, ptr_in); - r31.h = #HI(.Lmemcpy_return); /* set up final return pointer */ -} -{ - /* see if there if input buffer end if aligned */ - over = add(len, ptr_in); - back = add(len, offset); - memd(sp+#16) = R21:20; /* save r20,r21 on stack */ -} -{ - noprolog = bitsclr(prolog, #7); - prolog = and(prolog, #31); - dcfetch(ptr_in_p_128); - ptr_in_p_128 = add(ptr_in_p_128, #32); -} -{ - kernel = sub(len, prolog); - shift = asl(prolog, #3); - star3 = and(prolog, #7); - ptr_in = and(ptr_in, #-8); -} -{ - prolog = lsr(prolog, #3); - epilog = and(kernel, #31); - ptr_out_p_32 = add(ptr_out, prolog); - over = and(over, #7); -} -{ - p3 = cmp.gtu(back, #8); - kernel = lsr(kernel, #5); - dcfetch(ptr_in_p_128); - ptr_in_p_128 = add(ptr_in_p_128, #32); -} -{ - p1 = cmp.eq(prolog, #0); - if(!p1.new) prolog = add(prolog, #1); - dcfetch(ptr_in_p_128); /* reserve the line 64bytes on */ - ptr_in_p_128 = add(ptr_in_p_128, #32); -} -{ - nokernel = cmp.eq(kernel,#0); - dcfetch(ptr_in_p_128); /* reserve the line 64bytes on */ - ptr_in_p_128 = add(ptr_in_p_128, #32); - shiftb = and(shift, #8); -} -{ - dcfetch(ptr_in_p_128); /* reserve the line 64bytes on */ - ptr_in_p_128 = add(ptr_in_p_128, #32); - if(nokernel) jump .Lskip64; - p2 = cmp.eq(kernel, #1); /* skip ovr if kernel == 0 */ -} -{ - dczeroa(ptr_out_p_32); - /* don't advance pointer */ - if(!p2) ptr_out_p_32 = add(ptr_out_p_32, #32); -} -{ - dalign = and(dalign, #31); - dczeroa(ptr_out_p_32); -} -.Lskip64: -{ - data70 = memd(ptr_in++#16); - if(p3) dataF8 = memd(ptr_in+#8); - if(noprolog) jump .Lnoprolog32; - align = offset; -} -/* upto initial 7 bytes */ -{ - ldata0 = valignb(dataF8, data70, align); - ifbyte = tstbit(shift,#3); - offset = add(offset, star3); -} -{ - if(ifbyte) memb(ptr_out++#1) = data0; - ldata0 = lsr(ldata0, shiftb); - shiftb = and(shift, #16); - ifhword = tstbit(shift,#4); -} -{ - if(ifhword) memh(ptr_out++#2) = data0; - ldata0 = lsr(ldata0, shiftb); - ifword = tstbit(shift,#5); - p2 = cmp.gtu(offset, #7); -} -{ - if(ifword) memw(ptr_out++#4) = data0; - if(p2) data70 = dataF8; - if(p2) dataF8 = memd(ptr_in++#8); /* another 8 bytes */ - align = offset; -} -.Lnoprolog32: -{ - p3 = sp1loop0(.Ldword_loop_prolog, prolog) - rest = sub(len, star3); /* whats left after the loop */ - p0 = cmp.gt(over, #0); -} - if(p0) rest = add(rest, #16); -.Ldword_loop_prolog: -{ - if(p3) memd(ptr_out++#8) = ldata0; - ldata0 = valignb(dataF8, data70, align); - p0 = cmp.gt(rest, #16); -} -{ - data70 = dataF8; - if(p0) dataF8 = memd(ptr_in++#8); - rest = add(rest, #-8); -}:endloop0 -.Lkernel: -{ - /* kernel is at least 32bytes */ - p3 = cmp.gtu(kernel, #0); - /* last itn. remove edge effects */ - if(p3.new) kernel = add(kernel, #-1); - /* dealt with in last dword loop */ - if(p3.new) epilog = add(epilog, #32); -} -{ - nokernel = cmp.eq(kernel, #0); /* after adjustment, recheck */ - if(nokernel.new) jump:NT .Lepilog; /* likely not taken */ - inc = combine(#32, #-1); - p3 = cmp.gtu(dalign, #24); -} -{ - if(p3) jump .Lodd_alignment; -} -{ - loop0(.Loword_loop_25to31, kernel); - kernel1 = cmp.gtu(kernel, #1); - rest = kernel; -} - .falign -.Loword_loop_25to31: -{ - dcfetch(ptr_in_p_128); /* prefetch 4 lines ahead */ - if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32); -} -{ - dczeroa(ptr_out_p_32); /* reserve the next 32bytes in cache */ - p3 = cmp.eq(kernel, rest); -} -{ - /* kernel -= 1 */ - ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc); - /* kill write on first iteration */ - if(!p3) memd(ptr_out++#8) = ldata1; - ldata1 = valignb(dataF8, data70, align); - data70 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(data70, dataF8, align); - dataF8 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata1; - ldata1 = valignb(dataF8, data70, align); - data70 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(data70, dataF8, align); - dataF8 = memd(ptr_in++#8); - kernel1 = cmp.gtu(kernel, #1); -}:endloop0 -{ - memd(ptr_out++#8) = ldata1; - jump .Lepilog; -} -.Lodd_alignment: -{ - loop0(.Loword_loop_00to24, kernel); - kernel1 = cmp.gtu(kernel, #1); - rest = add(kernel, #-1); -} - .falign -.Loword_loop_00to24: -{ - dcfetch(ptr_in_p_128); /* prefetch 4 lines ahead */ - ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc); - if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32); -} -{ - dczeroa(ptr_out_p_32); /* reserve the next 32bytes in cache */ -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(dataF8, data70, align); - data70 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(data70, dataF8, align); - dataF8 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(dataF8, data70, align); - data70 = memd(ptr_in++#8); -} -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(data70, dataF8, align); - dataF8 = memd(ptr_in++#8); - kernel1 = cmp.gtu(kernel, #1); -}:endloop0 -.Lepilog: -{ - noepilog = cmp.eq(epilog,#0); - epilogdws = lsr(epilog, #3); - kernel = and(epilog, #7); -} -{ - if(noepilog) jumpr r31; - if(noepilog) ptr_out = sub(ptr_out, len); - p3 = cmp.eq(epilogdws, #0); - shift2 = asl(epilog, #3); -} -{ - shiftb = and(shift2, #32); - ifword = tstbit(epilog,#2); - if(p3) jump .Lepilog60; - if(!p3) epilog = add(epilog, #-16); -} -{ - loop0(.Ldword_loop_epilog, epilogdws); - /* stop criteria is lsbs unless = 0 then its 8 */ - p3 = cmp.eq(kernel, #0); - if(p3.new) kernel= #8; - p1 = cmp.gt(over, #0); -} - /* if not aligned to end of buffer execute 1 more iteration */ - if(p1) kernel= #0; -.Ldword_loop_epilog: -{ - memd(ptr_out++#8) = ldata0; - ldata0 = valignb(dataF8, data70, align); - p3 = cmp.gt(epilog, kernel); -} -{ - data70 = dataF8; - if(p3) dataF8 = memd(ptr_in++#8); - epilog = add(epilog, #-8); -}:endloop0 -/* copy last 7 bytes */ -.Lepilog60: -{ - if(ifword) memw(ptr_out++#4) = data0; - ldata0 = lsr(ldata0, shiftb); - ifhword = tstbit(epilog,#1); - shiftb = and(shift2, #16); -} -{ - if(ifhword) memh(ptr_out++#2) = data0; - ldata0 = lsr(ldata0, shiftb); - ifbyte = tstbit(epilog,#0); - if(ifbyte.new) len = add(len, #-1); -} -{ - if(ifbyte) memb(ptr_out) = data0; - ptr_out = sub(ptr_out, len); /* return dest pointer */ - jumpr r31; -} -/* do byte copy for small n */ -.Lbytes23orless: -{ - p3 = sp1loop0(.Lbyte_copy, len); - len = add(len, #-1); -} -.Lbyte_copy: -{ - data0 = memb(ptr_in++#1); - if(p3) memb(ptr_out++#1) = data0; -}:endloop0 -{ - memb(ptr_out) = data0; - ptr_out = sub(ptr_out, len); - jumpr r31; -} -/* do dword copies for aligned in, out and length */ -.Ldwordaligned: -{ - p3 = sp1loop0(.Ldword_copy, len8); -} -.Ldword_copy: -{ - if(p3) memd(ptr_out++#8) = ldata0; - ldata0 = memd(ptr_in++#8); -}:endloop0 -{ - memd(ptr_out) = ldata0; - ptr_out = sub(ptr_out, len); - jumpr r31; /* return to function caller */ -} -.Lmemcpy_return: - r21:20 = memd(sp+#16); /* restore r20+r21 */ -{ - r25:24 = memd(sp+#8); /* restore r24+r25 */ - r17:16 = memd(sp+#0); /* restore r16+r17 */ -} - deallocframe; /* restore r31 and incrment stack by 16 */ - jumpr r31 diff --git a/arch/hexagon/lib/memset.S b/arch/hexagon/lib/memset.S deleted file mode 100644 index e67304e3f7cfc6372a6642db251d1ebb3e193729..0000000000000000000000000000000000000000 --- a/arch/hexagon/lib/memset.S +++ /dev/null @@ -1,302 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2011, The Linux Foundation. All rights reserved. - */ - - -/* HEXAGON assembly optimized memset */ -/* Replaces the standard library function memset */ - - - .macro HEXAGON_OPT_FUNC_BEGIN name - .text - .p2align 4 - .globl \name - .type \name, @function -\name: - .endm - - .macro HEXAGON_OPT_FUNC_FINISH name - .size \name, . - \name - .endm - -/* FUNCTION: memset (v2 version) */ -#if __HEXAGON_ARCH__ < 3 -HEXAGON_OPT_FUNC_BEGIN memset - { - r6 = #8 - r7 = extractu(r0, #3 , #0) - p0 = cmp.eq(r2, #0) - p1 = cmp.gtu(r2, #7) - } - { - r4 = vsplatb(r1) - r8 = r0 /* leave r0 intact for return val */ - r9 = sub(r6, r7) /* bytes until double alignment */ - if p0 jumpr r31 /* count == 0, so return */ - } - { - r3 = #0 - r7 = #0 - p0 = tstbit(r9, #0) - if p1 jump 2f /* skip byte loop */ - } - -/* less than 8 bytes to set, so just set a byte at a time and return */ - - loop0(1f, r2) /* byte loop */ - .falign -1: /* byte loop */ - { - memb(r8++#1) = r4 - }:endloop0 - jumpr r31 - .falign -2: /* skip byte loop */ - { - r6 = #1 - p0 = tstbit(r9, #1) - p1 = cmp.eq(r2, #1) - if !p0 jump 3f /* skip initial byte store */ - } - { - memb(r8++#1) = r4 - r3:2 = sub(r3:2, r7:6) - if p1 jumpr r31 - } - .falign -3: /* skip initial byte store */ - { - r6 = #2 - p0 = tstbit(r9, #2) - p1 = cmp.eq(r2, #2) - if !p0 jump 4f /* skip initial half store */ - } - { - memh(r8++#2) = r4 - r3:2 = sub(r3:2, r7:6) - if p1 jumpr r31 - } - .falign -4: /* skip initial half store */ - { - r6 = #4 - p0 = cmp.gtu(r2, #7) - p1 = cmp.eq(r2, #4) - if !p0 jump 5f /* skip initial word store */ - } - { - memw(r8++#4) = r4 - r3:2 = sub(r3:2, r7:6) - p0 = cmp.gtu(r2, #11) - if p1 jumpr r31 - } - .falign -5: /* skip initial word store */ - { - r10 = lsr(r2, #3) - p1 = cmp.eq(r3, #1) - if !p0 jump 7f /* skip double loop */ - } - { - r5 = r4 - r6 = #8 - loop0(6f, r10) /* double loop */ - } - -/* set bytes a double word at a time */ - - .falign -6: /* double loop */ - { - memd(r8++#8) = r5:4 - r3:2 = sub(r3:2, r7:6) - p1 = cmp.eq(r2, #8) - }:endloop0 - .falign -7: /* skip double loop */ - { - p0 = tstbit(r2, #2) - if p1 jumpr r31 - } - { - r6 = #4 - p0 = tstbit(r2, #1) - p1 = cmp.eq(r2, #4) - if !p0 jump 8f /* skip final word store */ - } - { - memw(r8++#4) = r4 - r3:2 = sub(r3:2, r7:6) - if p1 jumpr r31 - } - .falign -8: /* skip final word store */ - { - p1 = cmp.eq(r2, #2) - if !p0 jump 9f /* skip final half store */ - } - { - memh(r8++#2) = r4 - if p1 jumpr r31 - } - .falign -9: /* skip final half store */ - { - memb(r8++#1) = r4 - jumpr r31 - } -HEXAGON_OPT_FUNC_FINISH memset -#endif - - -/* FUNCTION: memset (v3 and higher version) */ -#if __HEXAGON_ARCH__ >= 3 -HEXAGON_OPT_FUNC_BEGIN memset - { - r7=vsplatb(r1) - r6 = r0 - if (r2==#0) jump:nt .L1 - } - { - r5:4=combine(r7,r7) - p0 = cmp.gtu(r2,#8) - if (p0.new) jump:nt .L3 - } - { - r3 = r0 - loop0(.L47,r2) - } - .falign -.L47: - { - memb(r3++#1) = r1 - }:endloop0 /* start=.L47 */ - jumpr r31 -.L3: - { - p0 = tstbit(r0,#0) - if (!p0.new) jump:nt .L8 - p1 = cmp.eq(r2, #1) - } - { - r6 = add(r0, #1) - r2 = add(r2,#-1) - memb(r0) = r1 - if (p1) jump .L1 - } -.L8: - { - p0 = tstbit(r6,#1) - if (!p0.new) jump:nt .L10 - } - { - r2 = add(r2,#-2) - memh(r6++#2) = r7 - p0 = cmp.eq(r2, #2) - if (p0.new) jump:nt .L1 - } -.L10: - { - p0 = tstbit(r6,#2) - if (!p0.new) jump:nt .L12 - } - { - r2 = add(r2,#-4) - memw(r6++#4) = r7 - p0 = cmp.eq(r2, #4) - if (p0.new) jump:nt .L1 - } -.L12: - { - p0 = cmp.gtu(r2,#127) - if (!p0.new) jump:nt .L14 - } - r3 = and(r6,#31) - if (r3==#0) jump:nt .L17 - { - memd(r6++#8) = r5:4 - r2 = add(r2,#-8) - } - r3 = and(r6,#31) - if (r3==#0) jump:nt .L17 - { - memd(r6++#8) = r5:4 - r2 = add(r2,#-8) - } - r3 = and(r6,#31) - if (r3==#0) jump:nt .L17 - { - memd(r6++#8) = r5:4 - r2 = add(r2,#-8) - } -.L17: - { - r3 = lsr(r2,#5) - if (r1!=#0) jump:nt .L18 - } - { - r8 = r3 - r3 = r6 - loop0(.L46,r3) - } - .falign -.L46: - { - dczeroa(r6) - r6 = add(r6,#32) - r2 = add(r2,#-32) - }:endloop0 /* start=.L46 */ -.L14: - { - p0 = cmp.gtu(r2,#7) - if (!p0.new) jump:nt .L28 - r8 = lsr(r2,#3) - } - loop0(.L44,r8) - .falign -.L44: - { - memd(r6++#8) = r5:4 - r2 = add(r2,#-8) - }:endloop0 /* start=.L44 */ -.L28: - { - p0 = tstbit(r2,#2) - if (!p0.new) jump:nt .L33 - } - { - r2 = add(r2,#-4) - memw(r6++#4) = r7 - } -.L33: - { - p0 = tstbit(r2,#1) - if (!p0.new) jump:nt .L35 - } - { - r2 = add(r2,#-2) - memh(r6++#2) = r7 - } -.L35: - p0 = cmp.eq(r2,#1) - if (p0) memb(r6) = r1 -.L1: - jumpr r31 -.L18: - loop0(.L45,r3) - .falign -.L45: - dczeroa(r6) - { - memd(r6++#8) = r5:4 - r2 = add(r2,#-32) - } - memd(r6++#8) = r5:4 - memd(r6++#8) = r5:4 - { - memd(r6++#8) = r5:4 - }:endloop0 /* start=.L45 */ - jump .L14 -HEXAGON_OPT_FUNC_FINISH memset -#endif diff --git a/arch/hexagon/mm/copy_from_user.S b/arch/hexagon/mm/copy_from_user.S deleted file mode 100644 index 1a49bf24f68bc1104ff2610020ae1d19d5d251ef..0000000000000000000000000000000000000000 --- a/arch/hexagon/mm/copy_from_user.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * User memory copy functions for kernel - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* - * The right way to do this involves valignb - * The easy way to do this is only speed up src/dest similar alignment. - */ - -/* - * Copy to/from user are the same, except that for packets with a load and - * a store, I don't know how to tell which kind of exception we got. - * Therefore, we duplicate the function, and handle faulting addresses - * differently for each function - */ - -/* - * copy from user: loads can fault - */ -#define src_sav r13 -#define dst_sav r12 -#define src_dst_sav r13:12 -#define d_dbuf r15:14 -#define w_dbuf r15 - -#define dst r0 -#define src r1 -#define bytes r2 -#define loopcount r5 - -#define FUNCNAME raw_copy_from_user -#include "copy_user_template.S" - - /* LOAD FAULTS from COPY_FROM_USER */ - - /* Alignment loop. r2 has been updated. Return it. */ - .falign -1009: -2009: -4009: - { - r0 = r2 - jumpr r31 - } - /* Normal copy loops. Do epilog. Use src-src_sav to compute distance */ - /* X - (A - B) == X + B - A */ - .falign -8089: - { - memd(dst) = d_dbuf - r2 += sub(src_sav,src) - } - { - r0 = r2 - jumpr r31 - } - .falign -4089: - { - memw(dst) = w_dbuf - r2 += sub(src_sav,src) - } - { - r0 = r2 - jumpr r31 - } - .falign -2089: - { - memh(dst) = w_dbuf - r2 += sub(src_sav,src) - } - { - r0 = r2 - jumpr r31 - } - .falign -1089: - { - memb(dst) = w_dbuf - r2 += sub(src_sav,src) - } - { - r0 = r2 - jumpr r31 - } - - /* COPY FROM USER: only loads can fail */ - - .section __ex_table,"a" - .long 1000b,1009b - .long 2000b,2009b - .long 4000b,4009b - .long 8080b,8089b - .long 4080b,4089b - .long 2080b,2089b - .long 1080b,1089b - .previous diff --git a/arch/hexagon/mm/copy_to_user.S b/arch/hexagon/mm/copy_to_user.S deleted file mode 100644 index ed8e3cafb36e4219f9f6caa0768dbc21d11e282e..0000000000000000000000000000000000000000 --- a/arch/hexagon/mm/copy_to_user.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * User memory copying routines for the Hexagon Kernel - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* The right way to do this involves valignb - * The easy way to do this is only speed up src/dest similar alignment. - */ - -/* - * Copy to/from user are the same, except that for packets with a load and - * a store, I don't know how to tell which kind of exception we got. - * Therefore, we duplicate the function, and handle faulting addresses - * differently for each function - */ - -/* - * copy to user: stores can fault - */ -#define src_sav r13 -#define dst_sav r12 -#define src_dst_sav r13:12 -#define d_dbuf r15:14 -#define w_dbuf r15 - -#define dst r0 -#define src r1 -#define bytes r2 -#define loopcount r5 - -#define FUNCNAME raw_copy_to_user -#include "copy_user_template.S" - - /* STORE FAULTS from COPY_TO_USER */ - .falign -1109: -2109: -4109: - /* Alignment loop. r2 has been updated. Return it. */ - { - r0 = r2 - jumpr r31 - } - /* Normal copy loops. Use dst-dst_sav to compute distance */ - /* dst holds best write, no need to unwind any loops */ - /* X - (A - B) == X + B - A */ - .falign -8189: -8199: -4189: -4199: -2189: -2199: -1189: -1199: - { - r2 += sub(dst_sav,dst) - } - { - r0 = r2 - jumpr r31 - } - - /* COPY TO USER: only stores can fail */ - .section __ex_table,"a" - .long 1100b,1109b - .long 2100b,2109b - .long 4100b,4109b - .long 8180b,8189b - .long 8190b,8199b - .long 4180b,4189b - .long 4190b,4199b - .long 2180b,2189b - .long 2190b,2199b - .long 1180b,1189b - .long 1190b,1199b - .previous diff --git a/arch/hexagon/mm/copy_user_template.S b/arch/hexagon/mm/copy_user_template.S deleted file mode 100644 index d297df01b43f941fe28684b500332bc1dbe7cca8..0000000000000000000000000000000000000000 --- a/arch/hexagon/mm/copy_user_template.S +++ /dev/null @@ -1,172 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -/* Numerology: - * WXYZ - * W: width in bytes - * X: Load=0, Store=1 - * Y: Location 0=preamble,8=loop,9=epilog - * Z: Location=0,handler=9 - */ - .text - .global FUNCNAME - .type FUNCNAME, @function - .p2align 5 -FUNCNAME: - { - p0 = cmp.gtu(bytes,#0) - if (!p0.new) jump:nt .Ldone - r3 = or(dst,src) - r4 = xor(dst,src) - } - { - p1 = cmp.gtu(bytes,#15) - p0 = bitsclr(r3,#7) - if (!p0.new) jump:nt .Loop_not_aligned_8 - src_dst_sav = combine(src,dst) - } - - { - loopcount = lsr(bytes,#3) - if (!p1) jump .Lsmall - } - p3=sp1loop0(.Loop8,loopcount) -.Loop8: -8080: -8180: - { - if (p3) memd(dst++#8) = d_dbuf - d_dbuf = memd(src++#8) - }:endloop0 -8190: - { - memd(dst++#8) = d_dbuf - bytes -= asl(loopcount,#3) - jump .Lsmall - } - -.Loop_not_aligned_8: - { - p0 = bitsclr(r4,#7) - if (p0.new) jump:nt .Lalign - } - { - p0 = bitsclr(r3,#3) - if (!p0.new) jump:nt .Loop_not_aligned_4 - p1 = cmp.gtu(bytes,#7) - } - - { - if (!p1) jump .Lsmall - loopcount = lsr(bytes,#2) - } - p3=sp1loop0(.Loop4,loopcount) -.Loop4: -4080: -4180: - { - if (p3) memw(dst++#4) = w_dbuf - w_dbuf = memw(src++#4) - }:endloop0 -4190: - { - memw(dst++#4) = w_dbuf - bytes -= asl(loopcount,#2) - jump .Lsmall - } - -.Loop_not_aligned_4: - { - p0 = bitsclr(r3,#1) - if (!p0.new) jump:nt .Loop_not_aligned - p1 = cmp.gtu(bytes,#3) - } - - { - if (!p1) jump .Lsmall - loopcount = lsr(bytes,#1) - } - p3=sp1loop0(.Loop2,loopcount) -.Loop2: -2080: -2180: - { - if (p3) memh(dst++#2) = w_dbuf - w_dbuf = memuh(src++#2) - }:endloop0 -2190: - { - memh(dst++#2) = w_dbuf - bytes -= asl(loopcount,#1) - jump .Lsmall - } - -.Loop_not_aligned: /* Works for as small as one byte */ - p3=sp1loop0(.Loop1,bytes) -.Loop1: -1080: -1180: - { - if (p3) memb(dst++#1) = w_dbuf - w_dbuf = memub(src++#1) - }:endloop0 - /* Done */ -1190: - { - memb(dst) = w_dbuf - jumpr r31 - r0 = #0 - } - -.Lsmall: - { - p0 = cmp.gtu(bytes,#0) - if (p0.new) jump:nt .Loop_not_aligned - } -.Ldone: - { - r0 = #0 - jumpr r31 - } - .falign -.Lalign: -1000: - { - if (p0.new) w_dbuf = memub(src) - p0 = tstbit(src,#0) - if (!p1) jump .Lsmall - } -1100: - { - if (p0) memb(dst++#1) = w_dbuf - if (p0) bytes = add(bytes,#-1) - if (p0) src = add(src,#1) - } -2000: - { - if (p0.new) w_dbuf = memuh(src) - p0 = tstbit(src,#1) - if (!p1) jump .Lsmall - } -2100: - { - if (p0) memh(dst++#2) = w_dbuf - if (p0) bytes = add(bytes,#-2) - if (p0) src = add(src,#2) - } -4000: - { - if (p0.new) w_dbuf = memw(src) - p0 = tstbit(src,#2) - if (!p1) jump .Lsmall - } -4100: - { - if (p0) memw(dst++#4) = w_dbuf - if (p0) bytes = add(bytes,#-4) - if (p0) src = add(src,#4) - jump FUNCNAME - } - .size FUNCNAME,.-FUNCNAME diff --git a/arch/hexagon/mm/strnlen_user.S b/arch/hexagon/mm/strnlen_user.S deleted file mode 100644 index 4b5574a7cc9cfac19eb953606db48147fae10e8c..0000000000000000000000000000000000000000 --- a/arch/hexagon/mm/strnlen_user.S +++ /dev/null @@ -1,126 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * User string length functions for kernel - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#define isrc r0 -#define max r1 /* Do not change! */ - -#define end r2 -#define tmp1 r3 - -#define obo r6 /* off-by-one */ -#define start r7 -#define mod8 r8 -#define dbuf r15:14 -#define dcmp r13:12 - -/* - * The vector mask version of this turned out *really* badly. - * The hardware loop version also turned out *really* badly. - * Seems straight pointer arithmetic basically wins here. - */ - -#define fname __strnlen_user - - .text - .global fname - .type fname, @function - .p2align 5 /* why? */ -fname: - { - mod8 = and(isrc,#7); - end = add(isrc,max); - start = isrc; - } - { - P0 = cmp.eq(mod8,#0); - mod8 = and(end,#7); - dcmp = #0; - if (P0.new) jump:t dw_loop; /* fire up the oven */ - } - -alignment_loop: -fail_1: { - tmp1 = memb(start++#1); - } - { - P0 = cmp.eq(tmp1,#0); - if (P0.new) jump:nt exit_found; - P1 = cmp.gtu(end,start); - mod8 = and(start,#7); - } - { - if (!P1) jump exit_error; /* hit the end */ - P0 = cmp.eq(mod8,#0); - } - { - if (!P0) jump alignment_loop; - } - - - -dw_loop: -fail_2: { - dbuf = memd(start); - obo = add(start,#1); - } - { - P0 = vcmpb.eq(dbuf,dcmp); - } - { - tmp1 = P0; - P0 = cmp.gtu(end,start); - } - { - tmp1 = ct0(tmp1); - mod8 = and(end,#7); - if (!P0) jump end_check; - } - { - P0 = cmp.eq(tmp1,#32); - if (!P0.new) jump:nt exit_found; - if (!P0.new) start = add(obo,tmp1); - } - { - start = add(start,#8); - jump dw_loop; - } /* might be nice to combine these jumps... */ - - -end_check: - { - P0 = cmp.gt(tmp1,mod8); - if (P0.new) jump:nt exit_error; /* neverfound! */ - start = add(obo,tmp1); - } - -exit_found: - { - R0 = sub(start,isrc); - jumpr R31; - } - -exit_error: - { - R0 = add(max,#1); - jumpr R31; - } - - /* Uh, what does the "fixup" return here? */ - .falign -fix_1: - { - R0 = #0; - jumpr R31; - } - - .size fname,.-fname - - -.section __ex_table,"a" -.long fail_1,fix_1 -.long fail_2,fix_1 -.previous diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S deleted file mode 100644 index 58233bb7976dfa46651f80ef2aaa67df9f8627b4..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/efi_stub.S +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * EFI call stub. - * - * Copyright (C) 1999-2001 Hewlett-Packard Co - * David Mosberger - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. We need this because we can't call SetVirtualMap() until - * the kernel has booted far enough to allow allocation of struct vma_struct - * entries (which we would need to map stuff with memory attributes other - * than uncached or writeback...). Since the GetTime() service gets called - * earlier than that, we need to be able to make physical mode EFI calls from - * the kernel. - */ - -/* - * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System - * Abstraction Layer Specification", revision 2.6e). Note that - * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. - * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call - * (the br.ia instruction fails unless psr.dfl and psr.dfh are - * cleared). Fortunately, SAL promises not to touch the floating - * point regs, so at least we don't have to save f2-f127. - */ -#define PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PSR_BITS_TO_SET \ - (IA64_PSR_BN) - -#include -#include - -/* - * Inputs: - * in0 = address of function descriptor of EFI routine to call - * in1..in7 = arguments to routine - * - * Outputs: - * r8 = EFI_STATUS returned by called function - */ - -GLOBAL_ENTRY(efi_call_phys) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,7,7,0 - ld8 r2=[in0],8 // load EFI function's entry point - mov loc0=rp - .body - ;; - mov loc2=gp // save global pointer - mov loc4=ar.rsc // save RSE configuration - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - ;; - ld8 gp=[in0] // load EFI function's global pointer - movl r16=PSR_BITS_TO_CLEAR - mov loc3=psr // save processor status word - movl r17=PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 - mov b6=r2 - ;; - andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared - br.call.sptk.many rp=ia64_switch_mode_phys -.ret0: mov out4=in5 - mov out0=in1 - mov out1=in2 - mov out2=in3 - mov out3=in4 - mov out5=in6 - mov out6=in7 - mov loc5=r19 - mov loc6=r20 - br.call.sptk.many rp=b6 // call the EFI function -.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode -.ret2: mov ar.rsc=loc4 // restore RSE configuration - mov ar.pfs=loc1 - mov rp=loc0 - mov gp=loc2 - br.ret.sptk.many rp -END(efi_call_phys) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S deleted file mode 100644 index a9992be5718b88a9e7e31e00db188fb03baeadfe..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/entry.S +++ /dev/null @@ -1,1435 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/ia64/kernel/entry.S - * - * Kernel entry points. - * - * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999, 2002-2003 - * Asit Mallick - * Don Dugger - * Suresh Siddha - * Fenghua Yu - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - */ -/* - * ia64_switch_to now places correct virtual mapping in in TR2 for - * kernel stack. This allows us to handle interrupts without changing - * to physical mode. - * - * Jonathan Nicklin - * Patrick O'Rourke - * 11/07/2000 - */ -/* - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - * pv_ops. - */ -/* - * Global (preserved) predicate usage on syscall entry/exit path: - * - * pKStk: See entry.h. - * pUStk: See entry.h. - * pSys: See entry.h. - * pNonSys: !pSys - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minstate.h" - - /* - * execve() is special because in case of success, we need to - * setup a null register window frame. - */ -ENTRY(ia64_execve) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 - mov loc0=rp - .body - mov out0=in0 // filename - ;; // stop bit between alloc and call - mov out1=in1 // argv - mov out2=in2 // envp - br.call.sptk.many rp=sys_execve -.ret0: - cmp4.ge p6,p7=r8,r0 - mov ar.pfs=loc1 // restore ar.pfs - sxt4 r8=r8 // return 64-bit result - ;; - stf.spill [sp]=f0 - mov rp=loc0 -(p6) mov ar.pfs=r0 // clear ar.pfs on success -(p7) br.ret.sptk.many rp - - /* - * In theory, we'd have to zap this state only to prevent leaking of - * security sensitive state (e.g., if current->mm->dumpable is zero). However, - * this executes in less than 20 cycles even on Itanium, so it's not worth - * optimizing for...). - */ - mov ar.unat=0; mov ar.lc=0 - mov r4=0; mov f2=f0; mov b1=r0 - mov r5=0; mov f3=f0; mov b2=r0 - mov r6=0; mov f4=f0; mov b3=r0 - mov r7=0; mov f5=f0; mov b4=r0 - ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 - ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 - ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 - ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 - ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 - ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 - ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 - br.ret.sptk.many rp -END(ia64_execve) - -/* - * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, - * u64 tls) - */ -GLOBAL_ENTRY(sys_clone2) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp - mov loc0=rp - mov loc1=r16 // save ar.pfs across do_fork - .body - mov out1=in1 - mov out2=in2 - tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out3=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID - ;; -(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() - mov out4=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - mov out0=in0 // out0 = clone_flags - br.call.sptk.many rp=do_fork -.ret1: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone2) - -/* - * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) - * Deprecated. Use sys_clone2() instead. - */ -GLOBAL_ENTRY(sys_clone) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp - mov loc0=rp - mov loc1=r16 // save ar.pfs across do_fork - .body - mov out1=in1 - mov out2=16 // stacksize (compensates for 16-byte scratch area) - tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out3=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID - ;; -(p6) st8 [r2]=in4 // store TLS in r13 (tp) - mov out4=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - mov out0=in0 // out0 = clone_flags - br.call.sptk.many rp=do_fork -.ret2: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone) - -/* - * prev_task <- ia64_switch_to(struct task_struct *next) - * With Ingo's new scheduler, interrupts are disabled when this routine gets - * called. The code starting at .map relies on this. The rest of the code - * doesn't care about the interrupt masking status. - */ -GLOBAL_ENTRY(ia64_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 - DO_SAVE_SWITCH_STACK - .body - - adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - movl r25=init_task - mov r27=IA64_KR(CURRENT_STACK) - adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 - dep r20=0,in0,61,3 // physical address of "next" - ;; - st8 [r22]=sp // save kernel stack pointer of old task - shr.u r26=r20,IA64_GRANULE_SHIFT - cmp.eq p7,p6=r25,in0 - ;; - /* - * If we've already mapped this task's page, we can skip doing it again. - */ -(p6) cmp.eq p7,p6=r26,r27 -(p6) br.cond.dpnt .map - ;; -.done: - ld8 sp=[r21] // load kernel stack pointer of new task - MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer - ;; - DO_LOAD_SWITCH_STACK - -#ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.many rp // boogie on out in new context - -.map: - RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here - movl r25=PAGE_KERNEL - ;; - srlz.d - or r23=r25,r20 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; - MOV_TO_ITIR(p0, r25, r8) - MOV_TO_IFA(in0, r8) // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit - br.cond.sptk .done -END(ia64_switch_to) - -/* - * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This - * means that we may get an interrupt with "sp" pointing to the new kernel stack while - * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, - * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a - * problem. Also, we don't need to specify unwind information for preserved registers - * that are not modified in save_switch_stack as the right unwind information is already - * specified at the call-site of save_switch_stack. - */ - -/* - * save_switch_stack: - * - r16 holds ar.pfs - * - b7 holds address to return to - * - rp (b0) holds return address to save - */ -GLOBAL_ENTRY(save_switch_stack) - .prologue - .altrp b7 - flushrs // flush dirty regs to backing store (must be first in insn group) - .save @priunat,r17 - mov r17=ar.unat // preserve caller's - .body -#ifdef CONFIG_ITANIUM - adds r2=16+128,sp - adds r3=16+64,sp - adds r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,16 // spill r4 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl.nt1 [r2],128 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl [r2] - lfetch.fault.excl [r3] - adds r15=SW(R5)+16,sp -#else - add r2=16+3*128,sp - add r3=16,sp - add r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 - ;; - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 - lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 - ;; - lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 - lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 - adds r15=SW(R5)+16,sp -#endif - ;; - st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 - mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 - add r2=SW(F2)+16,sp // r2 = &sw->f2 - ;; - st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 - mov.m r18=ar.fpsr // preserve fpsr - add r3=SW(F3)+16,sp // r3 = &sw->f3 - ;; - stf.spill [r2]=f2,32 - mov.m r19=ar.rnat - mov r21=b0 - - stf.spill [r3]=f3,32 - st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 - mov r22=b1 - ;; - // since we're done with the spills, read and save ar.unat: - mov.m r29=ar.unat - mov.m r20=ar.bspstore - mov r23=b2 - stf.spill [r2]=f4,32 - stf.spill [r3]=f5,32 - mov r24=b3 - ;; - st8 [r14]=r21,SW(B1)-SW(B0) // save b0 - st8 [r15]=r23,SW(B3)-SW(B2) // save b2 - mov r25=b4 - mov r26=b5 - ;; - st8 [r14]=r22,SW(B4)-SW(B1) // save b1 - st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 - mov r21=ar.lc // I-unit - stf.spill [r2]=f12,32 - stf.spill [r3]=f13,32 - ;; - st8 [r14]=r25,SW(B5)-SW(B4) // save b4 - st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs - stf.spill [r2]=f14,32 - stf.spill [r3]=f15,32 - ;; - st8 [r14]=r26 // save b5 - st8 [r15]=r21 // save ar.lc - stf.spill [r2]=f16,32 - stf.spill [r3]=f17,32 - ;; - stf.spill [r2]=f18,32 - stf.spill [r3]=f19,32 - ;; - stf.spill [r2]=f20,32 - stf.spill [r3]=f21,32 - ;; - stf.spill [r2]=f22,32 - stf.spill [r3]=f23,32 - ;; - stf.spill [r2]=f24,32 - stf.spill [r3]=f25,32 - ;; - stf.spill [r2]=f26,32 - stf.spill [r3]=f27,32 - ;; - stf.spill [r2]=f28,32 - stf.spill [r3]=f29,32 - ;; - stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) - stf.spill [r3]=f31,SW(PR)-SW(F31) - add r14=SW(CALLER_UNAT)+16,sp - ;; - st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat - st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat - mov r21=pr - ;; - st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat - st8 [r3]=r21 // save predicate registers - ;; - st8 [r2]=r20 // save ar.bspstore - st8 [r14]=r18 // save fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(save_switch_stack) - -/* - * load_switch_stack: - * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) - * - b7 holds address to return to - * - must not touch r8-r11 - */ -GLOBAL_ENTRY(load_switch_stack) - .prologue - .altrp b7 - - .body - lfetch.fault.nt1 [sp] - adds r2=SW(AR_BSPSTORE)+16,sp - adds r3=SW(AR_UNAT)+16,sp - mov ar.rsc=0 // put RSE into enforced lazy mode - adds r14=SW(CALLER_UNAT)+16,sp - adds r15=SW(AR_FPSR)+16,sp - ;; - ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore - ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat - ;; - ld8 r21=[r2],16 // restore b0 - ld8 r22=[r3],16 // restore b1 - ;; - ld8 r23=[r2],16 // restore b2 - ld8 r24=[r3],16 // restore b3 - ;; - ld8 r25=[r2],16 // restore b4 - ld8 r26=[r3],16 // restore b5 - ;; - ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs - ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc - ;; - ld8 r28=[r2] // restore pr - ld8 r30=[r3] // restore rnat - ;; - ld8 r18=[r14],16 // restore caller's unat - ld8 r19=[r15],24 // restore fpsr - ;; - ldf.fill f2=[r14],32 - ldf.fill f3=[r15],32 - ;; - ldf.fill f4=[r14],32 - ldf.fill f5=[r15],32 - ;; - ldf.fill f12=[r14],32 - ldf.fill f13=[r15],32 - ;; - ldf.fill f14=[r14],32 - ldf.fill f15=[r15],32 - ;; - ldf.fill f16=[r14],32 - ldf.fill f17=[r15],32 - ;; - ldf.fill f18=[r14],32 - ldf.fill f19=[r15],32 - mov b0=r21 - ;; - ldf.fill f20=[r14],32 - ldf.fill f21=[r15],32 - mov b1=r22 - ;; - ldf.fill f22=[r14],32 - ldf.fill f23=[r15],32 - mov b2=r23 - ;; - mov ar.bspstore=r27 - mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 - mov b3=r24 - ;; - ldf.fill f24=[r14],32 - ldf.fill f25=[r15],32 - mov b4=r25 - ;; - ldf.fill f26=[r14],32 - ldf.fill f27=[r15],32 - mov b5=r26 - ;; - ldf.fill f28=[r14],32 - ldf.fill f29=[r15],32 - mov ar.pfs=r16 - ;; - ldf.fill f30=[r14],32 - ldf.fill f31=[r15],24 - mov ar.lc=r17 - ;; - ld8.fill r4=[r14],16 - ld8.fill r5=[r15],16 - mov pr=r28,-1 - ;; - ld8.fill r6=[r14],16 - ld8.fill r7=[r15],16 - - mov ar.unat=r18 // restore caller's unat - mov ar.rnat=r30 // must restore after bspstore but before rsc! - mov ar.fpsr=r19 // restore fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(load_switch_stack) - - /* - * Invoke a system call, but do some tracing before and after the call. - * We MUST preserve the current register frame throughout this routine - * because some system calls (such as ia64_execve) directly - * manipulate ar.pfs. - */ -GLOBAL_ENTRY(ia64_trace_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * We need to preserve the scratch registers f6-f11 in case the system - * call is sigreturn. - */ - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args - cmp.lt p6,p0=r8,r0 // check tracehook - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - ldf.fill f6=[r16],32 - ldf.fill f7=[r17],32 - ;; - ldf.fill f8=[r16],32 - ldf.fill f9=[r17],32 - ;; - ldf.fill f10=[r16] - ldf.fill f11=[r17] - // the syscall number may have changed, so re-load it and re-calculate the - // syscall entry-point: - adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) - ;; - ld8 r15=[r15] - mov r3=NR_syscalls - 1 - ;; - adds r15=-1024,r15 - movl r16=sys_call_table - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - ;; - mov b6=r20 - br.call.sptk.many rp=b6 // do the syscall -.strace_check_retval: - cmp.lt p6,p0=r8,r0 // syscall failed? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - ;; // avoid RAW on r10 -.strace_save_retval: -.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 -.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -(pUStk) rsm psr.i // disable interrupts - br.cond.sptk ia64_work_pending_syscall_end - -strace_error: - ld8 r3=[r2] // load pt_regs.r8 - sub r9=0,r8 // negate return value to get errno value - ;; - cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? - adds r3=16,r2 // r3=&pt_regs.r10 - ;; -(p6) mov r10=-1 -(p6) mov r8=r9 - br.cond.sptk .strace_save_retval -END(ia64_trace_syscall) - - /* - * When traced and returning from sigreturn, we invoke syscall_trace but then - * go straight to ia64_leave_kernel rather than ia64_leave_syscall. - */ -GLOBAL_ENTRY(ia64_strace_leave_kernel) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -} -.ret4: br.cond.sptk ia64_leave_kernel -END(ia64_strace_leave_kernel) - -ENTRY(call_payload) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0) - /* call the kernel_thread payload; fn is in r4, arg - in r5 */ - alloc loc1=ar.pfs,0,3,1,0 - mov loc0=rp - mov loc2=gp - mov out0=r5 // arg - ld8 r14 = [r4], 8 // fn.address - ;; - mov b6 = r14 - ld8 gp = [r4] // fn.gp - ;; - br.call.sptk.many rp=b6 // fn(arg) -.ret12: mov gp=loc2 - mov rp=loc0 - mov ar.pfs=loc1 - /* ... and if it has returned, we are going to userland */ - cmp.ne pKStk,pUStk=r0,r0 - br.ret.sptk.many rp -END(call_payload) - -GLOBAL_ENTRY(ia64_ret_from_clone) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - /* - * We need to call schedule_tail() to complete the scheduling process. - * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the - * address of the previously executing task. - */ - br.call.sptk.many rp=ia64_invoke_schedule_tail -} -.ret8: -(pKStk) br.call.sptk.many rp=call_payload - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] - ;; - mov r8=0 - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 - ;; - cmp.ne p6,p0=r2,r0 -(p6) br.cond.spnt .strace_check_retval - ;; // added stop bits to prevent r8 dependency -END(ia64_ret_from_clone) - // fall through -GLOBAL_ENTRY(ia64_ret_from_syscall) - PT_REGS_UNWIND_INFO(0) - cmp.ge p6,p7=r8,r0 // syscall executed successfully? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - mov r10=r0 // clear error indication in r10 -(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure -END(ia64_ret_from_syscall) - // fall through - -/* - * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't - * need to switch to bank 0 and doesn't restore the scratch registers. - * To avoid leaking kernel bits, the scratch registers are set to - * the following known-to-be-safe values: - * - * r1: restored (global pointer) - * r2: cleared - * r3: 1 (when returning to user-level) - * r8-r11: restored (syscall return value(s)) - * r12: restored (user-level stack pointer) - * r13: restored (user-level thread pointer) - * r14: set to __kernel_syscall_via_epc - * r15: restored (syscall #) - * r16-r17: cleared - * r18: user-level b6 - * r19: cleared - * r20: user-level ar.fpsr - * r21: user-level b0 - * r22: cleared - * r23: user-level ar.bspstore - * r24: user-level ar.rnat - * r25: user-level ar.unat - * r26: user-level ar.pfs - * r27: user-level ar.rsc - * r28: user-level ip - * r29: user-level psr - * r30: user-level cfm - * r31: user-level pr - * f6-f11: cleared - * pr: restored (user-level pr) - * b0: restored (user-level rp) - * b6: restored - * b7: set to __kernel_syscall_via_epc - * ar.unat: restored (user-level ar.unat) - * ar.pfs: restored (user-level ar.pfs) - * ar.rsc: restored (user-level ar.rsc) - * ar.rnat: restored (user-level ar.rnat) - * ar.bspstore: restored (user-level ar.bspstore) - * ar.fpsr: restored (user-level ar.fpsr) - * ar.ccv: cleared - * ar.csd: cleared - * ar.ssd: cleared - */ -GLOBAL_ENTRY(ia64_leave_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - RSM_PSR_I(p0, r2, r18) // disable interrupts - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPT */ - RSM_PSR_I(pUStk, r2, r18) - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.global ia64_work_processed_syscall; -ia64_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - adds r2=PT(LOADRS)+16,r12 - MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - adds r3=PT(AR_BSPSTORE)+16,r12 // deferred - ;; -#else - adds r2=PT(LOADRS)+16,r12 - adds r3=PT(AR_BSPSTORE)+16,r12 - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - nop.i 0 - ;; -#endif - mov r16=ar.bsp // M2 get existing backing store pointer - ld8 r18=[r2],PT(R9)-PT(B6) // load b6 -(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) -(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? -(p6) br.cond.spnt .work_pending_syscall - ;; - // start restoring the state saved on the kernel stack (struct pt_regs): - ld8 r9=[r2],PT(CR_IPSR)-PT(R9) - ld8 r11=[r3],PT(CR_IIP)-PT(R11) -(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! - ;; - invala // M0|1 invalidate ALAT - RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection - cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs - - ld8 r29=[r2],16 // M0|1 load cr.ipsr - ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; -#else - mov r22=r0 // A clear r22 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; -#endif - ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs - MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled - nop 0 - ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc - mov f6=f0 // F clear f6 - ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates - mov f7=f0 // F clear f7 - ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr - ld8.fill r1=[r3],16 // M0|1 load r1 -(pUStk) mov r17=1 // A - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) st1 [r15]=r17 // M2|3 -#else -(pUStk) st1 [r14]=r17 // M2|3 -#endif - ld8.fill r13=[r3],16 // M0|1 - mov f8=f0 // F clear f8 - ;; - ld8.fill r12=[r2] // M0|1 restore r12 (sp) - ld8.fill r15=[r3] // M0|1 restore r15 - mov b6=r18 // I0 restore b6 - - LOAD_PHYS_STACK_REG_SIZE(r17) - mov f9=f0 // F clear f9 -(pKStk) br.cond.dpnt.many skip_rbs_switch // B - - srlz.d // M0 ensure interruption collection is off (for cover) - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - COVER // B add current frame into dirty partition & set cr.ifs - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - mov r19=ar.bsp // M2 get new backing store pointer - st8 [r14]=r22 // M save time at leave - mov f10=f0 // F clear f10 - - mov r22=r0 // A clear r22 - movl r14=__kernel_syscall_via_epc // X - ;; -#else - mov r19=ar.bsp // M2 get new backing store pointer - mov f10=f0 // F clear f10 - - nop.m 0 - movl r14=__kernel_syscall_via_epc // X - ;; -#endif - mov.m ar.csd=r0 // M2 clear ar.csd - mov.m ar.ccv=r0 // M2 clear ar.ccv - mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - - mov.m ar.ssd=r0 // M2 clear ar.ssd - mov f11=f0 // F clear f11 - br.cond.sptk.many rbs_switch // B -END(ia64_leave_syscall) - -GLOBAL_ENTRY(ia64_leave_kernel) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPT - RSM_PSR_I(p0, r17, r31) // disable interrupts - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else - RSM_PSR_I(pUStk, r17, r31) - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_kernel: - adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r17] // load current_thread_info()->flags - adds r21=PT(PR)+16,r12 - ;; - - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) - adds r30=PT(AR_CCV)+16,r12 -(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv -(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd -(p6) br.cond.spnt .work_pending - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) - mov ar.ccv=r15 - ;; - ldf.fill f11=[r2] - BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) - ;; -(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - .pred.rel.mutex pUStk,pKStk - MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled - MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave - nop.i 0 - ;; -#else - MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled - nop.i 0 - nop.i 0 - ;; -#endif - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16 // load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r21=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 -#else -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 -#endif - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred -#endif - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] -(pUStk) mov r17=1 - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; - // mib : mov add br -> mib : ld8 add br - // bbb_ : br nop cover;; mbb_ : mov br cover;; - // - // no one require bsp in r16 if (pKStk) branch is selected. -(pUStk) st8 [r3]=r22 // save time at leave -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - ld8.fill r3=[r16] // deferred - LOAD_PHYS_STACK_REG_SIZE(r17) -(pKStk) br.cond.dpnt skip_rbs_switch - mov r16=ar.bsp // get existing backing store pointer -#else - ld8.fill r3=[r16] -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - mov r16=ar.bsp // get existing backing store pointer - LOAD_PHYS_STACK_REG_SIZE(r17) -(pKStk) br.cond.dpnt skip_rbs_switch -#endif - - /* - * Restore user backing store. - * - * NOTE: alloc, loadrs, and cover can't be predicated. - */ -(pNonSys) br.cond.dpnt dont_preserve_current_frame - COVER // add current frame into dirty partition and set cr.ifs - ;; - mov r19=ar.bsp // get new backing store pointer -rbs_switch: - sub r16=r16,r18 // krbs = old bsp - size of dirty partition - cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs - ;; - sub r19=r19,r16 // calculate total byte size of dirty partition - add r18=64,r18 // don't force in0-in7 into memory... - ;; - shl r19=r19,16 // shift size of dirty partition into loadrs position - ;; -dont_preserve_current_frame: - /* - * To prevent leaking bits between the kernel and user-space, - * we must clear the stacked registers in the "invalid" partition here. - * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -#ifdef CONFIG_ITANIUM -# define Nregs 10 -#else -# define Nregs 14 -#endif - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r17 - mov in1=0 - ;; - TEXT_ALIGN(32) -rse_clear_invalid: -#ifdef CONFIG_ITANIUM - // cycle 0 - { .mii - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 -}{ .mfb - add out1=1,in1 // increment recursion count - nop.f 0 - nop.b 0 // can't do br.call here because of alloc (WAW on CFM) - ;; -}{ .mfi // cycle 1 - mov loc1=0 - nop.f 0 - mov loc2=0 -}{ .mib - mov loc3=0 - mov loc4=0 -(pRecurse) br.call.sptk.many b0=rse_clear_invalid - -}{ .mfi // cycle 2 - mov loc5=0 - nop.f 0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret -}{ .mib - mov loc6=0 - mov loc7=0 -(pReturn) br.ret.sptk.many b0 -} -#else /* !CONFIG_ITANIUM */ - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 -#endif /* !CONFIG_ITANIUM */ -# undef pRecurse -# undef pReturn - ;; - alloc r17=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; -skip_rbs_switch: - mov ar.unat=r25 // M2 -(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 -(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise - ;; -(pUStk) mov ar.bspstore=r23 // M2 -(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp -(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise - ;; - MOV_TO_IPSR(p0, r29, r25) // M2 - mov ar.pfs=r26 // I0 -(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise - - MOV_TO_IFS(p9, r30, r25)// M2 - mov b0=r21 // I0 -(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise - - mov ar.fpsr=r20 // M2 - MOV_TO_IIP(r28, r25) // M2 - nop 0 - ;; -(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode - nop 0 -(pLvSys)mov r2=r0 - - mov ar.rsc=r27 // M2 - mov pr=r31,-1 // I0 - RFI // B - - /* - * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) - * r31 = current->thread_info->flags - * On exit: - * p6 = TRUE if work-pending-check needs to be redone - * - * Interrupts are disabled on entry, reenabled depend on work, and - * disabled on exit. - */ -.work_pending_syscall: - add r2=-8,r2 - add r3=-8,r3 - ;; - st8 [r2]=r8 - st8 [r3]=r10 -.work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? -(p6) br.cond.sptk.few .notify - br.call.spnt.many rp=preempt_schedule_irq -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) -(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel - -.notify: -(pUStk) br.call.spnt.many rp=notify_resume_user -.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) -(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel - -.global ia64_work_pending_syscall_end; -ia64_work_pending_syscall_end: - adds r2=PT(R8)+16,r12 - adds r3=PT(R10)+16,r12 - ;; - ld8 r8=[r2] - ld8 r10=[r3] - br.cond.sptk.many ia64_work_processed_syscall -END(ia64_leave_kernel) - -ENTRY(handle_syscall_error) - /* - * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could - * lead us to mistake a negative return value as a failed syscall. Those syscall - * must deposit a non-zero value in pt_regs.r8 to indicate an error. If - * pt_regs.r8 is zero, we assume that the call completed successfully. - */ - PT_REGS_UNWIND_INFO(0) - ld8 r3=[r2] // load pt_regs.r8 - ;; - cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? - ;; -(p7) mov r10=-1 -(p7) sub r8=0,r8 // negate return value to get errno - br.cond.sptk ia64_leave_syscall -END(handle_syscall_error) - - /* - * Invoke schedule_tail(task) while preserving in0-in7, which may be needed - * in case a system call gets restarted. - */ -GLOBAL_ENTRY(ia64_invoke_schedule_tail) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,1,0 - mov loc0=rp - mov out0=r8 // Address of previous task - ;; - br.call.sptk.many rp=schedule_tail -.ret11: mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(ia64_invoke_schedule_tail) - - /* - * Setup stack and call do_notify_resume_user(), keeping interrupts - * disabled. - * - * Note that pSys and pNonSys need to be set up by the caller. - * We declare 8 input registers so the system call args get preserved, - * in case we need to restart a system call. - */ -GLOBAL_ENTRY(notify_resume_user) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=0 // there is no "oldset" - adds out1=8,sp // out1=&sigscratch->ar_pfs -(pSys) mov out2=1 // out2==1 => we're in a syscall - ;; -(pNonSys) mov out2=0 // out2==0 => not a syscall - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch - .body - br.call.sptk.many rp=do_notify_resume_user -.ret15: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sigscratch->scratch_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(notify_resume_user) - -ENTRY(sys_rt_sigreturn) - PT_REGS_UNWIND_INFO(0) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - alloc r2=ar.pfs,8,0,1,0 - .prologue - PT_REGS_SAVES(16) - adds sp=-16,sp - .body - cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... - ;; - /* - * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined - * syscall-entry path does not save them we save them here instead. Note: we - * don't need to save any other registers that are not saved by the stream-lined - * syscall path, because restore_sigcontext() restores them. - */ - adds r16=PT(F6)+32,sp - adds r17=PT(F7)+32,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - adds out0=16,sp // out0 = &sigscratch - br.call.sptk.many rp=ia64_rt_sigreturn -.ret19: .restore sp,0 - adds sp=16,sp - ;; - ld8 r9=[sp] // load new ar.unat - mov.sptk b7=r8,ia64_leave_kernel - ;; - mov ar.unat=r9 - br.many b7 -END(sys_rt_sigreturn) - -GLOBAL_ENTRY(ia64_prepare_handle_unaligned) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt -.ret21: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_unaligned) - - // - // unw_init_running(void (*callback)(info, arg), void *arg) - // -# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) - -GLOBAL_ENTRY(unw_init_running) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc loc1=ar.pfs,2,3,3,0 - ;; - ld8 loc2=[in0],8 - mov loc0=rp - mov r16=loc1 - DO_SAVE_SWITCH_STACK - .body - - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE - SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) - adds sp=-EXTRA_FRAME_SIZE,sp - .body - ;; - adds out0=16,sp // &info - mov out1=r13 // current - adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack - br.call.sptk.many rp=unw_init_frame_info -1: adds out0=16,sp // &info - mov b6=loc2 - mov loc2=gp // save gp across indirect function call - ;; - ld8 gp=[in0] - mov out1=in1 // arg - br.call.sptk.many rp=b6 // invoke the callback function -1: mov gp=loc2 // restore gp - - // For now, we don't allow changing registers from within - // unw_init_running; if we ever want to allow that, we'd - // have to do a load_switch_stack here: - .restore sp - adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp - - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(unw_init_running) -EXPORT_SYMBOL(unw_init_running) - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -GLOBAL_ENTRY(_mcount) - br ftrace_stub -END(_mcount) -EXPORT_SYMBOL(_mcount) - -.here: - br.ret.sptk.many b0 - -GLOBAL_ENTRY(ftrace_caller) - alloc out0 = ar.pfs, 8, 0, 4, 0 - mov out3 = r0 - ;; - mov out2 = b0 - add r3 = 0x20, r3 - mov out1 = r1; - br.call.sptk.many b0 = ftrace_patch_gp - //this might be called from module, so we must patch gp -ftrace_patch_gp: - movl gp=__gp - mov b0 = r3 - ;; -.global ftrace_call; -ftrace_call: -{ - .mlx - nop.m 0x0 - movl r3 = .here;; -} - alloc loc0 = ar.pfs, 4, 4, 2, 0 - ;; - mov loc1 = b0 - mov out0 = b0 - mov loc2 = r8 - mov loc3 = r15 - ;; - adds out0 = -MCOUNT_INSN_SIZE, out0 - mov out1 = in2 - mov b6 = r3 - - br.call.sptk.many b0 = b6 - ;; - mov ar.pfs = loc0 - mov b0 = loc1 - mov r8 = loc2 - mov r15 = loc3 - br ftrace_stub - ;; -END(ftrace_caller) - -#else -GLOBAL_ENTRY(_mcount) - movl r2 = ftrace_stub - movl r3 = ftrace_trace_function;; - ld8 r3 = [r3];; - ld8 r3 = [r3];; - cmp.eq p7,p0 = r2, r3 -(p7) br.sptk.many ftrace_stub - ;; - - alloc loc0 = ar.pfs, 4, 4, 2, 0 - ;; - mov loc1 = b0 - mov out0 = b0 - mov loc2 = r8 - mov loc3 = r15 - ;; - adds out0 = -MCOUNT_INSN_SIZE, out0 - mov out1 = in2 - mov b6 = r3 - - br.call.sptk.many b0 = b6 - ;; - mov ar.pfs = loc0 - mov b0 = loc1 - mov r8 = loc2 - mov r15 = loc3 - br ftrace_stub - ;; -END(_mcount) -#endif - -GLOBAL_ENTRY(ftrace_stub) - mov r3 = b0 - movl r2 = _mcount_ret_helper - ;; - mov b6 = r2 - mov b7 = r3 - br.ret.sptk.many b6 - -_mcount_ret_helper: - mov b0 = r42 - mov r1 = r41 - mov ar.pfs = r40 - br b7 -END(ftrace_stub) - -#endif /* CONFIG_FUNCTION_TRACER */ - -#define __SYSCALL(nr, entry, nargs) data8 entry - .rodata - .align 8 - .globl sys_call_table -sys_call_table: -#include -#undef __SYSCALL diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S deleted file mode 100644 index 821e68d1059874454efb1cf0a845a668de4990ef..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/esi_stub.S +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ESI call stub. - * - * Copyright (C) 2005 Hewlett-Packard Co - * Alex Williamson - * - * Based on EFI call stub by David Mosberger. The stub is virtually - * identical to the one for EFI phys-mode calls, except that ESI - * calls may have up to 8 arguments, so they get passed to this routine - * through memory. - * - * This stub allows us to make ESI calls in physical mode with interrupts - * turned off. ESI calls may not support calling from virtual mode. - * - * Google for "Extensible SAL specification" for a document describing the - * ESI standard. - */ - -/* - * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System - * Abstraction Layer Specification", revision 2.6e). Note that - * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. - * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call - * (the br.ia instruction fails unless psr.dfl and psr.dfh are - * cleared). Fortunately, SAL promises not to touch the floating - * point regs, so at least we don't have to save f2-f127. - */ -#define PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PSR_BITS_TO_SET \ - (IA64_PSR_BN) - -#include -#include -#include - -/* - * Inputs: - * in0 = address of function descriptor of ESI routine to call - * in1 = address of array of ESI parameters - * - * Outputs: - * r8 = result returned by called function - */ -GLOBAL_ENTRY(esi_call_phys) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc loc1=ar.pfs,2,7,8,0 - ld8 r2=[in0],8 // load ESI function's entry point - mov loc0=rp - .body - ;; - ld8 out0=[in1],8 // ESI params loaded from array - ;; // passing all as inputs doesn't work - ld8 out1=[in1],8 - ;; - ld8 out2=[in1],8 - ;; - ld8 out3=[in1],8 - ;; - ld8 out4=[in1],8 - ;; - ld8 out5=[in1],8 - ;; - ld8 out6=[in1],8 - ;; - ld8 out7=[in1] - mov loc2=gp // save global pointer - mov loc4=ar.rsc // save RSE configuration - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - ;; - ld8 gp=[in0] // load ESI function's global pointer - movl r16=PSR_BITS_TO_CLEAR - mov loc3=psr // save processor status word - movl r17=PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 - mov b6=r2 - ;; - andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared - br.call.sptk.many rp=ia64_switch_mode_phys -.ret0: mov loc5=r19 // old ar.bsp - mov loc6=r20 // old sp - br.call.sptk.many rp=b6 // call the ESI function -.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // save virtual mode psr - mov r19=loc5 // save virtual mode bspstore - mov r20=loc6 // save virtual mode sp - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode -.ret2: mov ar.rsc=loc4 // restore RSE configuration - mov ar.pfs=loc1 - mov rp=loc0 - mov gp=loc2 - br.ret.sptk.many rp -END(esi_call_phys) -EXPORT_SYMBOL_GPL(esi_call_phys) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S deleted file mode 100644 index 0750a716adc72a5f5d5f6cb502448a74da2d18db..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/fsys.S +++ /dev/null @@ -1,837 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file contains the light-weight system call handlers (fsyscall-handlers). - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). - * 18-Feb-03 louisk Implement fsys_gettimeofday(). - * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, - * probably broke it along the way... ;-) - * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make - * it capable of using memory based clocks without falling back to C code. - * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "entry.h" -#include - -/* - * See Documentation/ia64/fsys.rst for details on fsyscalls. - * - * On entry to an fsyscall handler: - * r10 = 0 (i.e., defaults to "successful syscall return") - * r11 = saved ar.pfs (a user-level value) - * r15 = system call number - * r16 = "current" task pointer (in normal kernel-mode, this is in r13) - * r32-r39 = system call arguments - * b6 = return address (a user-level value) - * ar.pfs = previous frame-state (a user-level value) - * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) - * all other registers may contain values passed in from user-mode - * - * On return from an fsyscall handler: - * r11 = saved ar.pfs (as passed into the fsyscall handler) - * r15 = system call number (as passed into the fsyscall handler) - * r32-r39 = system call arguments (as passed into the fsyscall handler) - * b6 = return address (as passed into the fsyscall handler) - * ar.pfs = previous frame-state (as passed into the fsyscall handler) - */ - -ENTRY(fsys_ni_syscall) - .prologue - .altrp b6 - .body - mov r8=ENOSYS - mov r10=-1 - FSYS_RETURN -END(fsys_ni_syscall) - -ENTRY(fsys_getpid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_SIGNAL_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->signal - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - ld4 r9=[r9] - add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17 - ;; - and r9=TIF_ALLWORK_MASK,r9 - ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID] - ;; - add r8=IA64_PID_LEVEL_OFFSET,r17 - ;; - ld4 r8=[r8] // r8 = pid->level - add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] - ;; - shl r8=r8,IA64_UPID_SHIFT - ;; - add r17=r17,r8 // r17 = &pid->numbers[pid->level] - ;; - ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr - ;; - mov r17=0 - ;; - cmp.ne p8,p0=0,r9 -(p8) br.spnt.many fsys_fallback_syscall - FSYS_RETURN -END(fsys_getpid) - -ENTRY(fsys_set_tid_address) - .prologue - .altrp b6 - .body - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - add r17=IA64_TASK_THREAD_PID_OFFSET,r16 - ;; - ld4 r9=[r9] - tnat.z p6,p7=r32 // check argument register for being NaT - ld8 r17=[r17] // r17 = current->thread_pid - ;; - and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_PID_LEVEL_OFFSET,r17 - add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 - ;; - ld4 r8=[r8] // r8 = pid->level - add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] - ;; - shl r8=r8,IA64_UPID_SHIFT - ;; - add r17=r17,r8 // r17 = &pid->numbers[pid->level] - ;; - ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr - ;; - cmp.ne p8,p0=0,r9 - mov r17=-1 - ;; -(p6) st8 [r18]=r32 -(p7) st8 [r18]=r17 -(p8) br.spnt.many fsys_fallback_syscall - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - FSYS_RETURN -END(fsys_set_tid_address) - -#if IA64_GTOD_SEQ_OFFSET !=0 -#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t -#endif -#if IA64_ITC_JITTER_OFFSET !=0 -#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t -#endif -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_DIVIDE_BY_1000 0x4000 -#define CLOCK_ADD_MONOTONIC 0x8000 - -ENTRY(fsys_gettimeofday) - .prologue - .altrp b6 - .body - mov r31 = r32 - tnat.nz p6,p0 = r33 // guard against NaT argument -(p6) br.cond.spnt.few .fail_einval - mov r30 = CLOCK_DIVIDE_BY_1000 - ;; -.gettime: - // Register map - // Incoming r31 = pointer to address where to place result - // r30 = flags determining how time is processed - // r2,r3 = temp r4-r7 preserved - // r8 = result nanoseconds - // r9 = result seconds - // r10 = temporary storage for clock difference - // r11 = preserved: saved ar.pfs - // r12 = preserved: memory stack - // r13 = preserved: thread pointer - // r14 = address of mask / mask value - // r15 = preserved: system call number - // r16 = preserved: current task pointer - // r17 = (not used) - // r18 = (not used) - // r19 = address of itc_lastcycle - // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) - // r21 = address of mmio_ptr - // r22 = address of wall_time or monotonic_time - // r23 = address of shift / value - // r24 = address mult factor / cycle_last value - // r25 = itc_lastcycle value - // r26 = address clocksource cycle_last - // r27 = (not used) - // r28 = sequence number at the beginning of critcal section - // r29 = address of itc_jitter - // r30 = time processing flags / memory address - // r31 = pointer to result - // Predicates - // p6,p7 short term use - // p8 = timesource ar.itc - // p9 = timesource mmio64 - // p10 = timesource mmio32 - not used - // p11 = timesource not to be handled by asm code - // p12 = memory time source ( = p9 | p10) - not used - // p13 = do cmpxchg with itc_lastcycle - // p14 = Divide by 1000 - // p15 = Add monotonic - // - // Note that instructions are optimized for McKinley. McKinley can - // process two bundles simultaneously and therefore we continuously - // try to feed the CPU two bundles and then a stop. - - add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 - tnat.nz p6,p0 = r31 // guard against Nat argument -(p6) br.cond.spnt.few .fail_einval - movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address - ;; - ld4 r2 = [r2] // process work pending flags - movl r29 = itc_jitter_data // itc_jitter - add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time - add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 - mov pr = r30,0xc000 // Set predicates according to function - ;; - and r2 = TIF_ALLWORK_MASK,r2 - add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 -(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time - ;; - add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last - cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall - ;; - // Begin critical section -.time_redo: - ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first - ;; - and r28 = ~1,r28 // And make sequence even to force retry if odd - ;; - ld8 r30 = [r21] // clocksource->mmio_ptr - add r24 = IA64_CLKSRC_MULT_OFFSET,r20 - ld4 r2 = [r29] // itc_jitter value - add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 - add r14 = IA64_CLKSRC_MASK_OFFSET,r20 - ;; - ld4 r3 = [r24] // clocksource mult value - ld8 r14 = [r14] // clocksource mask value - cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr - ;; - setf.sig f7 = r3 // Setup for mult scaling of counter -(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 - ld4 r23 = [r23] // clocksource shift value - ld8 r24 = [r26] // get clksrc_cycle_last value -(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control - ;; - .pred.rel.mutex p8,p9 - MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! -(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. -(p13) ld8 r25 = [r19] // get itc_lastcycle value - ld8 r9 = [r22],IA64_TIME_SN_SPEC_SNSEC_OFFSET // sec - ;; - ld8 r8 = [r22],-IA64_TIME_SN_SPEC_SNSEC_OFFSET // snsec -(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) - ;; -(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared - sub r10 = r2,r24 // current_cycle - last_cycle - ;; -(p6) sub r10 = r25,r24 // time we got was less than last_cycle -(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg - ;; -(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv - ;; -(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful - ;; -(p7) sub r10 = r3,r24 // then use new last_cycle instead - ;; - and r10 = r10,r14 // Apply mask - ;; - setf.sig f8 = r10 - nop.i 123 - ;; - // fault check takes 5 cycles and we have spare time -EX(.fail_efault, probe.w.fault r31, 3) - xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) - ;; - getf.sig r2 = f8 - mf - ;; - ld4 r10 = [r20] // gtod_lock.sequence - add r8 = r8,r2 // Add xtime.nsecs - ;; - shr.u r8 = r8,r23 // shift by factor - cmp4.ne p7,p0 = r28,r10 -(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo - // End critical section. - // Now r8=tv->tv_nsec and r9=tv->tv_sec - mov r10 = r0 - movl r2 = 1000000000 - add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 -(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack - ;; -.time_normalize: - mov r21 = r8 - cmp.ge p6,p0 = r8,r2 -(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time - ;; -(p14) setf.sig f8 = r20 -(p6) sub r8 = r8,r2 -(p6) add r9 = 1,r9 // two nops before the branch. -(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod -(p6) br.cond.dpnt.few .time_normalize - ;; - // Divided by 8 though shift. Now divide by 125 - // The compiler was able to do that with a multiply - // and a shift and we do the same -EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles -(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it - ;; -(p14) getf.sig r2 = f8 - ;; - mov r8 = r0 -(p14) shr.u r21 = r2, 4 - ;; -EX(.fail_efault, st8 [r31] = r9) -EX(.fail_efault, st8 [r23] = r21) - FSYS_RETURN -.fail_einval: - mov r8 = EINVAL - mov r10 = -1 - FSYS_RETURN -.fail_efault: - mov r8 = EFAULT - mov r10 = -1 - FSYS_RETURN -END(fsys_gettimeofday) - -ENTRY(fsys_clock_gettime) - .prologue - .altrp b6 - .body - cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 - // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC -(p6) br.spnt.few fsys_fallback_syscall - mov r31 = r33 - shl r30 = r32,15 - br.many .gettime -END(fsys_clock_gettime) - -/* - * fsys_getcpu doesn't use the third parameter in this implementation. It reads - * current_thread_info()->cpu and corresponding node in cpu_to_node_map. - */ -ENTRY(fsys_getcpu) - .prologue - .altrp b6 - .body - ;; - add r2=TI_FLAGS+IA64_TASK_SIZE,r16 - tnat.nz p6,p0 = r32 // guard against NaT argument - add r3=TI_CPU+IA64_TASK_SIZE,r16 - ;; - ld4 r3=[r3] // M r3 = thread_info->cpu - ld4 r2=[r2] // M r2 = thread_info->flags -(p6) br.cond.spnt.few .fail_einval // B - ;; - tnat.nz p7,p0 = r33 // I guard against NaT argument -(p7) br.cond.spnt.few .fail_einval // B - ;; - cmp.ne p6,p0=r32,r0 - cmp.ne p7,p0=r33,r0 - ;; -#ifdef CONFIG_NUMA - movl r17=cpu_to_node_map - ;; -EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles - shladd r18=r3,1,r17 - ;; - ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] - and r2 = TIF_ALLWORK_MASK,r2 - ;; - cmp.ne p8,p0=0,r2 -(p8) br.spnt.many fsys_fallback_syscall - ;; - ;; -EX(.fail_efault, (p6) st4 [r32] = r3) -EX(.fail_efault, (p7) st2 [r33] = r20) - mov r8=0 - ;; -#else -EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles - and r2 = TIF_ALLWORK_MASK,r2 - ;; - cmp.ne p8,p0=0,r2 -(p8) br.spnt.many fsys_fallback_syscall - ;; -EX(.fail_efault, (p6) st4 [r32] = r3) -EX(.fail_efault, (p7) st2 [r33] = r0) - mov r8=0 - ;; -#endif - FSYS_RETURN -END(fsys_getcpu) - -ENTRY(fsys_fallback_syscall) - .prologue - .altrp b6 - .body - /* - * We only get here from light-weight syscall handlers. Thus, we already - * know that r15 contains a valid syscall number. No need to re-check. - */ - adds r17=-1024,r15 - movl r14=sys_call_table - ;; - RSM_PSR_I(p0, r26, r27) - shladd r18=r17,3,r14 - ;; - ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point - MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) - mov r27=ar.rsc - mov r21=ar.fpsr - mov r26=ar.pfs -END(fsys_fallback_syscall) - /* FALL THROUGH */ -GLOBAL_ENTRY(fsys_bubble_down) - .prologue - .altrp b6 - .body - /* - * We get here for syscalls that don't have a lightweight - * handler. For those, we need to bubble down into the kernel - * and that requires setting up a minimal pt_regs structure, - * and initializing the CPU state more or less as if an - * interruption had occurred. To make syscall-restarts work, - * we setup pt_regs such that cr_iip points to the second - * instruction in syscall_via_break. Decrementing the IP - * hence will restart the syscall via break and not - * decrementing IP will return us to the caller, as usual. - * Note that we preserve the value of psr.pp rather than - * initializing it from dcr.pp. This makes it possible to - * distinguish fsyscall execution from other privileged - * execution. - * - * On entry: - * - normal fsyscall handler register usage, except - * that we also have: - * - r18: address of syscall entry point - * - r21: ar.fpsr - * - r26: ar.pfs - * - r27: ar.rsc - * - r29: psr - * - * We used to clear some PSR bits here but that requires slow - * serialization. Fortuntely, that isn't really necessary. - * The rationale is as follows: we used to clear bits - * ~PSR_PRESERVED_BITS in PSR.L. Since - * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we - * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. - * However, - * - * PSR.BE : already is turned off in __kernel_syscall_via_epc() - * PSR.AC : don't care (kernel normally turns PSR.AC on) - * PSR.I : already turned off by the time fsys_bubble_down gets - * invoked - * PSR.DFL: always 0 (kernel never turns it on) - * PSR.DFH: don't care --- kernel never touches f32-f127 on its own - * initiative - * PSR.DI : always 0 (kernel never turns it on) - * PSR.SI : always 0 (kernel never turns it on) - * PSR.DB : don't care --- kernel never enables kernel-level - * breakpoints - * PSR.TB : must be 0 already; if it wasn't zero on entry to - * __kernel_syscall_via_epc, the branch to fsys_bubble_down - * will trigger a taken branch; the taken-trap-handler then - * converts the syscall into a break-based system-call. - */ - /* - * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. - * The rest we have to synthesize. - */ -# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ - | (0x1 << IA64_PSR_RI_BIT) \ - | IA64_PSR_BN | IA64_PSR_I) - - invala // M0|1 - movl r14=ia64_ret_from_syscall // X - - nop.m 0 - movl r28=__kernel_syscall_via_break // X create cr.iip - ;; - - mov r2=r16 // A get task addr to addl-addressable register - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A - mov r31=pr // I0 save pr (2 cyc) - ;; - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag - addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS - add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A - ;; - ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store - nop.i 0 - ;; - mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting -#else - nop.m 0 -#endif - nop.i 0 - ;; - mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore - mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) - nop.i 0 - ;; - mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS - movl r8=PSR_ONE_BITS // X - ;; - mov r25=ar.unat // M2 (5 cyc) save ar.unat - mov r19=b6 // I0 save b6 (2 cyc) - mov r20=r1 // A save caller's gp in r20 - ;; - or r29=r8,r29 // A construct cr.ipsr value to save - mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack - - mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) - cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 - br.call.sptk.many b7=ia64_syscall_setup // B - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mov.m r30=ar.itc is called in advance - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel - ;; - ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime - ld8 r21=[r17] // cumulated utime - sub r22=r19,r18 // stime before leave kernel - ;; - st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp - sub r18=r30,r19 // elapsed time in user mode - ;; - add r20=r20,r22 // sum stime - add r21=r21,r18 // sum utime - ;; - st8 [r16]=r20 // update stime - st8 [r17]=r21 // update utime - ;; -#endif - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - mov rp=r14 // I0 set the real return addr - and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A - ;; - SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs - cmp.eq p8,p0=r3,r0 // A -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - - nop.m 0 -(p8) br.call.sptk.many b6=b6 // B (ignore return address) - br.cond.spnt ia64_trace_syscall // B -END(fsys_bubble_down) - - .rodata - .align 8 - .globl fsyscall_table - - data8 fsys_bubble_down -fsyscall_table: - data8 fsys_ni_syscall - data8 0 // exit // 1025 - data8 0 // read - data8 0 // write - data8 0 // open - data8 0 // close - data8 0 // creat // 1030 - data8 0 // link - data8 0 // unlink - data8 0 // execve - data8 0 // chdir - data8 0 // fchdir // 1035 - data8 0 // utimes - data8 0 // mknod - data8 0 // chmod - data8 0 // chown - data8 0 // lseek // 1040 - data8 fsys_getpid // getpid - data8 0 // getppid - data8 0 // mount - data8 0 // umount - data8 0 // setuid // 1045 - data8 0 // getuid - data8 0 // geteuid - data8 0 // ptrace - data8 0 // access - data8 0 // sync // 1050 - data8 0 // fsync - data8 0 // fdatasync - data8 0 // kill - data8 0 // rename - data8 0 // mkdir // 1055 - data8 0 // rmdir - data8 0 // dup - data8 0 // pipe - data8 0 // times - data8 0 // brk // 1060 - data8 0 // setgid - data8 0 // getgid - data8 0 // getegid - data8 0 // acct - data8 0 // ioctl // 1065 - data8 0 // fcntl - data8 0 // umask - data8 0 // chroot - data8 0 // ustat - data8 0 // dup2 // 1070 - data8 0 // setreuid - data8 0 // setregid - data8 0 // getresuid - data8 0 // setresuid - data8 0 // getresgid // 1075 - data8 0 // setresgid - data8 0 // getgroups - data8 0 // setgroups - data8 0 // getpgid - data8 0 // setpgid // 1080 - data8 0 // setsid - data8 0 // getsid - data8 0 // sethostname - data8 0 // setrlimit - data8 0 // getrlimit // 1085 - data8 0 // getrusage - data8 fsys_gettimeofday // gettimeofday - data8 0 // settimeofday - data8 0 // select - data8 0 // poll // 1090 - data8 0 // symlink - data8 0 // readlink - data8 0 // uselib - data8 0 // swapon - data8 0 // swapoff // 1095 - data8 0 // reboot - data8 0 // truncate - data8 0 // ftruncate - data8 0 // fchmod - data8 0 // fchown // 1100 - data8 0 // getpriority - data8 0 // setpriority - data8 0 // statfs - data8 0 // fstatfs - data8 0 // gettid // 1105 - data8 0 // semget - data8 0 // semop - data8 0 // semctl - data8 0 // msgget - data8 0 // msgsnd // 1110 - data8 0 // msgrcv - data8 0 // msgctl - data8 0 // shmget - data8 0 // shmat - data8 0 // shmdt // 1115 - data8 0 // shmctl - data8 0 // syslog - data8 0 // setitimer - data8 0 // getitimer - data8 0 // 1120 - data8 0 - data8 0 - data8 0 // vhangup - data8 0 // lchown - data8 0 // remap_file_pages // 1125 - data8 0 // wait4 - data8 0 // sysinfo - data8 0 // clone - data8 0 // setdomainname - data8 0 // newuname // 1130 - data8 0 // adjtimex - data8 0 - data8 0 // init_module - data8 0 // delete_module - data8 0 // 1135 - data8 0 - data8 0 // quotactl - data8 0 // bdflush - data8 0 // sysfs - data8 0 // personality // 1140 - data8 0 // afs_syscall - data8 0 // setfsuid - data8 0 // setfsgid - data8 0 // getdents - data8 0 // flock // 1145 - data8 0 // readv - data8 0 // writev - data8 0 // pread64 - data8 0 // pwrite64 - data8 0 // sysctl // 1150 - data8 0 // mmap - data8 0 // munmap - data8 0 // mlock - data8 0 // mlockall - data8 0 // mprotect // 1155 - data8 0 // mremap - data8 0 // msync - data8 0 // munlock - data8 0 // munlockall - data8 0 // sched_getparam // 1160 - data8 0 // sched_setparam - data8 0 // sched_getscheduler - data8 0 // sched_setscheduler - data8 0 // sched_yield - data8 0 // sched_get_priority_max // 1165 - data8 0 // sched_get_priority_min - data8 0 // sched_rr_get_interval - data8 0 // nanosleep - data8 0 // nfsservctl - data8 0 // prctl // 1170 - data8 0 // getpagesize - data8 0 // mmap2 - data8 0 // pciconfig_read - data8 0 // pciconfig_write - data8 0 // perfmonctl // 1175 - data8 0 // sigaltstack - data8 0 // rt_sigaction - data8 0 // rt_sigpending - data8 0 // rt_sigprocmask - data8 0 // rt_sigqueueinfo // 1180 - data8 0 // rt_sigreturn - data8 0 // rt_sigsuspend - data8 0 // rt_sigtimedwait - data8 0 // getcwd - data8 0 // capget // 1185 - data8 0 // capset - data8 0 // sendfile - data8 0 - data8 0 - data8 0 // socket // 1190 - data8 0 // bind - data8 0 // connect - data8 0 // listen - data8 0 // accept - data8 0 // getsockname // 1195 - data8 0 // getpeername - data8 0 // socketpair - data8 0 // send - data8 0 // sendto - data8 0 // recv // 1200 - data8 0 // recvfrom - data8 0 // shutdown - data8 0 // setsockopt - data8 0 // getsockopt - data8 0 // sendmsg // 1205 - data8 0 // recvmsg - data8 0 // pivot_root - data8 0 // mincore - data8 0 // madvise - data8 0 // newstat // 1210 - data8 0 // newlstat - data8 0 // newfstat - data8 0 // clone2 - data8 0 // getdents64 - data8 0 // getunwind // 1215 - data8 0 // readahead - data8 0 // setxattr - data8 0 // lsetxattr - data8 0 // fsetxattr - data8 0 // getxattr // 1220 - data8 0 // lgetxattr - data8 0 // fgetxattr - data8 0 // listxattr - data8 0 // llistxattr - data8 0 // flistxattr // 1225 - data8 0 // removexattr - data8 0 // lremovexattr - data8 0 // fremovexattr - data8 0 // tkill - data8 0 // futex // 1230 - data8 0 // sched_setaffinity - data8 0 // sched_getaffinity - data8 fsys_set_tid_address // set_tid_address - data8 0 // fadvise64_64 - data8 0 // tgkill // 1235 - data8 0 // exit_group - data8 0 // lookup_dcookie - data8 0 // io_setup - data8 0 // io_destroy - data8 0 // io_getevents // 1240 - data8 0 // io_submit - data8 0 // io_cancel - data8 0 // epoll_create - data8 0 // epoll_ctl - data8 0 // epoll_wait // 1245 - data8 0 // restart_syscall - data8 0 // semtimedop - data8 0 // timer_create - data8 0 // timer_settime - data8 0 // timer_gettime // 1250 - data8 0 // timer_getoverrun - data8 0 // timer_delete - data8 0 // clock_settime - data8 fsys_clock_gettime // clock_gettime - data8 0 // clock_getres // 1255 - data8 0 // clock_nanosleep - data8 0 // fstatfs64 - data8 0 // statfs64 - data8 0 // mbind - data8 0 // get_mempolicy // 1260 - data8 0 // set_mempolicy - data8 0 // mq_open - data8 0 // mq_unlink - data8 0 // mq_timedsend - data8 0 // mq_timedreceive // 1265 - data8 0 // mq_notify - data8 0 // mq_getsetattr - data8 0 // kexec_load - data8 0 // vserver - data8 0 // waitid // 1270 - data8 0 // add_key - data8 0 // request_key - data8 0 // keyctl - data8 0 // ioprio_set - data8 0 // ioprio_get // 1275 - data8 0 // move_pages - data8 0 // inotify_init - data8 0 // inotify_add_watch - data8 0 // inotify_rm_watch - data8 0 // migrate_pages // 1280 - data8 0 // openat - data8 0 // mkdirat - data8 0 // mknodat - data8 0 // fchownat - data8 0 // futimesat // 1285 - data8 0 // newfstatat - data8 0 // unlinkat - data8 0 // renameat - data8 0 // linkat - data8 0 // symlinkat // 1290 - data8 0 // readlinkat - data8 0 // fchmodat - data8 0 // faccessat - data8 0 - data8 0 // 1295 - data8 0 // unshare - data8 0 // splice - data8 0 // set_robust_list - data8 0 // get_robust_list - data8 0 // sync_file_range // 1300 - data8 0 // tee - data8 0 // vmsplice - data8 0 - data8 fsys_getcpu // getcpu // 1304 - - // fill in zeros for the remaining entries - .zero: - .space fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S deleted file mode 100644 index b3ef1c72e132b44e00ade63410380370e25114e2..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/gate-data.S +++ /dev/null @@ -1,3 +0,0 @@ - .section .data..gate, "aw" - - .incbin "arch/ia64/kernel/gate.so" diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S deleted file mode 100644 index 9f235cd551abf0479377943f86a036129c438928..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/gate.S +++ /dev/null @@ -1,380 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file contains the code that gets mapped at the upper end of each task's text - * region. For now, it contains the signal trampoline code only. - * - * Copyright (C) 1999-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, - * complications with the linker (which likes to create PLT stubs for branches - * to targets outside the shared object) and to avoid multi-phase kernel builds, we - * simply create minimalistic "patch lists" in special ELF sections. - */ - .section ".data..patch.fsyscall_table", "a" - .previous -#define LOAD_FSYSCALL_TABLE(reg) \ -[1:] movl reg=0; \ - .xdata4 ".data..patch.fsyscall_table", 1b-. - - .section ".data..patch.brl_fsys_bubble_down", "a" - .previous -#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ -[1:](pr)brl.cond.sptk 0; \ - ;; \ - .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-. - -GLOBAL_ENTRY(__kernel_syscall_via_break) - .prologue - .altrp b6 - .body - /* - * Note: for (fast) syscall restart to work, the break instruction must be - * the first one in the bundle addressed by syscall_via_break. - */ -{ .mib - break 0x100000 - nop.i 0 - br.ret.sptk.many b6 -} -END(__kernel_syscall_via_break) - -# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) -# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) -# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) -# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) -# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) - -# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET -# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET -# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET -# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET -# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET -# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET -# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET -# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET -# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET -# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET -# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET -# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET -# define base0 r2 -# define base1 r3 - /* - * When we get here, the memory stack looks like this: - * - * +===============================+ - * | | - * // struct sigframe // - * | | - * +-------------------------------+ <-- sp+16 - * | 16 byte of scratch | - * | space | - * +-------------------------------+ <-- sp - * - * The register stack looks _exactly_ the way it looked at the time the signal - * occurred. In other words, we're treading on a potential mine-field: each - * incoming general register may be a NaT value (including sp, in which case the - * process ends up dying with a SIGSEGV). - * - * The first thing need to do is a cover to get the registers onto the backing - * store. Once that is done, we invoke the signal handler which may modify some - * of the machine state. After returning from the signal handler, we return - * control to the previous context by executing a sigreturn system call. A signal - * handler may call the rt_sigreturn() function to directly return to a given - * sigcontext. However, the user-level sigreturn() needs to do much more than - * calling the rt_sigreturn() system call as it needs to unwind the stack to - * restore preserved registers that may have been saved on the signal handler's - * call stack. - */ - -#define SIGTRAMP_SAVES \ - .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ - .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ - .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ - .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ - .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ - .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ - .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ - .vframesp SP_OFF+SIGCONTEXT_OFF - -GLOBAL_ENTRY(__kernel_sigtramp) - // describe the state that is active when we get here: - .prologue - SIGTRAMP_SAVES - .body - - .label_state 1 - - adds base0=SIGHANDLER_OFF,sp - adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp - br.call.sptk.many rp=1f -1: - ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel - ld8 r15=[base1] // get address of new RBS base (or NULL) - cover // push args in interrupted frame onto backing store - ;; - cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) - mov.m r9=ar.bsp // fetch ar.bsp - .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF -(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) -back_from_setup_rbs: - alloc r8=ar.pfs,0,0,3,0 - ld8 out0=[base0],16 // load arg0 (signum) - adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 - ;; - ld8 out1=[base1] // load arg1 (siginfop) - ld8 r10=[r17],8 // get signal handler entry point - ;; - ld8 out2=[base0] // load arg2 (sigcontextp) - ld8 gp=[r17] // get signal handler's global pointer - adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF - st8 [base0]=r9 // save sc_ar_bsp - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - stf.spill [base0]=f6,32 - stf.spill [base1]=f7,32 - ;; - stf.spill [base0]=f8,32 - stf.spill [base1]=f9,32 - mov b6=r10 - ;; - stf.spill [base0]=f10,32 - stf.spill [base1]=f11,32 - ;; - stf.spill [base0]=f12,32 - stf.spill [base1]=f13,32 - ;; - stf.spill [base0]=f14,32 - stf.spill [base1]=f15,32 - br.call.sptk.many rp=b6 // call the signal handler -.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r15=[base0] // fetch sc_ar_bsp - mov r14=ar.bsp - ;; - cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? -(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) - ;; -back_from_restore_rbs: - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - ldf.fill f6=[base0],32 - ldf.fill f7=[base1],32 - ;; - ldf.fill f8=[base0],32 - ldf.fill f9=[base1],32 - ;; - ldf.fill f10=[base0],32 - ldf.fill f11=[base1],32 - ;; - ldf.fill f12=[base0],32 - ldf.fill f13=[base1],32 - ;; - ldf.fill f14=[base0],32 - ldf.fill f15=[base1],32 - mov r15=__NR_rt_sigreturn - .restore sp // pop .prologue - break __BREAK_SYSCALL - - .prologue - SIGTRAMP_SAVES -setup_rbs: - mov ar.rsc=0 // put RSE into enforced lazy mode - ;; - .save ar.rnat, r19 - mov r19=ar.rnat // save RNaT before switching backing store area - adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp - - mov r18=ar.bspstore - mov ar.bspstore=r15 // switch over to new register backing store area - ;; - - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - st8 [r14]=r19 // save sc_ar_rnat - .body - mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 - adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp - ;; - invala - sub r15=r16,r15 - extr.u r20=r18,3,6 - ;; - mov ar.rsc=0xf // set RSE into eager mode, pl 3 - cmp.eq p8,p0=63,r20 - shl r15=r15,16 - ;; - st8 [r14]=r15 // save sc_loadrs -(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now - .restore sp // pop .prologue - br.cond.sptk back_from_setup_rbs - - .prologue - SIGTRAMP_SAVES - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - .body -restore_rbs: - // On input: - // r14 = bsp1 (bsp at the time of return from signal handler) - // r15 = bsp0 (bsp at the time the signal occurred) - // - // Here, we need to calculate bspstore0, the value that ar.bspstore needs - // to be set to, based on bsp0 and the size of the dirty partition on - // the alternate stack (sc_loadrs >> 16). This can be done with the - // following algorithm: - // - // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); - // - // This is what the code below does. - // - alloc r2=ar.pfs,0,0,0,0 // alloc null frame - adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp - adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r17=[r16] - ld8 r16=[r18] // get new rnat - extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) - ;; - mov ar.rsc=r17 // put RSE into enforced lazy mode - shr.u r17=r17,16 - ;; - sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) - shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) - ;; - loadrs // restore dirty partition - extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) - ;; - add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) - ;; - shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 - ;; - sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) - movl r17=0x8208208208208209 - ;; - add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) - setf.sig f7=r17 - cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? - ;; -(p7) adds r18=-62,r18 // delta -= 62 - ;; - setf.sig f6=r18 - ;; - xmpy.h f6=f6,f7 - ;; - getf.sig r17=f6 - ;; - add r17=r17,r18 - shr r18=r18,63 - ;; - shr r17=r17,5 - ;; - sub r17=r17,r18 // r17 = delta/63 - ;; - add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) - ;; - shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) - ;; - mov ar.bspstore=r15 // switch back to old register backing store area - ;; - mov ar.rnat=r16 // restore RNaT - mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) - // invala not necessary as that will happen when returning to user-mode - br.cond.sptk back_from_restore_rbs -END(__kernel_sigtramp) - -/* - * On entry: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * b6 = return address - * On exit: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * all other "scratch" registers: undefined - * all "preserved" registers: same as on entry - */ - -GLOBAL_ENTRY(__kernel_syscall_via_epc) - .prologue - .altrp b6 - .body -{ - /* - * Note: the kernel cannot assume that the first two instructions in this - * bundle get executed. The remaining code must be safe even if - * they do not get executed. - */ - adds r17=-1024,r15 // A - mov r10=0 // A default to successful syscall execution - epc // B causes split-issue -} - ;; - RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) - LOAD_FSYSCALL_TABLE(r14) // X - ;; - mov r16=IA64_KR(CURRENT) // M2 (12 cyc) - shladd r18=r17,3,r14 // A - mov r19=NR_syscalls-1 // A - ;; - lfetch [r18] // M0|1 - MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) - // If r17 is a NaT, p6 will be zero - cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? - ;; - mov r21=ar.fpsr // M2 (12 cyc) - tnat.nz p10,p9=r15 // I0 - mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) - ;; - srlz.d // M0 (forces split-issue) ensure PSR.BE==0 -(p6) ld8 r18=[r18] // M0|1 - nop.i 0 - ;; - nop.m 0 -(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) - nop.i 0 - ;; - SSM_PSR_I(p8, p14, r25) -(p6) mov b7=r18 // I0 -(p8) br.dptk.many b7 // B - - mov r27=ar.rsc // M2 (12 cyc) -/* - * brl.cond doesn't work as intended because the linker would convert this branch - * into a branch to a PLT. Perhaps there will be a way to avoid this with some - * future version of the linker. In the meantime, we just use an indirect branch - * instead. - */ -#ifdef CONFIG_ITANIUM -(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry - ;; -(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down - ;; -(p6) mov b7=r14 -(p6) br.sptk.many b7 -#else - BRL_COND_FSYS_BUBBLE_DOWN(p6) -#endif - SSM_PSR_I(p0, p14, r10) - mov r10=-1 -(p10) mov r8=EINVAL -(p9) mov r8=ENOSYS - FSYS_RETURN - -END(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S deleted file mode 100644 index 461c7e69d46565771b58dc2aa0070f36b629276f..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/gate.lds.S +++ /dev/null @@ -1,108 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Linker script for gate DSO. The gate pages are an ELF shared object - * prelinked to its virtual address, with only one read-only segment and - * one execute-only segment (both fit in one page). This script controls - * its layout. - */ - -#include - -SECTIONS -{ - . = GATE_ADDR + SIZEOF_HEADERS; - - .hash : { *(.hash) } :readable - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note*) } :readable :note - - .dynamic : { *(.dynamic) } :readable :dynamic - - /* - * This linker script is used both with -r and with -shared. For - * the layouts to match, we need to skip more than enough space for - * the dynamic symbol table et al. If this amount is insufficient, - * ld -shared will barf. Just increase it here. - */ - . = GATE_ADDR + 0x600; - - .data..patch : { - __start_gate_mckinley_e9_patchlist = .; - *(.data..patch.mckinley_e9) - __end_gate_mckinley_e9_patchlist = .; - - __start_gate_vtop_patchlist = .; - *(.data..patch.vtop) - __end_gate_vtop_patchlist = .; - - __start_gate_fsyscall_patchlist = .; - *(.data..patch.fsyscall_table) - __end_gate_fsyscall_patchlist = .; - - __start_gate_brl_fsys_bubble_down_patchlist = .; - *(.data..patch.brl_fsys_bubble_down) - __end_gate_brl_fsys_bubble_down_patchlist = .; - } :readable - - .IA_64.unwind_info : { *(.IA_64.unwind_info*) } - .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind -#ifdef HAVE_BUGGY_SEGREL - .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable -#else - . = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); - .text : { *(.text) *(.text.*) } :epc -#endif - - /DISCARD/ : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - *(__ex_table) - *(__mca_table) - } -} - -/* - * ld does not recognize this name token; use the constant. - */ -#define PT_IA_64_UNWIND 0x70000001 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ -#ifndef HAVE_BUGGY_SEGREL - epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ -#endif - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - unwind PT_IA_64_UNWIND; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.5 { - global: - __kernel_syscall_via_break; - __kernel_syscall_via_epc; - __kernel_sigtramp; - - local: *; - }; -} - -/* The ELF entry point can be used to set the AT_SYSINFO value. */ -ENTRY(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S deleted file mode 100644 index e6f45170a4b97a80d9f63f4eeace13d0b9e2f64b..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/head.S +++ /dev/null @@ -1,1173 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Here is where the ball gets rolling as far as the kernel is concerned. - * When control is transferred to _start, the bootload has already - * loaded us to the correct address. All that's left to do here is - * to set up the kernel's global pointer and jump to the kernel - * entry point. - * - * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999 Intel Corp. - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - * Copyright (C) 2002 Fenghua Yu - * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. - * Copyright (C) 2004 Ashok Raj - * Support for CPU Hotplug - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_HOTPLUG_CPU -#define SAL_PSR_BITS_TO_SET \ - (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) - -#define SAVE_FROM_REG(src, ptr, dest) \ - mov dest=src;; \ - st8 [ptr]=dest,0x08 - -#define RESTORE_REG(reg, ptr, _tmp) \ - ld8 _tmp=[ptr],0x08;; \ - mov reg=_tmp - -#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -1: \ - SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ - add _idx=1,_idx;; \ - br.cloop.sptk.many 1b - -#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ - add _idx=1, _idx;; \ - br.cloop.sptk.many _lbl - -#define SAVE_ONE_RR(num, _reg, _tmp) \ - movl _tmp=(num<<61);; \ - mov _reg=rr[_tmp] - -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - SAVE_ONE_RR(0,_r0, _tmp);; \ - SAVE_ONE_RR(1,_r1, _tmp);; \ - SAVE_ONE_RR(2,_r2, _tmp);; \ - SAVE_ONE_RR(3,_r3, _tmp);; \ - SAVE_ONE_RR(4,_r4, _tmp);; \ - SAVE_ONE_RR(5,_r5, _tmp);; \ - SAVE_ONE_RR(6,_r6, _tmp);; \ - SAVE_ONE_RR(7,_r7, _tmp);; - -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - st8 [ptr]=_r0, 8;; \ - st8 [ptr]=_r1, 8;; \ - st8 [ptr]=_r2, 8;; \ - st8 [ptr]=_r3, 8;; \ - st8 [ptr]=_r4, 8;; \ - st8 [ptr]=_r5, 8;; \ - st8 [ptr]=_r6, 8;; \ - st8 [ptr]=_r7, 8;; - -#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ - mov ar.lc=0x08-1;; \ - movl _idx1=0x00;; \ -RestRR: \ - dep.z _idx2=_idx1,61,3;; \ - ld8 _tmp=[ptr],8;; \ - mov rr[_idx2]=_tmp;; \ - srlz.d;; \ - add _idx1=1,_idx1;; \ - br.cloop.sptk.few RestRR - -#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ - movl reg1=sal_state_for_booting_cpu;; \ - ld8 reg2=[reg1];; - -/* - * Adjust region registers saved before starting to save - * break regs and rest of the states that need to be preserved. - */ -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ - SAVE_FROM_REG(b0,_reg1,_reg2);; \ - SAVE_FROM_REG(b1,_reg1,_reg2);; \ - SAVE_FROM_REG(b2,_reg1,_reg2);; \ - SAVE_FROM_REG(b3,_reg1,_reg2);; \ - SAVE_FROM_REG(b4,_reg1,_reg2);; \ - SAVE_FROM_REG(b5,_reg1,_reg2);; \ - st8 [_reg1]=r1,0x08;; \ - st8 [_reg1]=r12,0x08;; \ - st8 [_reg1]=r13,0x08;; \ - SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ - st8 [_reg1]=r4,0x08;; \ - st8 [_reg1]=r5,0x08;; \ - st8 [_reg1]=r6,0x08;; \ - st8 [_reg1]=r7,0x08;; \ - st8 [_reg1]=_pred,0x08;; \ - SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ - stf.spill.nta [_reg1]=f2,16;; \ - stf.spill.nta [_reg1]=f3,16;; \ - stf.spill.nta [_reg1]=f4,16;; \ - stf.spill.nta [_reg1]=f5,16;; \ - stf.spill.nta [_reg1]=f16,16;; \ - stf.spill.nta [_reg1]=f17,16;; \ - stf.spill.nta [_reg1]=f18,16;; \ - stf.spill.nta [_reg1]=f19,16;; \ - stf.spill.nta [_reg1]=f20,16;; \ - stf.spill.nta [_reg1]=f21,16;; \ - stf.spill.nta [_reg1]=f22,16;; \ - stf.spill.nta [_reg1]=f23,16;; \ - stf.spill.nta [_reg1]=f24,16;; \ - stf.spill.nta [_reg1]=f25,16;; \ - stf.spill.nta [_reg1]=f26,16;; \ - stf.spill.nta [_reg1]=f27,16;; \ - stf.spill.nta [_reg1]=f28,16;; \ - stf.spill.nta [_reg1]=f29,16;; \ - stf.spill.nta [_reg1]=f30,16;; \ - stf.spill.nta [_reg1]=f31,16;; - -#else -#define SET_AREA_FOR_BOOTING_CPU(a1, a2) -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#endif - -#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ - movl _tmp1=(num << 61);; \ - mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ - mov rr[_tmp1]=_tmp2 - - __PAGE_ALIGNED_DATA - - .global empty_zero_page -EXPORT_DATA_SYMBOL_GPL(empty_zero_page) -empty_zero_page: - .skip PAGE_SIZE - - .global swapper_pg_dir -swapper_pg_dir: - .skip PAGE_SIZE - - .rodata -halt_msg: - stringz "Halting kernel\n" - - __REF - - .global start_ap - - /* - * Start the kernel. When the bootloader passes control to _start(), r28 - * points to the address of the boot parameter area. Execution reaches - * here in physical mode. - */ -GLOBAL_ENTRY(_start) -start_ap: - .prologue - .save rp, r0 // terminate unwind chain with a NULL rp - .body - - rsm psr.i | psr.ic - ;; - srlz.i - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - /* - * Save the region registers, predicate before they get clobbered - */ - SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); - mov r25=pr;; - - /* - * Initialize kernel region registers: - * rr[0]: VHPT enabled, page size = PAGE_SHIFT - * rr[1]: VHPT enabled, page size = PAGE_SHIFT - * rr[2]: VHPT enabled, page size = PAGE_SHIFT - * rr[3]: VHPT enabled, page size = PAGE_SHIFT - * rr[4]: VHPT enabled, page size = PAGE_SHIFT - * rr[5]: VHPT enabled, page size = PAGE_SHIFT - * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * We initialize all of them to prevent inadvertently assuming - * something about the state of address translation early in boot. - */ - SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; - SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; - /* - * Now pin mappings into the TLB for kernel text and data - */ - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - mov r3=ip - movl r18=PAGE_KERNEL - ;; - dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT - ;; - or r18=r2,r18 - ;; - srlz.i - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - srlz.i - - /* - * Switch into virtual mode: - */ - movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI) - ;; - mov cr.ipsr=r16 - movl r17=1f - ;; - mov cr.iip=r17 - mov cr.ifs=r0 - ;; - rfi - ;; -1: // now we are in virtual mode - - SET_AREA_FOR_BOOTING_CPU(r2, r16); - - STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); - SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) - ;; - - // set IVT entry point---can't access I/O ports without it - movl r3=ia64_ivt - ;; - mov cr.iva=r3 - movl r2=FPSR_DEFAULT - ;; - srlz.i - movl gp=__gp - - mov ar.fpsr=r2 - ;; - -#define isAP p2 // are we an Application Processor? -#define isBP p3 // are we the Bootstrap Processor? - -#ifdef CONFIG_SMP - /* - * Find the init_task for the currently booting CPU. At poweron, and in - * UP mode, task_for_booting_cpu is NULL. - */ - movl r3=task_for_booting_cpu - ;; - ld8 r3=[r3] - movl r2=init_task - ;; - cmp.eq isBP,isAP=r3,r0 - ;; -(isAP) mov r2=r3 -#else - movl r2=init_task - cmp.eq isBP,isAP=r0,r0 -#endif - ;; - tpa r3=r2 // r3 == phys addr of task struct - mov r16=-1 -(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it - - // load mapping for stack (virtaddr in r2, physaddr in r3) - rsm psr.ic - movl r17=PAGE_KERNEL - ;; - srlz.d - dep r18=0,r3,0,12 - ;; - or r18=r17,r18 - dep r2=-1,r3,61,3 // IMVA of task - ;; - mov r17=rr[r2] - shr.u r16=r3,IA64_GRANULE_SHIFT - ;; - dep r17=0,r17,8,24 - ;; - mov cr.itir=r17 - mov cr.ifa=r2 - - mov r19=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r19]=r18 - ;; - ssm psr.ic - srlz.d - ;; - -.load_current: - // load the "current" pointer (r13) and ar.k6 with the current task - mov IA64_KR(CURRENT)=r2 // virtual address - mov IA64_KR(CURRENT_STACK)=r16 - mov r13=r2 - /* - * Reserve space at the top of the stack for "struct pt_regs". Kernel - * threads don't store interesting values in that structure, but the space - * still needs to be there because time-critical stuff such as the context - * switching can be implemented more efficiently (for example, __switch_to() - * always sets the psr.dfh bit of the task it is switching to). - */ - - addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 - addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE - mov ar.rsc=0 // place RSE in enforced lazy mode - ;; - loadrs // clear the dirty partition - movl r19=__phys_per_cpu_start - mov r18=PERCPU_PAGE_SIZE - ;; -#ifndef CONFIG_SMP - add r19=r19,r18 - ;; -#else -(isAP) br.few 2f - movl r20=__cpu0_per_cpu - ;; - shr.u r18=r18,3 -1: - ld8 r21=[r19],8;; - st8[r20]=r21,8 - adds r18=-1,r18;; - cmp4.lt p7,p6=0,r18 -(p7) br.cond.dptk.few 1b - mov r19=r20 - ;; -2: -#endif - tpa r19=r19 - ;; - .pred.rel.mutex isBP,isAP -(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0 -(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base - ;; - mov ar.bspstore=r2 // establish the new RSE stack - ;; - mov ar.rsc=0x3 // place RSE in eager mode - -(isBP) dep r28=-1,r28,61,3 // make address virtual -(isBP) movl r2=ia64_boot_param - ;; -(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader - -#ifdef CONFIG_SMP -(isAP) br.call.sptk.many rp=start_secondary -.ret0: -(isAP) br.cond.sptk self -#endif - - // This is executed by the bootstrap processor (bsp) only: - -#ifdef CONFIG_IA64_FW_EMU - // initialize PAL & SAL emulator: - br.call.sptk.many rp=sys_fw_init -.ret1: -#endif - br.call.sptk.many rp=start_kernel -.ret2: addl r3=@ltoff(halt_msg),gp - ;; - alloc r2=ar.pfs,8,0,2,0 - ;; - ld8 out0=[r3] - br.call.sptk.many b0=console_print - -self: hint @pause - br.sptk.many self // endless loop -END(_start) - - .text - -GLOBAL_ENTRY(ia64_save_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - mov r20=ar.lc // preserve ar.lc - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=0 - add r19=IA64_NUM_DBG_REGS*8,in0 - ;; -1: mov r16=dbr[r18] -#ifdef CONFIG_ITANIUM - ;; - srlz.d -#endif - mov r17=ibr[r18] - add r18=1,r18 - ;; - st8.nta [in0]=r16,8 - st8.nta [r19]=r17,8 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_save_debug_regs) - -GLOBAL_ENTRY(ia64_load_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - lfetch.nta [in0] - mov r20=ar.lc // preserve ar.lc - add r19=IA64_NUM_DBG_REGS*8,in0 - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=-1 - ;; -1: ld8.nta r16=[in0],8 - ld8.nta r17=[r19],8 - add r18=1,r18 - ;; - mov dbr[r18]=r16 -#ifdef CONFIG_ITANIUM - ;; - srlz.d // Errata 132 (NoFix status) -#endif - mov ibr[r18]=r17 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_load_debug_regs) - -GLOBAL_ENTRY(__ia64_save_fpu) - alloc r2=ar.pfs,1,4,0,0 - adds loc0=96*16-16,in0 - adds loc1=96*16-16-128,in0 - ;; - stf.spill.nta [loc0]=f127,-256 - stf.spill.nta [loc1]=f119,-256 - ;; - stf.spill.nta [loc0]=f111,-256 - stf.spill.nta [loc1]=f103,-256 - ;; - stf.spill.nta [loc0]=f95,-256 - stf.spill.nta [loc1]=f87,-256 - ;; - stf.spill.nta [loc0]=f79,-256 - stf.spill.nta [loc1]=f71,-256 - ;; - stf.spill.nta [loc0]=f63,-256 - stf.spill.nta [loc1]=f55,-256 - adds loc2=96*16-32,in0 - ;; - stf.spill.nta [loc0]=f47,-256 - stf.spill.nta [loc1]=f39,-256 - adds loc3=96*16-32-128,in0 - ;; - stf.spill.nta [loc2]=f126,-256 - stf.spill.nta [loc3]=f118,-256 - ;; - stf.spill.nta [loc2]=f110,-256 - stf.spill.nta [loc3]=f102,-256 - ;; - stf.spill.nta [loc2]=f94,-256 - stf.spill.nta [loc3]=f86,-256 - ;; - stf.spill.nta [loc2]=f78,-256 - stf.spill.nta [loc3]=f70,-256 - ;; - stf.spill.nta [loc2]=f62,-256 - stf.spill.nta [loc3]=f54,-256 - adds loc0=96*16-48,in0 - ;; - stf.spill.nta [loc2]=f46,-256 - stf.spill.nta [loc3]=f38,-256 - adds loc1=96*16-48-128,in0 - ;; - stf.spill.nta [loc0]=f125,-256 - stf.spill.nta [loc1]=f117,-256 - ;; - stf.spill.nta [loc0]=f109,-256 - stf.spill.nta [loc1]=f101,-256 - ;; - stf.spill.nta [loc0]=f93,-256 - stf.spill.nta [loc1]=f85,-256 - ;; - stf.spill.nta [loc0]=f77,-256 - stf.spill.nta [loc1]=f69,-256 - ;; - stf.spill.nta [loc0]=f61,-256 - stf.spill.nta [loc1]=f53,-256 - adds loc2=96*16-64,in0 - ;; - stf.spill.nta [loc0]=f45,-256 - stf.spill.nta [loc1]=f37,-256 - adds loc3=96*16-64-128,in0 - ;; - stf.spill.nta [loc2]=f124,-256 - stf.spill.nta [loc3]=f116,-256 - ;; - stf.spill.nta [loc2]=f108,-256 - stf.spill.nta [loc3]=f100,-256 - ;; - stf.spill.nta [loc2]=f92,-256 - stf.spill.nta [loc3]=f84,-256 - ;; - stf.spill.nta [loc2]=f76,-256 - stf.spill.nta [loc3]=f68,-256 - ;; - stf.spill.nta [loc2]=f60,-256 - stf.spill.nta [loc3]=f52,-256 - adds loc0=96*16-80,in0 - ;; - stf.spill.nta [loc2]=f44,-256 - stf.spill.nta [loc3]=f36,-256 - adds loc1=96*16-80-128,in0 - ;; - stf.spill.nta [loc0]=f123,-256 - stf.spill.nta [loc1]=f115,-256 - ;; - stf.spill.nta [loc0]=f107,-256 - stf.spill.nta [loc1]=f99,-256 - ;; - stf.spill.nta [loc0]=f91,-256 - stf.spill.nta [loc1]=f83,-256 - ;; - stf.spill.nta [loc0]=f75,-256 - stf.spill.nta [loc1]=f67,-256 - ;; - stf.spill.nta [loc0]=f59,-256 - stf.spill.nta [loc1]=f51,-256 - adds loc2=96*16-96,in0 - ;; - stf.spill.nta [loc0]=f43,-256 - stf.spill.nta [loc1]=f35,-256 - adds loc3=96*16-96-128,in0 - ;; - stf.spill.nta [loc2]=f122,-256 - stf.spill.nta [loc3]=f114,-256 - ;; - stf.spill.nta [loc2]=f106,-256 - stf.spill.nta [loc3]=f98,-256 - ;; - stf.spill.nta [loc2]=f90,-256 - stf.spill.nta [loc3]=f82,-256 - ;; - stf.spill.nta [loc2]=f74,-256 - stf.spill.nta [loc3]=f66,-256 - ;; - stf.spill.nta [loc2]=f58,-256 - stf.spill.nta [loc3]=f50,-256 - adds loc0=96*16-112,in0 - ;; - stf.spill.nta [loc2]=f42,-256 - stf.spill.nta [loc3]=f34,-256 - adds loc1=96*16-112-128,in0 - ;; - stf.spill.nta [loc0]=f121,-256 - stf.spill.nta [loc1]=f113,-256 - ;; - stf.spill.nta [loc0]=f105,-256 - stf.spill.nta [loc1]=f97,-256 - ;; - stf.spill.nta [loc0]=f89,-256 - stf.spill.nta [loc1]=f81,-256 - ;; - stf.spill.nta [loc0]=f73,-256 - stf.spill.nta [loc1]=f65,-256 - ;; - stf.spill.nta [loc0]=f57,-256 - stf.spill.nta [loc1]=f49,-256 - adds loc2=96*16-128,in0 - ;; - stf.spill.nta [loc0]=f41,-256 - stf.spill.nta [loc1]=f33,-256 - adds loc3=96*16-128-128,in0 - ;; - stf.spill.nta [loc2]=f120,-256 - stf.spill.nta [loc3]=f112,-256 - ;; - stf.spill.nta [loc2]=f104,-256 - stf.spill.nta [loc3]=f96,-256 - ;; - stf.spill.nta [loc2]=f88,-256 - stf.spill.nta [loc3]=f80,-256 - ;; - stf.spill.nta [loc2]=f72,-256 - stf.spill.nta [loc3]=f64,-256 - ;; - stf.spill.nta [loc2]=f56,-256 - stf.spill.nta [loc3]=f48,-256 - ;; - stf.spill.nta [loc2]=f40 - stf.spill.nta [loc3]=f32 - br.ret.sptk.many rp -END(__ia64_save_fpu) - -GLOBAL_ENTRY(__ia64_load_fpu) - alloc r2=ar.pfs,1,2,0,0 - adds r3=128,in0 - adds r14=256,in0 - adds r15=384,in0 - mov loc0=512 - mov loc1=-1024+16 - ;; - ldf.fill.nta f32=[in0],loc0 - ldf.fill.nta f40=[ r3],loc0 - ldf.fill.nta f48=[r14],loc0 - ldf.fill.nta f56=[r15],loc0 - ;; - ldf.fill.nta f64=[in0],loc0 - ldf.fill.nta f72=[ r3],loc0 - ldf.fill.nta f80=[r14],loc0 - ldf.fill.nta f88=[r15],loc0 - ;; - ldf.fill.nta f96=[in0],loc1 - ldf.fill.nta f104=[ r3],loc1 - ldf.fill.nta f112=[r14],loc1 - ldf.fill.nta f120=[r15],loc1 - ;; - ldf.fill.nta f33=[in0],loc0 - ldf.fill.nta f41=[ r3],loc0 - ldf.fill.nta f49=[r14],loc0 - ldf.fill.nta f57=[r15],loc0 - ;; - ldf.fill.nta f65=[in0],loc0 - ldf.fill.nta f73=[ r3],loc0 - ldf.fill.nta f81=[r14],loc0 - ldf.fill.nta f89=[r15],loc0 - ;; - ldf.fill.nta f97=[in0],loc1 - ldf.fill.nta f105=[ r3],loc1 - ldf.fill.nta f113=[r14],loc1 - ldf.fill.nta f121=[r15],loc1 - ;; - ldf.fill.nta f34=[in0],loc0 - ldf.fill.nta f42=[ r3],loc0 - ldf.fill.nta f50=[r14],loc0 - ldf.fill.nta f58=[r15],loc0 - ;; - ldf.fill.nta f66=[in0],loc0 - ldf.fill.nta f74=[ r3],loc0 - ldf.fill.nta f82=[r14],loc0 - ldf.fill.nta f90=[r15],loc0 - ;; - ldf.fill.nta f98=[in0],loc1 - ldf.fill.nta f106=[ r3],loc1 - ldf.fill.nta f114=[r14],loc1 - ldf.fill.nta f122=[r15],loc1 - ;; - ldf.fill.nta f35=[in0],loc0 - ldf.fill.nta f43=[ r3],loc0 - ldf.fill.nta f51=[r14],loc0 - ldf.fill.nta f59=[r15],loc0 - ;; - ldf.fill.nta f67=[in0],loc0 - ldf.fill.nta f75=[ r3],loc0 - ldf.fill.nta f83=[r14],loc0 - ldf.fill.nta f91=[r15],loc0 - ;; - ldf.fill.nta f99=[in0],loc1 - ldf.fill.nta f107=[ r3],loc1 - ldf.fill.nta f115=[r14],loc1 - ldf.fill.nta f123=[r15],loc1 - ;; - ldf.fill.nta f36=[in0],loc0 - ldf.fill.nta f44=[ r3],loc0 - ldf.fill.nta f52=[r14],loc0 - ldf.fill.nta f60=[r15],loc0 - ;; - ldf.fill.nta f68=[in0],loc0 - ldf.fill.nta f76=[ r3],loc0 - ldf.fill.nta f84=[r14],loc0 - ldf.fill.nta f92=[r15],loc0 - ;; - ldf.fill.nta f100=[in0],loc1 - ldf.fill.nta f108=[ r3],loc1 - ldf.fill.nta f116=[r14],loc1 - ldf.fill.nta f124=[r15],loc1 - ;; - ldf.fill.nta f37=[in0],loc0 - ldf.fill.nta f45=[ r3],loc0 - ldf.fill.nta f53=[r14],loc0 - ldf.fill.nta f61=[r15],loc0 - ;; - ldf.fill.nta f69=[in0],loc0 - ldf.fill.nta f77=[ r3],loc0 - ldf.fill.nta f85=[r14],loc0 - ldf.fill.nta f93=[r15],loc0 - ;; - ldf.fill.nta f101=[in0],loc1 - ldf.fill.nta f109=[ r3],loc1 - ldf.fill.nta f117=[r14],loc1 - ldf.fill.nta f125=[r15],loc1 - ;; - ldf.fill.nta f38 =[in0],loc0 - ldf.fill.nta f46 =[ r3],loc0 - ldf.fill.nta f54 =[r14],loc0 - ldf.fill.nta f62 =[r15],loc0 - ;; - ldf.fill.nta f70 =[in0],loc0 - ldf.fill.nta f78 =[ r3],loc0 - ldf.fill.nta f86 =[r14],loc0 - ldf.fill.nta f94 =[r15],loc0 - ;; - ldf.fill.nta f102=[in0],loc1 - ldf.fill.nta f110=[ r3],loc1 - ldf.fill.nta f118=[r14],loc1 - ldf.fill.nta f126=[r15],loc1 - ;; - ldf.fill.nta f39 =[in0],loc0 - ldf.fill.nta f47 =[ r3],loc0 - ldf.fill.nta f55 =[r14],loc0 - ldf.fill.nta f63 =[r15],loc0 - ;; - ldf.fill.nta f71 =[in0],loc0 - ldf.fill.nta f79 =[ r3],loc0 - ldf.fill.nta f87 =[r14],loc0 - ldf.fill.nta f95 =[r15],loc0 - ;; - ldf.fill.nta f103=[in0] - ldf.fill.nta f111=[ r3] - ldf.fill.nta f119=[r14] - ldf.fill.nta f127=[r15] - br.ret.sptk.many rp -END(__ia64_load_fpu) - -GLOBAL_ENTRY(__ia64_init_fpu) - stf.spill [sp]=f0 // M3 - mov f32=f0 // F - nop.b 0 - - ldfps f33,f34=[sp] // M0 - ldfps f35,f36=[sp] // M1 - mov f37=f0 // F - ;; - - setf.s f38=r0 // M2 - setf.s f39=r0 // M3 - mov f40=f0 // F - - ldfps f41,f42=[sp] // M0 - ldfps f43,f44=[sp] // M1 - mov f45=f0 // F - - setf.s f46=r0 // M2 - setf.s f47=r0 // M3 - mov f48=f0 // F - - ldfps f49,f50=[sp] // M0 - ldfps f51,f52=[sp] // M1 - mov f53=f0 // F - - setf.s f54=r0 // M2 - setf.s f55=r0 // M3 - mov f56=f0 // F - - ldfps f57,f58=[sp] // M0 - ldfps f59,f60=[sp] // M1 - mov f61=f0 // F - - setf.s f62=r0 // M2 - setf.s f63=r0 // M3 - mov f64=f0 // F - - ldfps f65,f66=[sp] // M0 - ldfps f67,f68=[sp] // M1 - mov f69=f0 // F - - setf.s f70=r0 // M2 - setf.s f71=r0 // M3 - mov f72=f0 // F - - ldfps f73,f74=[sp] // M0 - ldfps f75,f76=[sp] // M1 - mov f77=f0 // F - - setf.s f78=r0 // M2 - setf.s f79=r0 // M3 - mov f80=f0 // F - - ldfps f81,f82=[sp] // M0 - ldfps f83,f84=[sp] // M1 - mov f85=f0 // F - - setf.s f86=r0 // M2 - setf.s f87=r0 // M3 - mov f88=f0 // F - - /* - * When the instructions are cached, it would be faster to initialize - * the remaining registers with simply mov instructions (F-unit). - * This gets the time down to ~29 cycles. However, this would use up - * 33 bundles, whereas continuing with the above pattern yields - * 10 bundles and ~30 cycles. - */ - - ldfps f89,f90=[sp] // M0 - ldfps f91,f92=[sp] // M1 - mov f93=f0 // F - - setf.s f94=r0 // M2 - setf.s f95=r0 // M3 - mov f96=f0 // F - - ldfps f97,f98=[sp] // M0 - ldfps f99,f100=[sp] // M1 - mov f101=f0 // F - - setf.s f102=r0 // M2 - setf.s f103=r0 // M3 - mov f104=f0 // F - - ldfps f105,f106=[sp] // M0 - ldfps f107,f108=[sp] // M1 - mov f109=f0 // F - - setf.s f110=r0 // M2 - setf.s f111=r0 // M3 - mov f112=f0 // F - - ldfps f113,f114=[sp] // M0 - ldfps f115,f116=[sp] // M1 - mov f117=f0 // F - - setf.s f118=r0 // M2 - setf.s f119=r0 // M3 - mov f120=f0 // F - - ldfps f121,f122=[sp] // M0 - ldfps f123,f124=[sp] // M1 - mov f125=f0 // F - - setf.s f126=r0 // M2 - setf.s f127=r0 // M3 - br.ret.sptk.many rp // F -END(__ia64_init_fpu) - -/* - * Switch execution mode from virtual to physical - * - * Inputs: - * r16 = new psr to establish - * Output: - * r19 = old virtual address of ar.bsp - * r20 = old virtual address of sp - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_phys) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_phys,r15 - - mov r19=ar.bsp - mov r20=sp - mov r14=rp // get return address into a general register - ;; - - // going to physical mode, use tpa to translate virt->phys - tpa r17=r19 - tpa r3=r3 - tpa sp=sp - tpa r14=r14 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r17 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_phys) - -/* - * Switch execution mode from physical to virtual - * - * Inputs: - * r16 = new psr to establish - * r19 = new bspstore to establish - * r20 = new sp to establish - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_virt) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_virt,r15 - - mov r14=rp // get return address into a general register - ;; - - // going to virtual - // - for code addresses, set upper bits of addr to KERNEL_START - // - for stack addresses, copy from input argument - movl r18=KERNEL_START - dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - mov sp=r20 - ;; - or r3=r3,r18 - or r14=r14,r18 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r19 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_virt) - -GLOBAL_ENTRY(ia64_delay_loop) - .prologue -{ nop 0 // work around GAS unwind info generation bug... - .save ar.lc,r2 - mov r2=ar.lc - .body - ;; - mov ar.lc=r32 -} - ;; - // force loop to be 32-byte aligned (GAS bug means we cannot use .align - // inside function body without corrupting unwind info). -{ nop 0 } -1: br.cloop.sptk.few 1b - ;; - mov ar.lc=r2 - br.ret.sptk.many rp -END(ia64_delay_loop) - -/* - * Return a CPU-local timestamp in nano-seconds. This timestamp is - * NOT synchronized across CPUs its return value must never be - * compared against the values returned on another CPU. The usage in - * kernel/sched/core.c ensures that. - * - * The return-value of sched_clock() is NOT supposed to wrap-around. - * If it did, it would cause some scheduling hiccups (at the worst). - * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even - * that would happen only once every 5+ years. - * - * The code below basically calculates: - * - * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT - * - * except that the multiplication and the shift are done with 128-bit - * intermediate precision so that we can produce a full 64-bit result. - */ -GLOBAL_ENTRY(ia64_native_sched_clock) - addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 - mov.m r9=ar.itc // fetch cycle-counter (35 cyc) - ;; - ldf8 f8=[r8] - ;; - setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... - ;; - xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) - xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product - ;; - getf.sig r8=f10 // (5 cyc) - getf.sig r9=f11 - ;; - shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT - br.ret.sptk.many rp -END(ia64_native_sched_clock) - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -GLOBAL_ENTRY(cycle_to_nsec) - alloc r16=ar.pfs,1,0,0,0 - addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 - ;; - ldf8 f8=[r8] - ;; - setf.sig f9=r32 - ;; - xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) - xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product - ;; - getf.sig r8=f10 // (5 cyc) - getf.sig r9=f11 - ;; - shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT - br.ret.sptk.many rp -END(cycle_to_nsec) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - -#ifdef CONFIG_IA64_BRL_EMU - -/* - * Assembly routines used by brl_emu.c to set preserved register state. - */ - -#define SET_REG(reg) \ - GLOBAL_ENTRY(ia64_set_##reg); \ - alloc r16=ar.pfs,1,0,0,0; \ - mov reg=r32; \ - ;; \ - br.ret.sptk.many rp; \ - END(ia64_set_##reg) - -SET_REG(b1); -SET_REG(b2); -SET_REG(b3); -SET_REG(b4); -SET_REG(b5); - -#endif /* CONFIG_IA64_BRL_EMU */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_HOTPLUG_CPU -GLOBAL_ENTRY(ia64_jump_to_sal) - alloc r16=ar.pfs,1,0,0,0;; - rsm psr.i | psr.ic -{ - flushrs - srlz.i -} - tpa r25=in0 - movl r18=tlb_purge_done;; - DATA_VA_TO_PA(r18);; - mov b1=r18 // Return location - movl r18=ia64_do_tlb_purge;; - DATA_VA_TO_PA(r18);; - mov b2=r18 // doing tlb_flush work - mov ar.rsc=0 // Put RSE in enforced lazy, LE mode - movl r17=1f;; - DATA_VA_TO_PA(r17);; - mov cr.iip=r17 - movl r16=SAL_PSR_BITS_TO_SET;; - mov cr.ipsr=r16 - mov cr.ifs=r0;; - rfi;; // note: this unmask MCA/INIT (psr.mc) -1: - /* - * Invalidate all TLB data/inst - */ - br.sptk.many b2;; // jump to tlb purge code - -tlb_purge_done: - RESTORE_REGION_REGS(r25, r17,r18,r19);; - RESTORE_REG(b0, r25, r17);; - RESTORE_REG(b1, r25, r17);; - RESTORE_REG(b2, r25, r17);; - RESTORE_REG(b3, r25, r17);; - RESTORE_REG(b4, r25, r17);; - RESTORE_REG(b5, r25, r17);; - ld8 r1=[r25],0x08;; - ld8 r12=[r25],0x08;; - ld8 r13=[r25],0x08;; - RESTORE_REG(ar.fpsr, r25, r17);; - RESTORE_REG(ar.pfs, r25, r17);; - RESTORE_REG(ar.rnat, r25, r17);; - RESTORE_REG(ar.unat, r25, r17);; - RESTORE_REG(ar.bspstore, r25, r17);; - RESTORE_REG(cr.dcr, r25, r17);; - RESTORE_REG(cr.iva, r25, r17);; - RESTORE_REG(cr.pta, r25, r17);; - srlz.d;; // required not to violate RAW dependency - RESTORE_REG(cr.itv, r25, r17);; - RESTORE_REG(cr.pmv, r25, r17);; - RESTORE_REG(cr.cmcv, r25, r17);; - RESTORE_REG(cr.lrr0, r25, r17);; - RESTORE_REG(cr.lrr1, r25, r17);; - ld8 r4=[r25],0x08;; - ld8 r5=[r25],0x08;; - ld8 r6=[r25],0x08;; - ld8 r7=[r25],0x08;; - ld8 r17=[r25],0x08;; - mov pr=r17,-1;; - RESTORE_REG(ar.lc, r25, r17);; - /* - * Now Restore floating point regs - */ - ldf.fill.nta f2=[r25],16;; - ldf.fill.nta f3=[r25],16;; - ldf.fill.nta f4=[r25],16;; - ldf.fill.nta f5=[r25],16;; - ldf.fill.nta f16=[r25],16;; - ldf.fill.nta f17=[r25],16;; - ldf.fill.nta f18=[r25],16;; - ldf.fill.nta f19=[r25],16;; - ldf.fill.nta f20=[r25],16;; - ldf.fill.nta f21=[r25],16;; - ldf.fill.nta f22=[r25],16;; - ldf.fill.nta f23=[r25],16;; - ldf.fill.nta f24=[r25],16;; - ldf.fill.nta f25=[r25],16;; - ldf.fill.nta f26=[r25],16;; - ldf.fill.nta f27=[r25],16;; - ldf.fill.nta f28=[r25],16;; - ldf.fill.nta f29=[r25],16;; - ldf.fill.nta f30=[r25],16;; - ldf.fill.nta f31=[r25],16;; - - /* - * Now that we have done all the register restores - * we are now ready for the big DIVE to SAL Land - */ - ssm psr.ic;; - srlz.d;; - br.ret.sptk.many b0;; -END(ia64_jump_to_sal) -#endif /* CONFIG_HOTPLUG_CPU */ - -#endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S deleted file mode 100644 index 1efcbe5f0c78183f7571a58a5c0fe98767a04d34..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/ivt.S +++ /dev/null @@ -1,1689 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/ia64/kernel/ivt.S - * - * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick - * Suresh Siddha - * Kenneth Chen - * Fenghua Yu - * - * 00/08/23 Asit Mallick TLB handling for SMP - * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer - * Xen paravirtualization - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - * pv_ops. - * Yaozu (Eddie) Dong - */ -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) - * entry offset ----/ / / / / - * entry number ---------/ / / / - * size of the entry -------------/ / / - * vector name -------------------------------------/ / - * interruptions triggering this vector ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if 0 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#if 0 - /* - * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't - * needed for something else before enabling this... - */ -# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 -#else -# define DBG_FAULT(i) -#endif - -#include "minstate.h" - -#define FAULT(n) \ - mov r31=pr; \ - mov r19=n;; /* prepare to save predicates */ \ - br.sptk.many dispatch_to_fault_handler - - .section .text..ivt,"ax" - - .align 32768 // align on 32KB boundary - .global ia64_ivt - EXPORT_DATA_SYMBOL(ia64_ivt) -ia64_ivt: -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(vhpt_miss) - DBG_FAULT(0) - /* - * The VHPT vector is invoked when the TLB entry for the virtual page table - * is missing. This happens only as a result of a previous - * (the "original") TLB miss, which may either be caused by an instruction - * fetch or a data access (or non-access). - * - * What we do here is normal TLB miss handing for the _original_ miss, - * followed by inserting the TLB entry for the virtual page table page - * that the VHPT walker was attempting to access. The latter gets - * inserted as long as page table entry above pte level have valid - * mappings for the faulting address. The TLB entry for the original - * miss gets inserted only if the pte entry indicates that the page is - * present. - * - * do_page_fault gets invoked in the following cases: - * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no valid page table mapping - */ - MOV_FROM_IFA(r16) // get address that caused the TLB miss -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT - MOV_FROM_ITIR(r25) -#endif - ;; - RSM_PSR_DT // use physical addressing for data - mov r31=pr // save the predicate registers - mov r19=IA64_KR(PT_BASE) // get page table base address - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 - ;; - shr.u r22=r21,3 -#ifdef CONFIG_HUGETLB_PAGE - extr.u r26=r25,2,6 - ;; - cmp.ne p8,p0=r18,r26 - sub r27=r26,r18 - ;; -(p8) dep r25=r18,r25,2,6 -(p8) shr r22=r22,r27 -#endif - ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#if CONFIG_PGTABLE_LEVELS == 4 - shr.u r28=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? -#if CONFIG_PGTABLE_LEVELS == 4 - dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) - ;; - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -(p7) ld8 r29=[r28] // get *pud (may be 0) - ;; -(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) -#else - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) -#endif - ;; -(p7) ld8 r20=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) - ;; -(p7) ld8 r18=[r21] // read *pte - MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss - ;; -(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? - MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss - ;; // avoid RAW on p7 -(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? - dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address - ;; - ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and - // insert the data TLB entry -(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - MOV_TO_IFA(r22, r24) - -#ifdef CONFIG_HUGETLB_PAGE - MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT -#endif - - /* - * Now compute and insert the TLB entry for the virtual page table. We never - * execute in a page table page so there is no need to set the exception deferral - * bit. - */ - adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 - ;; - ITC_D(p7, r24, r25) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - /* - * Re-check pagetable entry. If they changed, we may have received a ptc.g - * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. At this point, we have: - * - * r28 = equivalent of pud_offset(pgd, ifa) - * r17 = equivalent of pmd_offset(pud, ifa) - * r21 = equivalent of pte_offset(pmd, ifa) - * - * r29 = *pud - * r20 = *pmd - * r18 = *pte - */ - ld8 r25=[r21] // read *pte again - ld8 r26=[r17] // read *pmd again -#if CONFIG_PGTABLE_LEVELS == 4 - ld8 r19=[r28] // read *pud again -#endif - cmp.ne p6,p7=r0,r0 - ;; - cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change -#if CONFIG_PGTABLE_LEVELS == 4 - cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change -#endif - mov r27=PAGE_SHIFT<<2 - ;; -(p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change - ;; -(p6) ptc.l r16,r27 // purge translation -#endif - - mov pr=r31,-1 // restore predicate registers - RFI -END(vhpt_miss) - - .org ia64_ivt+0x400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(itlb_miss) - DBG_FAULT(1) - /* - * The ITLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ - MOV_FROM_IFA(r16) // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -.itlb_fault: - MOV_FROM_IHA(r17) // get virtual address of PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - ITC_I(p0, r18, r19) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - RFI -END(itlb_miss) - - .org ia64_ivt+0x0800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(dtlb_miss) - DBG_FAULT(2) - /* - * The DTLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ - MOV_FROM_IFA(r16) // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -dtlb_fault: - MOV_FROM_IHA(r17) // get virtual address of PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - ITC_D(p0, r18, r19) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - RFI -END(dtlb_miss) - - .org ia64_ivt+0x0c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(alt_itlb_miss) - DBG_FAULT(3) - MOV_FROM_IFA(r16) // get address that caused the TLB miss - movl r17=PAGE_KERNEL - MOV_FROM_IPSR(p0, r21) - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r31=pr - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // user mode - ;; - THASH(p8, r17, r16, r23) - ;; - MOV_TO_IHA(p8, r17, r23) -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk .itlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - shr.u r18=r16,57 // move address bit 61 to bit 4 - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) - cmp.ne p8,p0=r0,r23 // psr.cpl != 0? - or r19=r17,r19 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p8) br.cond.spnt page_fault - ;; - ITC_I(p0, r19, r18) // insert the TLB entry - mov pr=r31,-1 - RFI -END(alt_itlb_miss) - - .org ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(alt_dtlb_miss) - DBG_FAULT(4) - MOV_FROM_IFA(r16) // get address that caused the TLB miss - movl r17=PAGE_KERNEL - MOV_FROM_ISR(r20) - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - MOV_FROM_IPSR(p0, r21) - mov r31=pr - mov r24=PERCPU_ADDR - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // access to region 0-5 - ;; - THASH(p8, r17, r16, r25) - ;; - MOV_TO_IHA(p8, r17, r25) -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk dtlb_fault -#endif - cmp.ge p10,p11=r16,r24 // access to per_cpu_data? - tbit.z p12,p0=r16,61 // access to region 6? - mov r25=PERCPU_PAGE_SHIFT << 2 - mov r26=PERCPU_PAGE_SIZE - nop.m 0 - nop.b 0 - ;; -(p10) mov r19=IA64_KR(PER_CPU_DATA) -(p11) and r19=r19,r16 // clear non-ppn fields - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field - tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; -(p10) sub r19=r19,r26 - MOV_TO_ITIR(p10, r25, r24) - cmp.ne p8,p0=r0,r23 -(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field -(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr -(p8) br.cond.spnt page_fault - - dep r21=-1,r21,IA64_PSR_ED_BIT,1 - ;; - or r19=r19,r17 // insert PTE control bits into r19 - MOV_TO_IPSR(p6, r21, r24) - ;; - ITC_D(p7, r19, r18) // insert the TLB entry - mov pr=r31,-1 - RFI -END(alt_dtlb_miss) - - .org ia64_ivt+0x1400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(nested_dtlb_miss) - /* - * In the absence of kernel bugs, we get here when the virtually mapped linear - * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction - * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page - * table is missing, a nested TLB miss fault is triggered and control is - * transferred to this point. When this happens, we lookup the pte for the - * faulting address by walking the page table in physical mode and return to the - * continuation point passed in register r30 (or call page_fault if the address is - * not mapped). - * - * Input: r16: faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Output: r17: physical address of PTE of faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) - */ - RSM_PSR_DT // switch to using physical data addressing - mov r19=IA64_KR(PT_BASE) // get the page table base address - shl r21=r16,3 // shift bit 60 into sign bit - MOV_FROM_ITIR(r18) - ;; - shr.u r17=r16,61 // get the region number into r17 - extr.u r18=r18,2,6 // get the faulting page size - ;; - cmp.eq p6,p7=5,r17 // is faulting address in region 5? - add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address - add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 - ;; - shr.u r22=r16,r22 - shr.u r18=r16,r18 -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#if CONFIG_PGTABLE_LEVELS == 4 - shr.u r18=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) - ;; -#if CONFIG_PGTABLE_LEVELS == 4 -(p7) ld8 r17=[r17] // get *pud (may be 0) - shr.u r18=r22,PMD_SHIFT // shift pmd index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) - ;; -#endif -(p7) ld8 r17=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); -(p6) br.cond.spnt page_fault - mov b0=r30 - br.sptk.many b0 // return to continuation point -END(nested_dtlb_miss) - - .org ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(ikey_miss) - DBG_FAULT(6) - FAULT(6) -END(ikey_miss) - - .org ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(dkey_miss) - DBG_FAULT(7) - FAULT(7) -END(dkey_miss) - - .org ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(dirty_bit) - DBG_FAULT(8) - /* - * What we do here is to simply turn on the dirty bit in the PTE. We need to - * update both the page-table and the TLB entry. To efficiently access the PTE, - * we address it through the virtual page table. Most likely, the TLB entry for - * the relevant virtual page table page is still present in the TLB so we can - * normally do this without additional TLB misses. In case the necessary virtual - * page table TLB entry isn't present, we take a nested TLB miss hit where we look - * up the physical address of the L3 PTE and then continue at label 1 below. - */ - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault - mov r31=pr // save pr -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present - ;; - ITC_D(p6, r25, r18) // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - ITC_D(p0, r18, r16) // install updated PTE -#endif - mov pr=r31,-1 // restore pr - RFI -END(dirty_bit) - - .org ia64_ivt+0x2400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(iaccess_bit) - DBG_FAULT(9) - // Like Entry 8, except for instruction access - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - mov r31=pr // save predicates -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. - */ - MOV_FROM_IPSR(p0, r17) - ;; - MOV_FROM_IIP(r18) - tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? - ;; -(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa -#endif /* CONFIG_ITANIUM */ - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the accessed bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page present - ;; - ITC_I(p6, r25, r26) // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else /* !CONFIG_SMP */ - ;; -1: ld8 r18=[r17] - ;; - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - ITC_I(p0, r18, r16) // install updated PTE -#endif /* !CONFIG_SMP */ - mov pr=r31,-1 - RFI -END(iaccess_bit) - - .org ia64_ivt+0x2800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(daccess_bit) - DBG_FAULT(10) - // Like Entry 8, except for data access - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r31=pr - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the dirty bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page is present - ;; - ITC_D(p6, r25, r26) // install updated PTE - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - ;; - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_A,r18 // set the accessed bit - ;; - st8 [r17]=r18 // store back updated PTE - ITC_D(p0, r18, r16) // install updated PTE -#endif - mov b0=r29 // restore b0 - mov pr=r31,-1 - RFI -END(daccess_bit) - - .org ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(break_fault) - /* - * The streamlined system call entry/exit paths only save/restore the initial part - * of pt_regs. This implies that the callers of system-calls must adhere to the - * normal procedure calling conventions. - * - * Registers to be saved & restored: - * CR registers: cr.ipsr, cr.iip, cr.ifs - * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr - * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 - * Registers to be restored only: - * r8-r11: output value from the system call. - * - * During system call exit, scratch registers (including r15) are modified/cleared - * to prevent leaking bits from kernel to user level. - */ - DBG_FAULT(11) - mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) - MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) - mov r31=pr // I0 (2 cyc) - - MOV_FROM_IIM(r17) // M2 (2 cyc) - mov.m r27=ar.rsc // M2 (12 cyc) - mov r18=__IA64_BREAK_SYSCALL // A - - mov.m ar.rsc=0 // M2 - mov.m r21=ar.fpsr // M2 (12 cyc) - mov r19=b6 // I0 (2 cyc) - ;; - mov.m r23=ar.bspstore // M2 (12 cyc) - mov.m r24=ar.rnat // M2 (5 cyc) - mov.i r26=ar.pfs // I0 (2 cyc) - - invala // M0|1 - nop.m 0 // M - mov r20=r1 // A save r1 - - nop.m 0 - movl r30=sys_call_table // X - - MOV_FROM_IIP(r28) // M2 (2 cyc) - cmp.eq p0,p7=r18,r17 // I0 is this a system call? -(p7) br.cond.spnt non_syscall // B no -> - // - // From this point on, we are definitely on the syscall-path - // and we can use (non-banked) scratch registers. - // -/////////////////////////////////////////////////////////////////////// - mov r1=r16 // A move task-pointer to "addl"-addressable reg - mov r2=r16 // A setup r2 for ia64_syscall_setup - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags - - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - adds r15=-1024,r15 // A subtract 1024 from syscall number - mov r3=NR_syscalls - 1 - ;; - ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag - ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags - extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr - - shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) - addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS - cmp.leu p6,p7=r15,r3 // A syscall number in range? - ;; - - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS -(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point - tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? - - mov.m ar.bspstore=r22 // M2 switch to kernel RBS - cmp.eq p8,p9=2,r8 // A isr.ei==2? - ;; - -(p8) mov r8=0 // A clear ei to 0 -(p7) movl r30=sys_ni_syscall // X - -(p8) adds r28=16,r28 // A switch cr.iip to next bundle -(p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - ;; - mov b6=r30 // I0 setup syscall handler branch reg early -#else - nop.i 0 - ;; -#endif - - mov.m r25=ar.unat // M2 (5 cyc) - dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr - adds r15=1024,r15 // A restore original syscall number - // - // If any of the above loads miss in L1D, we'll stall here until - // the data arrives. - // -/////////////////////////////////////////////////////////////////////// - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting -#else - mov b6=r30 // I0 setup syscall handler branch reg early -#endif - cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? - - and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit - mov r18=ar.bsp // M2 (12 cyc) -(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS - ;; -.back_from_break_fixup: -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack - cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? - br.call.sptk.many b7=ia64_syscall_setup // B -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mov.m r30=ar.itc is called in advance, and r13 is current - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A -(pKStk) br.cond.spnt .skip_accounting // B unlikely skip - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave - ;; - ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime - ld8 r21=[r17] // M cumulated utime - sub r22=r19,r18 // A stime before leave - ;; - st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp - sub r18=r30,r19 // A elapsed time in user - ;; - add r20=r20,r22 // A sum stime - add r21=r21,r18 // A sum utime - ;; - st8 [r16]=r20 // M update stime - st8 [r17]=r21 // M update utime - ;; -.skip_accounting: -#endif - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - nop 0 - BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 - ;; - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection - // M0 ensure interruption collection is on - movl r3=ia64_ret_from_syscall // X - ;; - mov rp=r3 // I0 set the real return addr -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - - SSM_PSR_I(p15, p15, r16) // M2 restore psr.i -(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) - br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic - // NOT REACHED -/////////////////////////////////////////////////////////////////////// - // On entry, we optimistically assumed that we're coming from user-space. - // For the rare cases where a system-call is done from within the kernel, - // we fix things up at this point: -.break_fixup: - add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure - mov ar.rnat=r24 // M2 restore kernel's AR.RNAT - ;; - mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE - br.cond.sptk .back_from_break_fixup -END(break_fault) - - .org ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(interrupt) - /* interrupt handler has become too big to fit this area. */ - br.sptk.many __interrupt -END(interrupt) - - .org ia64_ivt+0x3400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved - DBG_FAULT(13) - FAULT(13) - - .org ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - DBG_FAULT(14) - FAULT(14) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - * - * ia64_syscall_setup() is a separate subroutine so that it can - * allocate stacked registers so it can safely demine any - * potential NaT values from the input registers. - * - * On entry: - * - executing on bank 0 or bank 1 register set (doesn't matter) - * - r1: stack pointer - * - r2: current task pointer - * - r3: preserved - * - r11: original contents (saved ar.pfs to be saved) - * - r12: original contents (sp to be saved) - * - r13: original contents (tp to be saved) - * - r15: original contents (syscall # to be saved) - * - r18: saved bsp (after switching to kernel stack) - * - r19: saved b6 - * - r20: saved r1 (gp) - * - r21: saved ar.fpsr - * - r22: kernel's register backing store base (krbs_base) - * - r23: saved ar.bspstore - * - r24: saved ar.rnat - * - r25: saved ar.unat - * - r26: saved ar.pfs - * - r27: saved ar.rsc - * - r28: saved cr.iip - * - r29: saved cr.ipsr - * - r30: ar.itc for accounting (don't touch) - * - r31: saved pr - * - b0: original contents (to be saved) - * On exit: - * - p10: TRUE if syscall is invoked with more than 8 out - * registers or r15's Nat is true - * - r1: kernel's gp - * - r3: preserved (same as on entry) - * - r8: -EINVAL if p10 is true - * - r12: points to kernel stack - * - r13: points to current task - * - r14: preserved (same as on entry) - * - p13: preserved - * - p15: TRUE if interrupts need to be re-enabled - * - ar.fpsr: set to kernel settings - * - b6: preserved (same as on entry) - */ -GLOBAL_ENTRY(ia64_syscall_setup) -#if PT(B6) != 0 -# error This code assumes that b6 is the first field in pt_regs. -#endif - st8 [r1]=r19 // save b6 - add r16=PT(CR_IPSR),r1 // initialize first base pointer - add r17=PT(R11),r1 // initialize second base pointer - ;; - alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable - st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr - tnat.nz p8,p0=in0 - - st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 - tnat.nz p9,p0=in1 -(pKStk) mov r18=r0 // make sure r18 isn't NaT - ;; - - st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs - st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip - mov r28=b0 // save b0 (2 cyc) - ;; - - st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat - dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] -(p8) mov in0=-1 - ;; - - st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs - extr.u r11=r19,7,7 // I0 // get sol of ar.pfs - and r8=0x7f,r19 // A // get sof of ar.pfs - - st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc - tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 -(p9) mov in1=-1 - ;; - -(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 - tnat.nz p10,p0=in2 - add r11=8,r11 - ;; -(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field -(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field - tnat.nz p11,p0=in3 - ;; -(p10) mov in2=-1 - tnat.nz p12,p0=in4 // [I0] -(p11) mov in3=-1 - ;; -(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat -(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore - shl r18=r18,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates - st8 [r17]=r28,PT(R1)-PT(B0) // save b0 - tnat.nz p13,p0=in5 // [I0] - ;; - st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" - st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 -(p12) mov in4=-1 - ;; - -.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 -.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 -(p13) mov in5=-1 - ;; - st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p13,p0=in6 - cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 - ;; - mov r8=1 -(p9) tnat.nz p10,p0=r15 - adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) - - st8.spill [r17]=r15 // save r15 - tnat.nz p8,p0=in7 - nop.i 0 - - mov r13=r2 // establish `current' - movl r1=__gp // establish kernel global pointer - ;; - st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) -(p13) mov in6=-1 -(p8) mov in7=-1 - - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value -(p10) mov r8=-EINVAL - br.ret.sptk.many b7 -END(ia64_syscall_setup) - - .org ia64_ivt+0x3c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - DBG_FAULT(15) - FAULT(15) - - .org ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - DBG_FAULT(16) - FAULT(16) - -#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) - /* - * There is no particular reason for this code to be here, other than - * that there happens to be space here that would go unused otherwise. - * If this fault ever gets "unreserved", simply moved the following - * code to a more suitable spot... - * - * account_sys_enter is called from SAVE_MIN* macros if accounting is - * enabled and if the macro is entered from user mode. - */ -GLOBAL_ENTRY(account_sys_enter) - // mov.m r20=ar.itc is called in advance, and r13 is current - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel - ;; - ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime - ld8 r21=[r17] // cumulated utime - sub r22=r19,r18 // stime before leave kernel - ;; - st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp - sub r18=r20,r19 // elapsed time in user mode - ;; - add r23=r23,r22 // sum stime - add r21=r21,r18 // sum utime - ;; - st8 [r16]=r23 // update stime - st8 [r17]=r21 // update utime - ;; - br.ret.sptk.many rp -END(account_sys_enter) -#endif - - .org ia64_ivt+0x4400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - DBG_FAULT(17) - FAULT(17) - - .org ia64_ivt+0x4800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - DBG_FAULT(18) - FAULT(18) - - .org ia64_ivt+0x4c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - DBG_FAULT(19) - FAULT(19) - -// -// --- End of long entries, Beginning of short entries -// - - .org ia64_ivt+0x5000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) -ENTRY(page_not_present) - DBG_FAULT(20) - MOV_FROM_IFA(r16) - RSM_PSR_DT - /* - * The Linux page fault handler doesn't expect non-present pages to be in - * the TLB. Flush the existing entry now, so we meet that expectation. - */ - mov r17=PAGE_SHIFT<<2 - ;; - ptc.l r16,r17 - ;; - mov r31=pr - srlz.d - br.sptk.many page_fault -END(page_not_present) - - .org ia64_ivt+0x5100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) -ENTRY(key_permission) - DBG_FAULT(21) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(key_permission) - - .org ia64_ivt+0x5200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(iaccess_rights) - DBG_FAULT(22) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(iaccess_rights) - - .org ia64_ivt+0x5300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(daccess_rights) - DBG_FAULT(23) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(daccess_rights) - - .org ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(general_exception) - DBG_FAULT(24) - MOV_FROM_ISR(r16) - mov r31=pr - ;; - cmp4.eq p6,p0=0,r16 -(p6) br.sptk.many dispatch_illegal_op_fault - ;; - mov r19=24 // fault number - br.sptk.many dispatch_to_fault_handler -END(general_exception) - - .org ia64_ivt+0x5500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(disabled_fp_reg) - DBG_FAULT(25) - rsm psr.dfh // ensure we can access fph - ;; - srlz.d - mov r31=pr - mov r19=25 - br.sptk.many dispatch_to_fault_handler -END(disabled_fp_reg) - - .org ia64_ivt+0x5600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(nat_consumption) - DBG_FAULT(26) - - MOV_FROM_IPSR(p0, r16) - MOV_FROM_ISR(r17) - mov r31=pr // save PR - ;; - and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} - tbit.z p6,p0=r17,IA64_ISR_NA_BIT - ;; - cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 - dep r16=-1,r16,IA64_PSR_ED_BIT,1 -(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) - ;; - MOV_TO_IPSR(p0, r16, r18) - mov pr=r31,-1 - ;; - RFI - -1: mov pr=r31,-1 - ;; - FAULT(26) -END(nat_consumption) - - .org ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(speculation_vector) - DBG_FAULT(27) - /* - * A [f]chk.[as] instruction needs to take the branch to the recovery code but - * this part of the architecture is not implemented in hardware on some CPUs, such - * as Itanium. Thus, in general we need to emulate the behavior. IIM contains - * the relative target (not yet sign extended). So after sign extending it we - * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, - * i.e., the slot to restart into. - * - * cr.imm contains zero_ext(imm21) - */ - MOV_FROM_IIM(r18) - ;; - MOV_FROM_IIP(r17) - shl r18=r18,43 // put sign bit in position (43=64-21) - ;; - - MOV_FROM_IPSR(p0, r16) - shr r18=r18,39 // sign extend (39=43-4) - ;; - - add r17=r17,r18 // now add the offset - ;; - MOV_TO_IIP(r17, r19) - dep r16=0,r16,41,2 // clear EI - ;; - - MOV_TO_IPSR(p0, r16, r19) - ;; - - RFI -END(speculation_vector) - - .org ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - DBG_FAULT(28) - FAULT(28) - - .org ia64_ivt+0x5900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(debug_vector) - DBG_FAULT(29) - FAULT(29) -END(debug_vector) - - .org ia64_ivt+0x5a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(unaligned_access) - DBG_FAULT(30) - mov r31=pr // prepare to save predicates - ;; - br.sptk.many dispatch_unaligned_handler -END(unaligned_access) - - .org ia64_ivt+0x5b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(unsupported_data_reference) - DBG_FAULT(31) - FAULT(31) -END(unsupported_data_reference) - - .org ia64_ivt+0x5c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) -ENTRY(floating_point_fault) - DBG_FAULT(32) - FAULT(32) -END(floating_point_fault) - - .org ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(floating_point_trap) - DBG_FAULT(33) - FAULT(33) -END(floating_point_trap) - - .org ia64_ivt+0x5e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(lower_privilege_trap) - DBG_FAULT(34) - FAULT(34) -END(lower_privilege_trap) - - .org ia64_ivt+0x5f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(taken_branch_trap) - DBG_FAULT(35) - FAULT(35) -END(taken_branch_trap) - - .org ia64_ivt+0x6000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(single_step_trap) - DBG_FAULT(36) - FAULT(36) -END(single_step_trap) - - .org ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Reserved - DBG_FAULT(37) - FAULT(37) - - .org ia64_ivt+0x6200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - DBG_FAULT(38) - FAULT(38) - - .org ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - DBG_FAULT(39) - FAULT(39) - - .org ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - DBG_FAULT(40) - FAULT(40) - - .org ia64_ivt+0x6500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - DBG_FAULT(41) - FAULT(41) - - .org ia64_ivt+0x6600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - DBG_FAULT(42) - FAULT(42) - - .org ia64_ivt+0x6700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - DBG_FAULT(43) - FAULT(43) - - .org ia64_ivt+0x6800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - DBG_FAULT(44) - FAULT(44) - - .org ia64_ivt+0x6900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(ia32_exception) - DBG_FAULT(45) - FAULT(45) -END(ia32_exception) - - .org ia64_ivt+0x6a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(ia32_intercept) - DBG_FAULT(46) - FAULT(46) -END(ia32_intercept) - - .org ia64_ivt+0x6b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) -ENTRY(ia32_interrupt) - DBG_FAULT(47) - FAULT(47) -END(ia32_interrupt) - - .org ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - DBG_FAULT(48) - FAULT(48) - - .org ia64_ivt+0x6d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - DBG_FAULT(49) - FAULT(49) - - .org ia64_ivt+0x6e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - DBG_FAULT(50) - FAULT(50) - - .org ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - DBG_FAULT(51) - FAULT(51) - - .org ia64_ivt+0x7000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7000 Entry 52 (size 16 bundles) Reserved - DBG_FAULT(52) - FAULT(52) - - .org ia64_ivt+0x7100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - DBG_FAULT(53) - FAULT(53) - - .org ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - DBG_FAULT(54) - FAULT(54) - - .org ia64_ivt+0x7300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - DBG_FAULT(55) - FAULT(55) - - .org ia64_ivt+0x7400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - DBG_FAULT(56) - FAULT(56) - - .org ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - DBG_FAULT(57) - FAULT(57) - - .org ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - DBG_FAULT(58) - FAULT(58) - - .org ia64_ivt+0x7700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - DBG_FAULT(59) - FAULT(59) - - .org ia64_ivt+0x7800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - DBG_FAULT(60) - FAULT(60) - - .org ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - DBG_FAULT(61) - FAULT(61) - - .org ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - DBG_FAULT(62) - FAULT(62) - - .org ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - DBG_FAULT(63) - FAULT(63) - - .org ia64_ivt+0x7c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - DBG_FAULT(64) - FAULT(64) - - .org ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - DBG_FAULT(65) - FAULT(65) - - .org ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - DBG_FAULT(66) - FAULT(66) - - .org ia64_ivt+0x7f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - DBG_FAULT(67) - FAULT(67) - - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) - SSM_PSR_DT_AND_SRLZ_I - ;; - SAVE_MIN_WITH_COVER - alloc r15=ar.pfs,0,0,3,0 - MOV_FROM_IFA(out0) - MOV_FROM_ISR(out1) - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) - adds r3=8,r2 // set up second base pointer - SSM_PSR_I(p15, p15, r14) // restore psr.i - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - -ENTRY(non_syscall) - mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER - ;; - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - MOV_FROM_IIM(out0) - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r15) // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - -ENTRY(__interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) - // ensure everybody knows psr.ic is back on - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - ;; - MCA_RECOVER_RANGE(interrupt) - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq -END(__interrupt) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - MOV_FROM_IFA(out0) - adds out1=16,sp - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - MOV_FROM_ISR(out1) - MOV_FROM_IFA(out2) - MOV_FROM_IIM(out3) - MOV_FROM_ITIR(out4) - ;; - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) - // guarantee that interruption collection is on - mov out0=r15 - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - - /* - * Squatting in this space ... - * - * This special case dispatcher for illegal operation faults allows preserved - * registers to be modified through a callback function (asm only) that is handed - * back from the fault handler in r8. Up to three arguments can be passed to the - * callback function by returning an aggregate with the callback as its first - * element, followed by the arguments. - */ -ENTRY(dispatch_illegal_op_fault) - .prologue - .body - SAVE_MIN_WITH_COVER - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in insn group - mov out0=ar.ec - ;; - SAVE_REST - PT_REGS_UNWIND_INFO(0) - ;; - br.call.sptk.many rp=ia64_illegal_op_fault -.ret0: ;; - alloc r14=ar.pfs,0,0,3,0 // must be first in insn group - mov out0=r9 - mov out1=r10 - mov out2=r11 - movl r15=ia64_leave_kernel - ;; - mov rp=r15 - mov b6=r8 - ;; - cmp.ne p6,p0=0,r8 -(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel - br.sptk.many ia64_leave_kernel -END(dispatch_illegal_op_fault) diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S deleted file mode 100644 index 086cfa4999fd2ac29d452d823220513aee3d4f96..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/mca_asm.S +++ /dev/null @@ -1,1123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_asm.S - * Purpose: assembly portion of the IA64 MCA handling - * - * Mods by cfleck to integrate into kernel build - * - * 2000-03-15 David Mosberger-Tang - * Added various stop bits to get a clean compile - * - * 2000-03-29 Chuck Fleckenstein - * Added code to save INIT handoff state in pt_regs format, - * switch to temp kstack, switch modes, jump to C INIT handler - * - * 2002-01-04 J.Hall - * Before entering virtual mode code: - * 1. Check for TLB CPU error - * 2. Restore current thread pointer to kr6 - * 3. Move stack ptr 16 bytes to conform to C calling convention - * - * 2004-11-12 Russ Anderson - * Added per cpu MCA/INIT stack save areas. - * - * 2005-12-08 Keith Owens - * Use per cpu MCA/INIT stacks for all data. - */ -#include - -#include -#include -#include -#include -#include - -#include "entry.h" - -#define GET_IA64_MCA_DATA(reg) \ - GET_THIS_PADDR(reg, ia64_mca_data) \ - ;; \ - ld8 reg=[reg] - - .global ia64_do_tlb_purge - .global ia64_os_mca_dispatch - .global ia64_os_init_on_kdump - .global ia64_os_init_dispatch_monarch - .global ia64_os_init_dispatch_slave - - .text - .align 16 - -//StartMain//////////////////////////////////////////////////////////////////// - -/* - * Just the TLB purge part is moved to a separate function - * so we can re-use the code for cpu hotplug code as well - * Caller should now setup b1, so we can branch once the - * tlb flush is complete. - */ - -ia64_do_tlb_purge: -#define O(member) IA64_CPUINFO_##member##_OFFSET - - GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 - ;; - addl r17=O(PTCE_STRIDE),r2 - addl r2=O(PTCE_BASE),r2 - ;; - ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base - ld4 r19=[r2],4 // r19=ptce_count[0] - ld4 r21=[r17],4 // r21=ptce_stride[0] - ;; - ld4 r20=[r2] // r20=ptce_count[1] - ld4 r22=[r17] // r22=ptce_stride[1] - mov r24=0 - ;; - adds r20=-1,r20 - ;; -#undef O - -2: - cmp.ltu p6,p7=r24,r19 -(p7) br.cond.dpnt.few 4f - mov ar.lc=r20 -3: - ptc.e r18 - ;; - add r18=r22,r18 - br.cloop.sptk.few 3b - ;; - add r18=r21,r18 - add r24=1,r24 - ;; - br.sptk.few 2b -4: - srlz.i // srlz.i implies srlz.d - ;; - - // Now purge addresses formerly mapped by TR registers - // 1. Purge ITR&DTR for kernel. - movl r16=KERNEL_START - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - ;; - ptr.i r16, r18 - ptr.d r16, r18 - ;; - srlz.i - ;; - srlz.d - ;; - // 3. Purge ITR for PAL code. - GET_THIS_PADDR(r2, ia64_mca_pal_base) - ;; - ld8 r16=[r2] - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.i r16,r18 - ;; - srlz.i - ;; - // 4. Purge DTR for stack. - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r16=r19,r16 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.i - ;; - // Now branch away to caller. - br.sptk.many b1 - ;; - -//EndMain////////////////////////////////////////////////////////////////////// - -//StartMain//////////////////////////////////////////////////////////////////// - -ia64_os_mca_dispatch: - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - mov r19=1 // All MCA events are treated as monarch (for now) - br.sptk ia64_state_save // save the state that is not in minstate -1: - - GET_IA64_MCA_DATA(r2) - // Using MCA stack, struct ia64_sal_os_state, variable proc_state_param - ;; - add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2 - ;; - ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK. - ;; - tbit.nz p6,p7=r18,60 -(p7) br.spnt done_tlb_purge_and_reload - - // The following code purges TC and TR entries. Then reload all TC entries. - // Purge percpu data TC entries. -begin_tlb_purge_and_reload: - movl r18=ia64_reload_tr;; - LOAD_PHYSICAL(p0,r18,ia64_reload_tr);; - mov b1=r18;; - br.sptk.many ia64_do_tlb_purge;; - -ia64_reload_tr: - // Finally reload the TR registers. - // 1. Reload DTR/ITR registers for kernel. - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - mov r19=ip - movl r18=PAGE_KERNEL - ;; - dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT - ;; - or r18=r17,r18 - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - srlz.i - srlz.d - ;; - // 3. Reload ITR for PAL code. - GET_THIS_PADDR(r2, ia64_mca_pal_pte) - ;; - ld8 r18=[r2] // load PAL PTE - ;; - GET_THIS_PADDR(r2, ia64_mca_pal_base) - ;; - ld8 r16=[r2] // load PAL vaddr - mov r19=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r19 - mov cr.ifa=r16 - mov r20=IA64_TR_PALCODE - ;; - itr.i itr[r20]=r18 - ;; - srlz.i - ;; - // 4. Reload DTR for stack. - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r18=r19,r16 - movl r20=PAGE_KERNEL - ;; - add r16=r20,r16 - mov r19=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r19 - mov cr.ifa=r18 - mov r20=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r20]=r16 - GET_THIS_PADDR(r2, ia64_mca_tr_reload) - mov r18 = 1 - ;; - srlz.d - ;; - st8 [r2] =r18 - ;; - -done_tlb_purge_and_reload: - - // switch to per cpu MCA stack - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_new_stack -1: - - // everything saved, now we can set the kernel registers - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_set_kernel_registers -1: - - // This must be done in physical mode - GET_IA64_MCA_DATA(r2) - ;; - mov r7=r2 - - // Enter virtual mode from physical mode - VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) - - // This code returns to SAL via SOS r2, in general SAL has no unwind - // data. To get a clean termination when backtracing the C MCA/INIT - // handler, set a dummy return address of 0 in this routine. That - // requires that ia64_os_mca_virtual_begin be a global function. -ENTRY(ia64_os_mca_virtual_begin) - .prologue - .save rp,r0 - .body - - mov ar.rsc=3 // set eager mode for C handler - mov r2=r7 // see GET_IA64_MCA_DATA above - ;; - - // Call virtual mode handler - alloc r14=ar.pfs,0,0,3,0 - ;; - DATA_PA_TO_VA(r2,r7) - ;; - add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 - add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 - add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2 - br.call.sptk.many b0=ia64_mca_handler - - // Revert back to physical mode before going back to SAL - PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) -ia64_os_mca_virtual_end: - -END(ia64_os_mca_virtual_begin) - - // switch back to previous stack - alloc r14=ar.pfs,0,0,0,0 // remove the MCA handler frame - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_old_stack -1: - - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_restore // restore the SAL state -1: - - mov b0=r12 // SAL_CHECK return address - - br b0 - -//EndMain////////////////////////////////////////////////////////////////////// - -//StartMain//////////////////////////////////////////////////////////////////// - -// -// NOP init handler for kdump. In panic situation, we may receive INIT -// while kernel transition. Since we initialize registers on leave from -// current kernel, no longer monarch/slave handlers of current kernel in -// virtual mode are called safely. -// We can unregister these init handlers from SAL, however then the INIT -// will result in warmboot by SAL and we cannot retrieve the crashdump. -// Therefore register this NOP function to SAL, to prevent entering virtual -// mode and resulting warmboot by SAL. -// -ia64_os_init_on_kdump: - mov r8=r0 // IA64_INIT_RESUME - mov r9=r10 // SAL_GP - mov r22=r17 // *minstate - ;; - mov r10=r0 // return to same context - mov b0=r12 // SAL_CHECK return address - br b0 - -// -// SAL to OS entry point for INIT on all processors. This has been defined for -// registration purposes with SAL as a part of ia64_mca_init. Monarch and -// slave INIT have identical processing, except for the value of the -// sos->monarch flag in r19. -// - -ia64_os_init_dispatch_monarch: - mov r19=1 // Bow, bow, ye lower middle classes! - br.sptk ia64_os_init_dispatch - -ia64_os_init_dispatch_slave: - mov r19=0 // yeth, mathter - -ia64_os_init_dispatch: - - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_save // save the state that is not in minstate -1: - - // switch to per cpu INIT stack - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_new_stack -1: - - // everything saved, now we can set the kernel registers - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_set_kernel_registers -1: - - // This must be done in physical mode - GET_IA64_MCA_DATA(r2) - ;; - mov r7=r2 - - // Enter virtual mode from physical mode - VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4) - - // This code returns to SAL via SOS r2, in general SAL has no unwind - // data. To get a clean termination when backtracing the C MCA/INIT - // handler, set a dummy return address of 0 in this routine. That - // requires that ia64_os_init_virtual_begin be a global function. -ENTRY(ia64_os_init_virtual_begin) - .prologue - .save rp,r0 - .body - - mov ar.rsc=3 // set eager mode for C handler - mov r2=r7 // see GET_IA64_MCA_DATA above - ;; - - // Call virtual mode handler - alloc r14=ar.pfs,0,0,3,0 - ;; - DATA_PA_TO_VA(r2,r7) - ;; - add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 - add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 - add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2 - br.call.sptk.many b0=ia64_init_handler - - // Revert back to physical mode before going back to SAL - PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4) -ia64_os_init_virtual_end: - -END(ia64_os_init_virtual_begin) - - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_restore // restore the SAL state -1: - - // switch back to previous stack - alloc r14=ar.pfs,0,0,0,0 // remove the INIT handler frame - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_old_stack -1: - - mov b0=r12 // SAL_CHECK return address - br b0 - -//EndMain////////////////////////////////////////////////////////////////////// - -// common defines for the stubs -#define ms r4 -#define regs r5 -#define temp1 r2 /* careful, it overlaps with input registers */ -#define temp2 r3 /* careful, it overlaps with input registers */ -#define temp3 r7 -#define temp4 r14 - - -//++ -// Name: -// ia64_state_save() -// -// Stub Description: -// -// Save the state that is not in minstate. This is sensitive to the layout of -// struct ia64_sal_os_state in mca.h. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// The OS to SAL section of struct ia64_sal_os_state is set to a default -// value of cold boot (MCA) or warm boot (INIT) and return to the same -// context. ia64_sal_os_state is also used to hold some registers that -// need to be saved and restored across the stack switches. -// -// Most input registers to this stub come from PAL/SAL -// r1 os gp, physical -// r8 pal_proc entry point -// r9 sal_proc entry point -// r10 sal gp -// r11 MCA - rendevzous state, INIT - reason code -// r12 sal return address -// r17 pal min_state -// r18 processor state parameter -// r19 monarch flag, set by the caller of this routine -// -// In addition to the SAL to OS state, this routine saves all the -// registers that appear in struct pt_regs and struct switch_stack, -// excluding those that are already in the PAL minstate area. This -// results in a partial pt_regs and switch_stack, the C code copies the -// remaining registers from PAL minstate to pt_regs and switch_stack. The -// resulting structures contain all the state of the original process when -// MCA/INIT occurred. -// -//-- - -ia64_state_save: - add regs=MCA_SOS_OFFSET, r3 - add ms=MCA_SOS_OFFSET+8, r3 - mov b0=r2 // save return address - cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3 - ;; - GET_IA64_MCA_DATA(temp2) - ;; - add temp1=temp2, regs // struct ia64_sal_os_state on MCA or INIT stack - add temp2=temp2, ms // struct ia64_sal_os_state+8 on MCA or INIT stack - ;; - mov regs=temp1 // save the start of sos - st8 [temp1]=r1,16 // os_gp - st8 [temp2]=r8,16 // pal_proc - ;; - st8 [temp1]=r9,16 // sal_proc - st8 [temp2]=r11,16 // rv_rc - mov r11=cr.iipa - ;; - st8 [temp1]=r18 // proc_state_param - st8 [temp2]=r19 // monarch - mov r6=IA64_KR(CURRENT) - add temp1=SOS(SAL_RA), regs - add temp2=SOS(SAL_GP), regs - ;; - st8 [temp1]=r12,16 // sal_ra - st8 [temp2]=r10,16 // sal_gp - mov r12=cr.isr - ;; - st8 [temp1]=r17,16 // pal_min_state - st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT - mov r6=IA64_KR(CURRENT_STACK) - ;; - st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK - st8 [temp2]=r0,16 // prev_task, starts off as NULL - mov r6=cr.ifa - ;; - st8 [temp1]=r12,16 // cr.isr - st8 [temp2]=r6,16 // cr.ifa - mov r12=cr.itir - ;; - st8 [temp1]=r12,16 // cr.itir - st8 [temp2]=r11,16 // cr.iipa - mov r12=cr.iim - ;; - st8 [temp1]=r12 // cr.iim -(p1) mov r12=IA64_MCA_COLD_BOOT -(p2) mov r12=IA64_INIT_WARM_BOOT - mov r6=cr.iha - add temp1=SOS(OS_STATUS), regs - ;; - st8 [temp2]=r6 // cr.iha - add temp2=SOS(CONTEXT), regs - st8 [temp1]=r12 // os_status, default is cold boot - mov r6=IA64_MCA_SAME_CONTEXT - ;; - st8 [temp2]=r6 // context, default is same context - - // Save the pt_regs data that is not in minstate. The previous code - // left regs at sos. - add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs - ;; - add temp1=PT(B6), regs - mov temp3=b6 - mov temp4=b7 - add temp2=PT(B7), regs - ;; - st8 [temp1]=temp3,PT(AR_CSD)-PT(B6) // save b6 - st8 [temp2]=temp4,PT(AR_SSD)-PT(B7) // save b7 - mov temp3=ar.csd - mov temp4=ar.ssd - cover // must be last in group - ;; - st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD) // save ar.csd - st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD) // save ar.ssd - mov temp3=ar.unat - mov temp4=ar.pfs - ;; - st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT) // save ar.unat - st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS) // save ar.pfs - mov temp3=ar.rnat - mov temp4=ar.bspstore - ;; - st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT) // save ar.rnat - st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE) // save ar.bspstore - mov temp3=ar.bsp - ;; - sub temp3=temp3, temp4 // ar.bsp - ar.bspstore - mov temp4=ar.fpsr - ;; - shl temp3=temp3,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS) // save loadrs - st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR) // save ar.fpsr - mov temp3=ar.ccv - ;; - st8 [temp1]=temp3,PT(F7)-PT(AR_CCV) // save ar.ccv - stf.spill [temp2]=f6,PT(F8)-PT(F6) - ;; - stf.spill [temp1]=f7,PT(F9)-PT(F7) - stf.spill [temp2]=f8,PT(F10)-PT(F8) - ;; - stf.spill [temp1]=f9,PT(F11)-PT(F9) - stf.spill [temp2]=f10 - ;; - stf.spill [temp1]=f11 - - // Save the switch_stack data that is not in minstate nor pt_regs. The - // previous code left regs at pt_regs. - add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs - ;; - add temp1=SW(F2), regs - add temp2=SW(F3), regs - ;; - stf.spill [temp1]=f2,32 - stf.spill [temp2]=f3,32 - ;; - stf.spill [temp1]=f4,32 - stf.spill [temp2]=f5,32 - ;; - stf.spill [temp1]=f12,32 - stf.spill [temp2]=f13,32 - ;; - stf.spill [temp1]=f14,32 - stf.spill [temp2]=f15,32 - ;; - stf.spill [temp1]=f16,32 - stf.spill [temp2]=f17,32 - ;; - stf.spill [temp1]=f18,32 - stf.spill [temp2]=f19,32 - ;; - stf.spill [temp1]=f20,32 - stf.spill [temp2]=f21,32 - ;; - stf.spill [temp1]=f22,32 - stf.spill [temp2]=f23,32 - ;; - stf.spill [temp1]=f24,32 - stf.spill [temp2]=f25,32 - ;; - stf.spill [temp1]=f26,32 - stf.spill [temp2]=f27,32 - ;; - stf.spill [temp1]=f28,32 - stf.spill [temp2]=f29,32 - ;; - stf.spill [temp1]=f30,SW(B2)-SW(F30) - stf.spill [temp2]=f31,SW(B3)-SW(F31) - mov temp3=b2 - mov temp4=b3 - ;; - st8 [temp1]=temp3,16 // save b2 - st8 [temp2]=temp4,16 // save b3 - mov temp3=b4 - mov temp4=b5 - ;; - st8 [temp1]=temp3,SW(AR_LC)-SW(B4) // save b4 - st8 [temp2]=temp4 // save b5 - mov temp3=ar.lc - ;; - st8 [temp1]=temp3 // save ar.lc - - // FIXME: Some proms are incorrectly accessing the minstate area as - // cached data. The C code uses region 6, uncached virtual. Ensure - // that there is no cache data lying around for the first 1K of the - // minstate area. - // Remove this code in September 2006, that gives platforms a year to - // fix their proms and get their customers updated. - - add r1=32*1,r17 - add r2=32*2,r17 - add r3=32*3,r17 - add r4=32*4,r17 - add r5=32*5,r17 - add r6=32*6,r17 - add r7=32*7,r17 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_state_restore() -// -// Stub Description: -// -// Restore the SAL/OS state. This is sensitive to the layout of struct -// ia64_sal_os_state in mca.h. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// In addition to the SAL to OS state, this routine restores all the -// registers that appear in struct pt_regs and struct switch_stack, -// excluding those in the PAL minstate area. -// -//-- - -ia64_state_restore: - // Restore the switch_stack data that is not in minstate nor pt_regs. - add regs=MCA_SWITCH_STACK_OFFSET, r3 - mov b0=r2 // save return address - ;; - GET_IA64_MCA_DATA(temp2) - ;; - add regs=temp2, regs - ;; - add temp1=SW(F2), regs - add temp2=SW(F3), regs - ;; - ldf.fill f2=[temp1],32 - ldf.fill f3=[temp2],32 - ;; - ldf.fill f4=[temp1],32 - ldf.fill f5=[temp2],32 - ;; - ldf.fill f12=[temp1],32 - ldf.fill f13=[temp2],32 - ;; - ldf.fill f14=[temp1],32 - ldf.fill f15=[temp2],32 - ;; - ldf.fill f16=[temp1],32 - ldf.fill f17=[temp2],32 - ;; - ldf.fill f18=[temp1],32 - ldf.fill f19=[temp2],32 - ;; - ldf.fill f20=[temp1],32 - ldf.fill f21=[temp2],32 - ;; - ldf.fill f22=[temp1],32 - ldf.fill f23=[temp2],32 - ;; - ldf.fill f24=[temp1],32 - ldf.fill f25=[temp2],32 - ;; - ldf.fill f26=[temp1],32 - ldf.fill f27=[temp2],32 - ;; - ldf.fill f28=[temp1],32 - ldf.fill f29=[temp2],32 - ;; - ldf.fill f30=[temp1],SW(B2)-SW(F30) - ldf.fill f31=[temp2],SW(B3)-SW(F31) - ;; - ld8 temp3=[temp1],16 // restore b2 - ld8 temp4=[temp2],16 // restore b3 - ;; - mov b2=temp3 - mov b3=temp4 - ld8 temp3=[temp1],SW(AR_LC)-SW(B4) // restore b4 - ld8 temp4=[temp2] // restore b5 - ;; - mov b4=temp3 - mov b5=temp4 - ld8 temp3=[temp1] // restore ar.lc - ;; - mov ar.lc=temp3 - - // Restore the pt_regs data that is not in minstate. The previous code - // left regs at switch_stack. - add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs - ;; - add temp1=PT(B6), regs - add temp2=PT(B7), regs - ;; - ld8 temp3=[temp1],PT(AR_CSD)-PT(B6) // restore b6 - ld8 temp4=[temp2],PT(AR_SSD)-PT(B7) // restore b7 - ;; - mov b6=temp3 - mov b7=temp4 - ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD) // restore ar.csd - ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD) // restore ar.ssd - ;; - mov ar.csd=temp3 - mov ar.ssd=temp4 - ld8 temp3=[temp1] // restore ar.unat - add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1 - ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS) // restore ar.pfs - ;; - mov ar.unat=temp3 - mov ar.pfs=temp4 - // ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack. - ld8 temp3=[temp1],PT(F6)-PT(AR_CCV) // restore ar.ccv - ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR) // restore ar.fpsr - ;; - mov ar.ccv=temp3 - mov ar.fpsr=temp4 - ldf.fill f6=[temp1],PT(F8)-PT(F6) - ldf.fill f7=[temp2],PT(F9)-PT(F7) - ;; - ldf.fill f8=[temp1],PT(F10)-PT(F8) - ldf.fill f9=[temp2],PT(F11)-PT(F9) - ;; - ldf.fill f10=[temp1] - ldf.fill f11=[temp2] - - // Restore the SAL to OS state. The previous code left regs at pt_regs. - add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs - ;; - add temp1=SOS(SAL_RA), regs - add temp2=SOS(SAL_GP), regs - ;; - ld8 r12=[temp1],16 // sal_ra - ld8 r9=[temp2],16 // sal_gp - ;; - ld8 r22=[temp1],16 // pal_min_state, virtual - ld8 r13=[temp2],16 // prev_IA64_KR_CURRENT - ;; - ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK - ld8 r20=[temp2],16 // prev_task - ;; - ld8 temp3=[temp1],16 // cr.isr - ld8 temp4=[temp2],16 // cr.ifa - ;; - mov cr.isr=temp3 - mov cr.ifa=temp4 - ld8 temp3=[temp1],16 // cr.itir - ld8 temp4=[temp2],16 // cr.iipa - ;; - mov cr.itir=temp3 - mov cr.iipa=temp4 - ld8 temp3=[temp1] // cr.iim - ld8 temp4=[temp2] // cr.iha - add temp1=SOS(OS_STATUS), regs - add temp2=SOS(CONTEXT), regs - ;; - mov cr.iim=temp3 - mov cr.iha=temp4 - dep r22=0,r22,62,1 // pal_min_state, physical, uncached - mov IA64_KR(CURRENT)=r13 - ld8 r8=[temp1] // os_status - ld8 r10=[temp2] // context - - /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To - * avoid any dependencies on the algorithm in ia64_switch_to(), just - * purge any existing CURRENT_STACK mapping and insert the new one. - * - * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains - * prev_IA64_KR_CURRENT, these values may have been changed by the C - * code. Do not use r8, r9, r10, r22, they contain values ready for - * the return to SAL. - */ - - mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK - ;; - shl r15=r15,IA64_GRANULE_SHIFT - ;; - dep r15=-1,r15,61,3 // virtual granule - mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps - ;; - ptr.d r15,r18 - ;; - srlz.d - - extr.u r19=r13,61,3 // r13 = prev_IA64_KR_CURRENT - shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK - movl r21=PAGE_KERNEL // page properties - ;; - mov IA64_KR(CURRENT_STACK)=r16 - cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? - or r21=r20,r21 // construct PA | page properties -(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( - ;; - mov cr.itir=r18 - mov cr.ifa=r13 - mov r20=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r20]=r21 - ;; - srlz.d -1: - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_new_stack() -// -// Stub Description: -// -// Switch to the MCA/INIT stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// On entry RBS is still on the original stack, this routine switches RBS -// to use the MCA/INIT stack. -// -// On entry, sos->pal_min_state is physical, on exit it is virtual. -// -//-- - -ia64_new_stack: - add regs=MCA_PT_REGS_OFFSET, r3 - add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp1) - invala - ;; - add temp2=temp2, temp1 // struct ia64_sal_os_state.pal_min_state on MCA or INIT stack - add regs=regs, temp1 // struct pt_regs on MCA or INIT stack - ;; - // Address of minstate area provided by PAL is physical, uncacheable. - // Convert to Linux virtual address in region 6 for C code. - ld8 ms=[temp2] // pal_min_state, physical - ;; - dep temp1=-1,ms,62,2 // set region 6 - mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET - ;; - st8 [temp2]=temp1 // pal_min_state, virtual - - add temp4=temp3, regs // start of bspstore on new stack - ;; - mov ar.bspstore=temp4 // switch RBS to MCA/INIT stack - ;; - flushrs // must be first in group - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_old_stack() -// -// Stub Description: -// -// Switch to the old stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// On entry, pal_min_state is virtual, on exit it is physical. -// -// On entry RBS is on the MCA/INIT stack, this routine switches RBS -// back to the previous stack. -// -// The psr is set to all zeroes. SAL return requires either all zeroes or -// just psr.mc set. Leaving psr.mc off allows INIT to be issued if this -// code does not perform correctly. -// -// The dirty registers at the time of the event were flushed to the -// MCA/INIT stack in ia64_pt_regs_save(). Restore the dirty registers -// before reverting to the previous bspstore. -//-- - -ia64_old_stack: - add regs=MCA_PT_REGS_OFFSET, r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp2) - LOAD_PHYSICAL(p0,temp1,1f) - ;; - mov cr.ipsr=r0 - mov cr.ifs=r0 - mov cr.iip=temp1 - ;; - invala - rfi -1: - - add regs=regs, temp2 // struct pt_regs on MCA or INIT stack - ;; - add temp1=PT(LOADRS), regs - ;; - ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS) // restore loadrs - ;; - ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE) // restore ar.bspstore - mov ar.rsc=temp2 - ;; - loadrs - ld8 temp4=[temp1] // restore ar.rnat - ;; - mov ar.bspstore=temp3 // back to old stack - ;; - mov ar.rnat=temp4 - ;; - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_set_kernel_registers() -// -// Stub Description: -// -// Set the registers that are required by the C code in order to run on an -// MCA/INIT stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -//-- - -ia64_set_kernel_registers: - add temp3=MCA_SP_OFFSET, r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp1) - ;; - add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack - add r13=temp1, r3 // set current to start of MCA/INIT stack - add r20=temp1, r3 // physical start of MCA/INIT stack - ;; - DATA_PA_TO_VA(r12,temp2) - DATA_PA_TO_VA(r13,temp3) - ;; - mov IA64_KR(CURRENT)=r13 - - /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid - * any dependencies on the algorithm in ia64_switch_to(), just purge - * any existing CURRENT_STACK mapping and insert the new one. - */ - - mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK - ;; - shl r16=r16,IA64_GRANULE_SHIFT - ;; - dep r16=-1,r16,61,3 // virtual granule - mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps - ;; - ptr.d r16,r18 - ;; - srlz.d - - shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack - movl r21=PAGE_KERNEL // page properties - ;; - mov IA64_KR(CURRENT_STACK)=r16 - or r21=r20,r21 // construct PA | page properties - ;; - mov cr.itir=r18 - mov cr.ifa=r13 - mov r20=IA64_TR_CURRENT_STACK - - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value - ;; - itr.d dtr[r20]=r21 - ;; - srlz.d - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - -#undef ms -#undef regs -#undef temp1 -#undef temp2 -#undef temp3 -#undef temp4 - - -// Support function for mca.c, it is here to avoid using inline asm. Given the -// address of an rnat slot, if that address is below the current ar.bspstore -// then return the contents of that slot, otherwise return the contents of -// ar.rnat. -GLOBAL_ENTRY(ia64_get_rnat) - alloc r14=ar.pfs,1,0,0,0 - mov ar.rsc=0 - ;; - mov r14=ar.bspstore - ;; - cmp.lt p6,p7=in0,r14 - ;; -(p6) ld8 r8=[in0] -(p7) mov r8=ar.rnat - mov ar.rsc=3 - br.ret.sptk.many rp -END(ia64_get_rnat) - - -// void ia64_set_psr_mc(void) -// -// Set psr.mc bit to mask MCA/INIT. -GLOBAL_ENTRY(ia64_set_psr_mc) - rsm psr.i | psr.ic // disable interrupts - ;; - srlz.d - ;; - mov r14 = psr // get psr{36:35,31:0} - movl r15 = 1f - ;; - dep r14 = -1, r14, PSR_MC, 1 // set psr.mc - ;; - dep r14 = -1, r14, PSR_IC, 1 // set psr.ic - ;; - dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use - ;; - mov cr.ipsr = r14 - mov cr.ifs = r0 - mov cr.iip = r15 - ;; - rfi -1: - br.ret.sptk.many rp -END(ia64_set_psr_mc) diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S deleted file mode 100644 index 4428f57bee73565b34b3e043ad5391e31cd783e6..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/mca_drv_asm.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_drv_asm.S - * Purpose: Assembly portion of Generic MCA handling - * - * Copyright (C) 2004 FUJITSU LIMITED - * Copyright (C) 2004 Hidetoshi Seto - */ -#include - -#include -#include -#include - -GLOBAL_ENTRY(mca_handler_bhhook) - invala // clear RSE ? - cover - ;; - clrrrb - ;; - alloc r16=ar.pfs,0,2,3,0 // make a new frame - mov ar.rsc=0 - mov r13=IA64_KR(CURRENT) // current task pointer - ;; - mov r2=r13 - ;; - addl r22=IA64_RBS_OFFSET,r2 - ;; - mov ar.bspstore=r22 - addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 - ;; - adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; - st1 [r2]=r0 // clear current->thread.on_ustack flag - mov loc0=r16 - movl loc1=mca_handler_bh // recovery C function - ;; - mov out0=r8 // poisoned address - mov out1=r9 // iip - mov out2=r10 // psr - mov b6=loc1 - ;; - mov loc1=rp - ssm psr.ic - ;; - srlz.i - ;; - ssm psr.i - br.call.sptk.many rp=b6 // does not return ... - ;; - mov ar.pfs=loc0 - mov rp=loc1 - ;; - mov r8=r0 - br.ret.sptk.many rp -END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S deleted file mode 100644 index d3e22c018b68acd9a8d02e41214e25835ec1c0e8..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/pal.S +++ /dev/null @@ -1,306 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * PAL Firmware support - * IA-64 Processor Programmers Reference Vol 2 - * - * Copyright (C) 1999 Don Dugger - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co - * David Mosberger - * Stephane Eranian - * - * 05/22/2000 eranian Added support for stacked register calls - * 05/24/2000 eranian Added support for physical mode static calls - */ - -#include -#include -#include - - .data -pal_entry_point: - data8 ia64_pal_default_handler - .text - -/* - * Set the PAL entry point address. This could be written in C code, but we - * do it here to keep it all in one module (besides, it's so trivial that it's - * not a big deal). - * - * in0 Address of the PAL entry point (text address, NOT a function - * descriptor). - */ -GLOBAL_ENTRY(ia64_pal_handler_init) - alloc r3=ar.pfs,1,0,0,0 - movl r2=pal_entry_point - ;; - st8 [r2]=in0 - br.ret.sptk.many rp -END(ia64_pal_handler_init) - -/* - * Default PAL call handler. This needs to be coded in assembly because it - * uses the static calling convention, i.e., the RSE may not be used and - * calls are done via "br.cond" (not "br.call"). - */ -GLOBAL_ENTRY(ia64_pal_default_handler) - mov r8=-1 - br.cond.sptk.many rp -END(ia64_pal_default_handler) - -/* - * Make a PAL call using the static calling convention. - * - * in0 Index of PAL service - * in1 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,5,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 - mov r29 = in1 - mov r8 = ip - } - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - adds r8 = 1f-1b,r8 - mov loc4=ar.rsc // save RSE configuration - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov loc3 = psr - mov loc0 = rp - .body - mov r30 = in2 - - mov r31 = in3 - mov b7 = loc2 - - rsm psr.i - ;; - mov rp = r8 - br.cond.sptk.many b7 -1: mov psr.l = loc3 - mov ar.rsc = loc4 // restore RSE configuration - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_static) -EXPORT_SYMBOL(ia64_pal_call_static) - -/* - * Make a PAL call using the stacked registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,4,4,0 - movl loc2 = pal_entry_point - - mov r28 = in0 // Index MUST be copied to r28 - mov out0 = in0 // AND in0 of PAL function - mov loc0 = rp - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov out1 = in1 - mov out2 = in2 - mov out3 = in3 - mov loc3 = psr - ;; - rsm psr.i - mov b7 = loc2 - ;; - br.call.sptk.many rp=b7 // now make the call -.ret0: mov psr.l = loc3 - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_stacked) -EXPORT_SYMBOL(ia64_pal_call_stacked) - -/* - * Make a physical mode PAL call using the static registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - * - * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. - * So we don't need to clear them. - */ -#define PAL_PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT |\ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PAL_PSR_BITS_TO_SET \ - (IA64_PSR_BN) - - -GLOBAL_ENTRY(ia64_pal_call_phys_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,7,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov r8 = ip // save ip to compute branch - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov r29 = in1 // first argument - mov r30 = in2 // copy arg2 - mov r31 = in3 // copy arg3 - ;; - mov loc3 = psr // save psr - adds r8 = 1f-1b,r8 // calculate return address for call - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - tpa r8=r8 // convert rp to physical - ;; - mov b7 = loc2 // install target to branch reg - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys - mov rp = r8 // install return address (physical) - mov loc5 = r19 - mov loc6 = r20 - br.cond.sptk.many b7 -1: - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - mov psr.l = loc3 // restore init PSR - - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_static) -EXPORT_SYMBOL(ia64_pal_call_phys_static) - -/* - * Make a PAL call using the stacked registers in physical mode. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_phys_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) - alloc loc1 = ar.pfs,5,7,4,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov loc3 = psr // save psr - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - mov b7 = loc2 // install target to branch reg - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys - - mov out0 = in0 // first argument - mov out1 = in1 // copy arg2 - mov out2 = in2 // copy arg3 - mov out3 = in3 // copy arg3 - mov loc5 = r19 - mov loc6 = r20 - - br.call.sptk.many rp=b7 // now make the call - - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - - mov psr.l = loc3 // restore init PSR - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // seralize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_stacked) -EXPORT_SYMBOL(ia64_pal_call_phys_stacked) - -/* - * Save scratch fp scratch regs which aren't saved in pt_regs already - * (fp10-fp15). - * - * NOTE: We need to do this since firmware (SAL and PAL) may use any of the - * scratch regs fp-low partition. - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_save_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - stf.spill [in0] = f10,32 - stf.spill [r2] = f11,32 - ;; - stf.spill [in0] = f12,32 - stf.spill [r2] = f13,32 - ;; - stf.spill [in0] = f14,32 - stf.spill [r2] = f15,32 - br.ret.sptk.many rp -END(ia64_save_scratch_fpregs) -EXPORT_SYMBOL(ia64_save_scratch_fpregs) - -/* - * Load scratch fp scratch regs (fp10-fp15) - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_load_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - ldf.fill f10 = [in0],32 - ldf.fill f11 = [r2],32 - ;; - ldf.fill f12 = [in0],32 - ldf.fill f13 = [r2],32 - ;; - ldf.fill f14 = [in0],32 - ldf.fill f15 = [r2],32 - br.ret.sptk.many rp -END(ia64_load_scratch_fpregs) -EXPORT_SYMBOL(ia64_load_scratch_fpregs) diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S deleted file mode 100644 index 7124fe7bec7c322ef879829342280dbdf6ff442c..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/relocate_kernel.S +++ /dev/null @@ -1,323 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/ia64/kernel/relocate_kernel.S - * - * Relocate kexec'able kernel and start it - * - * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. - * Copyright (C) 2005 Khalid Aziz - * Copyright (C) 2005 Intel Corp, Zou Nan hai - */ -#include -#include -#include -#include -#include - - /* Must be relocatable PIC code callable as a C function - */ -GLOBAL_ENTRY(relocate_new_kernel) - .prologue - alloc r31=ar.pfs,4,0,0,0 - .body -.reloc_entry: -{ - rsm psr.i| psr.ic - mov r2=ip -} - ;; -{ - flushrs // must be first insn in group - srlz.i -} - ;; - dep r2=0,r2,61,3 //to physical address - ;; - //first switch to physical mode - add r3=1f-.reloc_entry, r2 - movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC - mov ar.rsc=0 // put RSE in enforced lazy mode - ;; - add sp=(memory_stack_end - 16 - .reloc_entry),r2 - add r8=(register_stack - .reloc_entry),r2 - ;; - mov r18=ar.rnat - mov ar.bspstore=r8 - ;; - mov cr.ipsr=r16 - mov cr.iip=r3 - mov cr.ifs=r0 - srlz.i - ;; - mov ar.rnat=r18 - rfi // note: this unmask MCA/INIT (psr.mc) - ;; -1: - //physical mode code begin - mov b6=in1 - dep r28=0,in2,61,3 //to physical address - - // purge all TC entries -#define O(member) IA64_CPUINFO_##member##_OFFSET - GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 - ;; - addl r17=O(PTCE_STRIDE),r2 - addl r2=O(PTCE_BASE),r2 - ;; - ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base - ld4 r19=[r2],4 // r19=ptce_count[0] - ld4 r21=[r17],4 // r21=ptce_stride[0] - ;; - ld4 r20=[r2] // r20=ptce_count[1] - ld4 r22=[r17] // r22=ptce_stride[1] - mov r24=r0 - ;; - adds r20=-1,r20 - ;; -#undef O -2: - cmp.ltu p6,p7=r24,r19 -(p7) br.cond.dpnt.few 4f - mov ar.lc=r20 -3: - ptc.e r18 - ;; - add r18=r22,r18 - br.cloop.sptk.few 3b - ;; - add r18=r21,r18 - add r24=1,r24 - ;; - br.sptk.few 2b -4: - srlz.i - ;; - // purge TR entry for kernel text and data - movl r16=KERNEL_START - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - ;; - ptr.i r16, r18 - ptr.d r16, r18 - ;; - srlz.i - ;; - - // purge TR entry for pal code - mov r16=in3 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.i r16,r18 - ;; - srlz.i - ;; - - // purge TR entry for stack - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r16=r19,r16 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.i - ;; - - //copy segments - movl r16=PAGE_MASK - mov r30=in0 // in0 is page_list - br.sptk.few .dest_page - ;; -.loop: - ld8 r30=[in0], 8;; -.dest_page: - tbit.z p0, p6=r30, 0;; // 0x1 dest page -(p6) and r17=r30, r16 -(p6) br.cond.sptk.few .loop;; - - tbit.z p0, p6=r30, 1;; // 0x2 indirect page -(p6) and in0=r30, r16 -(p6) br.cond.sptk.few .loop;; - - tbit.z p0, p6=r30, 2;; // 0x4 end flag -(p6) br.cond.sptk.few .end_loop;; - - tbit.z p6, p0=r30, 3;; // 0x8 source page -(p6) br.cond.sptk.few .loop - - and r18=r30, r16 - - // simple copy page, may optimize later - movl r14=PAGE_SIZE/8 - 1;; - mov ar.lc=r14;; -1: - ld8 r14=[r18], 8;; - st8 [r17]=r14;; - fc.i r17 - add r17=8, r17 - br.ctop.sptk.few 1b - br.sptk.few .loop - ;; - -.end_loop: - sync.i // for fc.i - ;; - srlz.i - ;; - srlz.d - ;; - br.call.sptk.many b0=b6;; - -.align 32 -memory_stack: - .fill 8192, 1, 0 -memory_stack_end: -register_stack: - .fill 8192, 1, 0 -register_stack_end: -relocate_new_kernel_end: -END(relocate_new_kernel) - -.global relocate_new_kernel_size -relocate_new_kernel_size: - data8 relocate_new_kernel_end - relocate_new_kernel - -GLOBAL_ENTRY(ia64_dump_cpu_regs) - .prologue - alloc loc0=ar.pfs,1,2,0,0 - .body - mov ar.rsc=0 // put RSE in enforced lazy mode - add loc1=4*8, in0 // save r4 and r5 first - ;; -{ - flushrs // flush dirty regs to backing store - srlz.i -} - st8 [loc1]=r4, 8 - ;; - st8 [loc1]=r5, 8 - ;; - add loc1=32*8, in0 - mov r4=ar.rnat - ;; - st8 [in0]=r0, 8 // r0 - st8 [loc1]=r4, 8 // rnat - mov r5=pr - ;; - st8 [in0]=r1, 8 // r1 - st8 [loc1]=r5, 8 // pr - mov r4=b0 - ;; - st8 [in0]=r2, 8 // r2 - st8 [loc1]=r4, 8 // b0 - mov r5=b1; - ;; - st8 [in0]=r3, 24 // r3 - st8 [loc1]=r5, 8 // b1 - mov r4=b2 - ;; - st8 [in0]=r6, 8 // r6 - st8 [loc1]=r4, 8 // b2 - mov r5=b3 - ;; - st8 [in0]=r7, 8 // r7 - st8 [loc1]=r5, 8 // b3 - mov r4=b4 - ;; - st8 [in0]=r8, 8 // r8 - st8 [loc1]=r4, 8 // b4 - mov r5=b5 - ;; - st8 [in0]=r9, 8 // r9 - st8 [loc1]=r5, 8 // b5 - mov r4=b6 - ;; - st8 [in0]=r10, 8 // r10 - st8 [loc1]=r5, 8 // b6 - mov r5=b7 - ;; - st8 [in0]=r11, 8 // r11 - st8 [loc1]=r5, 8 // b7 - mov r4=b0 - ;; - st8 [in0]=r12, 8 // r12 - st8 [loc1]=r4, 8 // ip - mov r5=loc0 - ;; - st8 [in0]=r13, 8 // r13 - extr.u r5=r5, 0, 38 // ar.pfs.pfm - mov r4=r0 // user mask - ;; - st8 [in0]=r14, 8 // r14 - st8 [loc1]=r5, 8 // cfm - ;; - st8 [in0]=r15, 8 // r15 - st8 [loc1]=r4, 8 // user mask - mov r5=ar.rsc - ;; - st8 [in0]=r16, 8 // r16 - st8 [loc1]=r5, 8 // ar.rsc - mov r4=ar.bsp - ;; - st8 [in0]=r17, 8 // r17 - st8 [loc1]=r4, 8 // ar.bsp - mov r5=ar.bspstore - ;; - st8 [in0]=r18, 8 // r18 - st8 [loc1]=r5, 8 // ar.bspstore - mov r4=ar.rnat - ;; - st8 [in0]=r19, 8 // r19 - st8 [loc1]=r4, 8 // ar.rnat - mov r5=ar.ccv - ;; - st8 [in0]=r20, 8 // r20 - st8 [loc1]=r5, 8 // ar.ccv - mov r4=ar.unat - ;; - st8 [in0]=r21, 8 // r21 - st8 [loc1]=r4, 8 // ar.unat - mov r5 = ar.fpsr - ;; - st8 [in0]=r22, 8 // r22 - st8 [loc1]=r5, 8 // ar.fpsr - mov r4 = ar.unat - ;; - st8 [in0]=r23, 8 // r23 - st8 [loc1]=r4, 8 // unat - mov r5 = ar.fpsr - ;; - st8 [in0]=r24, 8 // r24 - st8 [loc1]=r5, 8 // fpsr - mov r4 = ar.pfs - ;; - st8 [in0]=r25, 8 // r25 - st8 [loc1]=r4, 8 // ar.pfs - mov r5 = ar.lc - ;; - st8 [in0]=r26, 8 // r26 - st8 [loc1]=r5, 8 // ar.lc - mov r4 = ar.ec - ;; - st8 [in0]=r27, 8 // r27 - st8 [loc1]=r4, 8 // ar.ec - mov r5 = ar.csd - ;; - st8 [in0]=r28, 8 // r28 - st8 [loc1]=r5, 8 // ar.csd - mov r4 = ar.ssd - ;; - st8 [in0]=r29, 8 // r29 - st8 [loc1]=r4, 8 // ar.ssd - ;; - st8 [in0]=r30, 8 // r30 - ;; - st8 [in0]=r31, 8 // r31 - mov ar.pfs=loc0 - ;; - br.ret.sptk.many rp -END(ia64_dump_cpu_regs) - - diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S deleted file mode 100644 index d9d4e21107cdbe8cee21c863ff0e7f585eaa171a..0000000000000000000000000000000000000000 --- a/arch/ia64/kernel/vmlinux.lds.S +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include -#include -#include - -#include - -OUTPUT_FORMAT("elf64-ia64-little") -OUTPUT_ARCH(ia64) -ENTRY(phys_start) -jiffies = jiffies_64; - -PHDRS { - code PT_LOAD; - percpu PT_LOAD; - data PT_LOAD; - note PT_NOTE; - unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ -} - -SECTIONS { - /* - * unwind exit sections must be discarded before - * the rest of the sections get included. - */ - /DISCARD/ : { - *(.IA_64.unwind.exit.text) - *(.IA_64.unwind_info.exit.text) - *(.comment) - *(.note) - } - - v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ - phys_start = _start - LOAD_OFFSET; - - code : { - } :code - . = KERNEL_START; - - _text = .; - _stext = .; - - .text : AT(ADDR(.text) - LOAD_OFFSET) { - __start_ivt_text = .; - *(.text..ivt) - __end_ivt_text = .; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.gnu.linkonce.t*) - } - - .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { - *(.text2) - } - -#ifdef CONFIG_SMP - .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) { - *(.text..lock) - } -#endif - _etext = .; - - /* - * Read-only data - */ - NOTES :code :note /* put .notes in text and mark in PT_NOTE */ - code_continues : { - } : code /* switch back to regular program... */ - - EXCEPTION_TABLE(16) - - /* MCA table */ - . = ALIGN(16); - __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { - __start___mca_table = .; - *(__mca_table) - __stop___mca_table = .; - } - - .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) { - __start___phys_stack_reg_patchlist = .; - *(.data..patch.phys_stack_reg) - __end___phys_stack_reg_patchlist = .; - } - - /* - * Global data - */ - _data = .; - - /* Unwind info & table: */ - . = ALIGN(8); - .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) { - *(.IA_64.unwind_info*) - } - .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) { - __start_unwind = .; - *(.IA_64.unwind*) - __end_unwind = .; - } :code :unwind - code_continues2 : { - } : code - - RODATA - - .opd : AT(ADDR(.opd) - LOAD_OFFSET) { - __start_opd = .; - *(.opd) - __end_opd = .; - } - - /* - * Initialization code and data: - */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; - - INIT_TEXT_SECTION(PAGE_SIZE) - INIT_DATA_SECTION(16) - - .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) { - __start___vtop_patchlist = .; - *(.data..patch.vtop) - __end___vtop_patchlist = .; - } - - .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) { - __start___rse_patchlist = .; - *(.data..patch.rse) - __end___rse_patchlist = .; - } - - .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) { - __start___mckinley_e9_bundles = .; - *(.data..patch.mckinley_e9) - __end___mckinley_e9_bundles = .; - } - -#ifdef CONFIG_SMP - . = ALIGN(PERCPU_PAGE_SIZE); - __cpu0_per_cpu = .; - . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ -#endif - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { - PAGE_ALIGNED_DATA(PAGE_SIZE) - . = ALIGN(PAGE_SIZE); - __start_gate_section = .; - *(.data..gate) - __stop_gate_section = .; - } - /* - * make sure the gate page doesn't expose - * kernel data - */ - . = ALIGN(PAGE_SIZE); - - /* Per-cpu data: */ - . = ALIGN(PERCPU_PAGE_SIZE); - PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) - __phys_per_cpu_start = __per_cpu_load; - /* - * ensure percpu data fits - * into percpu page size - */ - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; - - data : { - } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) { - _sdata = .; - INIT_TASK_DATA(PAGE_SIZE) - CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) - READ_MOSTLY_DATA(SMP_CACHE_BYTES) - DATA_DATA - *(.data1) - *(.gnu.linkonce.d*) - CONSTRUCTORS - } - - BUG_TABLE - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) { - *(.got.plt) - *(.got) - } - __gp = ADDR(.got) + 0x200000; - - /* - * We want the small data sections together, - * so single-instruction offsets can access - * them all, and initialized data all before - * uninitialized, so we can shorten the - * on-disk segment size. - */ - .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { - *(.sdata) - *(.sdata1) - *(.srdata) - } - _edata = .; - - BSS_SECTION(0, 0, 0) - - _end = .; - - code : { - } :code - - STABS_DEBUG - DWARF_DEBUG - - /* Default discards */ - DISCARDS -} diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S deleted file mode 100644 index 1a4a639dc42f05a73fff3c29b5ad26481befd91c..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/carta_random.S +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Fast, simple, yet decent quality random number generator based on - * a paper by David G. Carta ("Two Fast Implementations of the - * `Minimal Standard' Random Number Generator," Communications of the - * ACM, January, 1990). - * - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include - -#define a r2 -#define m r3 -#define lo r8 -#define hi r9 -#define t0 r16 -#define t1 r17 -#define seed r32 - -GLOBAL_ENTRY(carta_random32) - movl a = (16807 << 16) | 16807 - ;; - pmpyshr2.u t0 = a, seed, 0 - pmpyshr2.u t1 = a, seed, 16 - ;; - unpack2.l t0 = t1, t0 - dep m = -1, r0, 0, 31 - ;; - zxt4 lo = t0 - shr.u hi = t0, 32 - ;; - dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff) - ;; - shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16 - shr t1 = hi, 15 // t1 = (hi >> 15) - ;; - add lo = lo, t0 - ;; - cmp.gtu p6, p0 = lo, m - ;; -(p6) and lo = lo, m - ;; -(p6) add lo = 1, lo - ;; - add lo = lo, t1 - ;; - cmp.gtu p6, p0 = lo, m - ;; -(p6) and lo = lo, m - ;; -(p6) add lo = 1, lo - br.ret.sptk.many rp -END(carta_random32) diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S deleted file mode 100644 index 65b75085c8f46c231586745f22dc14a6aa13c11a..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/clear_page.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1999-2002 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - * Copyright (C) 2002 Ken Chen - * - * 1/06/01 davidm Tuned for Itanium. - * 2/12/02 kchen Tuned for both Itanium and McKinley - * 3/08/02 davidm Some more tweaking - */ - -#include -#include -#include - -#ifdef CONFIG_ITANIUM -# define L3_LINE_SIZE 64 // Itanium L3 line size -# define PREFETCH_LINES 9 // magic number -#else -# define L3_LINE_SIZE 128 // McKinley L3 line size -# define PREFETCH_LINES 12 // magic number -#endif - -#define saved_lc r2 -#define dst_fetch r3 -#define dst1 r8 -#define dst2 r9 -#define dst3 r10 -#define dst4 r11 - -#define dst_last r31 - -GLOBAL_ENTRY(clear_page) - .prologue - .regstk 1,0,0,0 - mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until - .save ar.lc, saved_lc - mov saved_lc = ar.lc - - .body - mov ar.lc = (PREFETCH_LINES - 1) - mov dst_fetch = in0 - adds dst1 = 16, in0 - adds dst2 = 32, in0 - ;; -.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - adds dst3 = 48, in0 // executing this multiple times is harmless - br.cloop.sptk.few .fetch - ;; - addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch - mov ar.lc = r16 // one L3 line per iteration - adds dst4 = 64, in0 - ;; -#ifdef CONFIG_ITANIUM - // Optimized for Itanium -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - cmp.lt p8,p0=dst_fetch, dst_last - ;; -#else - // Optimized for McKinley -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - stf.spill.nta [dst3] = f0, 64 - stf.spill.nta [dst4] = f0, 128 - cmp.lt p8,p0=dst_fetch, dst_last - ;; - stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 -#endif - stf.spill.nta [dst3] = f0, 64 -(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - br.cloop.sptk.few 1b - ;; - mov ar.lc = saved_lc // restore lc - br.ret.sptk.many rp -END(clear_page) -EXPORT_SYMBOL(clear_page) diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S deleted file mode 100644 index a28f39d349ebeabb784962f18f3df8df0ae220ff..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/clear_user.S +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This routine clears to zero a linear memory buffer in user space. - * - * Inputs: - * in0: address of buffer - * in1: length of buffer in bytes - * Outputs: - * r8: number of bytes that didn't get cleared due to a fault - * - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - */ - -#include -#include - -// -// arguments -// -#define buf r32 -#define len r33 - -// -// local registers -// -#define cnt r16 -#define buf2 r17 -#define saved_lc r18 -#define saved_pfs r19 -#define tmp r20 -#define len2 r21 -#define len3 r22 - -// -// Theory of operations: -// - we check whether or not the buffer is small, i.e., less than 17 -// in which case we do the byte by byte loop. -// -// - Otherwise we go progressively from 1 byte store to 8byte store in -// the head part, the body is a 16byte store loop and we finish we the -// tail for the last 15 bytes. -// The good point about this breakdown is that the long buffer handling -// contains only 2 branches. -// -// The reason for not using shifting & masking for both the head and the -// tail is to stay semantically correct. This routine is not supposed -// to write bytes outside of the buffer. While most of the time this would -// be ok, we can't tolerate a mistake. A classical example is the case -// of multithreaded code were to the extra bytes touched is actually owned -// by another thread which runs concurrently to ours. Another, less likely, -// example is with device drivers where reading an I/O mapped location may -// have side effects (same thing for writing). -// - -GLOBAL_ENTRY(__do_clear_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,0,0,0 - cmp.eq p6,p0=r0,len // check for zero length - .save ar.lc, saved_lc - mov saved_lc=ar.lc // preserve ar.lc (slow) - .body - ;; // avoid WAW on CFM - adds tmp=-1,len // br.ctop is repeat/until - mov ret0=len // return value is length at this point -(p6) br.ret.spnt.many rp - ;; - cmp.lt p6,p0=16,len // if len > 16 then long memset - mov ar.lc=tmp // initialize lc for small count -(p6) br.cond.dptk .long_do_clear - ;; // WAR on ar.lc - // - // worst case 16 iterations, avg 8 iterations - // - // We could have played with the predicates to use the extra - // M slot for 2 stores/iteration but the cost the initialization - // the various counters compared to how long the loop is supposed - // to last on average does not make this solution viable. - // -1: - EX( .Lexit1, st1 [buf]=r0,1 ) - adds len=-1,len // countdown length using len - br.cloop.dptk 1b - ;; // avoid RAW on ar.lc - // - // .Lexit4: comes from byte by byte loop - // len contains bytes left -.Lexit1: - mov ret0=len // faster than using ar.lc - mov ar.lc=saved_lc - br.ret.sptk.many rp // end of short clear_user - - - // - // At this point we know we have more than 16 bytes to copy - // so we focus on alignment (no branches required) - // - // The use of len/len2 for countdown of the number of bytes left - // instead of ret0 is due to the fact that the exception code - // changes the values of r8. - // -.long_do_clear: - tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) - ;; - EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned -(p6) adds len=-1,len;; // sync because buf is modified - tbit.nz p6,p0=buf,1 - ;; - EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned -(p6) adds len=-2,len;; - tbit.nz p6,p0=buf,2 - ;; - EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned -(p6) adds len=-4,len;; - tbit.nz p6,p0=buf,3 - ;; - EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned -(p6) adds len=-8,len;; - shr.u cnt=len,4 // number of 128-bit (2x64bit) words - ;; - cmp.eq p6,p0=r0,cnt - adds tmp=-1,cnt -(p6) br.cond.dpnt .dotail // we have less than 16 bytes left - ;; - adds buf2=8,buf // setup second base pointer - mov ar.lc=tmp - ;; - - // - // 16bytes/iteration core loop - // - // The second store can never generate a fault because - // we come into the loop only when we are 16-byte aligned. - // This means that if we cross a page then it will always be - // in the first store and never in the second. - // - // - // We need to keep track of the remaining length. A possible (optimistic) - // way would be to use ar.lc and derive how many byte were left by - // doing : left= 16*ar.lc + 16. this would avoid the addition at - // every iteration. - // However we need to keep the synchronization point. A template - // M;;MB does not exist and thus we can keep the addition at no - // extra cycle cost (use a nop slot anyway). It also simplifies the - // (unlikely) error recovery code - // - -2: EX(.Lexit3, st8 [buf]=r0,16 ) - ;; // needed to get len correct when error - st8 [buf2]=r0,16 - adds len=-16,len - br.cloop.dptk 2b - ;; - mov ar.lc=saved_lc - // - // tail correction based on len only - // - // We alternate the use of len3,len2 to allow parallelism and correct - // error handling. We also reuse p6/p7 to return correct value. - // The addition of len2/len3 does not cost anything more compared to - // the regular memset as we had empty slots. - // -.dotail: - mov len2=len // for parallelization of error handling - mov len3=len - tbit.nz p6,p0=len,3 - ;; - EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes -(p6) adds len3=-8,len2 - tbit.nz p7,p6=len,2 - ;; - EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes -(p7) adds len2=-4,len3 - tbit.nz p6,p7=len,1 - ;; - EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes -(p6) adds len3=-2,len2 - tbit.nz p7,p6=len,0 - ;; - EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left - mov ret0=r0 // success - br.ret.sptk.many rp // end of most likely path - - // - // Outlined error handling code - // - - // - // .Lexit3: comes from core loop, need restore pr/lc - // len contains bytes left - // - // - // .Lexit2: - // if p6 -> coming from st8 or st2 : len2 contains what's left - // if p7 -> coming from st4 or st1 : len3 contains what's left - // We must restore lc/pr even though might not have been used. -.Lexit2: - .pred.rel "mutex", p6, p7 -(p6) mov len=len2 -(p7) mov len=len3 - ;; - // - // .Lexit4: comes from head, need not restore pr/lc - // len contains bytes left - // -.Lexit3: - mov ret0=len - mov ar.lc=saved_lc - br.ret.sptk.many rp -END(__do_clear_user) -EXPORT_SYMBOL(__do_clear_user) diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S deleted file mode 100644 index 176f857c522e8a9d620026db1903c0e36206439a..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/copy_page.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard copy_page() function - * - * Inputs: - * in0: address of target page - * in1: address of source page - * Output: - * no return value - * - * Copyright (C) 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger - * - * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. - */ -#include -#include -#include - -#define PIPE_DEPTH 3 -#define EPI p[PIPE_DEPTH-1] - -#define lcount r16 -#define saved_pr r17 -#define saved_lc r18 -#define saved_pfs r19 -#define src1 r20 -#define src2 r21 -#define tgt1 r22 -#define tgt2 r23 -#define srcf r24 -#define tgtf r25 -#define tgt_last r26 - -#define Nrot ((8*PIPE_DEPTH+7)&~7) - -GLOBAL_ENTRY(copy_page) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - - .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ - t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] - .rotp p[PIPE_DEPTH] - - .save ar.lc, saved_lc - mov saved_lc=ar.lc - mov ar.ec=PIPE_DEPTH - - mov lcount=PAGE_SIZE/64-1 - .save pr, saved_pr - mov saved_pr=pr - mov pr.rot=1<<16 - - .body - - mov src1=in1 - adds src2=8,in1 - mov tgt_last = PAGE_SIZE - ;; - adds tgt2=8,in0 - add srcf=512,in1 - mov ar.lc=lcount - mov tgt1=in0 - add tgtf=512,in0 - add tgt_last = tgt_last, in0 - ;; -1: -(p[0]) ld8 t1[0]=[src1],16 -(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 -(p[0]) ld8 t2[0]=[src2],16 -(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 - cmp.ltu p6,p0 = tgtf, tgt_last - ;; -(p[0]) ld8 t3[0]=[src1],16 -(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 -(p[0]) ld8 t4[0]=[src2],16 -(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 - ;; -(p[0]) ld8 t5[0]=[src1],16 -(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 -(p[0]) ld8 t6[0]=[src2],16 -(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 - ;; -(p[0]) ld8 t7[0]=[src1],16 -(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 -(p[0]) ld8 t8[0]=[src2],16 -(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 - -(p6) lfetch [srcf], 64 -(p6) lfetch [tgtf], 64 - br.ctop.sptk.few 1b - ;; - mov pr=saved_pr,0xffffffffffff0000 // restore predicates - mov ar.pfs=saved_pfs - mov ar.lc=saved_lc - br.ret.sptk.many rp -END(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S deleted file mode 100644 index d6fd56e4f1c1dcac5965ce4cb03516a7f15d4587..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/copy_page_mck.S +++ /dev/null @@ -1,188 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * McKinley-optimized version of copy_page(). - * - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger - * - * Inputs: - * in0: address of target page - * in1: address of source page - * Output: - * no return value - * - * General idea: - * - use regular loads and stores to prefetch data to avoid consuming M-slot just for - * lfetches => good for in-cache performance - * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single - * cycle - * - * Principle of operation: - * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. - * To avoid secondary misses in L2, we prefetch both source and destination with a line-size - * of 128 bytes. When both of these lines are in the L2 and the first half of the - * source line is in L1, we start copying the remaining words. The second half of the - * source line is prefetched in an earlier iteration, so that by the time we start - * accessing it, it's also present in the L1. - * - * We use a software-pipelined loop to control the overall operation. The pipeline - * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching - * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination - * cache-lines, the last K stages are used to copy the cache-line words not copied by - * the prefetches. The four relevant points in the pipelined are called A, B, C, D: - * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line - * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought - * into L1D and p[D] is TRUE if a cacheline needs to be copied. - * - * This all sounds very complicated, but thanks to the modulo-scheduled loop support, - * the resulting code is very regular and quite easy to follow (once you get the idea). - * - * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented - * as the separate .prefetch_loop. Logically, this loop performs exactly like the - * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, - * so that each loop iteration is faster (again, good for cached case). - * - * When reading the code, it helps to keep the following picture in mind: - * - * word 0 word 1 - * +------+------+--- - * | v[x] | t1 | ^ - * | t2 | t3 | | - * | t4 | t5 | | - * | t6 | t7 | | 128 bytes - * | n[y] | t9 | | (L2 cache line) - * | t10 | t11 | | - * | t12 | t13 | | - * | t14 | t15 | v - * +------+------+--- - * - * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] - * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in - * an order that avoids bank conflicts. - */ -#include -#include -#include - -#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) - -#define src0 r2 -#define src1 r3 -#define dst0 r9 -#define dst1 r10 -#define src_pre_mem r11 -#define dst_pre_mem r14 -#define src_pre_l2 r15 -#define dst_pre_l2 r16 -#define t1 r17 -#define t2 r18 -#define t3 r19 -#define t4 r20 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r21 -#define t15 r22 - -#define saved_lc r23 -#define saved_pr r24 - -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 3) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -GLOBAL_ENTRY(copy_page) - .prologue - alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot - - .rotr v[2*PREFETCH_DIST], n[D-C+1] - .rotp p[N] - - .save ar.lc, saved_lc - mov saved_lc = ar.lc - .save pr, saved_pr - mov saved_pr = pr - .body - - mov src_pre_mem = in1 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = in0 - mov ar.lc = 2*PREFETCH_DIST - 1 - - add src_pre_l2 = 8*8, in1 - add dst_pre_l2 = 8*8, in0 - add src0 = 8, in1 // first t1 src - add src1 = 3*8, in1 // first t3 src - add dst0 = 8, in0 // first t1 dst - add dst1 = 3*8, in0 // first t3 dst - mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 - nop.m 0 - nop.i 0 - ;; - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) - mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -(p[D]) ld8 t2 = [src0], 3*8 // M0 -(p[D]) ld8 t4 = [src1], 3*8 // M1 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory -(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 - ;; -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory -(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 -(p[D]) st8 [dst0] = t1, 8 // M2 -(p[D]) st8 [dst1] = t3, 8 // M3 - ;; -(p[D]) ld8 t5 = [src0], 8 -(p[D]) ld8 t7 = [src1], 3*8 -(p[D]) st8 [dst0] = t2, 3*8 -(p[D]) st8 [dst1] = t4, 3*8 - ;; -(p[D]) ld8 t6 = [src0], 3*8 -(p[D]) ld8 t10 = [src1], 8 -(p[D]) st8 [dst0] = t5, 8 -(p[D]) st8 [dst1] = t7, 3*8 - ;; -(p[D]) ld8 t9 = [src0], 3*8 -(p[D]) ld8 t11 = [src1], 3*8 -(p[D]) st8 [dst0] = t6, 3*8 -(p[D]) st8 [dst1] = t10, 8 - ;; -(p[D]) ld8 t12 = [src0], 8 -(p[D]) ld8 t14 = [src1], 8 -(p[D]) st8 [dst0] = t9, 3*8 -(p[D]) st8 [dst1] = t11, 3*8 - ;; -(p[D]) ld8 t13 = [src0], 4*8 -(p[D]) ld8 t15 = [src1], 4*8 -(p[D]) st8 [dst0] = t12, 8 -(p[D]) st8 [dst1] = t14, 8 - ;; -(p[D-1])ld8 t1 = [src0], 8 -(p[D-1])ld8 t3 = [src1], 8 -(p[D]) st8 [dst0] = t13, 4*8 -(p[D]) st8 [dst1] = t15, 4*8 - br.ctop.sptk .line_copy - ;; - mov ar.lc = saved_lc - mov pr = saved_pr, -1 - br.ret.sptk.many rp -END(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S deleted file mode 100644 index f681556c6b86d99525c3674daa071ff4d5c3e6d5..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/copy_user.S +++ /dev/null @@ -1,613 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the copy_user() routine. - * It is used to copy date across the kernel/user boundary. - * - * The source and destination are always on opposite side of - * the boundary. When reading from user space we must catch - * faults on loads. When writing to user space we must catch - * errors on stores. Note that because of the nature of the copy - * we don't need to worry about overlapping regions. - * - * - * Inputs: - * in0 address of source buffer - * in1 address of destination buffer - * in2 number of bytes to copy - * - * Outputs: - * ret0 0 in case of success. The number of bytes NOT copied in - * case of error. - * - * Copyright (C) 2000-2001 Hewlett-Packard Co - * Stephane Eranian - * - * Fixme: - * - handle the case where we have more than 16 bytes and the alignment - * are different. - * - more benchmarking - * - fix extraneous stop bit introduced by the EX() macro. - */ - -#include -#include - -// -// Tuneable parameters -// -#define COPY_BREAK 16 // we do byte copy below (must be >=16) -#define PIPE_DEPTH 21 // pipe depth - -#define EPI p[PIPE_DEPTH-1] - -// -// arguments -// -#define dst in0 -#define src in1 -#define len in2 - -// -// local registers -// -#define t1 r2 // rshift in bytes -#define t2 r3 // lshift in bytes -#define rshift r14 // right shift in bits -#define lshift r15 // left shift in bits -#define word1 r16 -#define word2 r17 -#define cnt r18 -#define len2 r19 -#define saved_lc r20 -#define saved_pr r21 -#define tmp r22 -#define val r23 -#define src1 r24 -#define dst1 r25 -#define src2 r26 -#define dst2 r27 -#define len1 r28 -#define enddst r29 -#define endsrc r30 -#define saved_pfs r31 - -GLOBAL_ENTRY(__copy_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) - - .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] - .rotp p[PIPE_DEPTH] - - adds len2=-1,len // br.ctop is repeat/until - mov ret0=r0 - - ;; // RAW of cfm when len=0 - cmp.eq p8,p0=r0,len // check for zero length - .save ar.lc, saved_lc - mov saved_lc=ar.lc // preserve ar.lc (slow) -(p8) br.ret.spnt.many rp // empty mempcy() - ;; - add enddst=dst,len // first byte after end of source - add endsrc=src,len // first byte after end of destination - .save pr, saved_pr - mov saved_pr=pr // preserve predicates - - .body - - mov dst1=dst // copy because of rotation - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - - mov src1=src // copy because of rotation - mov ar.lc=len2 // initialize lc for small count - cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy - - xor tmp=src,dst // same alignment test prepare -(p10) br.cond.dptk .long_copy_user - ;; // RAW pr.rot/p16 ? - // - // Now we do the byte by byte loop with software pipeline - // - // p7 is necessarily false by now -1: - EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 1b - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs // restore ar.ec - br.ret.sptk.many rp // end of short memcpy - - // - // Not 8-byte aligned - // -.diff_align_copy_user: - // At this point we know we have more than 16 bytes to copy - // and also that src and dest do _not_ have the same alignment. - and src2=0x7,src1 // src offset - and dst2=0x7,dst1 // dst offset - ;; - // The basic idea is that we copy byte-by-byte at the head so - // that we can reach 8-byte alignment for both src1 and dst1. - // Then copy the body using software pipelined 8-byte copy, - // shifting the two back-to-back words right and left, then copy - // the tail by copying byte-by-byte. - // - // Fault handling. If the byte-by-byte at the head fails on the - // load, then restart and finish the pipleline by copying zeros - // to the dst1. Then copy zeros for the rest of dst1. - // If 8-byte software pipeline fails on the load, do the same as - // failure_in3 does. If the byte-by-byte at the tail fails, it is - // handled simply by failure_in_pipe1. - // - // The case p14 represents the source has more bytes in the - // the first word (by the shifted part), whereas the p15 needs to - // copy some bytes from the 2nd word of the source that has the - // tail of the 1st of the destination. - // - - // - // Optimization. If dst1 is 8-byte aligned (quite common), we don't need - // to copy the head to dst1, to start 8-byte copy software pipeline. - // We know src1 is not 8-byte aligned in this case. - // - cmp.eq p14,p15=r0,dst2 -(p15) br.cond.spnt 1f - ;; - sub t1=8,src2 - mov t2=src2 - ;; - shl rshift=t2,3 - sub len1=len,t1 // set len1 - ;; - sub lshift=64,rshift - ;; - br.cond.spnt .word_copy_user - ;; -1: - cmp.leu p14,p15=src2,dst2 - sub t1=dst2,src2 - ;; - .pred.rel "mutex", p14, p15 -(p14) sub word1=8,src2 // (8 - src offset) -(p15) sub t1=r0,t1 // absolute value -(p15) sub word1=8,dst2 // (8 - dst offset) - ;; - // For the case p14, we don't need to copy the shifted part to - // the 1st word of destination. - sub t2=8,t1 -(p14) sub word1=word1,t1 - ;; - sub len1=len,word1 // resulting len -(p15) shl rshift=t1,3 // in bits -(p14) shl rshift=t2,3 - ;; -(p14) sub len1=len1,t1 - adds cnt=-1,word1 - ;; - sub lshift=64,rshift - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - mov ar.lc=cnt - ;; -2: - EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 2b - ;; - clrrrb - ;; -.word_copy_user: - cmp.gtu p9,p0=16,len1 -(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy - ;; - shr.u cnt=len1,3 // number of 64-bit words - ;; - adds cnt=-1,cnt - ;; - .pred.rel "mutex", p14, p15 -(p14) sub src1=src1,t2 -(p15) sub src1=src1,t1 - // - // Now both src1 and dst1 point to an 8-byte aligned address. And - // we have more than 8 bytes to copy. - // - mov ar.lc=cnt - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - ;; -3: - // - // The pipleline consists of 3 stages: - // 1 (p16): Load a word from src1 - // 2 (EPI_1): Shift right pair, saving to tmp - // 3 (EPI): Store tmp to dst1 - // - // To make it simple, use at least 2 (p16) loops to set up val1[n] - // because we need 2 back-to-back val1[] to get tmp. - // Note that this implies EPI_2 must be p18 or greater. - // - -#define EPI_1 p[PIPE_DEPTH-2] -#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift -#define CASE(pred, shift) \ - (pred) br.cond.spnt .copy_user_bit##shift -#define BODY(rshift) \ -.copy_user_bit##rshift: \ -1: \ - EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \ -(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ - EX(3f,(p16) ld8 val1[1]=[src1],8); \ -(p16) mov val1[0]=r0; \ - br.ctop.dptk 1b; \ - ;; \ - br.cond.sptk.many .diff_align_do_tail; \ -2: \ -(EPI) st8 [dst1]=tmp,8; \ -(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ -3: \ -(p16) mov val1[1]=r0; \ -(p16) mov val1[0]=r0; \ - br.ctop.dptk 2b; \ - ;; \ - br.cond.sptk.many .failure_in2 - - // - // Since the instruction 'shrp' requires a fixed 128-bit value - // specifying the bits to shift, we need to provide 7 cases - // below. - // - SWITCH(p6, 8) - SWITCH(p7, 16) - SWITCH(p8, 24) - SWITCH(p9, 32) - SWITCH(p10, 40) - SWITCH(p11, 48) - SWITCH(p12, 56) - ;; - CASE(p6, 8) - CASE(p7, 16) - CASE(p8, 24) - CASE(p9, 32) - CASE(p10, 40) - CASE(p11, 48) - CASE(p12, 56) - ;; - BODY(8) - BODY(16) - BODY(24) - BODY(32) - BODY(40) - BODY(48) - BODY(56) - ;; -.diff_align_do_tail: - .pred.rel "mutex", p14, p15 -(p14) sub src1=src1,t1 -(p14) adds dst1=-8,dst1 -(p15) sub dst1=dst1,t1 - ;; -4: - // Tail correction. - // - // The problem with this piplelined loop is that the last word is not - // loaded and thus parf of the last word written is not correct. - // To fix that, we simply copy the tail byte by byte. - - sub len1=endsrc,src1,1 - clrrrb - ;; - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - mov ar.lc=len1 - ;; -5: - EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 5b - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // Beginning of long mempcy (i.e. > 16 bytes) - // -.long_copy_user: - tbit.nz p6,p7=src1,0 // odd alignment - and tmp=7,tmp - ;; - cmp.eq p10,p8=r0,tmp - mov len1=len // copy because of rotation -(p8) br.cond.dpnt .diff_align_copy_user - ;; - // At this point we know we have more than 16 bytes to copy - // and also that both src and dest have the same alignment - // which may not be the one we want. So for now we must move - // forward slowly until we reach 16byte alignment: no need to - // worry about reaching the end of buffer. - // - EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned -(p6) adds len1=-1,len1;; - tbit.nz p7,p0=src1,1 - ;; - EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned -(p7) adds len1=-2,len1;; - tbit.nz p8,p0=src1,2 - ;; - // - // Stop bit not required after ld4 because if we fail on ld4 - // we have never executed the ld1, therefore st1 is not executed. - // - EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned - ;; - EX(.failure_out,(p6) st1 [dst1]=val1[0],1) - tbit.nz p9,p0=src1,3 - ;; - // - // Stop bit not required after ld8 because if we fail on ld8 - // we have never executed the ld2, therefore st2 is not executed. - // - EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned - EX(.failure_out,(p7) st2 [dst1]=val1[1],2) -(p8) adds len1=-4,len1 - ;; - EX(.failure_out, (p8) st4 [dst1]=val2[0],4) -(p9) adds len1=-8,len1;; - shr.u cnt=len1,4 // number of 128-bit (2x64bit) words - ;; - EX(.failure_out, (p9) st8 [dst1]=val2[1],8) - tbit.nz p6,p0=len1,3 - cmp.eq p7,p0=r0,cnt - adds tmp=-1,cnt // br.ctop is repeat/until -(p7) br.cond.dpnt .dotail // we have less than 16 bytes left - ;; - adds src2=8,src1 - adds dst2=8,dst1 - mov ar.lc=tmp - ;; - // - // 16bytes/iteration - // -2: - EX(.failure_in3,(p16) ld8 val1[0]=[src1],16) -(p16) ld8 val2[0]=[src2],16 - - EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) -(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 - br.ctop.dptk 2b - ;; // RAW on src1 when fall through from loop - // - // Tail correction based on len only - // - // No matter where we come from (loop or test) the src1 pointer - // is 16 byte aligned AND we have less than 16 bytes to copy. - // -.dotail: - EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes - tbit.nz p7,p0=len1,2 - ;; - EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes - tbit.nz p8,p0=len1,1 - ;; - EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes - tbit.nz p9,p0=len1,0 - ;; - EX(.failure_out, (p6) st8 [dst1]=val1[0],8) - ;; - EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left - mov ar.lc=saved_lc - ;; - EX(.failure_out,(p7) st4 [dst1]=val1[1],4) - mov pr=saved_pr,0xffffffffffff0000 - ;; - EX(.failure_out, (p8) st2 [dst1]=val2[0],2) - mov ar.pfs=saved_pfs - ;; - EX(.failure_out, (p9) st1 [dst1]=val2[1]) - br.ret.sptk.many rp - - - // - // Here we handle the case where the byte by byte copy fails - // on the load. - // Several factors make the zeroing of the rest of the buffer kind of - // tricky: - // - the pipeline: loads/stores are not in sync (pipeline) - // - // In the same loop iteration, the dst1 pointer does not directly - // reflect where the faulty load was. - // - // - pipeline effect - // When you get a fault on load, you may have valid data from - // previous loads not yet store in transit. Such data must be - // store normally before moving onto zeroing the rest. - // - // - single/multi dispersal independence. - // - // solution: - // - we don't disrupt the pipeline, i.e. data in transit in - // the software pipeline will be eventually move to memory. - // We simply replace the load with a simple mov and keep the - // pipeline going. We can't really do this inline because - // p16 is always reset to 1 when lc > 0. - // -.failure_in_pipe1: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied -1: -(p16) mov val1[0]=r0 -(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 - br.ctop.dptk 1b - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // This is the case where the byte by byte copy fails on the load - // when we copy the head. We need to finish the pipeline and copy - // zeros for the rest of the destination. Since this happens - // at the top we still need to fill the body and tail. -.failure_in_pipe2: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied -2: -(p16) mov val1[0]=r0 -(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 - br.ctop.dptk 2b - ;; - sub len=enddst,dst1,1 // precompute len - br.cond.dptk.many .failure_in1bis - ;; - - // - // Here we handle the head & tail part when we check for alignment. - // The following code handles only the load failures. The - // main diffculty comes from the fact that loads/stores are - // scheduled. So when you fail on a load, the stores corresponding - // to previous successful loads must be executed. - // - // However some simplifications are possible given the way - // things work. - // - // 1) HEAD - // Theory of operation: - // - // Page A | Page B - // ---------|----- - // 1|8 x - // 1 2|8 x - // 4|8 x - // 1 4|8 x - // 2 4|8 x - // 1 2 4|8 x - // |1 - // |2 x - // |4 x - // - // page_size >= 4k (2^12). (x means 4, 2, 1) - // Here we suppose Page A exists and Page B does not. - // - // As we move towards eight byte alignment we may encounter faults. - // The numbers on each page show the size of the load (current alignment). - // - // Key point: - // - if you fail on 1, 2, 4 then you have never executed any smaller - // size loads, e.g. failing ld4 means no ld1 nor ld2 executed - // before. - // - // This allows us to simplify the cleanup code, because basically you - // only have to worry about "pending" stores in the case of a failing - // ld8(). Given the way the code is written today, this means only - // worry about st2, st4. There we can use the information encapsulated - // into the predicates. - // - // Other key point: - // - if you fail on the ld8 in the head, it means you went straight - // to it, i.e. 8byte alignment within an unexisting page. - // Again this comes from the fact that if you crossed just for the ld8 then - // you are 8byte aligned but also 16byte align, therefore you would - // either go for the 16byte copy loop OR the ld8 in the tail part. - // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible - // because it would mean you had 15bytes to copy in which case you - // would have defaulted to the byte by byte copy. - // - // - // 2) TAIL - // Here we now we have less than 16 bytes AND we are either 8 or 16 byte - // aligned. - // - // Key point: - // This means that we either: - // - are right on a page boundary - // OR - // - are at more than 16 bytes from a page boundary with - // at most 15 bytes to copy: no chance of crossing. - // - // This allows us to assume that if we fail on a load we haven't possibly - // executed any of the previous (tail) ones, so we don't need to do - // any stores. For instance, if we fail on ld2, this means we had - // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. - // - // This means that we are in a situation similar the a fault in the - // head part. That's nice! - // -.failure_in1: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied - sub len=endsrc,src1,1 - // - // we know that ret0 can never be zero at this point - // because we failed why trying to do a load, i.e. there is still - // some work to do. - // The failure_in1bis and length problem is taken care of at the - // calling side. - // - ;; -.failure_in1bis: // from (.failure_in3) - mov ar.lc=len // Continue with a stupid byte store. - ;; -5: - st1 [dst1]=r0,1 - br.cloop.dptk 5b - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // Here we simply restart the loop but instead - // of doing loads we fill the pipeline with zeroes - // We can't simply store r0 because we may have valid - // data in transit in the pipeline. - // ar.lc and ar.ec are setup correctly at this point - // - // we MUST use src1/endsrc here and not dst1/enddst because - // of the pipeline effect. - // -.failure_in3: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied - ;; -2: -(p16) mov val1[0]=r0 -(p16) mov val2[0]=r0 -(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 -(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 - br.ctop.dptk 2b - ;; - cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? - sub len=enddst,dst1,1 // precompute len -(p6) br.cond.dptk .failure_in1bis - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - -.failure_in2: - sub ret0=endsrc,src1 - cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? - sub len=enddst,dst1,1 // precompute len -(p6) br.cond.dptk .failure_in1bis - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // handling of failures on stores: that's the easy part - // -.failure_out: - sub ret0=enddst,dst1 - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - - mov ar.pfs=saved_pfs - br.ret.sptk.many rp -END(__copy_user) -EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S deleted file mode 100644 index 6004dad2597c3e3842052344306594455c4c06ab..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/do_csum.S +++ /dev/null @@ -1,324 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optmized version of the standard do_csum() function - * - * Return: a 64bit quantity containing the 16bit Internet checksum - * - * Inputs: - * in0: address of buffer to checksum (char *) - * in1: length of the buffer (int) - * - * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co - * Stephane Eranian - * - * 02/04/22 Ken Chen - * Data locality study on the checksum buffer. - * More optimization cleanup - remove excessive stop bits. - * 02/04/08 David Mosberger - * More cleanup and tuning. - * 01/04/18 Jun Nakajima - * Clean up and optimize and the software pipeline, loading two - * back-to-back 8-byte words per loop. Clean up the initialization - * for the loop. Support the cases where load latency = 1 or 2. - * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). - */ - -#include - -// -// Theory of operations: -// The goal is to go as quickly as possible to the point where -// we can checksum 16 bytes/loop. Before reaching that point we must -// take care of incorrect alignment of first byte. -// -// The code hereafter also takes care of the "tail" part of the buffer -// before entering the core loop, if any. The checksum is a sum so it -// allows us to commute operations. So we do the "head" and "tail" -// first to finish at full speed in the body. Once we get the head and -// tail values, we feed them into the pipeline, very handy initialization. -// -// Of course we deal with the special case where the whole buffer fits -// into one 8 byte word. In this case we have only one entry in the pipeline. -// -// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for -// possible load latency and also to accommodate for head and tail. -// -// The end of the function deals with folding the checksum from 64bits -// down to 16bits taking care of the carry. -// -// This version avoids synchronization in the core loop by also using a -// pipeline for the accumulation of the checksum in resultx[] (x=1,2). -// -// wordx[] (x=1,2) -// |---| -// | | 0 : new value loaded in pipeline -// |---| -// | | - : in transit data -// |---| -// | | LOAD_LATENCY : current value to add to checksum -// |---| -// | | LOAD_LATENCY+1 : previous value added to checksum -// |---| (previous iteration) -// -// resultx[] (x=1,2) -// |---| -// | | 0 : initial value -// |---| -// | | LOAD_LATENCY-1 : new checksum -// |---| -// | | LOAD_LATENCY : previous value of checksum -// |---| -// | | LOAD_LATENCY+1 : final checksum when out of the loop -// |---| -// -// -// See RFC1071 "Computing the Internet Checksum" for various techniques for -// calculating the Internet checksum. -// -// NOT YET DONE: -// - Maybe another algorithm which would take care of the folding at the -// end in a different manner -// - Work with people more knowledgeable than me on the network stack -// to figure out if we could not split the function depending on the -// type of packet or alignment we get. Like the ip_fast_csum() routine -// where we know we have at least 20bytes worth of data to checksum. -// - Do a better job of handling small packets. -// - Note on prefetching: it was found that under various load, i.e. ftp read/write, -// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8% -// on the data that buffer points to (partly because the checksum is often preceded by -// a copy_from_user()). This finding indiate that lfetch will not be beneficial since -// the data is already in the cache. -// - -#define saved_pfs r11 -#define hmask r16 -#define tmask r17 -#define first1 r18 -#define firstval r19 -#define firstoff r20 -#define last r21 -#define lastval r22 -#define lastoff r23 -#define saved_lc r24 -#define saved_pr r25 -#define tmp1 r26 -#define tmp2 r27 -#define tmp3 r28 -#define carry1 r29 -#define carry2 r30 -#define first2 r31 - -#define buf in0 -#define len in1 - -#define LOAD_LATENCY 2 // XXX fix me - -#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) -# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." -#endif - -#define PIPE_DEPTH (LOAD_LATENCY+2) -#define ELD p[LOAD_LATENCY] // end of load -#define ELD_1 p[LOAD_LATENCY+1] // and next stage - -// unsigned long do_csum(unsigned char *buf,long len) - -GLOBAL_ENTRY(do_csum) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,16,0,16 - .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] - .rotp p[PIPE_DEPTH], pC1[2], pC2[2] - mov ret0=r0 // in case we have zero length - cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) - ;; - add tmp1=buf,len // last byte's address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) -(p6) br.ret.spnt.many rp // return if zero or negative length - - mov hmask=-1 // initialize head mask - tbit.nz p15,p0=buf,0 // is buf an odd address? - and first1=-8,buf // 8-byte align down address of first1 element - - and firstoff=7,buf // how many bytes off for first1 element - mov tmask=-1 // initialize tail mask - - ;; - adds tmp2=-1,tmp1 // last-1 - and lastoff=7,tmp1 // how many bytes off for last element - ;; - sub tmp1=8,lastoff // complement to lastoff - and last=-8,tmp2 // address of word containing last byte - ;; - sub tmp3=last,first1 // tmp3=distance from first1 to last - .save ar.lc, saved_lc - mov saved_lc=ar.lc // save lc - cmp.eq p8,p9=last,first1 // everything fits in one word ? - - ld8 firstval=[first1],8 // load, ahead of time, "first1" word - and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 - shl tmp2=firstoff,3 // number of bits - ;; -(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed - shl tmp1=tmp1,3 // number of bits -(p9) adds tmp3=-8,tmp3 // effectively loaded - ;; -(p8) mov lastval=r0 // we don't need lastval if first1==last - shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ - shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] - ;; - .body -#define count tmp3 - -(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only -(p9) and word2[0]=lastval,tmask // mask last it as appropriate - shr.u count=count,3 // how many 8-byte? - ;; - // If count is odd, finish this 8-byte word so that we can - // load two back-to-back 8-byte words per loop thereafter. - and word1[0]=firstval,hmask // and mask it as appropriate - tbit.nz p10,p11=count,0 // if (count is odd) - ;; -(p8) mov result1[0]=word1[0] -(p9) add result1[0]=word1[0],word2[0] - ;; - cmp.ltu p6,p0=result1[0],word1[0] // check the carry - cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte - ;; -(p6) adds result1[0]=1,result1[0] -(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word) -(p11) br.cond.dptk .do_csum16 // if (count is even) - - // Here count is odd. - ld8 word1[1]=[first1],8 // load an 8-byte word - cmp.eq p9,p10=1,count // if (count == 1) - adds count=-1,count // loaded an 8-byte word - ;; - add result1[0]=result1[0],word1[1] - ;; - cmp.ltu p6,p0=result1[0],word1[1] - ;; -(p6) adds result1[0]=1,result1[0] -(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit - // Fall through to calculate the checksum, feeding result1[0] as - // the initial value in result1[0]. - // - // Calculate the checksum loading two 8-byte words per loop. - // -.do_csum16: - add first2=8,first1 - shr.u count=count,1 // we do 16 bytes per loop - ;; - adds count=-1,count - mov carry1=r0 - mov carry2=r0 - brp.loop.imp 1f,2f - ;; - mov ar.ec=PIPE_DEPTH - mov ar.lc=count // set lc - mov pr.rot=1<<16 - // result1[0] must be initialized in advance. - mov result2[0]=r0 - ;; - .align 32 -1: -(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] -(pC1[1])adds carry1=1,carry1 -(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] -(pC2[1])adds carry2=1,carry2 -(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] -(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] -2: -(p[0]) ld8 word1[0]=[first1],16 -(p[0]) ld8 word2[0]=[first2],16 - br.ctop.sptk 1b - ;; - // Since len is a 32-bit value, carry cannot be larger than a 64-bit value. -(pC1[1])adds carry1=1,carry1 // since we miss the last one -(pC2[1])adds carry2=1,carry2 - ;; - add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 - add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 - ;; - cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 - cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 - ;; -(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] -(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] - ;; - add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] - ;; - cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] - ;; -(p6) adds result1[0]=1,result1[0] - ;; -.do_csum_exit: - // - // now fold 64 into 16 bits taking care of carry - // that's not very good because it has lots of sequentiality - // - mov tmp3=0xffff - zxt4 tmp1=result1[0] - shr.u tmp2=result1[0],32 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add ret0=tmp1,tmp2 - mov pr=saved_pr,0xffffffffffff0000 - ;; - // if buf was odd then swap bytes - mov ar.pfs=saved_pfs // restore ar.ec -(p15) mux1 ret0=ret0,@rev // reverse word - ;; - mov ar.lc=saved_lc -(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes - br.ret.sptk.many rp - -// I (Jun Nakajima) wrote an equivalent code (see below), but it was -// not much better than the original. So keep the original there so that -// someone else can challenge. -// -// shr.u word1[0]=result1[0],32 -// zxt4 result1[0]=result1[0] -// ;; -// add result1[0]=result1[0],word1[0] -// ;; -// zxt2 result2[0]=result1[0] -// extr.u word1[0]=result1[0],16,16 -// shr.u carry1=result1[0],32 -// ;; -// add result2[0]=result2[0],word1[0] -// ;; -// add result2[0]=result2[0],carry1 -// ;; -// extr.u ret0=result2[0],16,16 -// ;; -// add ret0=ret0,result2[0] -// ;; -// zxt2 ret0=ret0 -// mov ar.pfs=saved_pfs // restore ar.ec -// mov pr=saved_pr,0xffffffffffff0000 -// ;; -// // if buf was odd then swap bytes -// mov ar.lc=saved_lc -//(p15) mux1 ret0=ret0,@rev // reverse word -// ;; -//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes -// br.ret.sptk.many rp - -END(do_csum) diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S deleted file mode 100644 index 8573d59c9ed17098bfd1b38243b9c5a178fdcb8f..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/flush.S +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Cache flushing routines. - * - * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * - * 05/28/05 Zoltan Menyhart Dynamic stride size - */ - -#include -#include - - - /* - * flush_icache_range(start,end) - * - * Make i-cache(s) coherent with d-caches. - * - * Must deal with range from start to end-1 but nothing else (need to - * be careful not to touch addresses that may be unmapped). - * - * Note: "in0" and "in1" are preserved for debugging purposes. - */ - .section .kprobes.text,"ax" -GLOBAL_ENTRY(flush_icache_range) - - .prologue - alloc r2=ar.pfs,2,0,0,0 - movl r3=ia64_i_cache_stride_shift - mov r21=1 - ;; - ld8 r20=[r3] // r20: stride shift - sub r22=in1,r0,1 // last byte address - ;; - shr.u r23=in0,r20 // start / (stride size) - shr.u r22=r22,r20 // (last byte address) / (stride size) - shl r21=r21,r20 // r21: stride size of the i-cache(s) - ;; - sub r8=r22,r23 // number of strides - 1 - shl r24=r23,r20 // r24: addresses for "fc.i" = - // "start" rounded down to stride boundary - .save ar.lc,r3 - mov r3=ar.lc // save ar.lc - ;; - - .body - mov ar.lc=r8 - ;; - /* - * 32 byte aligned loop, even number of (actually 2) bundles - */ -.Loop: fc.i r24 // issuable on M0 only - add r24=r21,r24 // we flush "stride size" bytes per iteration - nop.i 0 - br.cloop.sptk.few .Loop - ;; - sync.i - ;; - srlz.i - ;; - mov ar.lc=r3 // restore ar.lc - br.ret.sptk.many rp -END(flush_icache_range) -EXPORT_SYMBOL_GPL(flush_icache_range) - - /* - * clflush_cache_range(start,size) - * - * Flush cache lines from start to start+size-1. - * - * Must deal with range from start to start+size-1 but nothing else - * (need to be careful not to touch addresses that may be - * unmapped). - * - * Note: "in0" and "in1" are preserved for debugging purposes. - */ - .section .kprobes.text,"ax" -GLOBAL_ENTRY(clflush_cache_range) - - .prologue - alloc r2=ar.pfs,2,0,0,0 - movl r3=ia64_cache_stride_shift - mov r21=1 - add r22=in1,in0 - ;; - ld8 r20=[r3] // r20: stride shift - sub r22=r22,r0,1 // last byte address - ;; - shr.u r23=in0,r20 // start / (stride size) - shr.u r22=r22,r20 // (last byte address) / (stride size) - shl r21=r21,r20 // r21: stride size of the i-cache(s) - ;; - sub r8=r22,r23 // number of strides - 1 - shl r24=r23,r20 // r24: addresses for "fc" = - // "start" rounded down to stride - // boundary - .save ar.lc,r3 - mov r3=ar.lc // save ar.lc - ;; - - .body - mov ar.lc=r8 - ;; - /* - * 32 byte aligned loop, even number of (actually 2) bundles - */ -.Loop_fc: - fc r24 // issuable on M0 only - add r24=r21,r24 // we flush "stride size" bytes per iteration - nop.i 0 - br.cloop.sptk.few .Loop_fc - ;; - sync.i - ;; - srlz.i - ;; - mov ar.lc=r3 // restore ar.lc - br.ret.sptk.many rp -END(clflush_cache_range) diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S deleted file mode 100644 index def92b708e6e1f209ff6f26fb3158c20917509a2..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/idiv32.S +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2000 Hewlett-Packard Co - * Copyright (C) 2000 David Mosberger-Tang - * - * 32-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) - */ - -#include -#include - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define EXTEND zxt4 -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define EXTEND sxt4 -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) - EXTEND in0 = in0 // in0 = a - EXTEND in1 = in1 // in1 = b - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - setf.exp f7 = r2 // f7 = 2^-34 - frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 - ;; -#ifdef MODULO - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 -(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 - ;; -#ifdef MODULO - setf.sig f7 = in0 -#endif -(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 - ;; - FP_TO_INT(f6, f6) // q = trunc(q2) - ;; -#ifdef MODULO - xma.l f6 = f6, f9, f7 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f6 // transfer result to result register - br.ret.sptk.many rp -END(NAME) -EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S deleted file mode 100644 index a8ba3bd3d4d8cc545eaab2c62584ba4234efe15f..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/idiv64.S +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1999-2000 Hewlett-Packard Co - * Copyright (C) 1999-2000 David Mosberger-Tang - * - * 64-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) - */ - -#include -#include - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 - ;; -(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 -(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 - ;; -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif -(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 -(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 - ;; -(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 -(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a - ;; -#ifdef MODULO - setf.sig f8 = in0 // f8 = a - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 - ;; - FP_TO_INT(f11, f11) // q = trunc(q3) - ;; -#ifdef MODULO - xma.l f11 = f11, f9, f8 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f11 // transfer result to result register - br.ret.sptk.many rp -END(NAME) -EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S deleted file mode 100644 index dc9e6e6fe87695a48160d634890987dac93ebcca..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/ip_fast_csum.S +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Optmized version of the ip_fast_csum() function - * Used for calculating IP header checksum - * - * Return: 16bit checksum, complemented - * - * Inputs: - * in0: address of buffer to checksum (char *) - * in1: length of the buffer (int) - * - * Copyright (C) 2002, 2006 Intel Corp. - * Copyright (C) 2002, 2006 Ken Chen - */ - -#include -#include - -/* - * Since we know that most likely this function is called with buf aligned - * on 4-byte boundary and 20 bytes in length, we can execution rather quickly - * versus calling generic version of do_csum, which has lots of overhead in - * handling various alignments and sizes. However, due to lack of constrains - * put on the function input argument, cases with alignment not on 4-byte or - * size not equal to 20 bytes will be handled by the generic do_csum function. - */ - -#define in0 r32 -#define in1 r33 -#define in2 r34 -#define in3 r35 -#define in4 r36 -#define ret0 r8 - -GLOBAL_ENTRY(ip_fast_csum) - .prologue - .body - cmp.ne p6,p7=5,in1 // size other than 20 byte? - and r14=3,in0 // is it aligned on 4-byte? - add r15=4,in0 // second source pointer - ;; - cmp.ne.or.andcm p6,p7=r14,r0 - ;; -(p7) ld4 r20=[in0],8 -(p7) ld4 r21=[r15],8 -(p6) br.spnt .generic - ;; - ld4 r22=[in0],8 - ld4 r23=[r15],8 - ;; - ld4 r24=[in0] - add r20=r20,r21 - add r22=r22,r23 - ;; - add r20=r20,r22 - ;; - add r20=r20,r24 - ;; - shr.u ret0=r20,16 // now need to add the carry - zxt2 r20=r20 - ;; - add r20=ret0,r20 - ;; - shr.u ret0=r20,16 // add carry again - zxt2 r20=r20 - ;; - add r20=ret0,r20 - ;; - shr.u ret0=r20,16 - zxt2 r20=r20 - ;; - add r20=ret0,r20 - mov r9=0xffff - ;; - andcm ret0=r9,r20 - .restore sp // reset frame state - br.ret.sptk.many b0 - ;; - -.generic: - .prologue - .save ar.pfs, r35 - alloc r35=ar.pfs,2,2,2,0 - .save rp, r34 - mov r34=b0 - .body - dep.z out1=in1,2,30 - mov out0=in0 - ;; - br.call.sptk.many b0=do_csum - ;; - andcm ret0=-1,ret0 - mov ar.pfs=r35 - mov b0=r34 - br.ret.sptk.many b0 -END(ip_fast_csum) -EXPORT_SYMBOL(ip_fast_csum) - -GLOBAL_ENTRY(csum_ipv6_magic) - ld4 r20=[in0],4 - ld4 r21=[in1],4 - zxt4 in2=in2 - ;; - ld4 r22=[in0],4 - ld4 r23=[in1],4 - dep r15=in3,in2,32,16 - ;; - ld4 r24=[in0],4 - ld4 r25=[in1],4 - mux1 r15=r15,@rev - add r16=r20,r21 - add r17=r22,r23 - zxt4 in4=in4 - ;; - ld4 r26=[in0],4 - ld4 r27=[in1],4 - shr.u r15=r15,16 - add r18=r24,r25 - add r8=r16,r17 - ;; - add r19=r26,r27 - add r8=r8,r18 - ;; - add r8=r8,r19 - add r15=r15,in4 - ;; - add r8=r8,r15 - ;; - shr.u r10=r8,32 // now fold sum into short - zxt4 r11=r8 - ;; - add r8=r10,r11 - ;; - shr.u r10=r8,16 // yeah, keep it rolling - zxt2 r11=r8 - ;; - add r8=r10,r11 - ;; - shr.u r10=r8,16 // three times lucky - zxt2 r11=r8 - ;; - add r8=r10,r11 - mov r9=0xffff - ;; - andcm r8=r9,r8 - br.ret.sptk.many b0 -END(csum_ipv6_magic) -EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S deleted file mode 100644 index 91a625fddbf0599e78b2419013be61f33ff727b4..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/memcpy.S +++ /dev/null @@ -1,304 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard memcpy() function - * - * Inputs: - * in0: destination address - * in1: source address - * in2: number of bytes to copy - * Output: - * no return value - * - * Copyright (C) 2000-2001 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - */ -#include -#include - -GLOBAL_ENTRY(memcpy) - -# define MEM_LAT 21 /* latency to memory */ - -# define dst r2 -# define src r3 -# define retval r8 -# define saved_pfs r9 -# define saved_lc r10 -# define saved_pr r11 -# define cnt r16 -# define src2 r17 -# define t0 r18 -# define t1 r19 -# define t2 r20 -# define t3 r21 -# define t4 r22 -# define src_end r23 - -# define N (MEM_LAT + 4) -# define Nrot ((N + 7) & ~7) - - /* - * First, check if everything (src, dst, len) is a multiple of eight. If - * so, we handle everything with no taken branches (other than the loop - * itself) and a small icache footprint. Otherwise, we jump off to - * the more general copy routine handling arbitrary - * sizes/alignment etc. - */ - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot - .save ar.lc, saved_lc - mov saved_lc=ar.lc - or t0=in0,in1 - ;; - - or t0=t0,in2 - .save pr, saved_pr - mov saved_pr=pr - - .body - - cmp.eq p6,p0=in2,r0 // zero length? - mov retval=in0 // return dst -(p6) br.ret.spnt.many rp // zero length, return immediately - ;; - - mov dst=in0 // copy because of rotation - shr.u cnt=in2,3 // number of 8-byte words to copy - mov pr.rot=1<<16 - ;; - - adds cnt=-1,cnt // br.ctop is repeat/until - cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? - mov ar.ec=N - ;; - - and t0=0x7,t0 - mov ar.lc=cnt - ;; - cmp.ne p6,p0=t0,r0 - - mov src=in1 // copy because of rotation -(p7) br.cond.spnt.few .memcpy_short -(p6) br.cond.spnt.few .memcpy_long - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; - .rotr val[N] - .rotp p[N] - .align 32 -1: { .mib -(p[0]) ld8 val[0]=[src],8 - nop.i 0 - brp.loop.imp 1b, 2f -} -2: { .mfb -(p[N-1])st8 [dst]=val[N-1],8 - nop.f 0 - br.ctop.dptk.few 1b -} - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - /* - * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time - * copy loop. This performs relatively poorly on Itanium, but it doesn't - * get used very often (gcc inlines small copies) and due to atomicity - * issues, we want to avoid read-modify-write of entire words. - */ - .align 32 -.memcpy_short: - adds cnt=-1,in2 // br.ctop is repeat/until - mov ar.ec=MEM_LAT - brp.loop.imp 1f, 2f - ;; - mov ar.lc=cnt - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; - nop.m 0 - ;; - /* - * It is faster to put a stop bit in the loop here because it makes - * the pipeline shorter (and latency is what matters on short copies). - */ - .align 32 -1: { .mib -(p[0]) ld1 val[0]=[src],1 - nop.i 0 - brp.loop.imp 1b, 2f -} ;; -2: { .mfb -(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 - nop.f 0 - br.ctop.dptk.few 1b -} ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - /* - * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't - * an overriding concern here, but throughput is. We first do - * sub-word copying until the destination is aligned, then we check - * if the source is also aligned. If so, we do a simple load/store-loop - * until there are less than 8 bytes left over and then we do the tail, - * by storing the last few bytes using sub-word copying. If the source - * is not aligned, we branch off to the non-congruent loop. - * - * stage: op: - * 0 ld - * : - * MEM_LAT+3 shrp - * MEM_LAT+4 st - * - * On Itanium, the pipeline itself runs without stalls. However, br.ctop - * seems to introduce an unavoidable bubble in the pipeline so the overall - * latency is 2 cycles/iteration. This gives us a _copy_ throughput - * of 4 byte/cycle. Still not bad. - */ -# undef N -# undef Nrot -# define N (MEM_LAT + 5) /* number of stages */ -# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ - -#define LOG_LOOP_SIZE 6 - -.memcpy_long: - alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame - and t0=-8,src // t0 = src & ~7 - and t2=7,src // t2 = src & 7 - ;; - ld8 t0=[t0] // t0 = 1st source word - adds src2=7,src // src2 = (src + 7) - sub t4=r0,dst // t4 = -dst - ;; - and src2=-8,src2 // src2 = (src + 7) & ~7 - shl t2=t2,3 // t2 = 8*(src & 7) - shl t4=t4,3 // t4 = 8*(dst & 7) - ;; - ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise - sub t3=64,t2 // t3 = 64-8*(src & 7) - shr.u t0=t0,t2 - ;; - add src_end=src,in2 - shl t1=t1,t3 - mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) - ;; - or t0=t0,t1 - mov cnt=r0 - adds src_end=-1,src_end - ;; -(p3) st1 [dst]=t0,1 -(p3) shr.u t0=t0,8 -(p3) adds cnt=1,cnt - ;; -(p4) st2 [dst]=t0,2 -(p4) shr.u t0=t0,16 -(p4) adds cnt=2,cnt - ;; -(p5) st4 [dst]=t0,4 -(p5) adds cnt=4,cnt - and src_end=-8,src_end // src_end = last word of source buffer - ;; - - // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: - -1:{ add src=cnt,src // make src point to remainder of source buffer - sub cnt=in2,cnt // cnt = number of bytes left to copy - mov t4=ip - } ;; - and src2=-8,src // align source pointer - adds t4=.memcpy_loops-1b,t4 - mov ar.ec=N - - and t0=7,src // t0 = src & 7 - shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy - shl cnt=cnt,3 // move bits 0-2 to 3-5 - ;; - - .rotr val[N+1], w[2] - .rotp p[N] - - cmp.ne p6,p0=t0,r0 // is src aligned, too? - shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) - adds t2=-1,t2 // br.ctop is repeat/until - ;; - add t4=t0,t4 - mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy - mov ar.lc=t2 - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; -(p6) ld8 val[1]=[src2],8 // prime the pump... - mov b6=t4 - br.sptk.few b6 - ;; - -.memcpy_tail: - // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is - // less than 8) and t0 contains the last few bytes of the src buffer: -(p5) st4 [dst]=t0,4 -(p5) shr.u t0=t0,32 - mov ar.lc=saved_lc - ;; -(p4) st2 [dst]=t0,2 -(p4) shr.u t0=t0,16 - mov ar.pfs=saved_pfs - ;; -(p3) st1 [dst]=t0 - mov pr=saved_pr,-1 - br.ret.sptk.many rp - -/////////////////////////////////////////////////////// - .align 64 - -#define COPY(shift,index) \ - 1: { .mib \ - (p[0]) ld8 val[0]=[src2],8; \ - (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ - brp.loop.imp 1b, 2f \ - }; \ - 2: { .mfb \ - (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ - nop.f 0; \ - br.ctop.dptk.few 1b; \ - }; \ - ;; \ - ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ - ;; \ - shrp t0=val[N-1],val[N-index],shift; \ - br .memcpy_tail -.memcpy_loops: - COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ - COPY(8, 0) - COPY(16, 0) - COPY(24, 0) - COPY(32, 0) - COPY(40, 0) - COPY(48, 0) - COPY(56, 0) - -END(memcpy) -EXPORT_SYMBOL(memcpy) diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S deleted file mode 100644 index cc4e6ac914b6c2699749a700b7ff6e97a72ee02a..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/memcpy_mck.S +++ /dev/null @@ -1,659 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Itanium 2-optimized version of memcpy and copy_user function - * - * Inputs: - * in0: destination address - * in1: source address - * in2: number of bytes to copy - * Output: - * for memcpy: return dest - * for copy_user: return 0 if success, - * or number of byte NOT copied if error occurred. - * - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Ken Chen - */ -#include -#include -#include - -#define EK(y...) EX(y) - -/* McKinley specific optimization */ - -#define retval r8 -#define saved_pfs r31 -#define saved_lc r10 -#define saved_pr r11 -#define saved_in0 r14 -#define saved_in1 r15 -#define saved_in2 r16 - -#define src0 r2 -#define src1 r3 -#define dst0 r17 -#define dst1 r18 -#define cnt r9 - -/* r19-r30 are temp for each code section */ -#define PREFETCH_DIST 8 -#define src_pre_mem r19 -#define dst_pre_mem r20 -#define src_pre_l2 r21 -#define dst_pre_l2 r22 -#define t1 r23 -#define t2 r24 -#define t3 r25 -#define t4 r26 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define n8 r27 -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r28 -#define t15 r29 -#define tmp r30 - -/* defines for long_copy block */ -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 1) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -/* alias */ -#define in0 r32 -#define in1 r33 -#define in2 r34 - -GLOBAL_ENTRY(memcpy) - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f0 - mov retval=in0 - br.cond.sptk .common_code - ;; -END(memcpy) -EXPORT_SYMBOL(memcpy) -GLOBAL_ENTRY(__copy_user) - .prologue -// check dest alignment - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f1 - mov saved_in0=in0 // save dest pointer - mov saved_in1=in1 // save src pointer - mov retval=r0 // initialize return value - ;; -.common_code: - cmp.gt p15,p0=8,in2 // check for small size - cmp.ne p13,p0=0,r28 // check dest alignment - cmp.ne p14,p0=0,r29 // check src alignment - add src0=0,in1 - sub r30=8,r28 // for .align_dest - mov saved_in2=in2 // save len - ;; - add dst0=0,in0 - add dst1=1,in0 // dest odd index - cmp.le p6,p0 = 1,r30 // for .align_dest -(p15) br.cond.dpnt .memcpy_short -(p13) br.cond.dpnt .align_dest -(p14) br.cond.dpnt .unaligned_src - ;; - -// both dest and src are aligned on 8-byte boundary -.aligned_src: - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - .save pr, saved_pr - mov saved_pr=pr - - shr.u cnt=in2,7 // this much cache line - ;; - cmp.lt p6,p0=2*PREFETCH_DIST,cnt - cmp.lt p7,p8=1,cnt - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .body - add cnt=-1,cnt - add src_pre_mem=0,in1 // prefetch src pointer - add dst_pre_mem=0,in0 // prefetch dest pointer - ;; -(p7) mov ar.lc=cnt // prefetch count -(p8) mov ar.lc=r0 -(p6) br.cond.dpnt .long_copy - ;; - -.prefetch: - lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few .prefetch - ;; - -.medium_copy: - and tmp=31,in2 // copy length after iteration - shr.u r29=in2,5 // number of 32-byte iteration - add dst1=8,dst0 // 2nd dest pointer - ;; - add cnt=-1,r29 // ctop iteration adjustment - cmp.eq p10,p0=r29,r0 // do we really need to loop? - add src1=8,src0 // 2nd src pointer - cmp.le p6,p0=8,tmp - ;; - cmp.le p7,p0=16,tmp - mov ar.lc=cnt // loop setup - cmp.eq p16,p17 = r0,r0 - mov ar.ec=2 -(p10) br.dpnt.few .aligned_src_tail - ;; - TEXT_ALIGN(32) -1: -EX(.ex_handler, (p16) ld8 r34=[src0],16) -EK(.ex_handler, (p16) ld8 r38=[src1],16) -EX(.ex_handler, (p17) st8 [dst0]=r33,16) -EK(.ex_handler, (p17) st8 [dst1]=r37,16) - ;; -EX(.ex_handler, (p16) ld8 r32=[src0],16) -EK(.ex_handler, (p16) ld8 r36=[src1],16) -EX(.ex_handler, (p16) st8 [dst0]=r34,16) -EK(.ex_handler, (p16) st8 [dst1]=r38,16) - br.ctop.dptk.few 1b - ;; - -.aligned_src_tail: -EX(.ex_handler, (p6) ld8 t1=[src0]) - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs -EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) - cmp.le p8,p0=24,tmp - and r21=-8,tmp - ;; -EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) -EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 - and in2=7,tmp // remaining length -EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 - add src0=src0,r21 // setting up src pointer - add dst0=dst0,r21 // setting up dest pointer - ;; -EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 - mov pr=saved_pr,-1 - br.dptk.many .memcpy_short - ;; - -/* code taken from copy_page_mck */ -.long_copy: - .rotr v[2*PREFETCH_DIST] - .rotp p[N] - - mov src_pre_mem = src0 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = dst0 - - add src_pre_l2 = 8*8, src0 - add dst_pre_l2 = 8*8, dst0 - ;; - add src0 = 8, src_pre_mem // first t1 src - mov ar.lc = 2*PREFETCH_DIST - 1 - shr.u cnt=in2,7 // number of lines - add src1 = 3*8, src_pre_mem // first t3 src - add dst0 = 8, dst_pre_mem // first t1 dst - add dst1 = 3*8, dst_pre_mem // first t3 dst - ;; - and tmp=127,in2 // remaining bytes after this block - add cnt = -(2*PREFETCH_DIST) - 1, cnt - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 -EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 - mov ar.lc = cnt - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 -EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 -EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory -EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 - ;; -EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory -EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 -EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 -EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 - ;; -EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) - ;; -EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) -EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) - ;; -EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) -EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) - br.ctop.sptk .line_copy - ;; - - add dst0=-8,dst0 - add src0=-8,src0 - mov in2=tmp - .restore sp - br.sptk.many .medium_copy - ;; - -#define BLOCK_SIZE 128*32 -#define blocksize r23 -#define curlen r24 - -// dest is on 8-byte boundary, src is not. We need to do -// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. -.unaligned_src: - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,5,0,8 - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .save pr, saved_pr - mov saved_pr=pr - .body -.4k_block: - mov saved_in0=dst0 // need to save all input arguments - mov saved_in2=in2 - mov blocksize=BLOCK_SIZE - ;; - cmp.lt p6,p7=blocksize,in2 - mov saved_in1=src0 - ;; -(p6) mov in2=blocksize - ;; - shr.u r21=in2,7 // this much cache line - shr.u r22=in2,4 // number of 16-byte iteration - and curlen=15,in2 // copy length after iteration - and r30=7,src0 // source alignment - ;; - cmp.lt p7,p8=1,r21 - add cnt=-1,r21 - ;; - - add src_pre_mem=0,src0 // prefetch src pointer - add dst_pre_mem=0,dst0 // prefetch dest pointer - and src0=-8,src0 // 1st src pointer -(p7) mov ar.lc = cnt -(p8) mov ar.lc = r0 - ;; - TEXT_ALIGN(32) -1: lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few 1b - ;; - - shladd dst1=r22,3,dst0 // 2nd dest pointer - shladd src1=r22,3,src0 // 2nd src pointer - cmp.eq p8,p9=r22,r0 // do we really need to loop? - cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? - add cnt=-1,r22 // ctop iteration adjustment - ;; -EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer -EK(.ex_handler, (p9) ld8 r37=[src1],8) -(p8) br.dpnt.few .noloop - ;; - -// The jump address is calculated based on src alignment. The COPYU -// macro below need to confine its size to power of two, so an entry -// can be caulated using shl instead of an expensive multiply. The -// size is then hard coded by the following #define to match the -// actual size. This make it somewhat tedious when COPYU macro gets -// changed and this need to be adjusted to match. -#define LOOP_SIZE 6 -1: - mov r29=ip // jmp_table thread - mov ar.lc=cnt - ;; - add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 - shl r28=r30, LOOP_SIZE // jmp_table thread - mov ar.ec=2 // loop setup - ;; - add r29=r29,r28 // jmp_table thread - cmp.eq p16,p17=r0,r0 - ;; - mov b6=r29 // jmp_table thread - ;; - br.cond.sptk.few b6 - -// for 8-15 byte case -// We will skip the loop, but need to replicate the side effect -// that the loop produces. -.noloop: -EX(.ex_handler, (p6) ld8 r37=[src1],8) - add src0=8,src0 -(p6) shl r25=r30,3 - ;; -EX(.ex_handler, (p6) ld8 r27=[src1]) -(p6) shr.u r28=r37,r25 -(p6) sub r26=64,r25 - ;; -(p6) shl r27=r27,r26 - ;; -(p6) or r21=r28,r27 - -.unaligned_src_tail: -/* check if we have more than blocksize to copy, if so go back */ - cmp.gt p8,p0=saved_in2,blocksize - ;; -(p8) add dst0=saved_in0,blocksize -(p8) add src0=saved_in1,blocksize -(p8) sub in2=saved_in2,blocksize -(p8) br.dpnt .4k_block - ;; - -/* we have up to 15 byte to copy in the tail. - * part of work is already done in the jump table code - * we are at the following state. - * src side: - * - * xxxxxx xx <----- r21 has xxxxxxxx already - * -------- -------- -------- - * 0 8 16 - * ^ - * | - * src1 - * - * dst - * -------- -------- -------- - * ^ - * | - * dst1 - */ -EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy -(p6) add curlen=-8,curlen // update length - mov ar.pfs=saved_pfs - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov in2=curlen // remaining length - mov dst0=dst1 // dest pointer - add src0=src1,r30 // forward by src alignment - ;; - -// 7 byte or smaller. -.memcpy_short: - cmp.le p8,p9 = 1,in2 - cmp.le p10,p11 = 2,in2 - cmp.le p12,p13 = 3,in2 - cmp.le p14,p15 = 4,in2 - add src1=1,src0 // second src pointer - add dst1=1,dst0 // second dest pointer - ;; - -EX(.ex_handler_short, (p8) ld1 t1=[src0],2) -EK(.ex_handler_short, (p10) ld1 t2=[src1],2) -(p9) br.ret.dpnt rp // 0 byte copy - ;; - -EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) -EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) -(p11) br.ret.dpnt rp // 1 byte copy - -EX(.ex_handler_short, (p12) ld1 t3=[src0],2) -EK(.ex_handler_short, (p14) ld1 t4=[src1],2) -(p13) br.ret.dpnt rp // 2 byte copy - ;; - - cmp.le p6,p7 = 5,in2 - cmp.le p8,p9 = 6,in2 - cmp.le p10,p11 = 7,in2 - -EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) -EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) -(p15) br.ret.dpnt rp // 3 byte copy - ;; - -EX(.ex_handler_short, (p6) ld1 t5=[src0],2) -EK(.ex_handler_short, (p8) ld1 t6=[src1],2) -(p7) br.ret.dpnt rp // 4 byte copy - ;; - -EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) -EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) -(p9) br.ret.dptk rp // 5 byte copy - -EX(.ex_handler_short, (p10) ld1 t7=[src0],2) -(p11) br.ret.dptk rp // 6 byte copy - ;; - -EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) - br.ret.dptk rp // done all cases - - -/* Align dest to nearest 8-byte boundary. We know we have at - * least 7 bytes to copy, enough to crawl to 8-byte boundary. - * Actual number of byte to crawl depend on the dest alignment. - * 7 byte or less is taken care at .memcpy_short - - * src0 - source even index - * src1 - source odd index - * dst0 - dest even index - * dst1 - dest odd index - * r30 - distance to 8-byte boundary - */ - -.align_dest: - add src1=1,in1 // source odd index - cmp.le p7,p0 = 2,r30 // for .align_dest - cmp.le p8,p0 = 3,r30 // for .align_dest -EX(.ex_handler_short, (p6) ld1 t1=[src0],2) - cmp.le p9,p0 = 4,r30 // for .align_dest - cmp.le p10,p0 = 5,r30 - ;; -EX(.ex_handler_short, (p7) ld1 t2=[src1],2) -EK(.ex_handler_short, (p8) ld1 t3=[src0],2) - cmp.le p11,p0 = 6,r30 -EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) - cmp.le p12,p0 = 7,r30 - ;; -EX(.ex_handler_short, (p9) ld1 t4=[src1],2) -EK(.ex_handler_short, (p10) ld1 t5=[src0],2) -EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) -EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) - ;; -EX(.ex_handler_short, (p11) ld1 t6=[src1],2) -EK(.ex_handler_short, (p12) ld1 t7=[src0],2) - cmp.eq p6,p7=r28,r29 -EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) -EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) - sub in2=in2,r30 - ;; -EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) -EK(.ex_handler_short, (p12) st1 [dst0] = t7) - add dst0=in0,r30 // setup arguments - add src0=in1,r30 -(p6) br.cond.dptk .aligned_src -(p7) br.cond.dpnt .unaligned_src - ;; - -/* main loop body in jump table format */ -#define COPYU(shift) \ -1: \ -EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ -EK(.ex_handler, (p16) ld8 r36=[src1],8); \ - (p17) shrp r35=r33,r34,shift;; /* 1 */ \ -EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ - nop.m 0; \ - (p16) shrp r38=r36,r37,shift; \ -EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ -EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ - br.ctop.dptk.few 1b;; \ - (p7) add src1=-8,src1; /* back out for <8 byte case */ \ - shrp r21=r22,r38,shift; /* speculative work */ \ - br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ - ;; - TEXT_ALIGN(32) -.jump_table: - COPYU(8) // unaligned cases -.jmp1: - COPYU(16) - COPYU(24) - COPYU(32) - COPYU(40) - COPYU(48) - COPYU(56) - -#undef A -#undef B -#undef C -#undef D - -/* - * Due to lack of local tag support in gcc 2.x assembler, it is not clear which - * instruction failed in the bundle. The exception algorithm is that we - * first figure out the faulting address, then detect if there is any - * progress made on the copy, if so, redo the copy from last known copied - * location up to the faulting address (exclusive). In the copy_from_user - * case, remaining byte in kernel buffer will be zeroed. - * - * Take copy_from_user as an example, in the code there are multiple loads - * in a bundle and those multiple loads could span over two pages, the - * faulting address is calculated as page_round_down(max(src0, src1)). - * This is based on knowledge that if we can access one byte in a page, we - * can access any byte in that page. - * - * predicate used in the exception handler: - * p6-p7: direction - * p10-p11: src faulting addr calculation - * p12-p13: dst faulting addr calculation - */ - -#define A r19 -#define B r20 -#define C r21 -#define D r22 -#define F r28 - -#define saved_retval loc0 -#define saved_rtlink loc1 -#define saved_pfs_stack loc2 - -.ex_hndlr_s: - add src0=8,src0 - br.sptk .ex_handler - ;; -.ex_hndlr_d: - add dst0=8,dst0 - br.sptk .ex_handler - ;; -.ex_hndlr_lcpy_1: - mov src1=src_pre_mem - mov dst1=dst_pre_mem - cmp.gtu p10,p11=src_pre_mem,saved_in1 - cmp.gtu p12,p13=dst_pre_mem,saved_in0 - ;; -(p10) add src0=8,saved_in1 -(p11) mov src0=saved_in1 -(p12) add dst0=8,saved_in0 -(p13) mov dst0=saved_in0 - br.sptk .ex_handler -.ex_handler_lcpy: - // in line_copy block, the preload addresses should always ahead - // of the other two src/dst pointers. Furthermore, src1/dst1 should - // always ahead of src0/dst0. - mov src1=src_pre_mem - mov dst1=dst_pre_mem -.ex_handler: - mov pr=saved_pr,-1 // first restore pr, lc, and pfs - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - ;; -.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs - cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction - cmp.ltu p10,p11=src0,src1 - cmp.ltu p12,p13=dst0,dst1 - fcmp.eq p8,p0=f6,f0 // is it memcpy? - mov tmp = dst0 - ;; -(p11) mov src1 = src0 // pick the larger of the two -(p13) mov dst0 = dst1 // make dst0 the smaller one -(p13) mov dst1 = tmp // and dst1 the larger one - ;; -(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary -(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary - ;; -(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store -(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load - mov retval=saved_in2 -(p8) ld1 tmp=[src1] // force an oops for memcpy call -(p8) st1 [dst1]=r0 // force an oops for memcpy call -(p14) br.ret.sptk.many rp - -/* - * The remaining byte to copy is calculated as: - * - * A = (faulting_addr - orig_src) -> len to faulting ld address - * or - * (faulting_addr - orig_dst) -> len to faulting st address - * B = (cur_dst - orig_dst) -> len copied so far - * C = A - B -> len need to be copied - * D = orig_len - A -> len need to be left along - */ -(p6) sub A = F, saved_in0 -(p7) sub A = F, saved_in1 - clrrrb - ;; - alloc saved_pfs_stack=ar.pfs,3,3,3,0 - cmp.lt p8,p0=A,r0 - sub B = dst0, saved_in0 // how many byte copied so far - ;; -(p8) mov A = 0; // A shouldn't be negative, cap it - ;; - sub C = A, B - sub D = saved_in2, A - ;; - cmp.gt p8,p0=C,r0 // more than 1 byte? - mov r8=0 - mov saved_retval = D - mov saved_rtlink = b0 - - add out0=saved_in0, B - add out1=saved_in1, B - mov out2=C -(p8) br.call.sptk.few b0=__copy_user // recursive call - ;; - - add saved_retval=saved_retval,r8 // above might return non-zero value - ;; - - mov retval=saved_retval - mov ar.pfs=saved_pfs_stack - mov b0=saved_rtlink - br.ret.sptk.many rp - -/* end of McKinley specific optimization */ -END(__copy_user) -EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S deleted file mode 100644 index 07a8b92c64965e2578e3607532325c170ae220c7..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/memset.S +++ /dev/null @@ -1,365 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Optimized version of the standard memset() function. - - Copyright (c) 2002 Hewlett-Packard Co/CERN - Sverre Jarp - - Return: dest - - Inputs: - in0: dest - in1: value - in2: count - - The algorithm is fairly straightforward: set byte by byte until we - we get to a 16B-aligned address, then loop on 128 B chunks using an - early store as prefetching, then loop on 32B chucks, then clear remaining - words, finally clear remaining bytes. - Since a stf.spill f0 can store 16B in one go, we use this instruction - to get peak speed when value = 0. */ - -#include -#include -#undef ret - -#define dest in0 -#define value in1 -#define cnt in2 - -#define tmp r31 -#define save_lc r30 -#define ptr0 r29 -#define ptr1 r28 -#define ptr2 r27 -#define ptr3 r26 -#define ptr9 r24 -#define loopcnt r23 -#define linecnt r22 -#define bytecnt r21 - -#define fvalue f6 - -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches -#define p_nz p7 -#define p_zr p8 -#define p_unalgn p9 -#define p_y p11 -#define p_n p12 -#define p_yy p13 -#define p_nn p14 - -#define MIN1 15 -#define MIN1P1HALF 8 -#define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount -#define PREF_AHEAD 8 - -GLOBAL_ENTRY(memset) -{ .mmi - .prologue - alloc tmp = ar.pfs, 3, 0, 0, 0 - lfetch.nt1 [dest] // - .save ar.lc, save_lc - mov.i save_lc = ar.lc - .body -} { .mmi - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero - cmp.eq p_scr, p0 = cnt, r0 -;; } -{ .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for correct alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) -} { .mib - mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 -;; } -{ .mib - cmp.ne p_unalgn, p0 = tmp, r0 // -} { .mib - sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) -;; } -{ .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? -;; } -{ .mib -(p_y) add cnt = -8, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? -} { .mib -(p_y) st8 [ptr2] = value,-4 // -(p_n) add ptr2 = 4, ptr2 // -;; } -{ .mib -(p_yy) add cnt = -4, cnt // -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? -} { .mib -(p_yy) st4 [ptr2] = value,-2 // -(p_nn) add ptr2 = 2, ptr2 // -;; } -{ .mmi - mov tmp = LINE_SIZE+1 // for compare -(p_y) add cnt = -2, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? -} { .mmi - setf.sig fvalue=value // transfer value to FLP side -(p_y) st2 [ptr2] = value,-1 // -(p_n) add ptr2 = 1, ptr2 // -;; } - -{ .mmi -(p_yy) st1 [ptr2] = value // - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? -} { .mbb -(p_yy) add cnt = -1, cnt // -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few -;; } - -{ .mib - nop.m 0 - shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill -;; } - - TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt // - add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt // -;; } -.pref_l1a: -{ .mib - stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1a -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp // -;; } -.l1ax: - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf8 [ptr2] = fvalue, 24 -(p_scr) stf8 [ptr9] = fvalue, 128 - br.cloop.dptk.few .l1ax -;; } -{ .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 -;; } - - TEXT_ALIGN(32) -.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt -;; } -.pref_l1b: -{ .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1b -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp -;; } -.l1bx: - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf.spill [ptr2] = f0, 32 -(p_scr) stf.spill [ptr9] = f0, 128 - br.cloop.dptk.few .l1bx -;; } -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // -;; } - -.fraction_of_line: -{ .mib - add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 -;; } -{ .mib - cmp.eq p_scr, p0 = loopcnt, r0 - add loopcnt = -1, loopcnt -(p_scr) br.cond.dpnt.many .store_words -;; } -{ .mib - and cnt = 0x1f, cnt // compute the remaining cnt - mov.i ar.lc = loopcnt -;; } - TEXT_ALIGN(32) -.l2: // ------------------------------------ // L2A: store 32B in 2 cycles -{ .mmb - stf8 [ptr1] = fvalue, 8 - stf8 [ptr2] = fvalue, 8 -;; } { .mmb - stf8 [ptr1] = fvalue, 24 - stf8 [ptr2] = fvalue, 24 - br.cloop.dptk.many .l2 -;; } -.store_words: -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch -;; } - -{ .mmi - stf8 [ptr1] = fvalue, 8 // store - cmp.le p_y, p_n = 16, cnt - add cnt = -8, cnt // subtract -;; } -{ .mmi -(p_y) stf8 [ptr1] = fvalue, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract -;; } -{ .mmi // store -(p_yy) stf8 [ptr1] = fvalue, 8 -(p_yy) add cnt = -8, cnt // subtract -;; } - -.move_bytes_from_alignment: -{ .mib - cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? -(p_scr) br.cond.dpnt.few .restore_and_exit -;; } -{ .mib -(p_y) st4 [ptr1] = value,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? -;; } -{ .mib -(p_yy) st2 [ptr1] = value,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? -;; } - -{ .mib -(p_y) st1 [ptr1] = value -;; } -.restore_and_exit: -{ .mib - nop.m 0 - mov.i ar.lc = save_lc - br.ret.sptk.many rp -;; } - -.move_bytes_unaligned: -{ .mmi - .pred.rel "mutex",p_y, p_n - .pred.rel "mutex",p_yy, p_nn -(p_n) cmp.le p_yy, p_nn = 4, cnt -(p_y) cmp.le p_yy, p_nn = 5, cnt -(p_n) add ptr2 = 2, ptr1 -} { .mmi -(p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] -(p_y) add cnt = -1, cnt -;; } -{ .mmi -(p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store - mov.i ar.lc = save_lc -} { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] -(p_yy) add cnt = -4, cnt -;; } -{ .mmi -(p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? -} { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] -(p_y) add cnt = -4, cnt -;; } -{ .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? -} { .mmi -(p_yy) add cnt = -4, cnt -;; } -{ .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) - br.ret.sptk.many rp -} -END(memset) -EXPORT_SYMBOL(memset) diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S deleted file mode 100644 index d66de596697441c8cab0c73589a017ca0fc6c466..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/strlen.S +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard strlen() function - * - * - * Inputs: - * in0 address of string - * - * Outputs: - * ret0 the number of characters in the string (0 if empty string) - * does not count the \0 - * - * Copyright (C) 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - * - * 09/24/99 S.Eranian add speculation recovery code - */ - -#include -#include - -// -// -// This is an enhanced version of the basic strlen. it includes a combination -// of compute zero index (czx), parallel comparisons, speculative loads and -// loop unroll using rotating registers. -// -// General Ideas about the algorithm: -// The goal is to look at the string in chunks of 8 bytes. -// so we need to do a few extra checks at the beginning because the -// string may not be 8-byte aligned. In this case we load the 8byte -// quantity which includes the start of the string and mask the unused -// bytes with 0xff to avoid confusing czx. -// We use speculative loads and software pipelining to hide memory -// latency and do read ahead safely. This way we defer any exception. -// -// Because we don't want the kernel to be relying on particular -// settings of the DCR register, we provide recovery code in case -// speculation fails. The recovery code is going to "redo" the work using -// only normal loads. If we still get a fault then we generate a -// kernel panic. Otherwise we return the strlen as usual. -// -// The fact that speculation may fail can be caused, for instance, by -// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., -// a NaT bit will be set if the translation is not present. The normal -// load, on the other hand, will cause the translation to be inserted -// if the mapping exists. -// -// It should be noted that we execute recovery code only when we need -// to use the data that has been speculatively loaded: we don't execute -// recovery code on pure read ahead data. -// -// Remarks: -// - the cmp r0,r0 is used as a fast way to initialize a predicate -// register to 1. This is required to make sure that we get the parallel -// compare correct. -// -// - we don't use the epilogue counter to exit the loop but we need to set -// it to zero beforehand. -// -// - after the loop we must test for Nat values because neither the -// czx nor cmp instruction raise a NaT consumption fault. We must be -// careful not to look too far for a Nat for which we don't care. -// For instance we don't need to look at a NaT in val2 if the zero byte -// was in val1. -// -// - Clearly performance tuning is required. -// -// -// -#define saved_pfs r11 -#define tmp r10 -#define base r16 -#define orig r17 -#define saved_pr r18 -#define src r19 -#define mask r20 -#define val r21 -#define val1 r22 -#define val2 r23 - -GLOBAL_ENTRY(strlen) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 - - .rotr v[2], w[2] // declares our 4 aliases - - extr.u tmp=in0,0,3 // tmp=least significant 3 bits - mov orig=in0 // keep trackof initial byte address - dep src=0,in0,0,3 // src=8byte-aligned in0 address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) - ;; - - .body - - ld8 v[1]=[src],8 // must not speculate: can fail here - shl tmp=tmp,3 // multiply by 8bits/byte - mov mask=-1 // our mask - ;; - ld8.s w[1]=[src],8 // speculatively load next - cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and - sub tmp=64,tmp // how many bits to shift our mask on the right - ;; - shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part - mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) - ;; - add base=-16,src // keep track of aligned base - or v[1]=v[1],mask // now we have a safe initial byte pattern - ;; -1: - ld8.s v[0]=[src],8 // speculatively load next - czx1.r val1=v[1] // search 0 byte from right - czx1.r val2=w[1] // search 0 byte from right following 8bytes - ;; - ld8.s w[0]=[src],8 // speculatively load next to next - cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 - cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 -(p6) br.wtop.dptk 1b // loop until p6 == 0 - ;; - // - // We must return try the recovery code iff - // val1_is_nat || (val1==8 && val2_is_nat) - // - // XXX Fixme - // - there must be a better way of doing the test - // - cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) - tnat.nz p6,p7=val1 // test NaT on val1 -(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT - ;; - // - // if we come here p7 is true, i.e., initialized for // cmp - // - cmp.eq.and p7,p0=8,val1// val1==8? - tnat.nz.and p7,p0=val2 // test NaT if val2 -(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT - ;; -(p8) mov val1=val2 // the other test got us out of the loop -(p8) adds src=-16,src // correct position when 3 ahead -(p9) adds src=-24,src // correct position when 4 ahead - ;; - sub ret0=src,orig // distance from base - sub tmp=8,val1 // which byte in word - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // adjust - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of normal execution - - // - // Outlined recovery code when speculation failed - // - // This time we don't use speculation and rely on the normal exception - // mechanism. that's why the loop is not as good as the previous one - // because read ahead is not possible - // - // IMPORTANT: - // Please note that in the case of strlen() as opposed to strlen_user() - // we don't use the exception mechanism, as this function is not - // supposed to fail. If that happens it means we have a bug and the - // code will cause of kernel fault. - // - // XXX Fixme - // - today we restart from the beginning of the string instead - // of trying to continue where we left off. - // -.recover: - ld8 val=[base],8 // will fail if unrecoverable fault - ;; - or val=val,mask // remask first bytes - cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop - ;; - // - // ar.ec is still zero here - // -2: -(p6) ld8 val=[base],8 // will fail if unrecoverable fault - ;; - czx1.r val1=val // search 0 byte from right - ;; - cmp.eq p6,p0=8,val1 // val1==8 ? -(p6) br.wtop.dptk 2b // loop until p6 == 0 - ;; // (avoid WAW on p63) - sub ret0=base,orig // distance from base - sub tmp=8,val1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of successful recovery code -END(strlen) -EXPORT_SYMBOL(strlen) diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S deleted file mode 100644 index 49eb81b69cd224f77addd7853d87079eac6c7994..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/strncpy_from_user.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Just like strncpy() except that if a fault occurs during copying, - * -EFAULT is returned. - * - * Inputs: - * in0: address of destination buffer - * in1: address of string to be copied - * in2: length of buffer in bytes - * Outputs: - * r8: -EFAULT in case of fault or number of bytes copied if no fault - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * Copyright (C) 1998-2001 David Mosberger-Tang - * - * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by - * by Andreas Schwab ). - */ - -#include -#include - -GLOBAL_ENTRY(__strncpy_from_user) - alloc r2=ar.pfs,3,0,0,0 - mov r8=0 - mov r9=in1 - ;; - add r10=in1,in2 - cmp.eq p6,p0=r0,in2 -(p6) br.ret.spnt.many rp - - // XXX braindead copy loop---this needs to be optimized -.Loop1: - EX(.Lexit, ld1 r8=[in1],1) - ;; - EX(.Lexit, st1 [in0]=r8,1) - cmp.ne p6,p7=r8,r0 - ;; -(p6) cmp.ne.unc p8,p0=in1,r10 -(p8) br.cond.dpnt.few .Loop1 - ;; -(p6) mov r8=in2 // buffer filled up---return buffer length -(p7) sub r8=in1,r9,1 // return string length (excluding NUL character) -[.Lexit:] - br.ret.sptk.many rp -END(__strncpy_from_user) -EXPORT_SYMBOL(__strncpy_from_user) diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S deleted file mode 100644 index 4b684d4da10644db089c5b84e4a62e835883c06a..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/strnlen_user.S +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Returns 0 if exception before NUL or reaching the supplied limit (N), - * a value greater than N if the string is longer than the limit, else - * strlen. - * - * Inputs: - * in0: address of buffer - * in1: string length limit N - * Outputs: - * r8: 0 in case of fault, strlen(buffer)+1 otherwise - * - * Copyright (C) 1999, 2001 David Mosberger-Tang - */ - -#include -#include - -GLOBAL_ENTRY(__strnlen_user) - .prologue - alloc r2=ar.pfs,2,0,0,0 - .save ar.lc, r16 - mov r16=ar.lc // preserve ar.lc - - .body - - add r3=-1,in1 - ;; - mov ar.lc=r3 - mov r9=0 - ;; - // XXX braindead strlen loop---this needs to be optimized -.Loop1: - EXCLR(.Lexit, ld1 r8=[in0],1) - add r9=1,r9 - ;; - cmp.eq p6,p0=r8,r0 -(p6) br.cond.dpnt .Lexit - br.cloop.dptk.few .Loop1 - - add r9=1,in1 // NUL not found---return N+1 - ;; -.Lexit: - mov r8=r9 - mov ar.lc=r16 // restore ar.lc - br.ret.sptk.many rp -END(__strnlen_user) -EXPORT_SYMBOL(__strnlen_user) diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S deleted file mode 100644 index 5413dafe6b2e0103e418c7fbd8dd7b08c51e9e72..0000000000000000000000000000000000000000 --- a/arch/ia64/lib/xor.S +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/ia64/lib/xor.S - * - * Optimized RAID-5 checksumming functions for IA-64. - */ - -#include -#include - -GLOBAL_ENTRY(xor_ia64_2) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 3, 0, 13, 16 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov ar.lc = in0 - mov pr.rot = 1 << 16 - ;; - .rotr s1[6+1], s2[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[6+1])st8.nta [r8] = d[1], 8 - nop.f 0 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_2) -EXPORT_SYMBOL(xor_ia64_2) - -GLOBAL_ENTRY(xor_ia64_3) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 4, 0, 20, 24 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] - ;; -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], s3[6] - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_3) -EXPORT_SYMBOL(xor_ia64_3) - -GLOBAL_ENTRY(xor_ia64_4) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 5, 0, 27, 32 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - mov r19 = in4 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[0]) ld8.nta s4[0] = [r19], 8 -(p[6]) xor r20 = s3[6], s4[6] - ;; -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], r20 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_4) -EXPORT_SYMBOL(xor_ia64_4) - -GLOBAL_ENTRY(xor_ia64_5) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 6, 0, 34, 40 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - mov r19 = in4 - mov r20 = in5 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[0]) ld8.nta s4[0] = [r19], 8 -(p[6]) xor r21 = s3[6], s4[6] - ;; -(p[0]) ld8.nta s5[0] = [r20], 8 -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], r21 - ;; -(p[6]) xor d[0] = d[0], s5[6] - nop.f 0 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_5) -EXPORT_SYMBOL(xor_ia64_5) diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas old mode 100755 new mode 100644 diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S deleted file mode 100644 index 010e1d227e5dbeb1fed42fbb67e2d824455f1b6c..0000000000000000000000000000000000000000 --- a/arch/ia64/scripts/check-gas-asm.S +++ /dev/null @@ -1,2 +0,0 @@ -[1:] nop 0 - .xdata4 ".data", 0, 1b-. diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S deleted file mode 100644 index 65d6378adaaaa5fbc791fe32608c3a285d432175..0000000000000000000000000000000000000000 --- a/arch/ia64/scripts/check-segrel.S +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .rodata - data4 @segrel(start) - .data -start: diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S deleted file mode 100644 index 0400c106806cd58d0f47ce02e49c88f9e0c977e4..0000000000000000000000000000000000000000 --- a/arch/ia64/scripts/check-serialize.S +++ /dev/null @@ -1,2 +0,0 @@ - .serialize.data - .serialize.instruction diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S deleted file mode 100644 index 107fa1c88c2e115f5bb497900f2619749e28b520..0000000000000000000000000000000000000000 --- a/arch/ia64/scripts/check-text-align.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .proc foo - .prologue -foo: .save rp, r2 - nop 0 - .align 64 - .endp foo diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags old mode 100755 new mode 100644 diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S deleted file mode 100644 index 259b3661b614168ff8ab377587c66b1478222218..0000000000000000000000000000000000000000 --- a/arch/m68k/68000/entry.S +++ /dev/null @@ -1,244 +0,0 @@ -/* - * entry.S -- non-mmu 68000 interrupt and exception entry points - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file README.legal in the main directory of this archive - * for more details. - * - * Linux/m68k support by Hamish Macdonald - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -.text - -.globl system_call -.globl resume -.globl ret_from_exception -.globl ret_from_signal -.globl sys_call_table -.globl bad_interrupt -.globl inthandler1 -.globl inthandler2 -.globl inthandler3 -.globl inthandler4 -.globl inthandler5 -.globl inthandler6 -.globl inthandler7 - -badsys: - movel #-ENOSYS,%sp@(PT_OFF_D0) - jra ret_from_exception - -do_trace: - movel #-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/ - subql #4,%sp - SAVE_SWITCH_STACK - jbsr syscall_trace_enter - RESTORE_SWITCH_STACK - addql #4,%sp - movel %sp@(PT_OFF_ORIG_D0),%d1 - movel #-ENOSYS,%d0 - cmpl #NR_syscalls,%d1 - jcc 1f - lsl #2,%d1 - lea sys_call_table, %a0 - jbsr %a0@(%d1) - -1: movel %d0,%sp@(PT_OFF_D0) /* save the return value */ - subql #4,%sp /* dummy return address */ - SAVE_SWITCH_STACK - jbsr syscall_trace_leave - -ret_from_signal: - RESTORE_SWITCH_STACK - addql #4,%sp - jra ret_from_exception - -ENTRY(system_call) - SAVE_ALL_SYS - - /* save top of frame*/ - pea %sp@ - jbsr set_esp0 - addql #4,%sp - - movel %sp@(PT_OFF_ORIG_D0),%d0 - - movel %sp,%d1 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d1 - movel %d1,%a2 - btst #(TIF_SYSCALL_TRACE%8),%a2@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8) - jne do_trace - cmpl #NR_syscalls,%d0 - jcc badsys - lsl #2,%d0 - lea sys_call_table,%a0 - movel %a0@(%d0), %a0 - jbsr %a0@ - movel %d0,%sp@(PT_OFF_D0) /* save the return value*/ - -ret_from_exception: - btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel*/ - jeq Luser_return /* if so, skip resched, signals*/ - -Lkernel_return: - RESTORE_ALL - -Luser_return: - /* only allow interrupts when we are really the last one on the*/ - /* kernel stack, otherwise stack overflow can occur during*/ - /* heavy interrupt load*/ - andw #ALLOWINT,%sr - - movel %sp,%d1 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d1 - movel %d1,%a2 -1: - move %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */ - jne Lwork_to_do - RESTORE_ALL - -Lwork_to_do: - movel %a2@(TINFO_FLAGS),%d1 /* thread_info->flags */ - btst #TIF_NEED_RESCHED,%d1 - jne reschedule - -Lsignal_return: - subql #4,%sp /* dummy return address*/ - SAVE_SWITCH_STACK - pea %sp@(SWITCH_STACK_SIZE) - bsrw do_notify_resume - addql #4,%sp - RESTORE_SWITCH_STACK - addql #4,%sp - jra 1b - -/* - * This is the main interrupt handler, responsible for calling process_int() - */ -inthandler1: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #65,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler2: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #66,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler3: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #67,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler4: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #68,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler5: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #69,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler6: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #70,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler7: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel #71,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -inthandler: - SAVE_ALL_INT - movew %sp@(PT_OFF_FORMATVEC), %d0 - and #0x3ff, %d0 - - movel %sp,%sp@- - movel %d0,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ -3: addql #8,%sp /* pop parameters off stack*/ - bra ret_from_exception - -/* - * Handler for uninitialized and spurious interrupts. - */ -ENTRY(bad_interrupt) - addql #1,irq_err_count - rte - -/* - * Beware - when entering resume, prev (the current task) is - * in a0, next (the new task) is in a1, so don't change these - * registers until their contents are no longer needed. - */ -ENTRY(resume) - movel %a0,%d1 /* save prev thread in d1 */ - movew %sr,%a0@(TASK_THREAD+THREAD_SR) /* save sr */ - SAVE_SWITCH_STACK - movel %sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack */ - movel %usp,%a3 /* save usp */ - movel %a3,%a0@(TASK_THREAD+THREAD_USP) - - movel %a1@(TASK_THREAD+THREAD_USP),%a3 /* restore user stack */ - movel %a3,%usp - movel %a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new thread stack */ - RESTORE_SWITCH_STACK - movew %a1@(TASK_THREAD+THREAD_SR),%sr /* restore thread status reg */ - rts - diff --git a/arch/m68k/68000/head.S b/arch/m68k/68000/head.S deleted file mode 100644 index 140220662e33e9932e6d8442f0346ec74b48138c..0000000000000000000000000000000000000000 --- a/arch/m68k/68000/head.S +++ /dev/null @@ -1,241 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * head.S - Common startup code for 68000 core based CPU's - * - * 2012.10.21, Luis Alves , Single head.S file for all - * 68000 core based CPU's. Based on the sources from: - * Coldfire by Greg Ungerer - * 68328 by D. Jeff Dionne , - * Kenneth Albanowski , - * The Silver Hammer Group, Ltd. - * - */ - -#include -#include -#include -#include - - -/***************************************************************************** - * UCSIMM and UCDIMM use CONFIG_MEMORY_RESERVE to reserve some RAM - *****************************************************************************/ -#ifdef CONFIG_MEMORY_RESERVE -#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE)-(CONFIG_MEMORY_RESERVE*0x100000) -#else -#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE) -#endif -/*****************************************************************************/ - -.global _start -.global _rambase -.global _ramvec -.global _ramstart -.global _ramend - -#if defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD) -.global bootlogo_bits -#endif - -/* Defining DEBUG_HEAD_CODE, serial port in 68x328 is inited */ -/* #define DEBUG_HEAD_CODE */ -#undef DEBUG_HEAD_CODE - -.data - -/***************************************************************************** - * RAM setup pointers. Used by the kernel to determine RAM location and size. - *****************************************************************************/ - -_rambase: - .long 0 -_ramvec: - .long 0 -_ramstart: - .long 0 -_ramend: - .long 0 - -__HEAD - -/***************************************************************************** - * Entry point, where all begins! - *****************************************************************************/ - -_start: - -/* Pilot need this specific signature at the start of ROM */ -#ifdef CONFIG_PILOT - .byte 0x4e, 0xfa, 0x00, 0x0a /* bra opcode (jmp 10 bytes) */ - .byte 'b', 'o', 'o', 't' - .word 10000 - nop - moveq #0, %d0 - movew %d0, 0xfffff618 /* Watchdog off */ - movel #0x00011f07, 0xfffff114 /* CS A1 Mask */ -#endif /* CONFIG_PILOT */ - - movew #0x2700, %sr /* disable all interrupts */ - -/***************************************************************************** - * Setup PLL and wait for it to settle (in 68x328 cpu's). - * Also, if enabled, init serial port. - *****************************************************************************/ -#if defined(CONFIG_M68328) || \ - defined(CONFIG_M68EZ328) || \ - defined(CONFIG_M68VZ328) - -/* Serial port setup. Should only be needed if debugging this startup code. */ -#ifdef DEBUG_HEAD_CODE - movew #0x0800, 0xfffff906 /* Ignore CTS */ - movew #0x010b, 0xfffff902 /* BAUD to 9600 */ - movew #0xe100, 0xfffff900 /* enable */ -#endif /* DEBUG_HEAD */ - -#ifdef CONFIG_PILOT - movew #0x2410, 0xfffff200 /* PLLCR */ -#else - movew #0x2400, 0xfffff200 /* PLLCR */ -#endif - movew #0x0123, 0xfffff202 /* PLLFSR */ - moveq #0, %d0 - movew #16384, %d0 /* PLL settle wait loop */ -_pll_settle: - subw #1, %d0 - bne _pll_settle -#endif /* CONFIG_M68x328 */ - - -/***************************************************************************** - * If running kernel from ROM some specific initialization has to be done. - * (Assuming that everything is already init'ed when running from RAM) - *****************************************************************************/ -#ifdef CONFIG_ROMKERNEL - -/***************************************************************************** - * Init chip registers (uCsimm specific) - *****************************************************************************/ -#ifdef CONFIG_UCSIMM - moveb #0x00, 0xfffffb0b /* Watchdog off */ - moveb #0x10, 0xfffff000 /* SCR */ - moveb #0x00, 0xfffff40b /* enable chip select */ - moveb #0x00, 0xfffff423 /* enable /DWE */ - moveb #0x08, 0xfffffd0d /* disable hardmap */ - moveb #0x07, 0xfffffd0e /* level 7 interrupt clear */ - movew #0x8600, 0xfffff100 /* FLASH at 0x10c00000 */ - movew #0x018b, 0xfffff110 /* 2Meg, enable, 0ws */ - movew #0x8f00, 0xfffffc00 /* DRAM configuration */ - movew #0x9667, 0xfffffc02 /* DRAM control */ - movew #0x0000, 0xfffff106 /* DRAM at 0x00000000 */ - movew #0x068f, 0xfffff116 /* 8Meg, enable, 0ws */ - moveb #0x40, 0xfffff300 /* IVR */ - movel #0x007FFFFF, %d0 /* IMR */ - movel %d0, 0xfffff304 - moveb 0xfffff42b, %d0 - andb #0xe0, %d0 - moveb %d0, 0xfffff42b -#endif - -/***************************************************************************** - * Init LCD controller. - * (Assuming that LCD controller is already init'ed when running from RAM) - *****************************************************************************/ -#ifdef CONFIG_INIT_LCD -#ifdef CONFIG_PILOT - moveb #0, 0xfffffA27 /* LCKCON */ - movel #_start, 0xfffffA00 /* LSSA */ - moveb #0xa, 0xfffffA05 /* LVPW */ - movew #0x9f, 0xFFFFFa08 /* LXMAX */ - movew #0x9f, 0xFFFFFa0a /* LYMAX */ - moveb #9, 0xfffffa29 /* LBAR */ - moveb #0, 0xfffffa25 /* LPXCD */ - moveb #0x04, 0xFFFFFa20 /* LPICF */ - moveb #0x58, 0xfffffA27 /* LCKCON */ - moveb #0x85, 0xfffff429 /* PFDATA */ - moveb #0xd8, 0xfffffA27 /* LCKCON */ - moveb #0xc5, 0xfffff429 /* PFDATA */ - moveb #0xd5, 0xfffff429 /* PFDATA */ - movel #bootlogo_bits, 0xFFFFFA00 /* LSSA */ - moveb #10, 0xFFFFFA05 /* LVPW */ - movew #160, 0xFFFFFA08 /* LXMAX */ - movew #160, 0xFFFFFA0A /* LYMAX */ -#else /* CONFIG_PILOT */ - movel #bootlogo_bits, 0xfffffA00 /* LSSA */ - moveb #0x28, 0xfffffA05 /* LVPW */ - movew #0x280, 0xFFFFFa08 /* LXMAX */ - movew #0x1df, 0xFFFFFa0a /* LYMAX */ - moveb #0, 0xfffffa29 /* LBAR */ - moveb #0, 0xfffffa25 /* LPXCD */ - moveb #0x08, 0xFFFFFa20 /* LPICF */ - moveb #0x01, 0xFFFFFA21 /* -ve pol */ - moveb #0x81, 0xfffffA27 /* LCKCON */ - movew #0xff00, 0xfffff412 /* LCD pins */ -#endif /* CONFIG_PILOT */ -#endif /* CONFIG_INIT_LCD */ - -/***************************************************************************** - * Kernel is running from FLASH/ROM (XIP) - * Copy init text & data to RAM - *****************************************************************************/ - moveal #_etext, %a0 - moveal #_sdata, %a1 - moveal #__bss_start, %a2 -_copy_initmem: - movel %a0@+, %a1@+ - cmpal %a1, %a2 - bhi _copy_initmem -#endif /* CONFIG_ROMKERNEL */ - -/***************************************************************************** - * Setup basic memory information for kernel - *****************************************************************************/ - movel #CONFIG_VECTORBASE,_ramvec /* set vector base location */ - movel #CONFIG_RAMBASE,_rambase /* set the base of RAM */ - movel #RAMEND, _ramend /* set end ram addr */ - lea __bss_stop,%a1 - movel %a1,_ramstart - -/***************************************************************************** - * If the kernel is in RAM, move romfs to right above bss and - * adjust _ramstart to where romfs ends. - * - * (Do this only if CONFIG_MTD_UCLINUX is true) - *****************************************************************************/ - -#if defined(CONFIG_ROMFS_FS) && defined(CONFIG_RAMKERNEL) && \ - defined(CONFIG_MTD_UCLINUX) - lea __bss_start, %a0 /* get start of bss */ - lea __bss_stop, %a1 /* set up destination */ - movel %a0, %a2 /* copy of bss start */ - - movel 8(%a0), %d0 /* get size of ROMFS */ - addql #8, %d0 /* allow for rounding */ - andl #0xfffffffc, %d0 /* whole words */ - - addl %d0, %a0 /* copy from end */ - addl %d0, %a1 /* copy from end */ - movel %a1, _ramstart /* set start of ram */ -_copy_romfs: - movel -(%a0), -(%a1) /* copy dword */ - cmpl %a0, %a2 /* check if at end */ - bne _copy_romfs -#endif /* CONFIG_ROMFS_FS && CONFIG_RAMKERNEL && CONFIG_MTD_UCLINUX */ - -/***************************************************************************** - * Clear bss region - *****************************************************************************/ - lea __bss_start, %a0 /* get start of bss */ - lea __bss_stop, %a1 /* get end of bss */ -_clear_bss: - movel #0, (%a0)+ /* clear each word */ - cmpl %a0, %a1 /* check if at end */ - bne _clear_bss - -/***************************************************************************** - * Load the current task pointer and stack. - *****************************************************************************/ - lea init_thread_union,%a0 - lea THREAD_SIZE(%a0),%sp - jsr start_kernel /* start Linux kernel */ -_exit: - jmp _exit /* should never get here */ diff --git a/arch/m68k/68000/romvec.S b/arch/m68k/68000/romvec.S deleted file mode 100644 index 15c70cd6453fa2a995334e4b45d21fe62c314b66..0000000000000000000000000000000000000000 --- a/arch/m68k/68000/romvec.S +++ /dev/null @@ -1,35 +0,0 @@ -/* - * romvec.S - Vector table for 68000 cpus - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - * - * Copyright 1996 Roman Zippel - * Copyright 1999 D. Jeff Dionne - * Copyright 2006 Greg Ungerer - */ - -.global _start -.global _buserr -.global trap -.global system_call - -.section .romvec - -e_vectors: -.long CONFIG_RAMBASE+CONFIG_RAMSIZE-4, _start, buserr, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -/* TRAP #0-15 */ -.long system_call, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long trap, trap, trap, trap -.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S deleted file mode 100644 index 52d312d5b4d4f68337aa8f9b711f50b1a1ad9eda..0000000000000000000000000000000000000000 --- a/arch/m68k/coldfire/entry.S +++ /dev/null @@ -1,203 +0,0 @@ -/* - * entry.S -- interrupt and exception processing for ColdFire - * - * Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com) - * Copyright (C) 1998 D. Jeff Dionne , - * Kenneth Albanowski , - * Copyright (C) 2000 Lineo Inc. (www.lineo.com) - * Copyright (C) 2004-2006 Macq Electronique SA. (www.macqel.com) - * - * Based on: - * - * linux/arch/m68k/kernel/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file README.legal in the main directory of this archive - * for more details. - * - * Linux/m68k support by Hamish Macdonald - * - * 68060 fixes by Jesper Skov - * ColdFire support by Greg Ungerer (gerg@snapgear.com) - * 5307 fixes by David W. Miller - * linux 2.4 support David McCullough - * Bug, speed and maintainability fixes by Philippe De Muyter - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_COLDFIRE_SW_A7 -/* - * Define software copies of the supervisor and user stack pointers. - */ -.bss -sw_ksp: -.long 0 -sw_usp: -.long 0 -#endif /* CONFIG_COLDFIRE_SW_A7 */ - -.text - -.globl system_call -.globl resume -.globl ret_from_exception -.globl ret_from_signal -.globl sys_call_table -.globl inthandler - -enosys: - mov.l #sys_ni_syscall,%d3 - bra 1f - -ENTRY(system_call) - SAVE_ALL_SYS - move #0x2000,%sr /* enable intrs again */ - GET_CURRENT(%d2) - - cmpl #NR_syscalls,%d0 - jcc enosys - lea sys_call_table,%a0 - lsll #2,%d0 /* movel %a0@(%d0:l:4),%d3 */ - movel %a0@(%d0),%d3 - jeq enosys - -1: - movel %sp,%d2 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d2 /* at start of kernel stack */ - movel %d2,%a0 - movel %a0@,%a1 /* save top of frame */ - movel %sp,%a1@(TASK_THREAD+THREAD_ESP0) - btst #(TIF_SYSCALL_TRACE%8),%a0@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8) - bnes 1f - - movel %d3,%a0 - jbsr %a0@ - movel %d0,%sp@(PT_OFF_D0) /* save the return value */ - jra ret_from_exception -1: - movel #-ENOSYS,%d2 /* strace needs -ENOSYS in PT_OFF_D0 */ - movel %d2,PT_OFF_D0(%sp) /* on syscall entry */ - subql #4,%sp - SAVE_SWITCH_STACK - jbsr syscall_trace_enter - RESTORE_SWITCH_STACK - addql #4,%sp - movel %d3,%a0 - jbsr %a0@ - movel %d0,%sp@(PT_OFF_D0) /* save the return value */ - subql #4,%sp /* dummy return address */ - SAVE_SWITCH_STACK - jbsr syscall_trace_leave - -ret_from_signal: - RESTORE_SWITCH_STACK - addql #4,%sp - -ret_from_exception: - move #0x2700,%sr /* disable intrs */ - btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel */ - jeq Luser_return /* if so, skip resched, signals */ - -#ifdef CONFIG_PREEMPT - movel %sp,%d1 /* get thread_info pointer */ - andl #-THREAD_SIZE,%d1 /* at base of kernel stack */ - movel %d1,%a0 - movel %a0@(TINFO_FLAGS),%d1 /* get thread_info->flags */ - andl #(1<flags (low 8 bits) */ - jne Lwork_to_do /* still work to do */ - -Lreturn: - RESTORE_USER - -Lwork_to_do: - movel %a0@(TINFO_FLAGS),%d1 /* get thread_info->flags */ - move #0x2000,%sr /* enable intrs again */ - btst #TIF_NEED_RESCHED,%d1 - jne reschedule - -Lsignal_return: - subql #4,%sp /* dummy return address */ - SAVE_SWITCH_STACK - pea %sp@(SWITCH_STACK_SIZE) - jsr do_notify_resume - addql #4,%sp - RESTORE_SWITCH_STACK - addql #4,%sp - jmp Luser_return - -/* - * This is the generic interrupt handler (for all hardware interrupt - * sources). Calls up to high level code to do all the work. - */ -ENTRY(inthandler) - SAVE_ALL_INT - GET_CURRENT(%d2) - - movew %sp@(PT_OFF_FORMATVEC),%d0 /* put exception # in d0 */ - andl #0x03fc,%d0 /* mask out vector only */ - - movel %sp,%sp@- /* push regs arg */ - lsrl #2,%d0 /* calculate real vector # */ - movel %d0,%sp@- /* push vector number */ - jbsr do_IRQ /* call high level irq handler */ - lea %sp@(8),%sp /* pop args off stack */ - - bra ret_from_exception - -/* - * Beware - when entering resume, prev (the current task) is - * in a0, next (the new task) is in a1, so don't change these - * registers until their contents are no longer needed. - */ -ENTRY(resume) - movew %sr,%d1 /* save current status */ - movew %d1,%a0@(TASK_THREAD+THREAD_SR) - movel %a0,%d1 /* get prev thread in d1 */ - SAVE_SWITCH_STACK - movel %sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack pointer */ - RDUSP /* movel %usp,%a3 */ - movel %a3,%a0@(TASK_THREAD+THREAD_USP) /* save thread user stack */ -#ifdef CONFIG_MMU - movel %a1,%a2 /* set new current */ -#endif - movel %a1@(TASK_THREAD+THREAD_USP),%a3 /* restore thread user stack */ - WRUSP /* movel %a3,%usp */ - movel %a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new kernel stack */ - movew %a1@(TASK_THREAD+THREAD_SR),%d7 /* restore new status */ - movew %d7,%sr - RESTORE_SWITCH_STACK - rts - diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S deleted file mode 100644 index c6d7fd28c60237f1a05659f3527efe1d81ca2887..0000000000000000000000000000000000000000 --- a/arch/m68k/coldfire/head.S +++ /dev/null @@ -1,299 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/*****************************************************************************/ - -/* - * head.S -- common startup code for ColdFire CPUs. - * - * (C) Copyright 1999-2011, Greg Ungerer . - */ - -/*****************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include - -/*****************************************************************************/ - -/* - * If we don't have a fixed memory size, then lets build in code - * to auto detect the DRAM size. Obviously this is the preferred - * method, and should work for most boards. It won't work for those - * that do not have their RAM starting at address 0, and it only - * works on SDRAM (not boards fitted with SRAM). - */ -#if CONFIG_RAMSIZE != 0 -.macro GET_MEM_SIZE - movel #CONFIG_RAMSIZE,%d0 /* hard coded memory size */ -.endm - -#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \ - defined(CONFIG_M5249) || defined(CONFIG_M525x) || \ - defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ - defined(CONFIG_M5307) || defined(CONFIG_M5407) -/* - * Not all these devices have exactly the same DRAM controller, - * but the DCMR register is virtually identical - give or take - * a couple of bits. The only exception is the 5272 devices, their - * DRAM controller is quite different. - */ -.macro GET_MEM_SIZE - movel MCFSIM_DMR0,%d0 /* get mask for 1st bank */ - btst #0,%d0 /* check if region enabled */ - beq 1f - andl #0xfffc0000,%d0 - beq 1f - addl #0x00040000,%d0 /* convert mask to size */ -1: - movel MCFSIM_DMR1,%d1 /* get mask for 2nd bank */ - btst #0,%d1 /* check if region enabled */ - beq 2f - andl #0xfffc0000,%d1 - beq 2f - addl #0x00040000,%d1 - addl %d1,%d0 /* total mem size in d0 */ -2: -.endm - -#elif defined(CONFIG_M5272) -.macro GET_MEM_SIZE - movel MCFSIM_CSOR7,%d0 /* get SDRAM address mask */ - andil #0xfffff000,%d0 /* mask out chip select options */ - negl %d0 /* negate bits */ -.endm - -#elif defined(CONFIG_M520x) -.macro GET_MEM_SIZE - clrl %d0 - movel MCFSIM_SDCS0, %d2 /* Get SDRAM chip select 0 config */ - andl #0x1f, %d2 /* Get only the chip select size */ - beq 3f /* Check if it is enabled */ - addql #1, %d2 /* Form exponent */ - moveql #1, %d0 - lsll %d2, %d0 /* 2 ^ exponent */ -3: - movel MCFSIM_SDCS1, %d2 /* Get SDRAM chip select 1 config */ - andl #0x1f, %d2 /* Get only the chip select size */ - beq 4f /* Check if it is enabled */ - addql #1, %d2 /* Form exponent */ - moveql #1, %d1 - lsll %d2, %d1 /* 2 ^ exponent */ - addl %d1, %d0 /* Total size of SDRAM in d0 */ -4: -.endm - -#else -#error "ERROR: I don't know how to probe your boards memory size?" -#endif - -/*****************************************************************************/ - -/* - * Boards and platforms can do specific early hardware setup if - * they need to. Most don't need this, define away if not required. - */ -#ifndef PLATFORM_SETUP -#define PLATFORM_SETUP -#endif - -/*****************************************************************************/ - -.global _start -.global _rambase -.global _ramvec -.global _ramstart -.global _ramend -#if defined(CONFIG_UBOOT) -.global _init_sp -#endif - -/*****************************************************************************/ - -.data - -/* - * During startup we store away the RAM setup. These are not in the - * bss, since their values are determined and written before the bss - * has been cleared. - */ -_rambase: -.long 0 -_ramvec: -.long 0 -_ramstart: -.long 0 -_ramend: -.long 0 -#if defined(CONFIG_UBOOT) -_init_sp: -.long 0 -#endif - -/*****************************************************************************/ - -__HEAD - -#ifdef CONFIG_MMU -_start0: - jmp _start -.global kernel_pg_dir -.equ kernel_pg_dir,_start0 -.equ .,_start0+0x1000 -#endif - -/* - * This is the codes first entry point. This is where it all - * begins... - */ - -_start: - nop /* filler */ - movew #0x2700, %sr /* no interrupts */ - movel #CACHE_INIT,%d0 /* disable cache */ - movec %d0,%CACR - nop -#if defined(CONFIG_UBOOT) - movel %sp,_init_sp /* save initial stack pointer */ -#endif -#ifdef CONFIG_MBAR - movel #CONFIG_MBAR+1,%d0 /* configured MBAR address */ - movec %d0,%MBAR /* set it */ -#endif - - /* - * Do any platform or board specific setup now. Most boards - * don't need anything. Those exceptions are define this in - * their board specific includes. - */ - PLATFORM_SETUP - - /* - * Create basic memory configuration. Set VBR accordingly, - * and size memory. - */ - movel #CONFIG_VECTORBASE,%a7 - movec %a7,%VBR /* set vectors addr */ - movel %a7,_ramvec - - movel #CONFIG_RAMBASE,%a7 /* mark the base of RAM */ - movel %a7,_rambase - - GET_MEM_SIZE /* macro code determines size */ - addl %a7,%d0 - movel %d0,_ramend /* set end ram addr */ - - /* - * Now that we know what the memory is, lets enable cache - * and get things moving. This is Coldfire CPU specific. Not - * all version cores have identical cache register setup. But - * it is very similar. Define the exact settings in the headers - * then the code here is the same for all. - */ - movel #ACR0_MODE,%d0 /* set RAM region for caching */ - movec %d0,%ACR0 - movel #ACR1_MODE,%d0 /* anything else to cache? */ - movec %d0,%ACR1 -#ifdef ACR2_MODE - movel #ACR2_MODE,%d0 - movec %d0,%ACR2 - movel #ACR3_MODE,%d0 - movec %d0,%ACR3 -#endif - movel #CACHE_MODE,%d0 /* enable cache */ - movec %d0,%CACR - nop - -#ifdef CONFIG_MMU - /* - * Identity mapping for the kernel region. - */ - movel #(MMUBASE+1),%d0 /* enable MMUBAR registers */ - movec %d0,%MMUBAR - movel #MMUOR_CA,%d0 /* clear TLB entries */ - movel %d0,MMUOR - movel #0,%d0 /* set ASID to 0 */ - movec %d0,%asid - - movel #MMUCR_EN,%d0 /* Enable the identity map */ - movel %d0,MMUCR - nop /* sync i-pipeline */ - - movel #_vstart,%a0 /* jump to "virtual" space */ - jmp %a0@ -_vstart: -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_ROMFS_FS - /* - * Move ROM filesystem above bss :-) - */ - lea __bss_start,%a0 /* get start of bss */ - lea __bss_stop,%a1 /* set up destination */ - movel %a0,%a2 /* copy of bss start */ - - movel 8(%a0),%d0 /* get size of ROMFS */ - addql #8,%d0 /* allow for rounding */ - andl #0xfffffffc, %d0 /* whole words */ - - addl %d0,%a0 /* copy from end */ - addl %d0,%a1 /* copy from end */ - movel %a1,_ramstart /* set start of ram */ - -_copy_romfs: - movel -(%a0),%d0 /* copy dword */ - movel %d0,-(%a1) - cmpl %a0,%a2 /* check if at end */ - bne _copy_romfs - -#else /* CONFIG_ROMFS_FS */ - lea __bss_stop,%a1 - movel %a1,_ramstart -#endif /* CONFIG_ROMFS_FS */ - - - /* - * Zero out the bss region. - */ - lea __bss_start,%a0 /* get start of bss */ - lea __bss_stop,%a1 /* get end of bss */ - clrl %d0 /* set value */ -_clear_bss: - movel %d0,(%a0)+ /* clear each word */ - cmpl %a0,%a1 /* check if at end */ - bne _clear_bss - - /* - * Load the current task pointer and stack. - */ - lea init_thread_union,%a0 - lea THREAD_SIZE(%a0),%sp - -#ifdef CONFIG_MMU -.global m68k_cputype -.global m68k_mmutype -.global m68k_fputype -.global m68k_machtype - movel #CPU_COLDFIRE,%d0 - movel %d0,m68k_cputype /* Mark us as a ColdFire */ - movel #MMU_COLDFIRE,%d0 - movel %d0,m68k_mmutype - movel #FPUTYPE,%d0 - movel %d0,m68k_fputype /* Mark FPU type */ - movel #MACHINE,%d0 - movel %d0,m68k_machtype /* Mark machine type */ - lea init_task,%a2 /* Set "current" init task */ -#endif - - /* - * Assembler start up done, start code proper. - */ - jsr start_kernel /* start Linux kernel */ - -_exit: - jmp _exit /* should never get here */ - -/*****************************************************************************/ diff --git a/arch/m68k/fpsp040/bindec.S b/arch/m68k/fpsp040/bindec.S deleted file mode 100644 index f2e795231046049bd41950bd642178f81065c819..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/bindec.S +++ /dev/null @@ -1,919 +0,0 @@ -| -| bindec.sa 3.4 1/3/91 -| -| bindec -| -| Description: -| Converts an input in extended precision format -| to bcd format. -| -| Input: -| a0 points to the input extended precision value -| value in memory; d0 contains the k-factor sign-extended -| to 32-bits. The input may be either normalized, -| unnormalized, or denormalized. -| -| Output: result in the FP_SCR1 space on the stack. -| -| Saves and Modifies: D2-D7,A2,FP2 -| -| Algorithm: -| -| A1. Set RM and size ext; Set SIGMA = sign of input. -| The k-factor is saved for use in d7. Clear the -| BINDEC_FLG for separating normalized/denormalized -| input. If input is unnormalized or denormalized, -| normalize it. -| -| A2. Set X = abs(input). -| -| A3. Compute ILOG. -| ILOG is the log base 10 of the input value. It is -| approximated by adding e + 0.f when the original -| value is viewed as 2^^e * 1.f in extended precision. -| This value is stored in d6. -| -| A4. Clr INEX bit. -| The operation in A3 above may have set INEX2. -| -| A5. Set ICTR = 0; -| ICTR is a flag used in A13. It must be set before the -| loop entry A6. -| -| A6. Calculate LEN. -| LEN is the number of digits to be displayed. The -| k-factor can dictate either the total number of digits, -| if it is a positive number, or the number of digits -| after the decimal point which are to be included as -| significant. See the 68882 manual for examples. -| If LEN is computed to be greater than 17, set OPERR in -| USER_FPSR. LEN is stored in d4. -| -| A7. Calculate SCALE. -| SCALE is equal to 10^ISCALE, where ISCALE is the number -| of decimal places needed to insure LEN integer digits -| in the output before conversion to bcd. LAMBDA is the -| sign of ISCALE, used in A9. Fp1 contains -| 10^^(abs(ISCALE)) using a rounding mode which is a -| function of the original rounding mode and the signs -| of ISCALE and X. A table is given in the code. -| -| A8. Clr INEX; Force RZ. -| The operation in A3 above may have set INEX2. -| RZ mode is forced for the scaling operation to insure -| only one rounding error. The grs bits are collected in -| the INEX flag for use in A10. -| -| A9. Scale X -> Y. -| The mantissa is scaled to the desired number of -| significant digits. The excess digits are collected -| in INEX2. -| -| A10. Or in INEX. -| If INEX is set, round error occurred. This is -| compensated for by 'or-ing' in the INEX2 flag to -| the lsb of Y. -| -| A11. Restore original FPCR; set size ext. -| Perform FINT operation in the user's rounding mode. -| Keep the size to extended. -| -| A12. Calculate YINT = FINT(Y) according to user's rounding -| mode. The FPSP routine sintd0 is used. The output -| is in fp0. -| -| A13. Check for LEN digits. -| If the int operation results in more than LEN digits, -| or less than LEN -1 digits, adjust ILOG and repeat from -| A6. This test occurs only on the first pass. If the -| result is exactly 10^LEN, decrement ILOG and divide -| the mantissa by 10. -| -| A14. Convert the mantissa to bcd. -| The binstr routine is used to convert the LEN digit -| mantissa to bcd in memory. The input to binstr is -| to be a fraction; i.e. (mantissa)/10^LEN and adjusted -| such that the decimal point is to the left of bit 63. -| The bcd digits are stored in the correct position in -| the final string area in memory. -| -| A15. Convert the exponent to bcd. -| As in A14 above, the exp is converted to bcd and the -| digits are stored in the final string. -| Test the length of the final exponent string. If the -| length is 4, set operr. -| -| A16. Write sign bits to final string. -| -| Implementation Notes: -| -| The registers are used as follows: -| -| d0: scratch; LEN input to binstr -| d1: scratch -| d2: upper 32-bits of mantissa for binstr -| d3: scratch;lower 32-bits of mantissa for binstr -| d4: LEN -| d5: LAMBDA/ICTR -| d6: ILOG -| d7: k-factor -| a0: ptr for original operand/final result -| a1: scratch pointer -| a2: pointer to FP_X; abs(original value) in ext -| fp0: scratch -| fp1: scratch -| fp2: scratch -| F_SCR1: -| F_SCR2: -| L_SCR1: -| L_SCR2: - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|BINDEC idnt 2,1 | Motorola 040 Floating Point Software Package - -#include "fpsp.h" - - |section 8 - -| Constants in extended precision -LOG2: .long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 -LOG2UP1: .long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 - -| Constants in single precision -FONE: .long 0x3F800000,0x00000000,0x00000000,0x00000000 -FTWO: .long 0x40000000,0x00000000,0x00000000,0x00000000 -FTEN: .long 0x41200000,0x00000000,0x00000000,0x00000000 -F4933: .long 0x459A2800,0x00000000,0x00000000,0x00000000 - -RBDTBL: .byte 0,0,0,0 - .byte 3,3,2,2 - .byte 3,2,2,3 - .byte 2,3,3,2 - - |xref binstr - |xref sintdo - |xref ptenrn,ptenrm,ptenrp - - .global bindec - .global sc_mul -bindec: - moveml %d2-%d7/%a2,-(%a7) - fmovemx %fp0-%fp2,-(%a7) - -| A1. Set RM and size ext. Set SIGMA = sign input; -| The k-factor is saved for use in d7. Clear BINDEC_FLG for -| separating normalized/denormalized input. If the input -| is a denormalized number, set the BINDEC_FLG memory word -| to signal denorm. If the input is unnormalized, normalize -| the input and test for denormalized result. -| - fmovel #rm_mode,%FPCR |set RM and ext - movel (%a0),L_SCR2(%a6) |save exponent for sign check - movel %d0,%d7 |move k-factor to d7 - clrb BINDEC_FLG(%a6) |clr norm/denorm flag - movew STAG(%a6),%d0 |get stag - andiw #0xe000,%d0 |isolate stag bits - beq A2_str |if zero, input is norm -| -| Normalize the denorm -| -un_de_norm: - movew (%a0),%d0 - andiw #0x7fff,%d0 |strip sign of normalized exp - movel 4(%a0),%d1 - movel 8(%a0),%d2 -norm_loop: - subw #1,%d0 - lsll #1,%d2 - roxll #1,%d1 - tstl %d1 - bges norm_loop -| -| Test if the normalized input is denormalized -| - tstw %d0 - bgts pos_exp |if greater than zero, it is a norm - st BINDEC_FLG(%a6) |set flag for denorm -pos_exp: - andiw #0x7fff,%d0 |strip sign of normalized exp - movew %d0,(%a0) - movel %d1,4(%a0) - movel %d2,8(%a0) - -| A2. Set X = abs(input). -| -A2_str: - movel (%a0),FP_SCR2(%a6) | move input to work space - movel 4(%a0),FP_SCR2+4(%a6) | move input to work space - movel 8(%a0),FP_SCR2+8(%a6) | move input to work space - andil #0x7fffffff,FP_SCR2(%a6) |create abs(X) - -| A3. Compute ILOG. -| ILOG is the log base 10 of the input value. It is approx- -| imated by adding e + 0.f when the original value is viewed -| as 2^^e * 1.f in extended precision. This value is stored -| in d6. -| -| Register usage: -| Input/Output -| d0: k-factor/exponent -| d2: x/x -| d3: x/x -| d4: x/x -| d5: x/x -| d6: x/ILOG -| d7: k-factor/Unchanged -| a0: ptr for original operand/final result -| a1: x/x -| a2: x/x -| fp0: x/float(ILOG) -| fp1: x/x -| fp2: x/x -| F_SCR1:x/x -| F_SCR2:Abs(X)/Abs(X) with $3fff exponent -| L_SCR1:x/x -| L_SCR2:first word of X packed/Unchanged - - tstb BINDEC_FLG(%a6) |check for denorm - beqs A3_cont |if clr, continue with norm - movel #-4933,%d6 |force ILOG = -4933 - bras A4_str -A3_cont: - movew FP_SCR2(%a6),%d0 |move exp to d0 - movew #0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff - fmovex FP_SCR2(%a6),%fp0 |now fp0 has 1.f - subw #0x3fff,%d0 |strip off bias - faddw %d0,%fp0 |add in exp - fsubs FONE,%fp0 |subtract off 1.0 - fbge pos_res |if pos, branch - fmulx LOG2UP1,%fp0 |if neg, mul by LOG2UP1 - fmovel %fp0,%d6 |put ILOG in d6 as a lword - bras A4_str |go move out ILOG -pos_res: - fmulx LOG2,%fp0 |if pos, mul by LOG2 - fmovel %fp0,%d6 |put ILOG in d6 as a lword - - -| A4. Clr INEX bit. -| The operation in A3 above may have set INEX2. - -A4_str: - fmovel #0,%FPSR |zero all of fpsr - nothing needed - - -| A5. Set ICTR = 0; -| ICTR is a flag used in A13. It must be set before the -| loop entry A6. The lower word of d5 is used for ICTR. - - clrw %d5 |clear ICTR - - -| A6. Calculate LEN. -| LEN is the number of digits to be displayed. The k-factor -| can dictate either the total number of digits, if it is -| a positive number, or the number of digits after the -| original decimal point which are to be included as -| significant. See the 68882 manual for examples. -| If LEN is computed to be greater than 17, set OPERR in -| USER_FPSR. LEN is stored in d4. -| -| Register usage: -| Input/Output -| d0: exponent/Unchanged -| d2: x/x/scratch -| d3: x/x -| d4: exc picture/LEN -| d5: ICTR/Unchanged -| d6: ILOG/Unchanged -| d7: k-factor/Unchanged -| a0: ptr for original operand/final result -| a1: x/x -| a2: x/x -| fp0: float(ILOG)/Unchanged -| fp1: x/x -| fp2: x/x -| F_SCR1:x/x -| F_SCR2:Abs(X) with $3fff exponent/Unchanged -| L_SCR1:x/x -| L_SCR2:first word of X packed/Unchanged - -A6_str: - tstl %d7 |branch on sign of k - bles k_neg |if k <= 0, LEN = ILOG + 1 - k - movel %d7,%d4 |if k > 0, LEN = k - bras len_ck |skip to LEN check -k_neg: - movel %d6,%d4 |first load ILOG to d4 - subl %d7,%d4 |subtract off k - addql #1,%d4 |add in the 1 -len_ck: - tstl %d4 |LEN check: branch on sign of LEN - bles LEN_ng |if neg, set LEN = 1 - cmpl #17,%d4 |test if LEN > 17 - bles A7_str |if not, forget it - movel #17,%d4 |set max LEN = 17 - tstl %d7 |if negative, never set OPERR - bles A7_str |if positive, continue - orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR - bras A7_str |finished here -LEN_ng: - moveql #1,%d4 |min LEN is 1 - - -| A7. Calculate SCALE. -| SCALE is equal to 10^ISCALE, where ISCALE is the number -| of decimal places needed to insure LEN integer digits -| in the output before conversion to bcd. LAMBDA is the sign -| of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using -| the rounding mode as given in the following table (see -| Coonen, p. 7.23 as ref.; however, the SCALE variable is -| of opposite sign in bindec.sa from Coonen). -| -| Initial USE -| FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] -| ---------------------------------------------- -| RN 00 0 0 00/0 RN -| RN 00 0 1 00/0 RN -| RN 00 1 0 00/0 RN -| RN 00 1 1 00/0 RN -| RZ 01 0 0 11/3 RP -| RZ 01 0 1 11/3 RP -| RZ 01 1 0 10/2 RM -| RZ 01 1 1 10/2 RM -| RM 10 0 0 11/3 RP -| RM 10 0 1 10/2 RM -| RM 10 1 0 10/2 RM -| RM 10 1 1 11/3 RP -| RP 11 0 0 10/2 RM -| RP 11 0 1 11/3 RP -| RP 11 1 0 11/3 RP -| RP 11 1 1 10/2 RM -| -| Register usage: -| Input/Output -| d0: exponent/scratch - final is 0 -| d2: x/0 or 24 for A9 -| d3: x/scratch - offset ptr into PTENRM array -| d4: LEN/Unchanged -| d5: 0/ICTR:LAMBDA -| d6: ILOG/ILOG or k if ((k<=0)&(ILOG 0, skip this - cmpl %d6,%d7 |test k - ILOG - blts k_pos |if ILOG >= k, skip this - movel %d7,%d6 |if ((k<0) & (ILOG < k)) ILOG = k -k_pos: - movel %d6,%d0 |calc ILOG + 1 - LEN in d0 - addql #1,%d0 |add the 1 - subl %d4,%d0 |sub off LEN - swap %d5 |use upper word of d5 for LAMBDA - clrw %d5 |set it zero initially - clrw %d2 |set up d2 for very small case - tstl %d0 |test sign of ISCALE - bges iscale |if pos, skip next inst - addqw #1,%d5 |if neg, set LAMBDA true - cmpl #0xffffecd4,%d0 |test iscale <= -4908 - bgts no_inf |if false, skip rest - addil #24,%d0 |add in 24 to iscale - movel #24,%d2 |put 24 in d2 for A9 -no_inf: - negl %d0 |and take abs of ISCALE -iscale: - fmoves FONE,%fp1 |init fp1 to 1 - bfextu USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits - lslw #1,%d1 |put them in bits 2:1 - addw %d5,%d1 |add in LAMBDA - lslw #1,%d1 |put them in bits 3:1 - tstl L_SCR2(%a6) |test sign of original x - bges x_pos |if pos, don't set bit 0 - addql #1,%d1 |if neg, set bit 0 -x_pos: - leal RBDTBL,%a2 |load rbdtbl base - moveb (%a2,%d1),%d3 |load d3 with new rmode - lsll #4,%d3 |put bits in proper position - fmovel %d3,%fpcr |load bits into fpu - lsrl #4,%d3 |put bits in proper position - tstb %d3 |decode new rmode for pten table - bnes not_rn |if zero, it is RN - leal PTENRN,%a1 |load a1 with RN table base - bras rmode |exit decode -not_rn: - lsrb #1,%d3 |get lsb in carry - bccs not_rp |if carry clear, it is RM - leal PTENRP,%a1 |load a1 with RP table base - bras rmode |exit decode -not_rp: - leal PTENRM,%a1 |load a1 with RM table base -rmode: - clrl %d3 |clr table index -e_loop: - lsrl #1,%d0 |shift next bit into carry - bccs e_next |if zero, skip the mul - fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) -e_next: - addl #12,%d3 |inc d3 to next pwrten table entry - tstl %d0 |test if ISCALE is zero - bnes e_loop |if not, loop - - -| A8. Clr INEX; Force RZ. -| The operation in A3 above may have set INEX2. -| RZ mode is forced for the scaling operation to insure -| only one rounding error. The grs bits are collected in -| the INEX flag for use in A10. -| -| Register usage: -| Input/Output - - fmovel #0,%FPSR |clr INEX - fmovel #rz_mode,%FPCR |set RZ rounding mode - - -| A9. Scale X -> Y. -| The mantissa is scaled to the desired number of significant -| digits. The excess digits are collected in INEX2. If mul, -| Check d2 for excess 10 exponential value. If not zero, -| the iscale value would have caused the pwrten calculation -| to overflow. Only a negative iscale can cause this, so -| multiply by 10^(d2), which is now only allowed to be 24, -| with a multiply by 10^8 and 10^16, which is exact since -| 10^24 is exact. If the input was denormalized, we must -| create a busy stack frame with the mul command and the -| two operands, and allow the fpu to complete the multiply. -| -| Register usage: -| Input/Output -| d0: FPCR with RZ mode/Unchanged -| d2: 0 or 24/unchanged -| d3: x/x -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA -| d6: ILOG/Unchanged -| d7: k-factor/Unchanged -| a0: ptr for original operand/final result -| a1: ptr to PTENRM array/Unchanged -| a2: x/x -| fp0: float(ILOG)/X adjusted for SCALE (Y) -| fp1: 10^ISCALE/Unchanged -| fp2: x/x -| F_SCR1:x/x -| F_SCR2:Abs(X) with $3fff exponent/Unchanged -| L_SCR1:x/x -| L_SCR2:first word of X packed/Unchanged - -A9_str: - fmovex (%a0),%fp0 |load X from memory - fabsx %fp0 |use abs(X) - tstw %d5 |LAMBDA is in lower word of d5 - bne sc_mul |if neg (LAMBDA = 1), scale by mul - fdivx %fp1,%fp0 |calculate X / SCALE -> Y to fp0 - bras A10_st |branch to A10 - -sc_mul: - tstb BINDEC_FLG(%a6) |check for denorm - beqs A9_norm |if norm, continue with mul - fmovemx %fp1-%fp1,-(%a7) |load ETEMP with 10^ISCALE - movel 8(%a0),-(%a7) |load FPTEMP with input arg - movel 4(%a0),-(%a7) - movel (%a0),-(%a7) - movel #18,%d3 |load count for busy stack -A9_loop: - clrl -(%a7) |clear lword on stack - dbf %d3,A9_loop - moveb VER_TMP(%a6),(%a7) |write current version number - moveb #BUSY_SIZE-4,1(%a7) |write current busy size - moveb #0x10,0x44(%a7) |set fcefpte[15] bit - movew #0x0023,0x40(%a7) |load cmdreg1b with mul command - moveb #0xfe,0x8(%a7) |load all 1s to cu savepc - frestore (%a7)+ |restore frame to fpu for completion - fmulx 36(%a1),%fp0 |multiply fp0 by 10^8 - fmulx 48(%a1),%fp0 |multiply fp0 by 10^16 - bras A10_st -A9_norm: - tstw %d2 |test for small exp case - beqs A9_con |if zero, continue as normal - fmulx 36(%a1),%fp0 |multiply fp0 by 10^8 - fmulx 48(%a1),%fp0 |multiply fp0 by 10^16 -A9_con: - fmulx %fp1,%fp0 |calculate X * SCALE -> Y to fp0 - - -| A10. Or in INEX. -| If INEX is set, round error occurred. This is compensated -| for by 'or-ing' in the INEX2 flag to the lsb of Y. -| -| Register usage: -| Input/Output -| d0: FPCR with RZ mode/FPSR with INEX2 isolated -| d2: x/x -| d3: x/x -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA -| d6: ILOG/Unchanged -| d7: k-factor/Unchanged -| a0: ptr for original operand/final result -| a1: ptr to PTENxx array/Unchanged -| a2: x/ptr to FP_SCR2(a6) -| fp0: Y/Y with lsb adjusted -| fp1: 10^ISCALE/Unchanged -| fp2: x/x - -A10_st: - fmovel %FPSR,%d0 |get FPSR - fmovex %fp0,FP_SCR2(%a6) |move Y to memory - leal FP_SCR2(%a6),%a2 |load a2 with ptr to FP_SCR2 - btstl #9,%d0 |check if INEX2 set - beqs A11_st |if clear, skip rest - oril #1,8(%a2) |or in 1 to lsb of mantissa - fmovex FP_SCR2(%a6),%fp0 |write adjusted Y back to fpu - - -| A11. Restore original FPCR; set size ext. -| Perform FINT operation in the user's rounding mode. Keep -| the size to extended. The sintdo entry point in the sint -| routine expects the FPCR value to be in USER_FPCR for -| mode and precision. The original FPCR is saved in L_SCR1. - -A11_st: - movel USER_FPCR(%a6),L_SCR1(%a6) |save it for later - andil #0x00000030,USER_FPCR(%a6) |set size to ext, -| ;block exceptions - - -| A12. Calculate YINT = FINT(Y) according to user's rounding mode. -| The FPSP routine sintd0 is used. The output is in fp0. -| -| Register usage: -| Input/Output -| d0: FPSR with AINEX cleared/FPCR with size set to ext -| d2: x/x/scratch -| d3: x/x -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA/Unchanged -| d6: ILOG/Unchanged -| d7: k-factor/Unchanged -| a0: ptr for original operand/src ptr for sintdo -| a1: ptr to PTENxx array/Unchanged -| a2: ptr to FP_SCR2(a6)/Unchanged -| a6: temp pointer to FP_SCR2(a6) - orig value saved and restored -| fp0: Y/YINT -| fp1: 10^ISCALE/Unchanged -| fp2: x/x -| F_SCR1:x/x -| F_SCR2:Y adjusted for inex/Y with original exponent -| L_SCR1:x/original USER_FPCR -| L_SCR2:first word of X packed/Unchanged - -A12_st: - moveml %d0-%d1/%a0-%a1,-(%a7) |save regs used by sintd0 - movel L_SCR1(%a6),-(%a7) - movel L_SCR2(%a6),-(%a7) - leal FP_SCR2(%a6),%a0 |a0 is ptr to F_SCR2(a6) - fmovex %fp0,(%a0) |move Y to memory at FP_SCR2(a6) - tstl L_SCR2(%a6) |test sign of original operand - bges do_fint |if pos, use Y - orl #0x80000000,(%a0) |if neg, use -Y -do_fint: - movel USER_FPSR(%a6),-(%a7) - bsr sintdo |sint routine returns int in fp0 - moveb (%a7),USER_FPSR(%a6) - addl #4,%a7 - movel (%a7)+,L_SCR2(%a6) - movel (%a7)+,L_SCR1(%a6) - moveml (%a7)+,%d0-%d1/%a0-%a1 |restore regs used by sint - movel L_SCR2(%a6),FP_SCR2(%a6) |restore original exponent - movel L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR - - -| A13. Check for LEN digits. -| If the int operation results in more than LEN digits, -| or less than LEN -1 digits, adjust ILOG and repeat from -| A6. This test occurs only on the first pass. If the -| result is exactly 10^LEN, decrement ILOG and divide -| the mantissa by 10. The calculation of 10^LEN cannot -| be inexact, since all powers of ten up to 10^27 are exact -| in extended precision, so the use of a previous power-of-ten -| table will introduce no error. -| -| -| Register usage: -| Input/Output -| d0: FPCR with size set to ext/scratch final = 0 -| d2: x/x -| d3: x/scratch final = x -| d4: LEN/LEN adjusted -| d5: ICTR:LAMBDA/LAMBDA:ICTR -| d6: ILOG/ILOG adjusted -| d7: k-factor/Unchanged -| a0: pointer into memory for packed bcd string formation -| a1: ptr to PTENxx array/Unchanged -| a2: ptr to FP_SCR2(a6)/Unchanged -| fp0: int portion of Y/abs(YINT) adjusted -| fp1: 10^ISCALE/Unchanged -| fp2: x/10^LEN -| F_SCR1:x/x -| F_SCR2:Y with original exponent/Unchanged -| L_SCR1:original USER_FPCR/Unchanged -| L_SCR2:first word of X packed/Unchanged - -A13_st: - swap %d5 |put ICTR in lower word of d5 - tstw %d5 |check if ICTR = 0 - bne not_zr |if non-zero, go to second test -| -| Compute 10^(LEN-1) -| - fmoves FONE,%fp2 |init fp2 to 1.0 - movel %d4,%d0 |put LEN in d0 - subql #1,%d0 |d0 = LEN -1 - clrl %d3 |clr table index -l_loop: - lsrl #1,%d0 |shift next bit into carry - bccs l_next |if zero, skip the mul - fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no) -l_next: - addl #12,%d3 |inc d3 to next pwrten table entry - tstl %d0 |test if LEN is zero - bnes l_loop |if not, loop -| -| 10^LEN-1 is computed for this test and A14. If the input was -| denormalized, check only the case in which YINT > 10^LEN. -| - tstb BINDEC_FLG(%a6) |check if input was norm - beqs A13_con |if norm, continue with checking - fabsx %fp0 |take abs of YINT - bra test_2 -| -| Compare abs(YINT) to 10^(LEN-1) and 10^LEN -| -A13_con: - fabsx %fp0 |take abs of YINT - fcmpx %fp2,%fp0 |compare abs(YINT) with 10^(LEN-1) - fbge test_2 |if greater, do next test - subql #1,%d6 |subtract 1 from ILOG - movew #1,%d5 |set ICTR - fmovel #rm_mode,%FPCR |set rmode to RM - fmuls FTEN,%fp2 |compute 10^LEN - bra A6_str |return to A6 and recompute YINT -test_2: - fmuls FTEN,%fp2 |compute 10^LEN - fcmpx %fp2,%fp0 |compare abs(YINT) with 10^LEN - fblt A14_st |if less, all is ok, go to A14 - fbgt fix_ex |if greater, fix and redo - fdivs FTEN,%fp0 |if equal, divide by 10 - addql #1,%d6 | and inc ILOG - bras A14_st | and continue elsewhere -fix_ex: - addql #1,%d6 |increment ILOG by 1 - movew #1,%d5 |set ICTR - fmovel #rm_mode,%FPCR |set rmode to RM - bra A6_str |return to A6 and recompute YINT -| -| Since ICTR <> 0, we have already been through one adjustment, -| and shouldn't have another; this is to check if abs(YINT) = 10^LEN -| 10^LEN is again computed using whatever table is in a1 since the -| value calculated cannot be inexact. -| -not_zr: - fmoves FONE,%fp2 |init fp2 to 1.0 - movel %d4,%d0 |put LEN in d0 - clrl %d3 |clr table index -z_loop: - lsrl #1,%d0 |shift next bit into carry - bccs z_next |if zero, skip the mul - fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no) -z_next: - addl #12,%d3 |inc d3 to next pwrten table entry - tstl %d0 |test if LEN is zero - bnes z_loop |if not, loop - fabsx %fp0 |get abs(YINT) - fcmpx %fp2,%fp0 |check if abs(YINT) = 10^LEN - fbne A14_st |if not, skip this - fdivs FTEN,%fp0 |divide abs(YINT) by 10 - addql #1,%d6 |and inc ILOG by 1 - addql #1,%d4 | and inc LEN - fmuls FTEN,%fp2 | if LEN++, the get 10^^LEN - - -| A14. Convert the mantissa to bcd. -| The binstr routine is used to convert the LEN digit -| mantissa to bcd in memory. The input to binstr is -| to be a fraction; i.e. (mantissa)/10^LEN and adjusted -| such that the decimal point is to the left of bit 63. -| The bcd digits are stored in the correct position in -| the final string area in memory. -| -| -| Register usage: -| Input/Output -| d0: x/LEN call to binstr - final is 0 -| d1: x/0 -| d2: x/ms 32-bits of mant of abs(YINT) -| d3: x/ls 32-bits of mant of abs(YINT) -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA/LAMBDA:ICTR -| d6: ILOG -| d7: k-factor/Unchanged -| a0: pointer into memory for packed bcd string formation -| /ptr to first mantissa byte in result string -| a1: ptr to PTENxx array/Unchanged -| a2: ptr to FP_SCR2(a6)/Unchanged -| fp0: int portion of Y/abs(YINT) adjusted -| fp1: 10^ISCALE/Unchanged -| fp2: 10^LEN/Unchanged -| F_SCR1:x/Work area for final result -| F_SCR2:Y with original exponent/Unchanged -| L_SCR1:original USER_FPCR/Unchanged -| L_SCR2:first word of X packed/Unchanged - -A14_st: - fmovel #rz_mode,%FPCR |force rz for conversion - fdivx %fp2,%fp0 |divide abs(YINT) by 10^LEN - leal FP_SCR1(%a6),%a0 - fmovex %fp0,(%a0) |move abs(YINT)/10^LEN to memory - movel 4(%a0),%d2 |move 2nd word of FP_RES to d2 - movel 8(%a0),%d3 |move 3rd word of FP_RES to d3 - clrl 4(%a0) |zero word 2 of FP_RES - clrl 8(%a0) |zero word 3 of FP_RES - movel (%a0),%d0 |move exponent to d0 - swap %d0 |put exponent in lower word - beqs no_sft |if zero, don't shift - subil #0x3ffd,%d0 |sub bias less 2 to make fract - tstl %d0 |check if > 1 - bgts no_sft |if so, don't shift - negl %d0 |make exp positive -m_loop: - lsrl #1,%d2 |shift d2:d3 right, add 0s - roxrl #1,%d3 |the number of places - dbf %d0,m_loop |given in d0 -no_sft: - tstl %d2 |check for mantissa of zero - bnes no_zr |if not, go on - tstl %d3 |continue zero check - beqs zer_m |if zero, go directly to binstr -no_zr: - clrl %d1 |put zero in d1 for addx - addil #0x00000080,%d3 |inc at bit 7 - addxl %d1,%d2 |continue inc - andil #0xffffff80,%d3 |strip off lsb not used by 882 -zer_m: - movel %d4,%d0 |put LEN in d0 for binstr call - addql #3,%a0 |a0 points to M16 byte in result - bsr binstr |call binstr to convert mant - - -| A15. Convert the exponent to bcd. -| As in A14 above, the exp is converted to bcd and the -| digits are stored in the final string. -| -| Digits are stored in L_SCR1(a6) on return from BINDEC as: -| -| 32 16 15 0 -| ----------------------------------------- -| | 0 | e3 | e2 | e1 | e4 | X | X | X | -| ----------------------------------------- -| -| And are moved into their proper places in FP_SCR1. If digit e4 -| is non-zero, OPERR is signaled. In all cases, all 4 digits are -| written as specified in the 881/882 manual for packed decimal. -| -| Register usage: -| Input/Output -| d0: x/LEN call to binstr - final is 0 -| d1: x/scratch (0);shift count for final exponent packing -| d2: x/ms 32-bits of exp fraction/scratch -| d3: x/ls 32-bits of exp fraction -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA/LAMBDA:ICTR -| d6: ILOG -| d7: k-factor/Unchanged -| a0: ptr to result string/ptr to L_SCR1(a6) -| a1: ptr to PTENxx array/Unchanged -| a2: ptr to FP_SCR2(a6)/Unchanged -| fp0: abs(YINT) adjusted/float(ILOG) -| fp1: 10^ISCALE/Unchanged -| fp2: 10^LEN/Unchanged -| F_SCR1:Work area for final result/BCD result -| F_SCR2:Y with original exponent/ILOG/10^4 -| L_SCR1:original USER_FPCR/Exponent digits on return from binstr -| L_SCR2:first word of X packed/Unchanged - -A15_st: - tstb BINDEC_FLG(%a6) |check for denorm - beqs not_denorm - ftstx %fp0 |test for zero - fbeq den_zero |if zero, use k-factor or 4933 - fmovel %d6,%fp0 |float ILOG - fabsx %fp0 |get abs of ILOG - bras convrt -den_zero: - tstl %d7 |check sign of the k-factor - blts use_ilog |if negative, use ILOG - fmoves F4933,%fp0 |force exponent to 4933 - bras convrt |do it -use_ilog: - fmovel %d6,%fp0 |float ILOG - fabsx %fp0 |get abs of ILOG - bras convrt -not_denorm: - ftstx %fp0 |test for zero - fbne not_zero |if zero, force exponent - fmoves FONE,%fp0 |force exponent to 1 - bras convrt |do it -not_zero: - fmovel %d6,%fp0 |float ILOG - fabsx %fp0 |get abs of ILOG -convrt: - fdivx 24(%a1),%fp0 |compute ILOG/10^4 - fmovex %fp0,FP_SCR2(%a6) |store fp0 in memory - movel 4(%a2),%d2 |move word 2 to d2 - movel 8(%a2),%d3 |move word 3 to d3 - movew (%a2),%d0 |move exp to d0 - beqs x_loop_fin |if zero, skip the shift - subiw #0x3ffd,%d0 |subtract off bias - negw %d0 |make exp positive -x_loop: - lsrl #1,%d2 |shift d2:d3 right - roxrl #1,%d3 |the number of places - dbf %d0,x_loop |given in d0 -x_loop_fin: - clrl %d1 |put zero in d1 for addx - addil #0x00000080,%d3 |inc at bit 6 - addxl %d1,%d2 |continue inc - andil #0xffffff80,%d3 |strip off lsb not used by 882 - movel #4,%d0 |put 4 in d0 for binstr call - leal L_SCR1(%a6),%a0 |a0 is ptr to L_SCR1 for exp digits - bsr binstr |call binstr to convert exp - movel L_SCR1(%a6),%d0 |load L_SCR1 lword to d0 - movel #12,%d1 |use d1 for shift count - lsrl %d1,%d0 |shift d0 right by 12 - bfins %d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1 - lsrl %d1,%d0 |shift d0 right by 12 - bfins %d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1 - tstb %d0 |check if e4 is zero - beqs A16_st |if zero, skip rest - orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR - - -| A16. Write sign bits to final string. -| Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). -| -| Register usage: -| Input/Output -| d0: x/scratch - final is x -| d2: x/x -| d3: x/x -| d4: LEN/Unchanged -| d5: ICTR:LAMBDA/LAMBDA:ICTR -| d6: ILOG/ILOG adjusted -| d7: k-factor/Unchanged -| a0: ptr to L_SCR1(a6)/Unchanged -| a1: ptr to PTENxx array/Unchanged -| a2: ptr to FP_SCR2(a6)/Unchanged -| fp0: float(ILOG)/Unchanged -| fp1: 10^ISCALE/Unchanged -| fp2: 10^LEN/Unchanged -| F_SCR1:BCD result with correct signs -| F_SCR2:ILOG/10^4 -| L_SCR1:Exponent digits on return from binstr -| L_SCR2:first word of X packed/Unchanged - -A16_st: - clrl %d0 |clr d0 for collection of signs - andib #0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1 - tstl L_SCR2(%a6) |check sign of original mantissa - bges mant_p |if pos, don't set SM - moveql #2,%d0 |move 2 in to d0 for SM -mant_p: - tstl %d6 |check sign of ILOG - bges wr_sgn |if pos, don't set SE - addql #1,%d0 |set bit 0 in d0 for SE -wr_sgn: - bfins %d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1 - -| Clean up and restore all registers used. - - fmovel #0,%FPSR |clear possible inex2/ainex bits - fmovemx (%a7)+,%fp0-%fp2 - moveml (%a7)+,%d2-%d7/%a2 - rts - - |end diff --git a/arch/m68k/fpsp040/binstr.S b/arch/m68k/fpsp040/binstr.S deleted file mode 100644 index 8a05ba92a8a0f1d493c447044c4d3ef4c7f72c97..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/binstr.S +++ /dev/null @@ -1,139 +0,0 @@ -| -| binstr.sa 3.3 12/19/90 -| -| -| Description: Converts a 64-bit binary integer to bcd. -| -| Input: 64-bit binary integer in d2:d3, desired length (LEN) in -| d0, and a pointer to start in memory for bcd characters -| in d0. (This pointer must point to byte 4 of the first -| lword of the packed decimal memory string.) -| -| Output: LEN bcd digits representing the 64-bit integer. -| -| Algorithm: -| The 64-bit binary is assumed to have a decimal point before -| bit 63. The fraction is multiplied by 10 using a mul by 2 -| shift and a mul by 8 shift. The bits shifted out of the -| msb form a decimal digit. This process is iterated until -| LEN digits are formed. -| -| A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the -| digit formed will be assumed the least significant. This is -| to force the first byte formed to have a 0 in the upper 4 bits. -| -| A2. Beginning of the loop: -| Copy the fraction in d2:d3 to d4:d5. -| -| A3. Multiply the fraction in d2:d3 by 8 using bit-field -| extracts and shifts. The three msbs from d2 will go into -| d1. -| -| A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb -| will be collected by the carry. -| -| A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 -| into d2:d3. D1 will contain the bcd digit formed. -| -| A6. Test d7. If zero, the digit formed is the ms digit. If non- -| zero, it is the ls digit. Put the digit in its place in the -| upper word of d0. If it is the ls digit, write the word -| from d0 to memory. -| -| A7. Decrement d6 (LEN counter) and repeat the loop until zero. -| -| Implementation Notes: -| -| The registers are used as follows: -| -| d0: LEN counter -| d1: temp used to form the digit -| d2: upper 32-bits of fraction for mul by 8 -| d3: lower 32-bits of fraction for mul by 8 -| d4: upper 32-bits of fraction for mul by 2 -| d5: lower 32-bits of fraction for mul by 2 -| d6: temp for bit-field extracts -| d7: byte digit formation word;digit count {0,1} -| a0: pointer into memory for packed bcd string formation -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|BINSTR idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - .global binstr -binstr: - moveml %d0-%d7,-(%a7) -| -| A1: Init d7 -| - moveql #1,%d7 |init d7 for second digit - subql #1,%d0 |for dbf d0 would have LEN+1 passes -| -| A2. Copy d2:d3 to d4:d5. Start loop. -| -loop: - movel %d2,%d4 |copy the fraction before muls - movel %d3,%d5 |to d4:d5 -| -| A3. Multiply d2:d3 by 8; extract msbs into d1. -| - bfextu %d2{#0:#3},%d1 |copy 3 msbs of d2 into d1 - asll #3,%d2 |shift d2 left by 3 places - bfextu %d3{#0:#3},%d6 |copy 3 msbs of d3 into d6 - asll #3,%d3 |shift d3 left by 3 places - orl %d6,%d2 |or in msbs from d3 into d2 -| -| A4. Multiply d4:d5 by 2; add carry out to d1. -| - asll #1,%d5 |mul d5 by 2 - roxll #1,%d4 |mul d4 by 2 - swap %d6 |put 0 in d6 lower word - addxw %d6,%d1 |add in extend from mul by 2 -| -| A5. Add mul by 8 to mul by 2. D1 contains the digit formed. -| - addl %d5,%d3 |add lower 32 bits - nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90) - addxl %d4,%d2 |add with extend upper 32 bits - nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90) - addxw %d6,%d1 |add in extend from add to d1 - swap %d6 |with d6 = 0; put 0 in upper word -| -| A6. Test d7 and branch. -| - tstw %d7 |if zero, store digit & to loop - beqs first_d |if non-zero, form byte & write -sec_d: - swap %d7 |bring first digit to word d7b - aslw #4,%d7 |first digit in upper 4 bits d7b - addw %d1,%d7 |add in ls digit to d7b - moveb %d7,(%a0)+ |store d7b byte in memory - swap %d7 |put LEN counter in word d7a - clrw %d7 |set d7a to signal no digits done - dbf %d0,loop |do loop some more! - bras end_bstr |finished, so exit -first_d: - swap %d7 |put digit word in d7b - movew %d1,%d7 |put new digit in d7b - swap %d7 |put LEN counter in word d7a - addqw #1,%d7 |set d7a to signal first digit done - dbf %d0,loop |do loop some more! - swap %d7 |put last digit in string - lslw #4,%d7 |move it to upper 4 bits - moveb %d7,(%a0)+ |store it in memory string -| -| Clean up and return with result in fp0. -| -end_bstr: - moveml (%a7)+,%d0-%d7 - rts - |end diff --git a/arch/m68k/fpsp040/bugfix.S b/arch/m68k/fpsp040/bugfix.S deleted file mode 100644 index 3bb9c84bb0582d1d012ff40be4ef7bc6d02f57d0..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/bugfix.S +++ /dev/null @@ -1,495 +0,0 @@ -| -| bugfix.sa 3.2 1/31/91 -| -| -| This file contains workarounds for bugs in the 040 -| relating to the Floating-Point Software Package (FPSP) -| -| Fixes for bugs: 1238 -| -| Bug: 1238 -| -| -| /* The following dirty_bit clear should be left in -| * the handler permanently to improve throughput. -| * The dirty_bits are located at bits [23:16] in -| * longword $08 in the busy frame $4x60. Bit 16 -| * corresponds to FP0, bit 17 corresponds to FP1, -| * and so on. -| */ -| if (E3_exception_just_serviced) { -| dirty_bit[cmdreg3b[9:7]] = 0; -| } -| -| if (fsave_format_version != $40) {goto NOFIX} -| -| if !(E3_exception_just_serviced) {goto NOFIX} -| if (cupc == 0000000) {goto NOFIX} -| if ((cmdreg1b[15:13] != 000) && -| (cmdreg1b[15:10] != 010001)) {goto NOFIX} -| if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) && -| (cmdreg1b[12:10] != cmdreg3b[9:7])) ) && -| ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) && -| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX} -| -| /* Note: for 6d43b or 8d43b, you may want to add the following code -| * to get better coverage. (If you do not insert this code, the part -| * won't lock up; it will simply get the wrong answer.) -| * Do NOT insert this code for 10d43b or later parts. -| * -| * if (fpiarcu == integer stack return address) { -| * cupc = 0000000; -| * goto NOFIX; -| * } -| */ -| -| if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2} -| FIX_OPCLASS0: -| if (((cmdreg1b[12:10] == cmdreg2b[9:7]) || -| (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) && -| (cmdreg1b[12:10] != cmdreg3b[9:7]) && -| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */ -| /* We execute the following code if there is an -| xu conflict and NOT an nu conflict */ -| -| /* first save some values on the fsave frame */ -| stag_temp = STAG[fsave_frame]; -| cmdreg1b_temp = CMDREG1B[fsave_frame]; -| dtag_temp = DTAG[fsave_frame]; -| ete15_temp = ETE15[fsave_frame]; -| -| CUPC[fsave_frame] = 0000000; -| FRESTORE -| FSAVE -| -| /* If the xu instruction is exceptional, we punt. -| * Otherwise, we would have to include OVFL/UNFL handler -| * code here to get the correct answer. -| */ -| if (fsave_frame_format == $4060) {goto KILL_PROCESS} -| -| fsave_frame = /* build a long frame of all zeros */ -| fsave_frame_format = $4060; /* label it as long frame */ -| -| /* load it with the temps we saved */ -| STAG[fsave_frame] = stag_temp; -| CMDREG1B[fsave_frame] = cmdreg1b_temp; -| DTAG[fsave_frame] = dtag_temp; -| ETE15[fsave_frame] = ete15_temp; -| -| /* Make sure that the cmdreg3b dest reg is not going to -| * be destroyed by a FMOVEM at the end of all this code. -| * If it is, you should move the current value of the reg -| * onto the stack so that the reg will loaded with that value. -| */ -| -| /* All done. Proceed with the code below */ -| } -| -| etemp = FP_reg_[cmdreg1b[12:10]]; -| ete15 = ~ete14; -| cmdreg1b[15:10] = 010010; -| clear(bug_flag_procIDxxxx); -| FRESTORE and return; -| -| -| FIX_OPCLASS2: -| if ((cmdreg1b[9:7] == cmdreg2b[9:7]) && -| (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */ -| /* We execute the following code if there is an -| xu conflict and NOT an nu conflict */ -| -| /* first save some values on the fsave frame */ -| stag_temp = STAG[fsave_frame]; -| cmdreg1b_temp = CMDREG1B[fsave_frame]; -| dtag_temp = DTAG[fsave_frame]; -| ete15_temp = ETE15[fsave_frame]; -| etemp_temp = ETEMP[fsave_frame]; -| -| CUPC[fsave_frame] = 0000000; -| FRESTORE -| FSAVE -| -| -| /* If the xu instruction is exceptional, we punt. -| * Otherwise, we would have to include OVFL/UNFL handler -| * code here to get the correct answer. -| */ -| if (fsave_frame_format == $4060) {goto KILL_PROCESS} -| -| fsave_frame = /* build a long frame of all zeros */ -| fsave_frame_format = $4060; /* label it as long frame */ -| -| /* load it with the temps we saved */ -| STAG[fsave_frame] = stag_temp; -| CMDREG1B[fsave_frame] = cmdreg1b_temp; -| DTAG[fsave_frame] = dtag_temp; -| ETE15[fsave_frame] = ete15_temp; -| ETEMP[fsave_frame] = etemp_temp; -| -| /* Make sure that the cmdreg3b dest reg is not going to -| * be destroyed by a FMOVEM at the end of all this code. -| * If it is, you should move the current value of the reg -| * onto the stack so that the reg will loaded with that value. -| */ -| -| /* All done. Proceed with the code below */ -| } -| -| if (etemp_exponent == min_sgl) etemp_exponent = min_dbl; -| if (etemp_exponent == max_sgl) etemp_exponent = max_dbl; -| cmdreg1b[15:10] = 010101; -| clear(bug_flag_procIDxxxx); -| FRESTORE and return; -| -| -| NOFIX: -| clear(bug_flag_procIDxxxx); -| FRESTORE and return; -| - - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|BUGFIX idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref fpsp_fmt_error - - .global b1238_fix -b1238_fix: -| -| This code is entered only on completion of the handling of an -| nu-generated ovfl, unfl, or inex exception. If the version -| number of the fsave is not $40, this handler is not necessary. -| Simply branch to fix_done and exit normally. -| - cmpib #VER_40,4(%a7) - bne fix_done -| -| Test for cu_savepc equal to zero. If not, this is not a bug -| #1238 case. -| - moveb CU_SAVEPC(%a6),%d0 - andib #0xFE,%d0 - beq fix_done |if zero, this is not bug #1238 - -| -| Test the register conflict aspect. If opclass0, check for -| cu src equal to xu dest or equal to nu dest. If so, go to -| op0. Else, or if opclass2, check for cu dest equal to -| xu dest or equal to nu dest. If so, go to tst_opcl. Else, -| exit, it is not the bug case. -| -| Check for opclass 0. If not, go and check for opclass 2 and sgl. -| - movew CMDREG1B(%a6),%d0 - andiw #0xE000,%d0 |strip all but opclass - bne op2sgl |not opclass 0, check op2 -| -| Check for cu and nu register conflict. If one exists, this takes -| priority over a cu and xu conflict. -| - bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src - bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest - cmpb %d0,%d1 - beqs op0 |if equal, continue bugfix -| -| Check for cu dest equal to nu dest. If so, go and fix the -| bug condition. Otherwise, exit. -| - bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest - cmpb %d0,%d1 |cmp 1st dest with 3rd dest - beqs op0 |if equal, continue bugfix -| -| Check for cu and xu register conflict. -| - bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest - cmpb %d0,%d1 |cmp 1st dest with 2nd dest - beqs op0_xu |if equal, continue bugfix - bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src - cmpb %d0,%d1 |cmp 1st src with 2nd dest - beq op0_xu - bne fix_done |if the reg checks fail, exit -| -| We have the opclass 0 situation. -| -op0: - bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no - movel #7,%d1 - subl %d0,%d1 - clrl %d0 - bsetl %d1,%d0 - fmovemx %d0,ETEMP(%a6) |load source to ETEMP - - moveb #0x12,%d0 - bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended -| -| Set ETEMP exponent bit 15 as the opposite of ete14 -| - btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14 - beq setete15 - bclr #etemp15_bit,STAG(%a6) - bra finish -setete15: - bset #etemp15_bit,STAG(%a6) - bra finish - -| -| We have the case in which a conflict exists between the cu src or -| dest and the dest of the xu. We must clear the instruction in -| the cu and restore the state, allowing the instruction in the -| xu to complete. Remember, the instruction in the nu -| was exceptional, and was completed by the appropriate handler. -| If the result of the xu instruction is not exceptional, we can -| restore the instruction from the cu to the frame and continue -| processing the original exception. If the result is also -| exceptional, we choose to kill the process. -| -| Items saved from the stack: -| -| $3c stag - L_SCR1 -| $40 cmdreg1b - L_SCR2 -| $44 dtag - L_SCR3 -| -| The cu savepc is set to zero, and the frame is restored to the -| fpu. -| -op0_xu: - movel STAG(%a6),L_SCR1(%a6) - movel CMDREG1B(%a6),L_SCR2(%a6) - movel DTAG(%a6),L_SCR3(%a6) - andil #0xe0000000,L_SCR3(%a6) - moveb #0,CU_SAVEPC(%a6) - movel (%a7)+,%d1 |save return address from bsr - frestore (%a7)+ - fsave -(%a7) -| -| Check if the instruction which just completed was exceptional. -| - cmpw #0x4060,(%a7) - beq op0_xb -| -| It is necessary to isolate the result of the instruction in the -| xu if it is to fp0 - fp3 and write that value to the USER_FPn -| locations on the stack. The correct destination register is in -| cmdreg2b. -| - bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no - cmpil #3,%d0 - bgts op0_xi - beqs op0_fp3 - cmpil #1,%d0 - blts op0_fp0 - beqs op0_fp1 -op0_fp2: - fmovemx %fp2-%fp2,USER_FP2(%a6) - bras op0_xi -op0_fp1: - fmovemx %fp1-%fp1,USER_FP1(%a6) - bras op0_xi -op0_fp0: - fmovemx %fp0-%fp0,USER_FP0(%a6) - bras op0_xi -op0_fp3: - fmovemx %fp3-%fp3,USER_FP3(%a6) -| -| The frame returned is idle. We must build a busy frame to hold -| the cu state information and setup etemp. -| -op0_xi: - movel #22,%d0 |clear 23 lwords - clrl (%a7) -op0_loop: - clrl -(%a7) - dbf %d0,op0_loop - movel #0x40600000,-(%a7) - movel L_SCR1(%a6),STAG(%a6) - movel L_SCR2(%a6),CMDREG1B(%a6) - movel L_SCR3(%a6),DTAG(%a6) - moveb #0x6,CU_SAVEPC(%a6) - movel %d1,-(%a7) |return bsr return address - bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no - movel #7,%d1 - subl %d0,%d1 - clrl %d0 - bsetl %d1,%d0 - fmovemx %d0,ETEMP(%a6) |load source to ETEMP - - moveb #0x12,%d0 - bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended -| -| Set ETEMP exponent bit 15 as the opposite of ete14 -| - btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14 - beq op0_sete15 - bclr #etemp15_bit,STAG(%a6) - bra finish -op0_sete15: - bset #etemp15_bit,STAG(%a6) - bra finish - -| -| The frame returned is busy. It is not possible to reconstruct -| the code sequence to allow completion. We will jump to -| fpsp_fmt_error and allow the kernel to kill the process. -| -op0_xb: - jmp fpsp_fmt_error - -| -| Check for opclass 2 and single size. If not both, exit. -| -op2sgl: - movew CMDREG1B(%a6),%d0 - andiw #0xFC00,%d0 |strip all but opclass and size - cmpiw #0x4400,%d0 |test for opclass 2 and size=sgl - bne fix_done |if not, it is not bug 1238 -| -| Check for cu dest equal to nu dest or equal to xu dest, with -| a cu and nu conflict taking priority an nu conflict. If either, -| go and fix the bug condition. Otherwise, exit. -| - bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest - bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest - cmpb %d0,%d1 |cmp 1st dest with 3rd dest - beq op2_com |if equal, continue bugfix - bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest - cmpb %d0,%d1 |cmp 1st dest with 2nd dest - bne fix_done |if the reg checks fail, exit -| -| We have the case in which a conflict exists between the cu src or -| dest and the dest of the xu. We must clear the instruction in -| the cu and restore the state, allowing the instruction in the -| xu to complete. Remember, the instruction in the nu -| was exceptional, and was completed by the appropriate handler. -| If the result of the xu instruction is not exceptional, we can -| restore the instruction from the cu to the frame and continue -| processing the original exception. If the result is also -| exceptional, we choose to kill the process. -| -| Items saved from the stack: -| -| $3c stag - L_SCR1 -| $40 cmdreg1b - L_SCR2 -| $44 dtag - L_SCR3 -| etemp - FP_SCR2 -| -| The cu savepc is set to zero, and the frame is restored to the -| fpu. -| -op2_xu: - movel STAG(%a6),L_SCR1(%a6) - movel CMDREG1B(%a6),L_SCR2(%a6) - movel DTAG(%a6),L_SCR3(%a6) - andil #0xe0000000,L_SCR3(%a6) - moveb #0,CU_SAVEPC(%a6) - movel ETEMP(%a6),FP_SCR2(%a6) - movel ETEMP_HI(%a6),FP_SCR2+4(%a6) - movel ETEMP_LO(%a6),FP_SCR2+8(%a6) - movel (%a7)+,%d1 |save return address from bsr - frestore (%a7)+ - fsave -(%a7) -| -| Check if the instruction which just completed was exceptional. -| - cmpw #0x4060,(%a7) - beq op2_xb -| -| It is necessary to isolate the result of the instruction in the -| xu if it is to fp0 - fp3 and write that value to the USER_FPn -| locations on the stack. The correct destination register is in -| cmdreg2b. -| - bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no - cmpil #3,%d0 - bgts op2_xi - beqs op2_fp3 - cmpil #1,%d0 - blts op2_fp0 - beqs op2_fp1 -op2_fp2: - fmovemx %fp2-%fp2,USER_FP2(%a6) - bras op2_xi -op2_fp1: - fmovemx %fp1-%fp1,USER_FP1(%a6) - bras op2_xi -op2_fp0: - fmovemx %fp0-%fp0,USER_FP0(%a6) - bras op2_xi -op2_fp3: - fmovemx %fp3-%fp3,USER_FP3(%a6) -| -| The frame returned is idle. We must build a busy frame to hold -| the cu state information and fix up etemp. -| -op2_xi: - movel #22,%d0 |clear 23 lwords - clrl (%a7) -op2_loop: - clrl -(%a7) - dbf %d0,op2_loop - movel #0x40600000,-(%a7) - movel L_SCR1(%a6),STAG(%a6) - movel L_SCR2(%a6),CMDREG1B(%a6) - movel L_SCR3(%a6),DTAG(%a6) - moveb #0x6,CU_SAVEPC(%a6) - movel FP_SCR2(%a6),ETEMP(%a6) - movel FP_SCR2+4(%a6),ETEMP_HI(%a6) - movel FP_SCR2+8(%a6),ETEMP_LO(%a6) - movel %d1,-(%a7) - bra op2_com - -| -| We have the opclass 2 single source situation. -| -op2_com: - moveb #0x15,%d0 - bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, double - - cmpw #0x407F,ETEMP_EX(%a6) |single +max - bnes case2 - movew #0x43FF,ETEMP_EX(%a6) |to double +max - bra finish -case2: - cmpw #0xC07F,ETEMP_EX(%a6) |single -max - bnes case3 - movew #0xC3FF,ETEMP_EX(%a6) |to double -max - bra finish -case3: - cmpw #0x3F80,ETEMP_EX(%a6) |single +min - bnes case4 - movew #0x3C00,ETEMP_EX(%a6) |to double +min - bra finish -case4: - cmpw #0xBF80,ETEMP_EX(%a6) |single -min - bne fix_done - movew #0xBC00,ETEMP_EX(%a6) |to double -min - bra finish -| -| The frame returned is busy. It is not possible to reconstruct -| the code sequence to allow completion. fpsp_fmt_error causes -| an fline illegal instruction to be executed. -| -| You should replace the jump to fpsp_fmt_error with a jump -| to the entry point used to kill a process. -| -op2_xb: - jmp fpsp_fmt_error - -| -| Enter here if the case is not of the situations affected by -| bug #1238, or if the fix is completed, and exit. -| -finish: -fix_done: - rts - - |end diff --git a/arch/m68k/fpsp040/decbin.S b/arch/m68k/fpsp040/decbin.S deleted file mode 100644 index 16ed796bad87f375b98e21e5804aa375c4648858..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/decbin.S +++ /dev/null @@ -1,505 +0,0 @@ -| -| decbin.sa 3.3 12/19/90 -| -| Description: Converts normalized packed bcd value pointed to by -| register A6 to extended-precision value in FP0. -| -| Input: Normalized packed bcd value in ETEMP(a6). -| -| Output: Exact floating-point representation of the packed bcd value. -| -| Saves and Modifies: D2-D5 -| -| Speed: The program decbin takes ??? cycles to execute. -| -| Object Size: -| -| External Reference(s): None. -| -| Algorithm: -| Expected is a normal bcd (i.e. non-exceptional; all inf, zero, -| and NaN operands are dispatched without entering this routine) -| value in 68881/882 format at location ETEMP(A6). -| -| A1. Convert the bcd exponent to binary by successive adds and muls. -| Set the sign according to SE. Subtract 16 to compensate -| for the mantissa which is to be interpreted as 17 integer -| digits, rather than 1 integer and 16 fraction digits. -| Note: this operation can never overflow. -| -| A2. Convert the bcd mantissa to binary by successive -| adds and muls in FP0. Set the sign according to SM. -| The mantissa digits will be converted with the decimal point -| assumed following the least-significant digit. -| Note: this operation can never overflow. -| -| A3. Count the number of leading/trailing zeros in the -| bcd string. If SE is positive, count the leading zeros; -| if negative, count the trailing zeros. Set the adjusted -| exponent equal to the exponent from A1 and the zero count -| added if SM = 1 and subtracted if SM = 0. Scale the -| mantissa the equivalent of forcing in the bcd value: -| -| SM = 0 a non-zero digit in the integer position -| SM = 1 a non-zero digit in Mant0, lsd of the fraction -| -| this will insure that any value, regardless of its -| representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted -| consistently. -| -| A4. Calculate the factor 10^exp in FP1 using a table of -| 10^(2^n) values. To reduce the error in forming factors -| greater than 10^27, a directed rounding scheme is used with -| tables rounded to RN, RM, and RP, according to the table -| in the comments of the pwrten section. -| -| A5. Form the final binary number by scaling the mantissa by -| the exponent factor. This is done by multiplying the -| mantissa in FP0 by the factor in FP1 if the adjusted -| exponent sign is positive, and dividing FP0 by FP1 if -| it is negative. -| -| Clean up and return. Check if the final mul or div resulted -| in an inex2 exception. If so, set inex1 in the fpsr and -| check if the inex1 exception is enabled. If so, set d7 upper -| word to $0100. This will signal unimp.sa that an enabled inex1 -| exception occurred. Unimp will fix the stack. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|DECBIN idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -| -| PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded -| to nearest, minus, and plus, respectively. The tables include -| 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding -| is required until the power is greater than 27, however, all -| tables include the first 5 for ease of indexing. -| - |xref PTENRN - |xref PTENRM - |xref PTENRP - -RTABLE: .byte 0,0,0,0 - .byte 2,3,2,3 - .byte 2,3,3,2 - .byte 3,2,2,3 - - .global decbin - .global calc_e - .global pwrten - .global calc_m - .global norm - .global ap_st_z - .global ap_st_n -| - .set FNIBS,7 - .set FSTRT,0 -| - .set ESTRT,4 - .set EDIGITS,2 | -| -| Constants in single precision -FZERO: .long 0x00000000 -FONE: .long 0x3F800000 -FTEN: .long 0x41200000 - - .set TEN,10 - -| -decbin: - | fmovel #0,FPCR ;clr real fpcr - moveml %d2-%d5,-(%a7) -| -| Calculate exponent: -| 1. Copy bcd value in memory for use as a working copy. -| 2. Calculate absolute value of exponent in d1 by mul and add. -| 3. Correct for exponent sign. -| 4. Subtract 16 to compensate for interpreting the mant as all integer digits. -| (i.e., all digits assumed left of the decimal point.) -| -| Register usage: -| -| calc_e: -| (*) d0: temp digit storage -| (*) d1: accumulator for binary exponent -| (*) d2: digit count -| (*) d3: offset pointer -| ( ) d4: first word of bcd -| ( ) a0: pointer to working bcd value -| ( ) a6: pointer to original bcd value -| (*) FP_SCR1: working copy of original bcd value -| (*) L_SCR1: copy of original exponent word -| -calc_e: - movel #EDIGITS,%d2 |# of nibbles (digits) in fraction part - moveql #ESTRT,%d3 |counter to pick up digits - leal FP_SCR1(%a6),%a0 |load tmp bcd storage address - movel ETEMP(%a6),(%a0) |save input bcd value - movel ETEMP_HI(%a6),4(%a0) |save words 2 and 3 - movel ETEMP_LO(%a6),8(%a0) |and work with these - movel (%a0),%d4 |get first word of bcd - clrl %d1 |zero d1 for accumulator -e_gd: - mulul #TEN,%d1 |mul partial product by one digit place - bfextu %d4{%d3:#4},%d0 |get the digit and zero extend into d0 - addl %d0,%d1 |d1 = d1 + d0 - addqb #4,%d3 |advance d3 to the next digit - dbf %d2,e_gd |if we have used all 3 digits, exit loop - btst #30,%d4 |get SE - beqs e_pos |don't negate if pos - negl %d1 |negate before subtracting -e_pos: - subl #16,%d1 |sub to compensate for shift of mant - bges e_save |if still pos, do not neg - negl %d1 |now negative, make pos and set SE - orl #0x40000000,%d4 |set SE in d4, - orl #0x40000000,(%a0) |and in working bcd -e_save: - movel %d1,L_SCR1(%a6) |save exp in memory -| -| -| Calculate mantissa: -| 1. Calculate absolute value of mantissa in fp0 by mul and add. -| 2. Correct for mantissa sign. -| (i.e., all digits assumed left of the decimal point.) -| -| Register usage: -| -| calc_m: -| (*) d0: temp digit storage -| (*) d1: lword counter -| (*) d2: digit count -| (*) d3: offset pointer -| ( ) d4: words 2 and 3 of bcd -| ( ) a0: pointer to working bcd value -| ( ) a6: pointer to original bcd value -| (*) fp0: mantissa accumulator -| ( ) FP_SCR1: working copy of original bcd value -| ( ) L_SCR1: copy of original exponent word -| -calc_m: - moveql #1,%d1 |word counter, init to 1 - fmoves FZERO,%fp0 |accumulator -| -| -| Since the packed number has a long word between the first & second parts, -| get the integer digit then skip down & get the rest of the -| mantissa. We will unroll the loop once. -| - bfextu (%a0){#28:#4},%d0 |integer part is ls digit in long word - faddb %d0,%fp0 |add digit to sum in fp0 -| -| -| Get the rest of the mantissa. -| -loadlw: - movel (%a0,%d1.L*4),%d4 |load mantissa longword into d4 - moveql #FSTRT,%d3 |counter to pick up digits - moveql #FNIBS,%d2 |reset number of digits per a0 ptr -md2b: - fmuls FTEN,%fp0 |fp0 = fp0 * 10 - bfextu %d4{%d3:#4},%d0 |get the digit and zero extend - faddb %d0,%fp0 |fp0 = fp0 + digit -| -| -| If all the digits (8) in that long word have been converted (d2=0), -| then inc d1 (=2) to point to the next long word and reset d3 to 0 -| to initialize the digit offset, and set d2 to 7 for the digit count; -| else continue with this long word. -| - addqb #4,%d3 |advance d3 to the next digit - dbf %d2,md2b |check for last digit in this lw -nextlw: - addql #1,%d1 |inc lw pointer in mantissa - cmpl #2,%d1 |test for last lw - ble loadlw |if not, get last one - -| -| Check the sign of the mant and make the value in fp0 the same sign. -| -m_sign: - btst #31,(%a0) |test sign of the mantissa - beq ap_st_z |if clear, go to append/strip zeros - fnegx %fp0 |if set, negate fp0 - -| -| Append/strip zeros: -| -| For adjusted exponents which have an absolute value greater than 27*, -| this routine calculates the amount needed to normalize the mantissa -| for the adjusted exponent. That number is subtracted from the exp -| if the exp was positive, and added if it was negative. The purpose -| of this is to reduce the value of the exponent and the possibility -| of error in calculation of pwrten. -| -| 1. Branch on the sign of the adjusted exponent. -| 2p.(positive exp) -| 2. Check M16 and the digits in lwords 2 and 3 in descending order. -| 3. Add one for each zero encountered until a non-zero digit. -| 4. Subtract the count from the exp. -| 5. Check if the exp has crossed zero in #3 above; make the exp abs -| and set SE. -| 6. Multiply the mantissa by 10**count. -| 2n.(negative exp) -| 2. Check the digits in lwords 3 and 2 in descending order. -| 3. Add one for each zero encountered until a non-zero digit. -| 4. Add the count to the exp. -| 5. Check if the exp has crossed zero in #3 above; clear SE. -| 6. Divide the mantissa by 10**count. -| -| *Why 27? If the adjusted exponent is within -28 < expA < 28, than -| any adjustment due to append/strip zeros will drive the resultant -| exponent towards zero. Since all pwrten constants with a power -| of 27 or less are exact, there is no need to use this routine to -| attempt to lessen the resultant exponent. -| -| Register usage: -| -| ap_st_z: -| (*) d0: temp digit storage -| (*) d1: zero count -| (*) d2: digit count -| (*) d3: offset pointer -| ( ) d4: first word of bcd -| (*) d5: lword counter -| ( ) a0: pointer to working bcd value -| ( ) FP_SCR1: working copy of original bcd value -| ( ) L_SCR1: copy of original exponent word -| -| -| First check the absolute value of the exponent to see if this -| routine is necessary. If so, then check the sign of the exponent -| and do append (+) or strip (-) zeros accordingly. -| This section handles a positive adjusted exponent. -| -ap_st_z: - movel L_SCR1(%a6),%d1 |load expA for range test - cmpl #27,%d1 |test is with 27 - ble pwrten |if abs(expA) <28, skip ap/st zeros - btst #30,(%a0) |check sign of exp - bne ap_st_n |if neg, go to neg side - clrl %d1 |zero count reg - movel (%a0),%d4 |load lword 1 to d4 - bfextu %d4{#28:#4},%d0 |get M16 in d0 - bnes ap_p_fx |if M16 is non-zero, go fix exp - addql #1,%d1 |inc zero count - moveql #1,%d5 |init lword counter - movel (%a0,%d5.L*4),%d4 |get lword 2 to d4 - bnes ap_p_cl |if lw 2 is zero, skip it - addql #8,%d1 |and inc count by 8 - addql #1,%d5 |inc lword counter - movel (%a0,%d5.L*4),%d4 |get lword 3 to d4 -ap_p_cl: - clrl %d3 |init offset reg - moveql #7,%d2 |init digit counter -ap_p_gd: - bfextu %d4{%d3:#4},%d0 |get digit - bnes ap_p_fx |if non-zero, go to fix exp - addql #4,%d3 |point to next digit - addql #1,%d1 |inc digit counter - dbf %d2,ap_p_gd |get next digit -ap_p_fx: - movel %d1,%d0 |copy counter to d2 - movel L_SCR1(%a6),%d1 |get adjusted exp from memory - subl %d0,%d1 |subtract count from exp - bges ap_p_fm |if still pos, go to pwrten - negl %d1 |now its neg; get abs - movel (%a0),%d4 |load lword 1 to d4 - orl #0x40000000,%d4 | and set SE in d4 - orl #0x40000000,(%a0) | and in memory -| -| Calculate the mantissa multiplier to compensate for the striping of -| zeros from the mantissa. -| -ap_p_fm: - movel #PTENRN,%a1 |get address of power-of-ten table - clrl %d3 |init table index - fmoves FONE,%fp1 |init fp1 to 1 - moveql #3,%d2 |init d2 to count bits in counter -ap_p_el: - asrl #1,%d0 |shift lsb into carry - bccs ap_p_en |if 1, mul fp1 by pwrten factor - fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) -ap_p_en: - addl #12,%d3 |inc d3 to next rtable entry - tstl %d0 |check if d0 is zero - bnes ap_p_el |if not, get next bit - fmulx %fp1,%fp0 |mul mantissa by 10**(no_bits_shifted) - bra pwrten |go calc pwrten -| -| This section handles a negative adjusted exponent. -| -ap_st_n: - clrl %d1 |clr counter - moveql #2,%d5 |set up d5 to point to lword 3 - movel (%a0,%d5.L*4),%d4 |get lword 3 - bnes ap_n_cl |if not zero, check digits - subl #1,%d5 |dec d5 to point to lword 2 - addql #8,%d1 |inc counter by 8 - movel (%a0,%d5.L*4),%d4 |get lword 2 -ap_n_cl: - movel #28,%d3 |point to last digit - moveql #7,%d2 |init digit counter -ap_n_gd: - bfextu %d4{%d3:#4},%d0 |get digit - bnes ap_n_fx |if non-zero, go to exp fix - subql #4,%d3 |point to previous digit - addql #1,%d1 |inc digit counter - dbf %d2,ap_n_gd |get next digit -ap_n_fx: - movel %d1,%d0 |copy counter to d0 - movel L_SCR1(%a6),%d1 |get adjusted exp from memory - subl %d0,%d1 |subtract count from exp - bgts ap_n_fm |if still pos, go fix mantissa - negl %d1 |take abs of exp and clr SE - movel (%a0),%d4 |load lword 1 to d4 - andl #0xbfffffff,%d4 | and clr SE in d4 - andl #0xbfffffff,(%a0) | and in memory -| -| Calculate the mantissa multiplier to compensate for the appending of -| zeros to the mantissa. -| -ap_n_fm: - movel #PTENRN,%a1 |get address of power-of-ten table - clrl %d3 |init table index - fmoves FONE,%fp1 |init fp1 to 1 - moveql #3,%d2 |init d2 to count bits in counter -ap_n_el: - asrl #1,%d0 |shift lsb into carry - bccs ap_n_en |if 1, mul fp1 by pwrten factor - fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) -ap_n_en: - addl #12,%d3 |inc d3 to next rtable entry - tstl %d0 |check if d0 is zero - bnes ap_n_el |if not, get next bit - fdivx %fp1,%fp0 |div mantissa by 10**(no_bits_shifted) -| -| -| Calculate power-of-ten factor from adjusted and shifted exponent. -| -| Register usage: -| -| pwrten: -| (*) d0: temp -| ( ) d1: exponent -| (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp -| (*) d3: FPCR work copy -| ( ) d4: first word of bcd -| (*) a1: RTABLE pointer -| calc_p: -| (*) d0: temp -| ( ) d1: exponent -| (*) d3: PWRTxx table index -| ( ) a0: pointer to working copy of bcd -| (*) a1: PWRTxx pointer -| (*) fp1: power-of-ten accumulator -| -| Pwrten calculates the exponent factor in the selected rounding mode -| according to the following table: -| -| Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode -| -| ANY ANY RN RN -| -| + + RP RP -| - + RP RM -| + - RP RM -| - - RP RP -| -| + + RM RM -| - + RM RP -| + - RM RP -| - - RM RM -| -| + + RZ RM -| - + RZ RM -| + - RZ RP -| - - RZ RP -| -| -pwrten: - movel USER_FPCR(%a6),%d3 |get user's FPCR - bfextu %d3{#26:#2},%d2 |isolate rounding mode bits - movel (%a0),%d4 |reload 1st bcd word to d4 - asll #2,%d2 |format d2 to be - bfextu %d4{#0:#2},%d0 | {FPCR[6],FPCR[5],SM,SE} - addl %d0,%d2 |in d2 as index into RTABLE - leal RTABLE,%a1 |load rtable base - moveb (%a1,%d2),%d0 |load new rounding bits from table - clrl %d3 |clear d3 to force no exc and extended - bfins %d0,%d3{#26:#2} |stuff new rounding bits in FPCR - fmovel %d3,%FPCR |write new FPCR - asrl #1,%d0 |write correct PTENxx table - bccs not_rp |to a1 - leal PTENRP,%a1 |it is RP - bras calc_p |go to init section -not_rp: - asrl #1,%d0 |keep checking - bccs not_rm - leal PTENRM,%a1 |it is RM - bras calc_p |go to init section -not_rm: - leal PTENRN,%a1 |it is RN -calc_p: - movel %d1,%d0 |copy exp to d0;use d0 - bpls no_neg |if exp is negative, - negl %d0 |invert it - orl #0x40000000,(%a0) |and set SE bit -no_neg: - clrl %d3 |table index - fmoves FONE,%fp1 |init fp1 to 1 -e_loop: - asrl #1,%d0 |shift next bit into carry - bccs e_next |if zero, skip the mul - fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no) -e_next: - addl #12,%d3 |inc d3 to next rtable entry - tstl %d0 |check if d0 is zero - bnes e_loop |not zero, continue shifting -| -| -| Check the sign of the adjusted exp and make the value in fp0 the -| same sign. If the exp was pos then multiply fp1*fp0; -| else divide fp0/fp1. -| -| Register Usage: -| norm: -| ( ) a0: pointer to working bcd value -| (*) fp0: mantissa accumulator -| ( ) fp1: scaling factor - 10**(abs(exp)) -| -norm: - btst #30,(%a0) |test the sign of the exponent - beqs mul |if clear, go to multiply -div: - fdivx %fp1,%fp0 |exp is negative, so divide mant by exp - bras end_dec -mul: - fmulx %fp1,%fp0 |exp is positive, so multiply by exp -| -| -| Clean up and return with result in fp0. -| -| If the final mul/div in decbin incurred an inex exception, -| it will be inex2, but will be reported as inex1 by get_op. -| -end_dec: - fmovel %FPSR,%d0 |get status register - bclrl #inex2_bit+8,%d0 |test for inex2 and clear it - fmovel %d0,%FPSR |return status reg w/o inex2 - beqs no_exc |skip this if no exc - orl #inx1a_mask,USER_FPSR(%a6) |set inex1/ainex -no_exc: - moveml (%a7)+,%d2-%d5 - rts - |end diff --git a/arch/m68k/fpsp040/do_func.S b/arch/m68k/fpsp040/do_func.S deleted file mode 100644 index 3eff99a804138c39232ed25c05dc6ec7cbeb5b56..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/do_func.S +++ /dev/null @@ -1,558 +0,0 @@ -| -| do_func.sa 3.4 2/18/91 -| -| Do_func performs the unimplemented operation. The operation -| to be performed is determined from the lower 7 bits of the -| extension word (except in the case of fmovecr and fsincos). -| The opcode and tag bits form an index into a jump table in -| tbldo.sa. Cases of zero, infinity and NaN are handled in -| do_func by forcing the default result. Normalized and -| denormalized (there are no unnormalized numbers at this -| point) are passed onto the emulation code. -| -| CMDREG1B and STAG are extracted from the fsave frame -| and combined to form the table index. The function called -| will start with a0 pointing to the ETEMP operand. Dyadic -| functions can find FPTEMP at -12(a0). -| -| Called functions return their result in fp0. Sincos returns -| sin(x) in fp0 and cos(x) in fp1. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -DO_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref t_dz2 - |xref t_operr - |xref t_inx2 - |xref t_resdnrm - |xref dst_nan - |xref src_nan - |xref nrm_set - |xref sto_cos - - |xref tblpre - |xref slognp1,slogn,slog10,slog2 - |xref slognd,slog10d,slog2d - |xref smod,srem - |xref sscale - |xref smovcr - -PONE: .long 0x3fff0000,0x80000000,0x00000000 |+1 -MONE: .long 0xbfff0000,0x80000000,0x00000000 |-1 -PZERO: .long 0x00000000,0x00000000,0x00000000 |+0 -MZERO: .long 0x80000000,0x00000000,0x00000000 |-0 -PINF: .long 0x7fff0000,0x00000000,0x00000000 |+inf -MINF: .long 0xffff0000,0x00000000,0x00000000 |-inf -QNAN: .long 0x7fff0000,0xffffffff,0xffffffff |non-signaling nan -PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235 |+PI/2 -MPIBY2: .long 0xbFFF0000,0xC90FDAA2,0x2168C235 |-PI/2 - - .global do_func -do_func: - clrb CU_ONLY(%a6) -| -| Check for fmovecr. It does not follow the format of fp gen -| unimplemented instructions. The test is on the upper 6 bits; -| if they are $17, the inst is fmovecr. Call entry smovcr -| directly. -| - bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields - cmpil #0x17,%d0 |if op class and size fields are $17, -| ;it is FMOVECR; if not, continue - bnes not_fmovecr - jmp smovcr |fmovecr; jmp directly to emulation - -not_fmovecr: - movew CMDREG1B(%a6),%d0 - andl #0x7F,%d0 - cmpil #0x38,%d0 |if the extension is >= $38, - bge serror |it is illegal - bfextu STAG(%a6){#0:#3},%d1 - lsll #3,%d0 |make room for STAG - addl %d1,%d0 |combine for final index into table - leal tblpre,%a1 |start of monster jump table - movel (%a1,%d0.w*4),%a1 |real target address - leal ETEMP(%a6),%a0 |a0 is pointer to src op - movel USER_FPCR(%a6),%d1 - andl #0xFF,%d1 | discard all but rounding mode/prec - fmovel #0,%fpcr - jmp (%a1) -| -| ERROR -| - .global serror -serror: - st STORE_FLG(%a6) - rts -| -| These routines load forced values into fp0. They are called -| by index into tbldo. -| -| Load a signed zero to fp0 and set inex2/ainex -| - .global snzrinx -snzrinx: - btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand - bnes ld_mzinx |if negative, branch - bsr ld_pzero |bsr so we can return and set inx - bra t_inx2 |now, set the inx for the next inst -ld_mzinx: - bsr ld_mzero |if neg, load neg zero, return here - bra t_inx2 |now, set the inx for the next inst -| -| Load a signed zero to fp0; do not set inex2/ainex -| - .global szero -szero: - btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand - bne ld_mzero |if neg, load neg zero - bra ld_pzero |load positive zero -| -| Load a signed infinity to fp0; do not set inex2/ainex -| - .global sinf -sinf: - btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand - bne ld_minf |if negative branch - bra ld_pinf -| -| Load a signed one to fp0; do not set inex2/ainex -| - .global sone -sone: - btstb #sign_bit,LOCAL_EX(%a0) |check sign of source - bne ld_mone - bra ld_pone -| -| Load a signed pi/2 to fp0; do not set inex2/ainex -| - .global spi_2 -spi_2: - btstb #sign_bit,LOCAL_EX(%a0) |check sign of source - bne ld_mpi2 - bra ld_ppi2 -| -| Load either a +0 or +inf for plus/minus operand -| - .global szr_inf -szr_inf: - btstb #sign_bit,LOCAL_EX(%a0) |check sign of source - bne ld_pzero - bra ld_pinf -| -| Result is either an operr or +inf for plus/minus operand -| [Used by slogn, slognp1, slog10, and slog2] -| - .global sopr_inf -sopr_inf: - btstb #sign_bit,LOCAL_EX(%a0) |check sign of source - bne t_operr - bra ld_pinf -| -| FLOGNP1 -| - .global sslognp1 -sslognp1: - fmovemx (%a0),%fp0-%fp0 - fcmpb #-1,%fp0 - fbgt slognp1 - fbeq t_dz2 |if = -1, divide by zero exception - fmovel #0,%FPSR |clr N flag - bra t_operr |take care of operands < -1 -| -| FETOXM1 -| - .global setoxm1i -setoxm1i: - btstb #sign_bit,LOCAL_EX(%a0) |check sign of source - bne ld_mone - bra ld_pinf -| -| FLOGN -| -| Test for 1.0 as an input argument, returning +zero. Also check -| the sign and return operr if negative. -| - .global sslogn -sslogn: - btstb #sign_bit,LOCAL_EX(%a0) - bne t_operr |take care of operands < 0 - cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input - bne slogn - cmpil #0x80000000,LOCAL_HI(%a0) - bne slogn - tstl LOCAL_LO(%a0) - bne slogn - fmovex PZERO,%fp0 - rts - - .global sslognd -sslognd: - btstb #sign_bit,LOCAL_EX(%a0) - beq slognd - bra t_operr |take care of operands < 0 - -| -| FLOG10 -| - .global sslog10 -sslog10: - btstb #sign_bit,LOCAL_EX(%a0) - bne t_operr |take care of operands < 0 - cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input - bne slog10 - cmpil #0x80000000,LOCAL_HI(%a0) - bne slog10 - tstl LOCAL_LO(%a0) - bne slog10 - fmovex PZERO,%fp0 - rts - - .global sslog10d -sslog10d: - btstb #sign_bit,LOCAL_EX(%a0) - beq slog10d - bra t_operr |take care of operands < 0 - -| -| FLOG2 -| - .global sslog2 -sslog2: - btstb #sign_bit,LOCAL_EX(%a0) - bne t_operr |take care of operands < 0 - cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input - bne slog2 - cmpil #0x80000000,LOCAL_HI(%a0) - bne slog2 - tstl LOCAL_LO(%a0) - bne slog2 - fmovex PZERO,%fp0 - rts - - .global sslog2d -sslog2d: - btstb #sign_bit,LOCAL_EX(%a0) - beq slog2d - bra t_operr |take care of operands < 0 - -| -| FMOD -| -pmodt: -| ;$21 fmod -| ;dtag,stag - .long smod | 00,00 norm,norm = normal - .long smod_oper | 00,01 norm,zero = nan with operr - .long smod_fpn | 00,10 norm,inf = fpn - .long smod_snan | 00,11 norm,nan = nan - .long smod_zro | 01,00 zero,norm = +-zero - .long smod_oper | 01,01 zero,zero = nan with operr - .long smod_zro | 01,10 zero,inf = +-zero - .long smod_snan | 01,11 zero,nan = nan - .long smod_oper | 10,00 inf,norm = nan with operr - .long smod_oper | 10,01 inf,zero = nan with operr - .long smod_oper | 10,10 inf,inf = nan with operr - .long smod_snan | 10,11 inf,nan = nan - .long smod_dnan | 11,00 nan,norm = nan - .long smod_dnan | 11,01 nan,zero = nan - .long smod_dnan | 11,10 nan,inf = nan - .long smod_dnan | 11,11 nan,nan = nan - - .global pmod -pmod: - clrb FPSR_QBYTE(%a6) | clear quotient field - bfextu STAG(%a6){#0:#3},%d0 |stag = d0 - bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1 - -| -| Alias extended denorms to norms for the jump table. -| - bclrl #2,%d0 - bclrl #2,%d1 - - lslb #2,%d1 - orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag -| ;Tag values: -| ;00 = norm or denorm -| ;01 = zero -| ;10 = inf -| ;11 = nan - lea pmodt,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) - -smod_snan: - bra src_nan -smod_dnan: - bra dst_nan -smod_oper: - bra t_operr -smod_zro: - moveb ETEMP(%a6),%d1 |get sign of src op - moveb FPTEMP(%a6),%d0 |get sign of dst op - eorb %d0,%d1 |get exor of sign bits - btstl #7,%d1 |test for sign - beqs smod_zsn |if clr, do not set sign big - bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit -smod_zsn: - btstl #7,%d0 |test if + or - - beq ld_pzero |if pos then load +0 - bra ld_mzero |else neg load -0 - -smod_fpn: - moveb ETEMP(%a6),%d1 |get sign of src op - moveb FPTEMP(%a6),%d0 |get sign of dst op - eorb %d0,%d1 |get exor of sign bits - btstl #7,%d1 |test for sign - beqs smod_fsn |if clr, do not set sign big - bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit -smod_fsn: - tstb DTAG(%a6) |filter out denormal destination case - bpls smod_nrm | - leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP) - bra t_resdnrm |force UNFL(but exact) result -smod_nrm: - fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision - fmovex FPTEMP(%a6),%fp0 |return dest to fp0 - rts - -| -| FREM -| -premt: -| ;$25 frem -| ;dtag,stag - .long srem | 00,00 norm,norm = normal - .long srem_oper | 00,01 norm,zero = nan with operr - .long srem_fpn | 00,10 norm,inf = fpn - .long srem_snan | 00,11 norm,nan = nan - .long srem_zro | 01,00 zero,norm = +-zero - .long srem_oper | 01,01 zero,zero = nan with operr - .long srem_zro | 01,10 zero,inf = +-zero - .long srem_snan | 01,11 zero,nan = nan - .long srem_oper | 10,00 inf,norm = nan with operr - .long srem_oper | 10,01 inf,zero = nan with operr - .long srem_oper | 10,10 inf,inf = nan with operr - .long srem_snan | 10,11 inf,nan = nan - .long srem_dnan | 11,00 nan,norm = nan - .long srem_dnan | 11,01 nan,zero = nan - .long srem_dnan | 11,10 nan,inf = nan - .long srem_dnan | 11,11 nan,nan = nan - - .global prem -prem: - clrb FPSR_QBYTE(%a6) |clear quotient field - bfextu STAG(%a6){#0:#3},%d0 |stag = d0 - bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1 -| -| Alias extended denorms to norms for the jump table. -| - bclr #2,%d0 - bclr #2,%d1 - - lslb #2,%d1 - orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag -| ;Tag values: -| ;00 = norm or denorm -| ;01 = zero -| ;10 = inf -| ;11 = nan - lea premt,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) - -srem_snan: - bra src_nan -srem_dnan: - bra dst_nan -srem_oper: - bra t_operr -srem_zro: - moveb ETEMP(%a6),%d1 |get sign of src op - moveb FPTEMP(%a6),%d0 |get sign of dst op - eorb %d0,%d1 |get exor of sign bits - btstl #7,%d1 |test for sign - beqs srem_zsn |if clr, do not set sign big - bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit -srem_zsn: - btstl #7,%d0 |test if + or - - beq ld_pzero |if pos then load +0 - bra ld_mzero |else neg load -0 - -srem_fpn: - moveb ETEMP(%a6),%d1 |get sign of src op - moveb FPTEMP(%a6),%d0 |get sign of dst op - eorb %d0,%d1 |get exor of sign bits - btstl #7,%d1 |test for sign - beqs srem_fsn |if clr, do not set sign big - bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit -srem_fsn: - tstb DTAG(%a6) |filter out denormal destination case - bpls srem_nrm | - leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP) - bra t_resdnrm |force UNFL(but exact) result -srem_nrm: - fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision - fmovex FPTEMP(%a6),%fp0 |return dest to fp0 - rts -| -| FSCALE -| -pscalet: -| ;$26 fscale -| ;dtag,stag - .long sscale | 00,00 norm,norm = result - .long sscale | 00,01 norm,zero = fpn - .long scl_opr | 00,10 norm,inf = nan with operr - .long scl_snan | 00,11 norm,nan = nan - .long scl_zro | 01,00 zero,norm = +-zero - .long scl_zro | 01,01 zero,zero = +-zero - .long scl_opr | 01,10 zero,inf = nan with operr - .long scl_snan | 01,11 zero,nan = nan - .long scl_inf | 10,00 inf,norm = +-inf - .long scl_inf | 10,01 inf,zero = +-inf - .long scl_opr | 10,10 inf,inf = nan with operr - .long scl_snan | 10,11 inf,nan = nan - .long scl_dnan | 11,00 nan,norm = nan - .long scl_dnan | 11,01 nan,zero = nan - .long scl_dnan | 11,10 nan,inf = nan - .long scl_dnan | 11,11 nan,nan = nan - - .global pscale -pscale: - bfextu STAG(%a6){#0:#3},%d0 |stag in d0 - bfextu DTAG(%a6){#0:#3},%d1 |dtag in d1 - bclrl #2,%d0 |alias denorm into norm - bclrl #2,%d1 |alias denorm into norm - lslb #2,%d1 - orb %d0,%d1 |d1{4:2} = dtag, d1{1:0} = stag -| ;dtag values stag values: -| ;000 = norm 00 = norm -| ;001 = zero 01 = zero -| ;010 = inf 10 = inf -| ;011 = nan 11 = nan -| ;100 = dnrm -| -| - leal pscalet,%a1 |load start of jump table - movel (%a1,%d1.w*4),%a1 |load a1 with label depending on tag - jmp (%a1) |go to the routine - -scl_opr: - bra t_operr - -scl_dnan: - bra dst_nan - -scl_zro: - btstb #sign_bit,FPTEMP_EX(%a6) |test if + or - - beq ld_pzero |if pos then load +0 - bra ld_mzero |if neg then load -0 -scl_inf: - btstb #sign_bit,FPTEMP_EX(%a6) |test if + or - - beq ld_pinf |if pos then load +inf - bra ld_minf |else neg load -inf -scl_snan: - bra src_nan -| -| FSINCOS -| - .global ssincosz -ssincosz: - btstb #sign_bit,ETEMP(%a6) |get sign - beqs sincosp - fmovex MZERO,%fp0 - bras sincoscom -sincosp: - fmovex PZERO,%fp0 -sincoscom: - fmovemx PONE,%fp1-%fp1 |do not allow FPSR to be affected - bra sto_cos |store cosine result - - .global ssincosi -ssincosi: - fmovex QNAN,%fp1 |load NAN - bsr sto_cos |store cosine result - fmovex QNAN,%fp0 |load NAN - bra t_operr - - .global ssincosnan -ssincosnan: - movel ETEMP_EX(%a6),FP_SCR1(%a6) - movel ETEMP_HI(%a6),FP_SCR1+4(%a6) - movel ETEMP_LO(%a6),FP_SCR1+8(%a6) - bsetb #signan_bit,FP_SCR1+4(%a6) - fmovemx FP_SCR1(%a6),%fp1-%fp1 - bsr sto_cos - bra src_nan -| -| This code forces default values for the zero, inf, and nan cases -| in the transcendentals code. The CC bits must be set in the -| stacked FPSR to be correctly reported. -| -|**Returns +PI/2 - .global ld_ppi2 -ld_ppi2: - fmovex PPIBY2,%fp0 |load +pi/2 - bra t_inx2 |set inex2 exc - -|**Returns -PI/2 - .global ld_mpi2 -ld_mpi2: - fmovex MPIBY2,%fp0 |load -pi/2 - orl #neg_mask,USER_FPSR(%a6) |set N bit - bra t_inx2 |set inex2 exc - -|**Returns +inf - .global ld_pinf -ld_pinf: - fmovex PINF,%fp0 |load +inf - orl #inf_mask,USER_FPSR(%a6) |set I bit - rts - -|**Returns -inf - .global ld_minf -ld_minf: - fmovex MINF,%fp0 |load -inf - orl #neg_mask+inf_mask,USER_FPSR(%a6) |set N and I bits - rts - -|**Returns +1 - .global ld_pone -ld_pone: - fmovex PONE,%fp0 |load +1 - rts - -|**Returns -1 - .global ld_mone -ld_mone: - fmovex MONE,%fp0 |load -1 - orl #neg_mask,USER_FPSR(%a6) |set N bit - rts - -|**Returns +0 - .global ld_pzero -ld_pzero: - fmovex PZERO,%fp0 |load +0 - orl #z_mask,USER_FPSR(%a6) |set Z bit - rts - -|**Returns -0 - .global ld_mzero -ld_mzero: - fmovex MZERO,%fp0 |load -0 - orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z bits - rts - - |end diff --git a/arch/m68k/fpsp040/gen_except.S b/arch/m68k/fpsp040/gen_except.S deleted file mode 100644 index 3642cb7e3641748743a799d164dd941be57aff3e..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/gen_except.S +++ /dev/null @@ -1,467 +0,0 @@ -| -| gen_except.sa 3.7 1/16/92 -| -| gen_except --- FPSP routine to detect reportable exceptions -| -| This routine compares the exception enable byte of the -| user_fpcr on the stack with the exception status byte -| of the user_fpsr. -| -| Any routine which may report an exceptions must load -| the stack frame in memory with the exceptional operand(s). -| -| Priority for exceptions is: -| -| Highest: bsun -| snan -| operr -| ovfl -| unfl -| dz -| inex2 -| Lowest: inex1 -| -| Note: The IEEE standard specifies that inex2 is to be -| reported if ovfl occurs and the ovfl enable bit is not -| set but the inex2 enable bit is. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -GEN_EXCEPT: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref real_trace - |xref fpsp_done - |xref fpsp_fmt_error - -exc_tbl: - .long bsun_exc - .long commonE1 - .long commonE1 - .long ovfl_unfl - .long ovfl_unfl - .long commonE1 - .long commonE3 - .long commonE3 - .long no_match - - .global gen_except -gen_except: - cmpib #IDLE_SIZE-4,1(%a7) |test for idle frame - beq do_check |go handle idle frame - cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame - beqs unimp_x |go handle unimp frame - cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame - beqs unimp_x |go handle unimp frame - cmpib #BUSY_SIZE-4,1(%a7) |if size <> $60, fmt error - bnel fpsp_fmt_error - leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h -| ;equates will work -| Fix up the new busy frame with entries from the unimp frame -| - movel ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp - movel ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame - movel ETEMP_LO(%a6),ETEMP_LO(%a1) - movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp - movel CMDREG1B(%a6),%d0 |fix cmd1b to make it - andl #0x03c30000,%d0 |work for cmd3b - bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 - lsll #5,%d1 - swap %d1 - orl %d1,%d0 |put it in the right place - bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 - lsll #2,%d1 - swap %d1 - orl %d1,%d0 |put them in the right place - movel %d0,CMDREG3B(%a1) |in the busy frame -| -| Or in the FPSR from the emulation with the USER_FPSR on the stack. -| - fmovel %FPSR,%d0 - orl %d0,USER_FPSR(%a6) - movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits - orl #sx_mask,E_BYTE(%a1) - bra do_clean - -| -| Frame is an unimp frame possible resulting from an fmove ,fp0 -| that caused an exception -| -| a1 is modified to point into the new frame allowing fpsp equates -| to be valid. -| -unimp_x: - cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame - bnes test_rev - leal UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1 - bras unimp_con -test_rev: - cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame - bnel fpsp_fmt_error |if not $28 or $30 - leal UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1 - -unimp_con: -| -| Fix up the new unimp frame with entries from the old unimp frame -| - movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp -| -| Or in the FPSR from the emulation with the USER_FPSR on the stack. -| - fmovel %FPSR,%d0 - orl %d0,USER_FPSR(%a6) - bra do_clean - -| -| Frame is idle, so check for exceptions reported through -| USER_FPSR and set the unimp frame accordingly. -| A7 must be incremented to the point before the -| idle fsave vector to the unimp vector. -| - -do_check: - addl #4,%a7 |point A7 back to unimp frame -| -| Or in the FPSR from the emulation with the USER_FPSR on the stack. -| - fmovel %FPSR,%d0 - orl %d0,USER_FPSR(%a6) -| -| On a busy frame, we must clear the nmnexc bits. -| - cmpib #BUSY_SIZE-4,1(%a7) |check frame type - bnes check_fr |if busy, clr nmnexc - clrw NMNEXC(%a6) |clr nmnexc & nmcexc - btstb #5,CMDREG1B(%a6) |test for fmove out - bnes frame_com - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits - orl #sx_mask,E_BYTE(%a6) - bras frame_com -check_fr: - cmpb #UNIMP_40_SIZE-4,1(%a7) - beqs frame_com - clrw NMNEXC(%a6) -frame_com: - moveb FPCR_ENABLE(%a6),%d0 |get fpcr enable byte - andb FPSR_EXCEPT(%a6),%d0 |and in the fpsr exc byte - bfffo %d0{#24:#8},%d1 |test for first set bit - leal exc_tbl,%a0 |load jmp table address - subib #24,%d1 |normalize bit offset to 0-8 - movel (%a0,%d1.w*4),%a0 |load routine address based -| ;based on first enabled exc - jmp (%a0) |jump to routine -| -| Bsun is not possible in unimp or unsupp -| -bsun_exc: - bra do_clean -| -| The typical work to be done to the unimp frame to report an -| exception is to set the E1/E3 byte and clr the U flag. -| commonE1 does this for E1 exceptions, which are snan, -| operr, and dz. commonE3 does this for E3 exceptions, which -| are inex2 and inex1, and also clears the E1 exception bit -| left over from the unimp exception. -| -commonE1: - bsetb #E1,E_BYTE(%a6) |set E1 flag - bra commonE |go clean and exit - -commonE3: - tstb UFLG_TMP(%a6) |test flag for unsup/unimp state - bnes unsE3 -uniE3: - bsetb #E3,E_BYTE(%a6) |set E3 flag - bclrb #E1,E_BYTE(%a6) |clr E1 from unimp - bra commonE - -unsE3: - tstb RES_FLG(%a6) - bnes unsE3_0 -unsE3_1: - bsetb #E3,E_BYTE(%a6) |set E3 flag -unsE3_0: - bclrb #E1,E_BYTE(%a6) |clr E1 flag - movel CMDREG1B(%a6),%d0 - andl #0x03c30000,%d0 |work for cmd3b - bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 - lsll #5,%d1 - swap %d1 - orl %d1,%d0 |put it in the right place - bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 - lsll #2,%d1 - swap %d1 - orl %d1,%d0 |put them in the right place - movel %d0,CMDREG3B(%a6) |in the busy frame - -commonE: - bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp - bra do_clean |go clean and exit -| -| No bits in the enable byte match existing exceptions. Check for -| the case of the ovfl exc without the ovfl enabled, but with -| inex2 enabled. -| -no_match: - btstb #inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case - beqs no_exc |if clear, exit - btstb #ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl - beqs no_exc |if clear, exit - bras ovfl_unfl |go to unfl_ovfl to determine if -| ;it is an unsupp or unimp exc - -| No exceptions are to be reported. If the instruction was -| unimplemented, no FPU restore is necessary. If it was -| unsupported, we must perform the restore. -no_exc: - tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state - beqs uni_no_exc -uns_no_exc: - tstb RES_FLG(%a6) |check if frestore is needed - bne do_clean |if clear, no frestore needed -uni_no_exc: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - unlk %a6 - bra finish_up -| -| Unsupported Data Type Handler: -| Ovfl: -| An fmoveout that results in an overflow is reported this way. -| Unfl: -| An fmoveout that results in an underflow is reported this way. -| -| Unimplemented Instruction Handler: -| Ovfl: -| Only scosh, setox, ssinh, stwotox, and scale can set overflow in -| this manner. -| Unfl: -| Stwotox, setox, and scale can set underflow in this manner. -| Any of the other Library Routines such that f(x)=x in which -| x is an extended denorm can report an underflow exception. -| It is the responsibility of the exception-causing exception -| to make sure that WBTEMP is correct. -| -| The exceptional operand is in FP_SCR1. -| -ovfl_unfl: - tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state - beqs ofuf_con -| -| The caller was from an unsupported data type trap. Test if the -| caller set CU_ONLY. If so, the exceptional operand is expected in -| FPTEMP, rather than WBTEMP. -| - tstb CU_ONLY(%a6) |test if inst is cu-only - beq unsE3 -| move.w #$fe,CU_SAVEPC(%a6) - clrb CU_SAVEPC(%a6) - bsetb #E1,E_BYTE(%a6) |set E1 exception flag - movew ETEMP_EX(%a6),FPTEMP_EX(%a6) - movel ETEMP_HI(%a6),FPTEMP_HI(%a6) - movel ETEMP_LO(%a6),FPTEMP_LO(%a6) - bsetb #fptemp15_bit,DTAG(%a6) |set fpte15 - bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp - bra do_clean |go clean and exit - -ofuf_con: - moveb (%a7),VER_TMP(%a6) |save version number - cmpib #BUSY_SIZE-4,1(%a7) |check for busy frame - beqs busy_fr |if unimp, grow to busy - cmpib #VER_40,(%a7) |test for orig unimp frame - bnes try_41 |if not, test for rev frame - moveql #13,%d0 |need to zero 14 lwords - bras ofuf_fin -try_41: - cmpib #VER_41,(%a7) |test for rev unimp frame - bnel fpsp_fmt_error |if neither, exit with error - moveql #11,%d0 |need to zero 12 lwords - -ofuf_fin: - clrl (%a7) -loop1: - clrl -(%a7) |clear and dec a7 - dbra %d0,loop1 - moveb VER_TMP(%a6),(%a7) - moveb #BUSY_SIZE-4,1(%a7) |write busy fmt word. -busy_fr: - movel FP_SCR1(%a6),WBTEMP_EX(%a6) |write - movel FP_SCR1+4(%a6),WBTEMP_HI(%a6) |exceptional op to - movel FP_SCR1+8(%a6),WBTEMP_LO(%a6) |wbtemp - bsetb #E3,E_BYTE(%a6) |set E3 flag - bclrb #E1,E_BYTE(%a6) |make sure E1 is clear - bclrb #UFLAG,T_BYTE(%a6) |clr U flag - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - movel CMDREG1B(%a6),%d0 |fix cmd1b to make it - andl #0x03c30000,%d0 |work for cmd3b - bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2 - lsll #5,%d1 - swap %d1 - orl %d1,%d0 |put it in the right place - bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5 - lsll #2,%d1 - swap %d1 - orl %d1,%d0 |put them in the right place - movel %d0,CMDREG3B(%a6) |in the busy frame - -| -| Check if the frame to be restored is busy or unimp. -|** NOTE *** Bug fix for errata (0d43b #3) -| If the frame is unimp, we must create a busy frame to -| fix the bug with the nmnexc bits in cases in which they -| are set by a previous instruction and not cleared by -| the save. The frame will be unimp only if the final -| instruction in an emulation routine caused the exception -| by doing an fmove ,fp0. The exception operand, in -| internal format, is in fptemp. -| -do_clean: - cmpib #UNIMP_40_SIZE-4,1(%a7) - bnes do_con - moveql #13,%d0 |in orig, need to zero 14 lwords - bras do_build -do_con: - cmpib #UNIMP_41_SIZE-4,1(%a7) - bnes do_restore |frame must be busy - moveql #11,%d0 |in rev, need to zero 12 lwords - -do_build: - moveb (%a7),VER_TMP(%a6) - clrl (%a7) -loop2: - clrl -(%a7) |clear and dec a7 - dbra %d0,loop2 -| -| Use a1 as pointer into new frame. a6 is not correct if an unimp or -| busy frame was created as the result of an exception on the final -| instruction of an emulation routine. -| -| We need to set the nmcexc bits if the exception is E1. Otherwise, -| the exc taken will be inex2. -| - leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 for new frame - moveb VER_TMP(%a6),(%a7) |write busy fmt word - moveb #BUSY_SIZE-4,1(%a7) - movel FP_SCR1(%a6),WBTEMP_EX(%a1) |write - movel FP_SCR1+4(%a6),WBTEMP_HI(%a1) |exceptional op to - movel FP_SCR1+8(%a6),WBTEMP_LO(%a1) |wbtemp -| btst.b #E1,E_BYTE(%a1) -| beq.b do_restore - bfextu USER_FPSR(%a6){#17:#4},%d0 |get snan/operr/ovfl/unfl bits - bfins %d0,NMCEXC(%a1){#4:#4} |and insert them in nmcexc - movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits - orl #sx_mask,E_BYTE(%a1) - -do_restore: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - tstb RES_FLG(%a6) |RES_FLG indicates a "continuation" frame - beq cont - bsr bug1384 -cont: - unlk %a6 -| -| If trace mode enabled, then go to trace handler. This handler -| cannot have any fp instructions. If there are fp inst's and an -| exception has been restored into the machine then the exception -| will occur upon execution of the fp inst. This is not desirable -| in the kernel (supervisor mode). See MC68040 manual Section 9.3.8. -| -finish_up: - btstb #7,(%a7) |test T1 in SR - bnes g_trace - btstb #6,(%a7) |test T0 in SR - bnes g_trace - bral fpsp_done -| -| Change integer stack to look like trace stack -| The address of the instruction that caused the -| exception is already in the integer stack (is -| the same as the saved friar) -| -| If the current frame is already a 6-word stack then all -| that needs to be done is to change the vector# to TRACE. -| If the frame is only a 4-word stack (meaning we got here -| on an Unsupported data type exception), then we need to grow -| the stack an extra 2 words and get the FPIAR from the FPU. -| -g_trace: - bftst EXC_VEC-4(%sp){#0:#4} - bne g_easy - - subw #4,%sp | make room - movel 4(%sp),(%sp) - movel 8(%sp),4(%sp) - subw #BUSY_SIZE,%sp - fsave (%sp) - fmovel %fpiar,BUSY_SIZE+EXC_EA-4(%sp) - frestore (%sp) - addw #BUSY_SIZE,%sp - -g_easy: - movew #TRACE_VEC,EXC_VEC-4(%a7) - bral real_trace -| -| This is a work-around for hardware bug 1384. -| -bug1384: - link %a5,#0 - fsave -(%sp) - cmpib #0x41,(%sp) | check for correct frame - beq frame_41 - bgt nofix | if more advanced mask, do nada - -frame_40: - tstb 1(%sp) | check to see if idle - bne notidle -idle40: - clrl (%sp) | get rid of old fsave frame - movel %d1,USER_D1(%a6) | save d1 - movew #8,%d1 | place unimp frame instead -loop40: clrl -(%sp) - dbra %d1,loop40 - movel USER_D1(%a6),%d1 | restore d1 - movel #0x40280000,-(%sp) - frestore (%sp)+ - unlk %a5 - rts - -frame_41: - tstb 1(%sp) | check to see if idle - bne notidle -idle41: - clrl (%sp) | get rid of old fsave frame - movel %d1,USER_D1(%a6) | save d1 - movew #10,%d1 | place unimp frame instead -loop41: clrl -(%sp) - dbra %d1,loop41 - movel USER_D1(%a6),%d1 | restore d1 - movel #0x41300000,-(%sp) - frestore (%sp)+ - unlk %a5 - rts - -notidle: - bclrb #etemp15_bit,-40(%a5) - frestore (%sp)+ - unlk %a5 - rts - -nofix: - frestore (%sp)+ - unlk %a5 - rts - - |end diff --git a/arch/m68k/fpsp040/get_op.S b/arch/m68k/fpsp040/get_op.S deleted file mode 100644 index 64c36d79ef836ca97a8bbdbe741addcb4c1a452e..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/get_op.S +++ /dev/null @@ -1,675 +0,0 @@ -| -| get_op.sa 3.6 5/19/92 -| -| get_op.sa 3.5 4/26/91 -| -| Description: This routine is called by the unsupported format/data -| type exception handler ('unsupp' - vector 55) and the unimplemented -| instruction exception handler ('unimp' - vector 11). 'get_op' -| determines the opclass (0, 2, or 3) and branches to the -| opclass handler routine. See 68881/2 User's Manual table 4-11 -| for a description of the opclasses. -| -| For UNSUPPORTED data/format (exception vector 55) and for -| UNIMPLEMENTED instructions (exception vector 11) the following -| applies: -| -| - For unnormalized numbers (opclass 0, 2, or 3) the -| number(s) is normalized and the operand type tag is updated. -| -| - For a packed number (opclass 2) the number is unpacked and the -| operand type tag is updated. -| -| - For denormalized numbers (opclass 0 or 2) the number(s) is not -| changed but passed to the next module. The next module for -| unimp is do_func, the next module for unsupp is res_func. -| -| For UNSUPPORTED data/format (exception vector 55) only the -| following applies: -| -| - If there is a move out with a packed number (opclass 3) the -| number is packed and written to user memory. For the other -| opclasses the number(s) are written back to the fsave stack -| and the instruction is then restored back into the '040. The -| '040 is then able to complete the instruction. -| -| For example: -| fadd.x fpm,fpn where the fpm contains an unnormalized number. -| The '040 takes an unsupported data trap and gets to this -| routine. The number is normalized, put back on the stack and -| then an frestore is done to restore the instruction back into -| the '040. The '040 then re-executes the fadd.x fpm,fpn with -| a normalized number in the source and the instruction is -| successful. -| -| Next consider if in the process of normalizing the un- -| normalized number it becomes a denormalized number. The -| routine which converts the unnorm to a norm (called mk_norm) -| detects this and tags the number as a denorm. The routine -| res_func sees the denorm tag and converts the denorm to a -| norm. The instruction is then restored back into the '040 -| which re_executes the instruction. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -GET_OP: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - .global PIRN,PIRZRM,PIRP - .global SMALRN,SMALRZRM,SMALRP - .global BIGRN,BIGRZRM,BIGRP - -PIRN: - .long 0x40000000,0xc90fdaa2,0x2168c235 |pi -PIRZRM: - .long 0x40000000,0xc90fdaa2,0x2168c234 |pi -PIRP: - .long 0x40000000,0xc90fdaa2,0x2168c235 |pi - -|round to nearest -SMALRN: - .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2) - .long 0x40000000,0xadf85458,0xa2bb4a9a |e - .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e) - .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) - .long 0x00000000,0x00000000,0x00000000 |0.0 -| round to zero;round to negative infinity -SMALRZRM: - .long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2) - .long 0x40000000,0xadf85458,0xa2bb4a9a |e - .long 0x3fff0000,0xb8aa3b29,0x5c17f0bb |log2(e) - .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) - .long 0x00000000,0x00000000,0x00000000 |0.0 -| round to positive infinity -SMALRP: - .long 0x3ffd0000,0x9a209a84,0xfbcff799 |log10(2) - .long 0x40000000,0xadf85458,0xa2bb4a9b |e - .long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e) - .long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e) - .long 0x00000000,0x00000000,0x00000000 |0.0 - -|round to nearest -BIGRN: - .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2) - .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10) - .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 - - .global PTENRN -PTENRN: - .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 - .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 - .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 - .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 - .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 - .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32 - .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64 - .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128 - .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256 - .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512 - .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024 - .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048 - .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096 -|round to minus infinity -BIGRZRM: - .long 0x3ffe0000,0xb17217f7,0xd1cf79ab |ln(2) - .long 0x40000000,0x935d8ddd,0xaaa8ac16 |ln(10) - .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 - - .global PTENRM -PTENRM: - .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 - .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 - .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 - .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 - .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 - .long 0x40690000,0x9DC5ADA8,0x2B70B59D |10 ^ 32 - .long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64 - .long 0x41A80000,0x93BA47C9,0x80E98CDF |10 ^ 128 - .long 0x43510000,0xAA7EEBFB,0x9DF9DE8D |10 ^ 256 - .long 0x46A30000,0xE319A0AE,0xA60E91C6 |10 ^ 512 - .long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024 - .long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048 - .long 0x75250000,0xC4605202,0x8A20979A |10 ^ 4096 -|round to positive infinity -BIGRP: - .long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2) - .long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10) - .long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0 - - .global PTENRP -PTENRP: - .long 0x40020000,0xA0000000,0x00000000 |10 ^ 1 - .long 0x40050000,0xC8000000,0x00000000 |10 ^ 2 - .long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4 - .long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8 - .long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16 - .long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32 - .long 0x40D30000,0xC2781F49,0xFFCFA6D6 |10 ^ 64 - .long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128 - .long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256 - .long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512 - .long 0x4D480000,0xC9767586,0x81750C18 |10 ^ 1024 - .long 0x5A920000,0x9E8B3B5D,0xC53D5DE6 |10 ^ 2048 - .long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096 - - |xref nrm_zero - |xref decbin - |xref round - - .global get_op - .global uns_getop - .global uni_getop -get_op: - clrb DY_MO_FLG(%a6) - tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state - beq uni_getop - -uns_getop: - btstb #direction_bit,CMDREG1B(%a6) - bne opclass3 |branch if a fmove out (any kind) - btstb #6,CMDREG1B(%a6) - beqs uns_notpacked - - bfextu CMDREG1B(%a6){#3:#3},%d0 - cmpb #3,%d0 - beq pack_source |check for a packed src op, branch if so -uns_notpacked: - bsr chk_dy_mo |set the dyadic/monadic flag - tstb DY_MO_FLG(%a6) - beqs src_op_ck |if monadic, go check src op -| ;else, check dst op (fall through) - - btstb #7,DTAG(%a6) - beqs src_op_ck |if dst op is norm, check src op - bras dst_ex_dnrm |else, handle destination unnorm/dnrm - -uni_getop: - bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields - cmpil #0x17,%d0 |if op class and size fields are $17, -| ;it is FMOVECR; if not, continue -| -| If the instruction is fmovecr, exit get_op. It is handled -| in do_func and smovecr.sa. -| - bne not_fmovecr |handle fmovecr as an unimplemented inst - rts - -not_fmovecr: - btstb #E1,E_BYTE(%a6) |if set, there is a packed operand - bne pack_source |check for packed src op, branch if so - -| The following lines of are coded to optimize on normalized operands - moveb STAG(%a6),%d0 - orb DTAG(%a6),%d0 |check if either of STAG/DTAG msb set - bmis dest_op_ck |if so, some op needs to be fixed - rts - -dest_op_ck: - btstb #7,DTAG(%a6) |check for unsupported data types in - beqs src_op_ck |the destination, if not, check src op - bsr chk_dy_mo |set dyadic/monadic flag - tstb DY_MO_FLG(%a6) | - beqs src_op_ck |if monadic, check src op -| -| At this point, destination has an extended denorm or unnorm. -| -dst_ex_dnrm: - movew FPTEMP_EX(%a6),%d0 |get destination exponent - andiw #0x7fff,%d0 |mask sign, check if exp = 0000 - beqs src_op_ck |if denorm then check source op. -| ;denorms are taken care of in res_func -| ;(unsupp) or do_func (unimp) -| ;else unnorm fall through - leal FPTEMP(%a6),%a0 |point a0 to dop - used in mk_norm - bsr mk_norm |go normalize - mk_norm returns: -| ;L_SCR1{7:5} = operand tag -| ; (000 = norm, 100 = denorm) -| ;L_SCR1{4} = fpte15 or ete15 -| ; 0 = exp > $3fff -| ; 1 = exp <= $3fff -| ;and puts the normalized num back -| ;on the fsave stack -| - moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15 -| ;to the fsave stack and fall -| ;through to check source operand -| -src_op_ck: - btstb #7,STAG(%a6) - beq end_getop |check for unsupported data types on the -| ;source operand - btstb #5,STAG(%a6) - bnes src_sd_dnrm |if bit 5 set, handle sgl/dbl denorms -| -| At this point only unnorms or extended denorms are possible. -| -src_ex_dnrm: - movew ETEMP_EX(%a6),%d0 |get source exponent - andiw #0x7fff,%d0 |mask sign, check if exp = 0000 - beq end_getop |if denorm then exit, denorms are -| ;handled in do_func - leal ETEMP(%a6),%a0 |point a0 to sop - used in mk_norm - bsr mk_norm |go normalize - mk_norm returns: -| ;L_SCR1{7:5} = operand tag -| ; (000 = norm, 100 = denorm) -| ;L_SCR1{4} = fpte15 or ete15 -| ; 0 = exp > $3fff -| ; 1 = exp <= $3fff -| ;and puts the normalized num back -| ;on the fsave stack -| - moveb L_SCR1(%a6),STAG(%a6) |write the new tag & ete15 - rts |end_getop - -| -| At this point, only single or double denorms are possible. -| If the inst is not fmove, normalize the source. If it is, -| do nothing to the input. -| -src_sd_dnrm: - btstb #4,CMDREG1B(%a6) |differentiate between sgl/dbl denorm - bnes is_double -is_single: - movew #0x3f81,%d1 |write bias for sgl denorm - bras common |goto the common code -is_double: - movew #0x3c01,%d1 |write the bias for a dbl denorm -common: - btstb #sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa - beqs pos - bset #15,%d1 |set sign bit because it is negative -pos: - movew %d1,ETEMP_EX(%a6) -| ;put exponent on stack - - movew CMDREG1B(%a6),%d1 - andw #0xe3ff,%d1 |clear out source specifier - orw #0x0800,%d1 |set source specifier to extended prec - movew %d1,CMDREG1B(%a6) |write back to the command word in stack -| ;this is needed to fix unsupp data stack - leal ETEMP(%a6),%a0 |point a0 to sop - - bsr mk_norm |convert sgl/dbl denorm to norm - moveb L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0 - rts |end_getop -| -| At this point, the source is definitely packed, whether -| instruction is dyadic or monadic is still unknown -| -pack_source: - movel FPTEMP_LO(%a6),ETEMP(%a6) |write ms part of packed -| ;number to etemp slot - bsr chk_dy_mo |set dyadic/monadic flag - bsr unpack - - tstb DY_MO_FLG(%a6) - beqs end_getop |if monadic, exit -| ;else, fix FPTEMP -pack_dya: - bfextu CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg - movel #7,%d1 - subl %d0,%d1 - clrl %d0 - bsetl %d1,%d0 |set up d0 as a dynamic register mask - fmovemx %d0,FPTEMP(%a6) |write to FPTEMP - - btstb #7,DTAG(%a6) |check dest tag for unnorm or denorm - bne dst_ex_dnrm |else, handle the unnorm or ext denorm -| -| Dest is not denormalized. Check for norm, and set fpte15 -| accordingly. -| - moveb DTAG(%a6),%d0 - andib #0xf0,%d0 |strip to only dtag:fpte15 - tstb %d0 |check for normalized value - bnes end_getop |if inf/nan/zero leave get_op - movew FPTEMP_EX(%a6),%d0 - andiw #0x7fff,%d0 - cmpiw #0x3fff,%d0 |check if fpte15 needs setting - bges end_getop |if >= $3fff, leave fpte15=0 - orb #0x10,DTAG(%a6) - bras end_getop - -| -| At this point, it is either an fmoveout packed, unnorm or denorm -| -opclass3: - clrb DY_MO_FLG(%a6) |set dyadic/monadic flag to monadic - bfextu CMDREG1B(%a6){#4:#2},%d0 - cmpib #3,%d0 - bne src_ex_dnrm |if not equal, must be unnorm or denorm -| ;else it is a packed move out -| ;exit -end_getop: - rts - -| -| Sets the DY_MO_FLG correctly. This is used only on if it is an -| unsupported data type exception. Set if dyadic. -| -chk_dy_mo: - movew CMDREG1B(%a6),%d0 - btstl #5,%d0 |testing extension command word - beqs set_mon |if bit 5 = 0 then monadic - btstl #4,%d0 |know that bit 5 = 1 - beqs set_dya |if bit 4 = 0 then dyadic - andiw #0x007f,%d0 |get rid of all but extension bits {6:0} - cmpiw #0x0038,%d0 |if extension = $38 then fcmp (dyadic) - bnes set_mon -set_dya: - st DY_MO_FLG(%a6) |set the inst flag type to dyadic - rts -set_mon: - clrb DY_MO_FLG(%a6) |set the inst flag type to monadic - rts -| -| MK_NORM -| -| Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl -| exception if denorm. -| -| CASE opclass 0x0 unsupp -| mk_norm till msb set -| set tag = norm -| -| CASE opclass 0x0 unimp -| mk_norm till msb set or exp = 0 -| if integer bit = 0 -| tag = denorm -| else -| tag = norm -| -| CASE opclass 011 unsupp -| mk_norm till msb set or exp = 0 -| if integer bit = 0 -| tag = denorm -| set unfl_nmcexe = 1 -| else -| tag = norm -| -| if exp <= $3fff -| set ete15 or fpte15 = 1 -| else set ete15 or fpte15 = 0 - -| input: -| a0 = points to operand to be normalized -| output: -| L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm) -| L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff) -| the normalized operand is placed back on the fsave stack -mk_norm: - clrl L_SCR1(%a6) - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |transform into internal extended format - - cmpib #0x2c,1+EXC_VEC(%a6) |check if unimp - bnes uns_data |branch if unsupp - bsr uni_inst |call if unimp (opclass 0x0) - bras reload -uns_data: - btstb #direction_bit,CMDREG1B(%a6) |check transfer direction - bnes bit_set |branch if set (opclass 011) - bsr uns_opx |call if opclass 0x0 - bras reload -bit_set: - bsr uns_op3 |opclass 011 -reload: - cmpw #0x3fff,LOCAL_EX(%a0) |if exp > $3fff - bgts end_mk | fpte15/ete15 already set to 0 - bsetb #4,L_SCR1(%a6) |else set fpte15/ete15 to 1 -| ;calling routine actually sets the -| ;value on the stack (along with the -| ;tag), since this routine doesn't -| ;know if it should set ete15 or fpte15 -| ;ie, it doesn't know if this is the -| ;src op or dest op. -end_mk: - bfclr LOCAL_SGN(%a0){#0:#8} - beqs end_mk_pos - bsetb #sign_bit,LOCAL_EX(%a0) |convert back to IEEE format -end_mk_pos: - rts -| -| CASE opclass 011 unsupp -| -uns_op3: - bsr nrm_zero |normalize till msb = 1 or exp = zero - btstb #7,LOCAL_HI(%a0) |if msb = 1 - bnes no_unfl |then branch -set_unfl: - orw #dnrm_tag,L_SCR1(%a6) |set denorm tag - bsetb #unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit -no_unfl: - rts -| -| CASE opclass 0x0 unsupp -| -uns_opx: - bsr nrm_zero |normalize the number - btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set - beqs uns_den |if clear then now have a denorm -uns_nrm: - orb #norm_tag,L_SCR1(%a6) |set tag to norm - rts -uns_den: - orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm - rts -| -| CASE opclass 0x0 unimp -| -uni_inst: - bsr nrm_zero - btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set - beqs uni_den |if clear then now have a denorm -uni_nrm: - orb #norm_tag,L_SCR1(%a6) |set tag to norm - rts -uni_den: - orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm - rts - -| -| Decimal to binary conversion -| -| Special cases of inf and NaNs are completed outside of decbin. -| If the input is an snan, the snan bit is not set. -| -| input: -| ETEMP(a6) - points to packed decimal string in memory -| output: -| fp0 - contains packed string converted to extended precision -| ETEMP - same as fp0 -unpack: - movew CMDREG1B(%a6),%d0 |examine command word, looking for fmove's - andw #0x3b,%d0 - beq move_unpack |special handling for fmove: must set FPSR_CC - - movew ETEMP(%a6),%d0 |get word with inf information - bfextu %d0{#20:#12},%d1 |get exponent into d1 - cmpiw #0x0fff,%d1 |test for inf or NaN - bnes try_zero |if not equal, it is not special - bfextu %d0{#17:#3},%d1 |get SE and y bits into d1 - cmpiw #7,%d1 |SE and y bits must be on for special - bnes try_zero |if not on, it is not special -|input is of the special cases of inf and NaN - tstl ETEMP_HI(%a6) |check ms mantissa - bnes fix_nan |if non-zero, it is a NaN - tstl ETEMP_LO(%a6) |check ls mantissa - bnes fix_nan |if non-zero, it is a NaN - bra finish |special already on stack -fix_nan: - btstb #signan_bit,ETEMP_HI(%a6) |test for snan - bne finish - orl #snaniop_mask,USER_FPSR(%a6) |always set snan if it is so - bra finish -try_zero: - movew ETEMP_EX+2(%a6),%d0 |get word 4 - andiw #0x000f,%d0 |clear all but last ni(y)bble - tstw %d0 |check for zero. - bne not_spec - tstl ETEMP_HI(%a6) |check words 3 and 2 - bne not_spec - tstl ETEMP_LO(%a6) |check words 1 and 0 - bne not_spec - tstl ETEMP(%a6) |test sign of the zero - bges pos_zero - movel #0x80000000,ETEMP(%a6) |write neg zero to etemp - clrl ETEMP_HI(%a6) - clrl ETEMP_LO(%a6) - bra finish -pos_zero: - clrl ETEMP(%a6) - clrl ETEMP_HI(%a6) - clrl ETEMP_LO(%a6) - bra finish - -not_spec: - fmovemx %fp0-%fp1,-(%a7) |save fp0 - decbin returns in it - bsr decbin - fmovex %fp0,ETEMP(%a6) |put the unpacked sop in the fsave stack - fmovemx (%a7)+,%fp0-%fp1 - fmovel #0,%FPSR |clr fpsr from decbin - bra finish - -| -| Special handling for packed move in: Same results as all other -| packed cases, but we must set the FPSR condition codes properly. -| -move_unpack: - movew ETEMP(%a6),%d0 |get word with inf information - bfextu %d0{#20:#12},%d1 |get exponent into d1 - cmpiw #0x0fff,%d1 |test for inf or NaN - bnes mtry_zero |if not equal, it is not special - bfextu %d0{#17:#3},%d1 |get SE and y bits into d1 - cmpiw #7,%d1 |SE and y bits must be on for special - bnes mtry_zero |if not on, it is not special -|input is of the special cases of inf and NaN - tstl ETEMP_HI(%a6) |check ms mantissa - bnes mfix_nan |if non-zero, it is a NaN - tstl ETEMP_LO(%a6) |check ls mantissa - bnes mfix_nan |if non-zero, it is a NaN -|input is inf - orl #inf_mask,USER_FPSR(%a6) |set I bit - tstl ETEMP(%a6) |check sign - bge finish - orl #neg_mask,USER_FPSR(%a6) |set N bit - bra finish |special already on stack -mfix_nan: - orl #nan_mask,USER_FPSR(%a6) |set NaN bit - moveb #nan_tag,STAG(%a6) |set stag to NaN - btstb #signan_bit,ETEMP_HI(%a6) |test for snan - bnes mn_snan - orl #snaniop_mask,USER_FPSR(%a6) |set snan bit - btstb #snan_bit,FPCR_ENABLE(%a6) |test for snan enabled - bnes mn_snan - bsetb #signan_bit,ETEMP_HI(%a6) |force snans to qnans -mn_snan: - tstl ETEMP(%a6) |check for sign - bge finish |if clr, go on - orl #neg_mask,USER_FPSR(%a6) |set N bit - bra finish - -mtry_zero: - movew ETEMP_EX+2(%a6),%d0 |get word 4 - andiw #0x000f,%d0 |clear all but last ni(y)bble - tstw %d0 |check for zero. - bnes mnot_spec - tstl ETEMP_HI(%a6) |check words 3 and 2 - bnes mnot_spec - tstl ETEMP_LO(%a6) |check words 1 and 0 - bnes mnot_spec - tstl ETEMP(%a6) |test sign of the zero - bges mpos_zero - orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z - movel #0x80000000,ETEMP(%a6) |write neg zero to etemp - clrl ETEMP_HI(%a6) - clrl ETEMP_LO(%a6) - bras finish -mpos_zero: - orl #z_mask,USER_FPSR(%a6) |set Z - clrl ETEMP(%a6) - clrl ETEMP_HI(%a6) - clrl ETEMP_LO(%a6) - bras finish - -mnot_spec: - fmovemx %fp0-%fp1,-(%a7) |save fp0 ,fp1 - decbin returns in fp0 - bsr decbin - fmovex %fp0,ETEMP(%a6) -| ;put the unpacked sop in the fsave stack - fmovemx (%a7)+,%fp0-%fp1 - -finish: - movew CMDREG1B(%a6),%d0 |get the command word - andw #0xfbff,%d0 |change the source specifier field to -| ;extended (was packed). - movew %d0,CMDREG1B(%a6) |write command word back to fsave stack -| ;we need to do this so the 040 will -| ;re-execute the inst. without taking -| ;another packed trap. - -fix_stag: -|Converted result is now in etemp on fsave stack, now set the source -|tag (stag) -| if (ete =$7fff) then INF or NAN -| if (etemp = $x.0----0) then -| stag = INF -| else -| stag = NAN -| else -| if (ete = $0000) then -| stag = ZERO -| else -| stag = NORM -| -| Note also that the etemp_15 bit (just right of the stag) must -| be set accordingly. -| - movew ETEMP_EX(%a6),%d1 - andiw #0x7fff,%d1 |strip sign - cmpw #0x7fff,%d1 - bnes z_or_nrm - movel ETEMP_HI(%a6),%d1 - bnes is_nan - movel ETEMP_LO(%a6),%d1 - bnes is_nan -is_inf: - moveb #0x40,STAG(%a6) - movel #0x40,%d0 - rts -is_nan: - moveb #0x60,STAG(%a6) - movel #0x60,%d0 - rts -z_or_nrm: - tstw %d1 - bnes is_nrm -is_zro: -| For a zero, set etemp_15 - moveb #0x30,STAG(%a6) - movel #0x20,%d0 - rts -is_nrm: -| For a norm, check if the exp <= $3fff; if so, set etemp_15 - cmpiw #0x3fff,%d1 - bles set_bit15 - moveb #0,STAG(%a6) - bras end_is_nrm -set_bit15: - moveb #0x10,STAG(%a6) -end_is_nrm: - movel #0,%d0 -end_fix: - rts - -end_get: - rts - |end diff --git a/arch/m68k/fpsp040/kernel_ex.S b/arch/m68k/fpsp040/kernel_ex.S deleted file mode 100644 index 45bcf3455d341e6ae9ebdbdd28e36caa76038f94..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/kernel_ex.S +++ /dev/null @@ -1,493 +0,0 @@ -| -| kernel_ex.sa 3.3 12/19/90 -| -| This file contains routines to force exception status in the -| fpu for exceptional cases detected or reported within the -| transcendental functions. Typically, the t_xx routine will -| set the appropriate bits in the USER_FPSR word on the stack. -| The bits are tested in gen_except.sa to determine if an exceptional -| situation needs to be created on return from the FPSP. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -KERNEL_EX: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -mns_inf: .long 0xffff0000,0x00000000,0x00000000 -pls_inf: .long 0x7fff0000,0x00000000,0x00000000 -nan: .long 0x7fff0000,0xffffffff,0xffffffff -huge: .long 0x7ffe0000,0xffffffff,0xffffffff - - |xref ovf_r_k - |xref unf_sub - |xref nrm_set - - .global t_dz - .global t_dz2 - .global t_operr - .global t_unfl - .global t_ovfl - .global t_ovfl2 - .global t_inx2 - .global t_frcinx - .global t_extdnrm - .global t_resdnrm - .global dst_nan - .global src_nan -| -| DZ exception -| -| -| if dz trap disabled -| store properly signed inf (use sign of etemp) into fp0 -| set FPSR exception status dz bit, condition code -| inf bit, and accrued dz bit -| return -| frestore the frame into the machine (done by unimp_hd) -| -| else dz trap enabled -| set exception status bit & accrued bits in FPSR -| set flag to disable sto_res from corrupting fp register -| return -| frestore the frame into the machine (done by unimp_hd) -| -| t_dz2 is used by monadic functions such as flogn (from do_func). -| t_dz is used by monadic functions such as satanh (from the -| transcendental function). -| -t_dz2: - bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR - fmovel #0,%FPSR |clr status bits (Z set) - btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled - bnes dz_ena_end - bras m_inf |flogx always returns -inf -t_dz: - fmovel #0,%FPSR |clr status bits (Z set) - btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled - bnes dz_ena -| -| dz disabled -| - btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos - beqs p_inf |branch if pos sign - -m_inf: - fmovemx mns_inf,%fp0-%fp0 |load -inf - bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR - bras set_fpsr -p_inf: - fmovemx pls_inf,%fp0-%fp0 |load +inf -set_fpsr: - orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ - rts -| -| dz enabled -| -dz_ena: - btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos - beqs dz_ena_end - bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR -dz_ena_end: - orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ - st STORE_FLG(%a6) - rts -| -| OPERR exception -| -| if (operr trap disabled) -| set FPSR exception status operr bit, condition code -| nan bit; Store default NAN into fp0 -| frestore the frame into the machine (done by unimp_hd) -| -| else (operr trap enabled) -| set FPSR exception status operr bit, accrued operr bit -| set flag to disable sto_res from corrupting fp register -| frestore the frame into the machine (done by unimp_hd) -| -t_operr: - orl #opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP - - btstb #operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled - bnes op_ena - - fmovemx nan,%fp0-%fp0 |load default nan - rts -op_ena: - st STORE_FLG(%a6) |do not corrupt destination - rts - -| -| t_unfl --- UNFL exception -| -| This entry point is used by all routines requiring unfl, inex2, -| aunfl, and ainex to be set on exit. -| -| On entry, a0 points to the exceptional operand. The final exceptional -| operand is built in FP_SCR1 and only the sign from the original operand -| is used. -| -t_unfl: - clrl FP_SCR1(%a6) |set exceptional operand to zero - clrl FP_SCR1+4(%a6) - clrl FP_SCR1+8(%a6) - tstb (%a0) |extract sign from caller's exop - bpls unfl_signok - bset #sign_bit,FP_SCR1(%a6) -unfl_signok: - leal FP_SCR1(%a6),%a0 - orl #unfinx_mask,USER_FPSR(%a6) -| ;set UNFL, INEX2, AUNFL, AINEX -unfl_con: - btstb #unfl_bit,FPCR_ENABLE(%a6) - beqs unfl_dis - -unfl_ena: - bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 - bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 - bsetb #sticky_bit,STICKY(%a6) |set sticky bit - - bclrb #E1,E_BYTE(%a6) - -unfl_dis: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision - - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext format - - bsr unf_sub |returns IEEE result at a0 -| ;and sets FPSR_CC accordingly - - bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format - beqs unfl_fin - - bsetb #sign_bit,LOCAL_EX(%a0) - bsetb #sign_bit,FP_SCR1(%a6) |set sign bit of exc operand - -unfl_fin: - fmovemx (%a0),%fp0-%fp0 |store result in fp0 - rts - - -| -| t_ovfl2 --- OVFL exception (without inex2 returned) -| -| This entry is used by scale to force catastrophic overflow. The -| ovfl, aovfl, and ainex bits are set, but not the inex2 bit. -| -t_ovfl2: - orl #ovfl_inx_mask,USER_FPSR(%a6) - movel ETEMP(%a6),FP_SCR1(%a6) - movel ETEMP_HI(%a6),FP_SCR1+4(%a6) - movel ETEMP_LO(%a6),FP_SCR1+8(%a6) -| -| Check for single or double round precision. If single, check if -| the lower 40 bits of ETEMP are zero; if not, set inex2. If double, -| check if the lower 21 bits are zero; if not, set inex2. -| - moveb FPCR_MODE(%a6),%d0 - andib #0xc0,%d0 - beq t_work |if extended, finish ovfl processing - cmpib #0x40,%d0 |test for single - bnes t_dbl -t_sgl: - tstb ETEMP_LO(%a6) - bnes t_setinx2 - movel ETEMP_HI(%a6),%d0 - andil #0xff,%d0 |look at only lower 8 bits - bnes t_setinx2 - bra t_work -t_dbl: - movel ETEMP_LO(%a6),%d0 - andil #0x7ff,%d0 |look at only lower 11 bits - beq t_work -t_setinx2: - orl #inex2_mask,USER_FPSR(%a6) - bras t_work -| -| t_ovfl --- OVFL exception -| -|** Note: the exc operand is returned in ETEMP. -| -t_ovfl: - orl #ovfinx_mask,USER_FPSR(%a6) -t_work: - btstb #ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled - beqs ovf_dis - -ovf_ena: - clrl FP_SCR1(%a6) |set exceptional operand - clrl FP_SCR1+4(%a6) - clrl FP_SCR1+8(%a6) - - bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 - bclrb #wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15 - bsetb #sticky_bit,STICKY(%a6) |set sticky bit - - bclrb #E1,E_BYTE(%a6) -| ;fall through to disabled case - -| For disabled overflow call 'ovf_r_k'. This routine loads the -| correct result based on the rounding precision, destination -| format, rounding mode and sign. -| -ovf_dis: - bsr ovf_r_k |returns unsigned ETEMP_EX -| ;and sets FPSR_CC accordingly. - bfclr ETEMP_SGN(%a6){#0:#8} |fix sign - beqs ovf_pos - bsetb #sign_bit,ETEMP_EX(%a6) - bsetb #sign_bit,FP_SCR1(%a6) |set exceptional operand sign -ovf_pos: - fmovemx ETEMP(%a6),%fp0-%fp0 |move the result to fp0 - rts - - -| -| INEX2 exception -| -| The inex2 and ainex bits are set. -| -t_inx2: - orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX - rts - -| -| Force Inex2 -| -| This routine is called by the transcendental routines to force -| the inex2 exception bits set in the FPSR. If the underflow bit -| is set, but the underflow trap was not taken, the aunfl bit in -| the FPSR must be set. -| -t_frcinx: - orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX - btstb #unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set - beqs no_uacc1 |if clear, do not set aunfl - bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) -no_uacc1: - rts - -| -| DST_NAN -| -| Determine if the destination nan is signalling or non-signalling, -| and set the FPSR bits accordingly. See the MC68040 User's Manual -| section 3.2.2.5 NOT-A-NUMBERS. -| -dst_nan: - btstb #sign_bit,FPTEMP_EX(%a6) |test sign of nan - beqs dst_pos |if clr, it was positive - bsetb #neg_bit,FPSR_CC(%a6) |set N bit -dst_pos: - btstb #signan_bit,FPTEMP_HI(%a6) |check if signalling - beqs dst_snan |branch if signalling - - fmovel %d1,%fpcr |restore user's rmode/prec - fmovex FPTEMP(%a6),%fp0 |return the non-signalling nan -| -| Check the source nan. If it is signalling, snan will be reported. -| - moveb STAG(%a6),%d0 - andib #0xe0,%d0 - cmpib #0x60,%d0 - bnes no_snan - btstb #signan_bit,ETEMP_HI(%a6) |check if signalling - bnes no_snan - orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP -no_snan: - rts - -dst_snan: - btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled - beqs dst_dis |branch if disabled - - orb #nan_tag,DTAG(%a6) |set up dtag for nan - st STORE_FLG(%a6) |do not store a result - orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP - rts - -dst_dis: - bsetb #signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop - fmovel %d1,%fpcr |restore user's rmode/prec - fmovex FPTEMP(%a6),%fp0 |load non-sign. nan - orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP - rts - -| -| SRC_NAN -| -| Determine if the source nan is signalling or non-signalling, -| and set the FPSR bits accordingly. See the MC68040 User's Manual -| section 3.2.2.5 NOT-A-NUMBERS. -| -src_nan: - btstb #sign_bit,ETEMP_EX(%a6) |test sign of nan - beqs src_pos |if clr, it was positive - bsetb #neg_bit,FPSR_CC(%a6) |set N bit -src_pos: - btstb #signan_bit,ETEMP_HI(%a6) |check if signalling - beqs src_snan |branch if signalling - fmovel %d1,%fpcr |restore user's rmode/prec - fmovex ETEMP(%a6),%fp0 |return the non-signalling nan - rts - -src_snan: - btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled - beqs src_dis |branch if disabled - bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop - orb #norm_tag,DTAG(%a6) |set up dtag for norm - orb #nan_tag,STAG(%a6) |set up stag for nan - st STORE_FLG(%a6) |do not store a result - orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP - rts - -src_dis: - bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop - fmovel %d1,%fpcr |restore user's rmode/prec - fmovex ETEMP(%a6),%fp0 |load non-sign. nan - orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP - rts - -| -| For all functions that have a denormalized input and that f(x)=x, -| this is the entry point -| -t_extdnrm: - orl #unfinx_mask,USER_FPSR(%a6) -| ;set UNFL, INEX2, AUNFL, AINEX - bras xdnrm_con -| -| Entry point for scale with extended denorm. The function does -| not set inex2, aunfl, or ainex. -| -t_resdnrm: - orl #unfl_mask,USER_FPSR(%a6) - -xdnrm_con: - btstb #unfl_bit,FPCR_ENABLE(%a6) - beqs xdnrm_dis - -| -| If exceptions are enabled, the additional task of setting up WBTEMP -| is needed so that when the underflow exception handler is entered, -| the user perceives no difference between what the 040 provides vs. -| what the FPSP provides. -| -xdnrm_ena: - movel %a0,-(%a7) - - movel LOCAL_EX(%a0),FP_SCR1(%a6) - movel LOCAL_HI(%a0),FP_SCR1+4(%a6) - movel LOCAL_LO(%a0),FP_SCR1+8(%a6) - - lea FP_SCR1(%a6),%a0 - - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext format - tstw LOCAL_EX(%a0) |check if input is denorm - beqs xdnrm_dn |if so, skip nrm_set - bsr nrm_set |normalize the result (exponent -| ;will be negative -xdnrm_dn: - bclrb #sign_bit,LOCAL_EX(%a0) |take off false sign - bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format - beqs xdep - bsetb #sign_bit,LOCAL_EX(%a0) -xdep: - bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0 - bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 - bclrb #sticky_bit,STICKY(%a6) |clear sticky bit - bclrb #E1,E_BYTE(%a6) - movel (%a7)+,%a0 -xdnrm_dis: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision - bnes not_ext |if not round extended, store -| ;IEEE defaults -is_ext: - btstb #sign_bit,LOCAL_EX(%a0) - beqs xdnrm_store - - bsetb #neg_bit,FPSR_CC(%a6) |set N bit in FPSR_CC - - bras xdnrm_store - -not_ext: - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext format - bsr unf_sub |returns IEEE result pointed by -| ;a0; sets FPSR_CC accordingly - bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format - beqs xdnrm_store - bsetb #sign_bit,LOCAL_EX(%a0) -xdnrm_store: - fmovemx (%a0),%fp0-%fp0 |store result in fp0 - rts - -| -| This subroutine is used for dyadic operations that use an extended -| denorm within the kernel. The approach used is to capture the frame, -| fix/restore. -| - .global t_avoid_unsupp -t_avoid_unsupp: - link %a2,#-LOCAL_SIZE |so that a2 fpsp.h negative -| ;offsets may be used - fsave -(%a7) - tstb 1(%a7) |check if idle, exit if so - beq idle_end - btstb #E1,E_BYTE(%a2) |check for an E1 exception if -| ;enabled, there is an unsupp - beq end_avun |else, exit - btstb #7,DTAG(%a2) |check for denorm destination - beqs src_den |else, must be a source denorm -| -| handle destination denorm -| - lea FPTEMP(%a2),%a0 - btstb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext format - bclrb #7,DTAG(%a2) |set DTAG to norm - bsr nrm_set |normalize result, exponent -| ;will become negative - bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign - bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format - beqs ck_src_den |check if source is also denorm - bsetb #sign_bit,LOCAL_EX(%a0) -ck_src_den: - btstb #7,STAG(%a2) - beqs end_avun -src_den: - lea ETEMP(%a2),%a0 - btstb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext format - bclrb #7,STAG(%a2) |set STAG to norm - bsr nrm_set |normalize result, exponent -| ;will become negative - bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign - bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format - beqs den_com - bsetb #sign_bit,LOCAL_EX(%a0) -den_com: - moveb #0xfe,CU_SAVEPC(%a2) |set continue frame - clrw NMNEXC(%a2) |clear NMNEXC - bclrb #E1,E_BYTE(%a2) -| fmove.l %FPSR,FPSR_SHADOW(%a2) -| bset.b #SFLAG,E_BYTE(%a2) -| bset.b #XFLAG,T_BYTE(%a2) -end_avun: - frestore (%a7)+ - unlk %a2 - rts -idle_end: - addl #4,%a7 - unlk %a2 - rts - |end diff --git a/arch/m68k/fpsp040/res_func.S b/arch/m68k/fpsp040/res_func.S deleted file mode 100644 index d9cdf4383545b7aa21dd8b5d6976f38890611e68..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/res_func.S +++ /dev/null @@ -1,2039 +0,0 @@ -| -| res_func.sa 3.9 7/29/91 -| -| Normalizes denormalized numbers if necessary and updates the -| stack frame. The function is then restored back into the -| machine and the 040 completes the operation. This routine -| is only used by the unsupported data type/format handler. -| (Exception vector 55). -| -| For packed move out (fmove.p fpm,) the operation is -| completed here; data is packed and moved to user memory. -| The stack is restored to the 040 only in the case of a -| reportable exception in the conversion. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -RES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -sp_bnds: .short 0x3f81,0x407e - .short 0x3f6a,0x0000 -dp_bnds: .short 0x3c01,0x43fe - .short 0x3bcd,0x0000 - - |xref mem_write - |xref bindec - |xref get_fline - |xref round - |xref denorm - |xref dest_ext - |xref dest_dbl - |xref dest_sgl - |xref unf_sub - |xref nrm_set - |xref dnrm_lp - |xref ovf_res - |xref reg_dest - |xref t_ovfl - |xref t_unfl - - .global res_func - .global p_move - -res_func: - clrb DNRM_FLG(%a6) - clrb RES_FLG(%a6) - clrb CU_ONLY(%a6) - tstb DY_MO_FLG(%a6) - beqs monadic -dyadic: - btstb #7,DTAG(%a6) |if dop = norm=000, zero=001, -| ;inf=010 or nan=011 - beqs monadic |then branch -| ;else denorm -| HANDLE DESTINATION DENORM HERE -| ;set dtag to norm -| ;write the tag & fpte15 to the fstack - leal FPTEMP(%a6),%a0 - - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - - bsr nrm_set |normalize number (exp will go negative) - bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign - bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format - beqs dpos - bsetb #sign_bit,LOCAL_EX(%a0) -dpos: - bfclr DTAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 - bsetb #4,DTAG(%a6) |set FPTE15 - orb #0x0f,DNRM_FLG(%a6) -monadic: - leal ETEMP(%a6),%a0 - btstb #direction_bit,CMDREG1B(%a6) |check direction - bne opclass3 |it is a mv out -| -| At this point, only opclass 0 and 2 possible -| - btstb #7,STAG(%a6) |if sop = norm=000, zero=001, -| ;inf=010 or nan=011 - bne mon_dnrm |else denorm - tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would - bne normal |require normalization of denorm - -| At this point: -| monadic instructions: fabs = $18 fneg = $1a ftst = $3a -| fmove = $00 fsmove = $40 fdmove = $44 -| fsqrt = $05* fssqrt = $41 fdsqrt = $45 -| (*fsqrt reencoded to $05) -| - movew CMDREG1B(%a6),%d0 |get command register - andil #0x7f,%d0 |strip to only command word -| -| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and -| fdsqrt are possible. -| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) -| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) -| - btstl #0,%d0 - bne normal |weed out fsqrt instructions -| -| cu_norm handles fmove in instructions with normalized inputs. -| The routine round is used to correctly round the input for the -| destination precision and mode. -| -cu_norm: - st CU_ONLY(%a6) |set cu-only inst flag - movew CMDREG1B(%a6),%d0 - andib #0x3b,%d0 |isolate bits to select inst - tstb %d0 - beql cu_nmove |if zero, it is an fmove - cmpib #0x18,%d0 - beql cu_nabs |if $18, it is fabs - cmpib #0x1a,%d0 - beql cu_nneg |if $1a, it is fneg -| -| Inst is ftst. Check the source operand and set the cc's accordingly. -| No write is done, so simply rts. -| -cu_ntst: - movew LOCAL_EX(%a0),%d0 - bclrl #15,%d0 - sne LOCAL_SGN(%a0) - beqs cu_ntpo - orl #neg_mask,USER_FPSR(%a6) |set N -cu_ntpo: - cmpiw #0x7fff,%d0 |test for inf/nan - bnes cu_ntcz - tstl LOCAL_HI(%a0) - bnes cu_ntn - tstl LOCAL_LO(%a0) - bnes cu_ntn - orl #inf_mask,USER_FPSR(%a6) - rts -cu_ntn: - orl #nan_mask,USER_FPSR(%a6) - movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for -| ;snan handler - - rts -cu_ntcz: - tstl LOCAL_HI(%a0) - bnel cu_ntsx - tstl LOCAL_LO(%a0) - bnel cu_ntsx - orl #z_mask,USER_FPSR(%a6) -cu_ntsx: - rts -| -| Inst is fabs. Execute the absolute value function on the input. -| Branch to the fmove code. If the operand is NaN, do nothing. -| -cu_nabs: - moveb STAG(%a6),%d0 - btstl #5,%d0 |test for NaN or zero - bne wr_etemp |if either, simply write it - bclrb #7,LOCAL_EX(%a0) |do abs - bras cu_nmove |fmove code will finish -| -| Inst is fneg. Execute the negate value function on the input. -| Fall though to the fmove code. If the operand is NaN, do nothing. -| -cu_nneg: - moveb STAG(%a6),%d0 - btstl #5,%d0 |test for NaN or zero - bne wr_etemp |if either, simply write it - bchgb #7,LOCAL_EX(%a0) |do neg -| -| Inst is fmove. This code also handles all result writes. -| If bit 2 is set, round is forced to double. If it is clear, -| and bit 6 is set, round is forced to single. If both are clear, -| the round precision is found in the fpcr. If the rounding precision -| is double or single, round the result before the write. -| -cu_nmove: - moveb STAG(%a6),%d0 - andib #0xe0,%d0 |isolate stag bits - bne wr_etemp |if not norm, simply write it - btstb #2,CMDREG1B+1(%a6) |check for rd - bne cu_nmrd - btstb #6,CMDREG1B+1(%a6) |check for rs - bne cu_nmrs -| -| The move or operation is not with forced precision. Test for -| nan or inf as the input; if so, simply write it to FPn. Use the -| FPCR_MODE byte to get rounding on norms and zeros. -| -cu_nmnr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 - tstb %d0 |check for extended - beq cu_wrexn |if so, just write result - cmpib #1,%d0 |check for single - beq cu_nmrs |fall through to double -| -| The move is fdmove or round precision is double. -| -cu_nmrd: - movel #2,%d0 |set up the size for denorm - movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold - andw #0x7fff,%d1 - cmpw #0x3c01,%d1 - bls cu_nunfl - bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode - orl #0x00020000,%d1 |or in rprec (double) - clrl %d0 |clear g,r,s for round - bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal format - sne LOCAL_SGN(%a0) - bsrl round - bfclr LOCAL_SGN(%a0){#0:#8} - beqs cu_nmrdc - bsetb #sign_bit,LOCAL_EX(%a0) -cu_nmrdc: - movew LOCAL_EX(%a0),%d1 |check for overflow - andw #0x7fff,%d1 - cmpw #0x43ff,%d1 - bge cu_novfl |take care of overflow case - bra cu_wrexn -| -| The move is fsmove or round precision is single. -| -cu_nmrs: - movel #1,%d0 - movew LOCAL_EX(%a0),%d1 - andw #0x7fff,%d1 - cmpw #0x3f81,%d1 - bls cu_nunfl - bfextu FPCR_MODE(%a6){#2:#2},%d1 - orl #0x00010000,%d1 - clrl %d0 - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - bsrl round - bfclr LOCAL_SGN(%a0){#0:#8} - beqs cu_nmrsc - bsetb #sign_bit,LOCAL_EX(%a0) -cu_nmrsc: - movew LOCAL_EX(%a0),%d1 - andw #0x7FFF,%d1 - cmpw #0x407f,%d1 - blt cu_wrexn -| -| The operand is above precision boundaries. Use t_ovfl to -| generate the correct value. -| -cu_novfl: - bsr t_ovfl - bra cu_wrexn -| -| The operand is below precision boundaries. Use denorm to -| generate the correct value. -| -cu_nunfl: - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - bsr denorm - bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format - beqs cu_nucont - bsetb #sign_bit,LOCAL_EX(%a0) -cu_nucont: - bfextu FPCR_MODE(%a6){#2:#2},%d1 - btstb #2,CMDREG1B+1(%a6) |check for rd - bne inst_d - btstb #6,CMDREG1B+1(%a6) |check for rs - bne inst_s - swap %d1 - moveb FPCR_MODE(%a6),%d1 - lsrb #6,%d1 - swap %d1 - bra inst_sd -inst_d: - orl #0x00020000,%d1 - bra inst_sd -inst_s: - orl #0x00010000,%d1 -inst_sd: - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - bsrl round - bfclr LOCAL_SGN(%a0){#0:#8} - beqs cu_nuflp - bsetb #sign_bit,LOCAL_EX(%a0) -cu_nuflp: - btstb #inex2_bit,FPSR_EXCEPT(%a6) - beqs cu_nuninx - orl #aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL -cu_nuninx: - tstl LOCAL_HI(%a0) |test for zero - bnes cu_nunzro - tstl LOCAL_LO(%a0) - bnes cu_nunzro -| -| The mantissa is zero from the denorm loop. Check sign and rmode -| to see if rounding should have occurred which would leave the lsb. -| - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 |isolate rmode - cmpil #0x20,%d0 - blts cu_nzro - bnes cu_nrp -cu_nrm: - tstw LOCAL_EX(%a0) |if positive, set lsb - bges cu_nzro - btstb #7,FPCR_MODE(%a6) |check for double - beqs cu_nincs - bras cu_nincd -cu_nrp: - tstw LOCAL_EX(%a0) |if positive, set lsb - blts cu_nzro - btstb #7,FPCR_MODE(%a6) |check for double - beqs cu_nincs -cu_nincd: - orl #0x800,LOCAL_LO(%a0) |inc for double - bra cu_nunzro -cu_nincs: - orl #0x100,LOCAL_HI(%a0) |inc for single - bra cu_nunzro -cu_nzro: - orl #z_mask,USER_FPSR(%a6) - moveb STAG(%a6),%d0 - andib #0xe0,%d0 - cmpib #0x40,%d0 |check if input was tagged zero - beqs cu_numv -cu_nunzro: - orl #unfl_mask,USER_FPSR(%a6) |set unfl -cu_numv: - movel (%a0),ETEMP(%a6) - movel 4(%a0),ETEMP_HI(%a6) - movel 8(%a0),ETEMP_LO(%a6) -| -| Write the result to memory, setting the fpsr cc bits. NaN and Inf -| bypass cu_wrexn. -| -cu_wrexn: - tstw LOCAL_EX(%a0) |test for zero - beqs cu_wrzero - cmpw #0x8000,LOCAL_EX(%a0) |test for zero - bnes cu_wreon -cu_wrzero: - orl #z_mask,USER_FPSR(%a6) |set Z bit -cu_wreon: - tstw LOCAL_EX(%a0) - bpl wr_etemp - orl #neg_mask,USER_FPSR(%a6) - bra wr_etemp - -| -| HANDLE SOURCE DENORM HERE -| -| ;clear denorm stag to norm -| ;write the new tag & ete15 to the fstack -mon_dnrm: -| -| At this point, check for the cases in which normalizing the -| denorm produces incorrect results. -| - tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would - bnes nrm_src |require normalization of denorm - -| At this point: -| monadic instructions: fabs = $18 fneg = $1a ftst = $3a -| fmove = $00 fsmove = $40 fdmove = $44 -| fsqrt = $05* fssqrt = $41 fdsqrt = $45 -| (*fsqrt reencoded to $05) -| - movew CMDREG1B(%a6),%d0 |get command register - andil #0x7f,%d0 |strip to only command word -| -| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and -| fdsqrt are possible. -| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) -| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) -| - btstl #0,%d0 - bnes nrm_src |weed out fsqrt instructions - st CU_ONLY(%a6) |set cu-only inst flag - bra cu_dnrm |fmove, fabs, fneg, ftst -| ;cases go to cu_dnrm -nrm_src: - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - bsr nrm_set |normalize number (exponent will go -| ; negative) - bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign - - bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format - beqs spos - bsetb #sign_bit,LOCAL_EX(%a0) -spos: - bfclr STAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 - bsetb #4,STAG(%a6) |set ETE15 - orb #0xf0,DNRM_FLG(%a6) -normal: - tstb DNRM_FLG(%a6) |check if any of the ops were denorms - bne ck_wrap |if so, check if it is a potential -| ;wrap-around case -fix_stk: - moveb #0xfe,CU_SAVEPC(%a6) - bclrb #E1,E_BYTE(%a6) - - clrw NMNEXC(%a6) - - st RES_FLG(%a6) |indicate that a restore is needed - rts - -| -| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and -| ftst) completely in software without an frestore to the 040. -| -cu_dnrm: - st CU_ONLY(%a6) - movew CMDREG1B(%a6),%d0 - andib #0x3b,%d0 |isolate bits to select inst - tstb %d0 - beql cu_dmove |if zero, it is an fmove - cmpib #0x18,%d0 - beql cu_dabs |if $18, it is fabs - cmpib #0x1a,%d0 - beql cu_dneg |if $1a, it is fneg -| -| Inst is ftst. Check the source operand and set the cc's accordingly. -| No write is done, so simply rts. -| -cu_dtst: - movew LOCAL_EX(%a0),%d0 - bclrl #15,%d0 - sne LOCAL_SGN(%a0) - beqs cu_dtpo - orl #neg_mask,USER_FPSR(%a6) |set N -cu_dtpo: - cmpiw #0x7fff,%d0 |test for inf/nan - bnes cu_dtcz - tstl LOCAL_HI(%a0) - bnes cu_dtn - tstl LOCAL_LO(%a0) - bnes cu_dtn - orl #inf_mask,USER_FPSR(%a6) - rts -cu_dtn: - orl #nan_mask,USER_FPSR(%a6) - movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for -| ;snan handler - rts -cu_dtcz: - tstl LOCAL_HI(%a0) - bnel cu_dtsx - tstl LOCAL_LO(%a0) - bnel cu_dtsx - orl #z_mask,USER_FPSR(%a6) -cu_dtsx: - rts -| -| Inst is fabs. Execute the absolute value function on the input. -| Branch to the fmove code. -| -cu_dabs: - bclrb #7,LOCAL_EX(%a0) |do abs - bras cu_dmove |fmove code will finish -| -| Inst is fneg. Execute the negate value function on the input. -| Fall though to the fmove code. -| -cu_dneg: - bchgb #7,LOCAL_EX(%a0) |do neg -| -| Inst is fmove. This code also handles all result writes. -| If bit 2 is set, round is forced to double. If it is clear, -| and bit 6 is set, round is forced to single. If both are clear, -| the round precision is found in the fpcr. If the rounding precision -| is double or single, the result is zero, and the mode is checked -| to determine if the lsb of the result should be set. -| -cu_dmove: - btstb #2,CMDREG1B+1(%a6) |check for rd - bne cu_dmrd - btstb #6,CMDREG1B+1(%a6) |check for rs - bne cu_dmrs -| -| The move or operation is not with forced precision. Use the -| FPCR_MODE byte to get rounding. -| -cu_dmnr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 - tstb %d0 |check for extended - beq cu_wrexd |if so, just write result - cmpib #1,%d0 |check for single - beq cu_dmrs |fall through to double -| -| The move is fdmove or round precision is double. Result is zero. -| Check rmode for rp or rm and set lsb accordingly. -| -cu_dmrd: - bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode - tstw LOCAL_EX(%a0) |check sign - blts cu_dmdn - cmpib #3,%d1 |check for rp - bne cu_dpd |load double pos zero - bra cu_dpdr |load double pos zero w/lsb -cu_dmdn: - cmpib #2,%d1 |check for rm - bne cu_dnd |load double neg zero - bra cu_dndr |load double neg zero w/lsb -| -| The move is fsmove or round precision is single. Result is zero. -| Check for rp or rm and set lsb accordingly. -| -cu_dmrs: - bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode - tstw LOCAL_EX(%a0) |check sign - blts cu_dmsn - cmpib #3,%d1 |check for rp - bne cu_spd |load single pos zero - bra cu_spdr |load single pos zero w/lsb -cu_dmsn: - cmpib #2,%d1 |check for rm - bne cu_snd |load single neg zero - bra cu_sndr |load single neg zero w/lsb -| -| The precision is extended, so the result in etemp is correct. -| Simply set unfl (not inex2 or aunfl) and write the result to -| the correct fp register. -cu_wrexd: - orl #unfl_mask,USER_FPSR(%a6) - tstw LOCAL_EX(%a0) - beq wr_etemp - orl #neg_mask,USER_FPSR(%a6) - bra wr_etemp -| -| These routines write +/- zero in double format. The routines -| cu_dpdr and cu_dndr set the double lsb. -| -cu_dpd: - movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - orl #z_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_dpdr: - movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero - clrl LOCAL_HI(%a0) - movel #0x800,LOCAL_LO(%a0) |with lsb set - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_dnd: - movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - orl #z_mask,USER_FPSR(%a6) - orl #neg_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_dndr: - movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero - clrl LOCAL_HI(%a0) - movel #0x800,LOCAL_LO(%a0) |with lsb set - orl #neg_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -| -| These routines write +/- zero in single format. The routines -| cu_dpdr and cu_dndr set the single lsb. -| -cu_spd: - movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - orl #z_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_spdr: - movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero - movel #0x100,LOCAL_HI(%a0) |with lsb set - clrl LOCAL_LO(%a0) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_snd: - movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - orl #z_mask,USER_FPSR(%a6) - orl #neg_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp -cu_sndr: - movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero - movel #0x100,LOCAL_HI(%a0) |with lsb set - clrl LOCAL_LO(%a0) - orl #neg_mask,USER_FPSR(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - bra wr_etemp - -| -| This code checks for 16-bit overflow conditions on dyadic -| operations which are not restorable into the floating-point -| unit and must be completed in software. Basically, this -| condition exists with a very large norm and a denorm. One -| of the operands must be denormalized to enter this code. -| -| Flags used: -| DY_MO_FLG contains 0 for monadic op, $ff for dyadic -| DNRM_FLG contains $00 for neither op denormalized -| $0f for the destination op denormalized -| $f0 for the source op denormalized -| $ff for both ops denormalized -| -| The wrap-around condition occurs for add, sub, div, and cmp -| when -| -| abs(dest_exp - src_exp) >= $8000 -| -| and for mul when -| -| (dest_exp + src_exp) < $0 -| -| we must process the operation here if this case is true. -| -| The rts following the frcfpn routine is the exit from res_func -| for this condition. The restore flag (RES_FLG) is left clear. -| No frestore is done unless an exception is to be reported. -| -| For fadd: -| if(sign_of(dest) != sign_of(src)) -| replace exponent of src with $3fff (keep sign) -| use fpu to perform dest+new_src (user's rmode and X) -| clr sticky -| else -| set sticky -| call round with user's precision and mode -| move result to fpn and wbtemp -| -| For fsub: -| if(sign_of(dest) == sign_of(src)) -| replace exponent of src with $3fff (keep sign) -| use fpu to perform dest+new_src (user's rmode and X) -| clr sticky -| else -| set sticky -| call round with user's precision and mode -| move result to fpn and wbtemp -| -| For fdiv/fsgldiv: -| if(both operands are denorm) -| restore_to_fpu; -| if(dest is norm) -| force_ovf; -| else(dest is denorm) -| force_unf: -| -| For fcmp: -| if(dest is norm) -| N = sign_of(dest); -| else(dest is denorm) -| N = sign_of(src); -| -| For fmul: -| if(both operands are denorm) -| force_unf; -| if((dest_exp + src_exp) < 0) -| force_unf: -| else -| restore_to_fpu; -| -| local equates: - .set addcode,0x22 - .set subcode,0x28 - .set mulcode,0x23 - .set divcode,0x20 - .set cmpcode,0x38 -ck_wrap: - | tstb DY_MO_FLG(%a6) ;check for fsqrt - beq fix_stk |if zero, it is fsqrt - movew CMDREG1B(%a6),%d0 - andiw #0x3b,%d0 |strip to command bits - cmpiw #addcode,%d0 - beq wrap_add - cmpiw #subcode,%d0 - beq wrap_sub - cmpiw #mulcode,%d0 - beq wrap_mul - cmpiw #cmpcode,%d0 - beq wrap_cmp -| -| Inst is fdiv. -| -wrap_div: - cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, - beq fix_stk |restore to fpu -| -| One of the ops is denormalized. Test for wrap condition -| and force the result. -| - cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm - bnes div_srcd -div_destd: - bsrl ckinf_ns - bne fix_stk - bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) - bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) - subl %d1,%d0 |subtract dest from src - cmpl #0x7fff,%d0 - blt fix_stk |if less, not wrap case - clrb WBTEMP_SGN(%a6) - movew ETEMP_EX(%a6),%d0 |find the sign of the result - movew FPTEMP_EX(%a6),%d1 - eorw %d1,%d0 - andiw #0x8000,%d0 - beq force_unf - st WBTEMP_SGN(%a6) - bra force_unf - -ckinf_ns: - moveb STAG(%a6),%d0 |check source tag for inf or nan - bra ck_in_com -ckinf_nd: - moveb DTAG(%a6),%d0 |check destination tag for inf or nan -ck_in_com: - andib #0x60,%d0 |isolate tag bits - cmpb #0x40,%d0 |is it inf? - beq nan_or_inf |not wrap case - cmpb #0x60,%d0 |is it nan? - beq nan_or_inf |yes, not wrap case? - cmpb #0x20,%d0 |is it a zero? - beq nan_or_inf |yes - clrl %d0 - rts |then ; it is either a zero of norm, -| ;check wrap case -nan_or_inf: - moveql #-1,%d0 - rts - - - -div_srcd: - bsrl ckinf_nd - bne fix_stk - bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) - bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) - subl %d1,%d0 |subtract src from dest - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case - clrb WBTEMP_SGN(%a6) - movew ETEMP_EX(%a6),%d0 |find the sign of the result - movew FPTEMP_EX(%a6),%d1 - eorw %d1,%d0 - andiw #0x8000,%d0 - beqs force_ovf - st WBTEMP_SGN(%a6) -| -| This code handles the case of the instruction resulting in -| an overflow condition. -| -force_ovf: - bclrb #E1,E_BYTE(%a6) - orl #ovfl_inx_mask,USER_FPSR(%a6) - clrw NMNEXC(%a6) - leal WBTEMP(%a6),%a0 |point a0 to memory location - movew CMDREG1B(%a6),%d0 - btstl #6,%d0 |test for forced precision - beqs frcovf_fpcr - btstl #2,%d0 |check for double - bnes frcovf_dbl - movel #0x1,%d0 |inst is forced single - bras frcovf_rnd -frcovf_dbl: - movel #0x2,%d0 |inst is forced double - bras frcovf_rnd -frcovf_fpcr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec -frcovf_rnd: - -| The 881/882 does not set inex2 for the following case, so the -| line is commented out to be compatible with 881/882 -| tst.b %d0 -| beq.b frcovf_x -| or.l #inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2 - -|frcovf_x: - bsrl ovf_res |get correct result based on -| ;round precision/mode. This -| ;sets FPSR_CC correctly -| ;returns in external format - bfclr WBTEMP_SGN(%a6){#0:#8} - beq frcfpn - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpn -| -| Inst is fadd. -| -wrap_add: - cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, - beq fix_stk |restore to fpu -| -| One of the ops is denormalized. Test for wrap condition -| and complete the instruction. -| - cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm - bnes add_srcd -add_destd: - bsrl ckinf_ns - bne fix_stk - bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) - bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) - subl %d1,%d0 |subtract dest from src - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case - bra add_wrap -add_srcd: - bsrl ckinf_nd - bne fix_stk - bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) - bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) - subl %d1,%d0 |subtract src from dest - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case -| -| Check the signs of the operands. If they are unlike, the fpu -| can be used to add the norm and 1.0 with the sign of the -| denorm and it will correctly generate the result in extended -| precision. We can then call round with no sticky and the result -| will be correct for the user's rounding mode and precision. If -| the signs are the same, we call round with the sticky bit set -| and the result will be correct for the user's rounding mode and -| precision. -| -add_wrap: - movew ETEMP_EX(%a6),%d0 - movew FPTEMP_EX(%a6),%d1 - eorw %d1,%d0 - andiw #0x8000,%d0 - beq add_same -| -| The signs are unlike. -| - cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? - bnes add_u_srcd - movew FPTEMP_EX(%a6),%d0 - andiw #0x8000,%d0 - orw #0x3fff,%d0 |force the exponent to +/- 1 - movew %d0,FPTEMP_EX(%a6) |in the denorm - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - fmovel %d0,%fpcr |set up users rmode and X - fmovex ETEMP(%a6),%fp0 - faddx FPTEMP(%a6),%fp0 - leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd - fmovex %fp0,WBTEMP(%a6) |write result to memory - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - clrl %d0 |force sticky to zero - bclrb #sign_bit,WBTEMP_EX(%a6) - sne WBTEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beq frcfpnr - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpnr -add_u_srcd: - movew ETEMP_EX(%a6),%d0 - andiw #0x8000,%d0 - orw #0x3fff,%d0 |force the exponent to +/- 1 - movew %d0,ETEMP_EX(%a6) |in the denorm - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - fmovel %d0,%fpcr |set up users rmode and X - fmovex ETEMP(%a6),%fp0 - faddx FPTEMP(%a6),%fp0 - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd - leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame - fmovex %fp0,WBTEMP(%a6) |write result to memory - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - clrl %d0 |force sticky to zero - bclrb #sign_bit,WBTEMP_EX(%a6) - sne WBTEMP_SGN(%a6) |use internal format for round - bsrl round |round result to users rmode & prec - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beq frcfpnr - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpnr -| -| Signs are alike: -| -add_same: - cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? - bnes add_s_srcd -add_s_destd: - leal ETEMP(%a6),%a0 - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - movel #0x20000000,%d0 |set sticky for round - bclrb #sign_bit,ETEMP_EX(%a6) - sne ETEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs add_s_dclr - bsetb #sign_bit,ETEMP_EX(%a6) -add_s_dclr: - leal WBTEMP(%a6),%a0 - movel ETEMP(%a6),(%a0) |write result to wbtemp - movel ETEMP_HI(%a6),4(%a0) - movel ETEMP_LO(%a6),8(%a0) - tstw ETEMP_EX(%a6) - bgt add_ckovf - orl #neg_mask,USER_FPSR(%a6) - bra add_ckovf -add_s_srcd: - leal FPTEMP(%a6),%a0 - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - movel #0x20000000,%d0 |set sticky for round - bclrb #sign_bit,FPTEMP_EX(%a6) - sne FPTEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs add_s_sclr - bsetb #sign_bit,FPTEMP_EX(%a6) -add_s_sclr: - leal WBTEMP(%a6),%a0 - movel FPTEMP(%a6),(%a0) |write result to wbtemp - movel FPTEMP_HI(%a6),4(%a0) - movel FPTEMP_LO(%a6),8(%a0) - tstw FPTEMP_EX(%a6) - bgt add_ckovf - orl #neg_mask,USER_FPSR(%a6) -add_ckovf: - movew WBTEMP_EX(%a6),%d0 - andiw #0x7fff,%d0 - cmpiw #0x7fff,%d0 - bne frcfpnr -| -| The result has overflowed to $7fff exponent. Set I, ovfl, -| and aovfl, and clr the mantissa (incorrectly set by the -| round routine.) -| - orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) - clrl 4(%a0) - bra frcfpnr -| -| Inst is fsub. -| -wrap_sub: - cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, - beq fix_stk |restore to fpu -| -| One of the ops is denormalized. Test for wrap condition -| and complete the instruction. -| - cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm - bnes sub_srcd -sub_destd: - bsrl ckinf_ns - bne fix_stk - bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) - bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) - subl %d1,%d0 |subtract src from dest - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case - bra sub_wrap -sub_srcd: - bsrl ckinf_nd - bne fix_stk - bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) - bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) - subl %d1,%d0 |subtract dest from src - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case -| -| Check the signs of the operands. If they are alike, the fpu -| can be used to subtract from the norm 1.0 with the sign of the -| denorm and it will correctly generate the result in extended -| precision. We can then call round with no sticky and the result -| will be correct for the user's rounding mode and precision. If -| the signs are unlike, we call round with the sticky bit set -| and the result will be correct for the user's rounding mode and -| precision. -| -sub_wrap: - movew ETEMP_EX(%a6),%d0 - movew FPTEMP_EX(%a6),%d1 - eorw %d1,%d0 - andiw #0x8000,%d0 - bne sub_diff -| -| The signs are alike. -| - cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? - bnes sub_u_srcd - movew FPTEMP_EX(%a6),%d0 - andiw #0x8000,%d0 - orw #0x3fff,%d0 |force the exponent to +/- 1 - movew %d0,FPTEMP_EX(%a6) |in the denorm - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - fmovel %d0,%fpcr |set up users rmode and X - fmovex FPTEMP(%a6),%fp0 - fsubx ETEMP(%a6),%fp0 - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd - leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame - fmovex %fp0,WBTEMP(%a6) |write result to memory - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - clrl %d0 |force sticky to zero - bclrb #sign_bit,WBTEMP_EX(%a6) - sne WBTEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beq frcfpnr - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpnr -sub_u_srcd: - movew ETEMP_EX(%a6),%d0 - andiw #0x8000,%d0 - orw #0x3fff,%d0 |force the exponent to +/- 1 - movew %d0,ETEMP_EX(%a6) |in the denorm - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - fmovel %d0,%fpcr |set up users rmode and X - fmovex FPTEMP(%a6),%fp0 - fsubx ETEMP(%a6),%fp0 - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd - leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame - fmovex %fp0,WBTEMP(%a6) |write result to memory - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - clrl %d0 |force sticky to zero - bclrb #sign_bit,WBTEMP_EX(%a6) - sne WBTEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beq frcfpnr - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpnr -| -| Signs are unlike: -| -sub_diff: - cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? - bnes sub_s_srcd -sub_s_destd: - leal ETEMP(%a6),%a0 - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - movel #0x20000000,%d0 |set sticky for round -| -| Since the dest is the denorm, the sign is the opposite of the -| norm sign. -| - eoriw #0x8000,ETEMP_EX(%a6) |flip sign on result - tstw ETEMP_EX(%a6) - bgts sub_s_dwr - orl #neg_mask,USER_FPSR(%a6) -sub_s_dwr: - bclrb #sign_bit,ETEMP_EX(%a6) - sne ETEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs sub_s_dclr - bsetb #sign_bit,ETEMP_EX(%a6) -sub_s_dclr: - leal WBTEMP(%a6),%a0 - movel ETEMP(%a6),(%a0) |write result to wbtemp - movel ETEMP_HI(%a6),4(%a0) - movel ETEMP_LO(%a6),8(%a0) - bra sub_ckovf -sub_s_srcd: - leal FPTEMP(%a6),%a0 - movel USER_FPCR(%a6),%d0 - andil #0x30,%d0 - lsrl #4,%d0 |put rmode in lower 2 bits - movel USER_FPCR(%a6),%d1 - andil #0xc0,%d1 - lsrl #6,%d1 |put precision in upper word - swap %d1 - orl %d0,%d1 |set up for round call - movel #0x20000000,%d0 |set sticky for round - bclrb #sign_bit,FPTEMP_EX(%a6) - sne FPTEMP_SGN(%a6) - bsrl round |round result to users rmode & prec - bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs sub_s_sclr - bsetb #sign_bit,FPTEMP_EX(%a6) -sub_s_sclr: - leal WBTEMP(%a6),%a0 - movel FPTEMP(%a6),(%a0) |write result to wbtemp - movel FPTEMP_HI(%a6),4(%a0) - movel FPTEMP_LO(%a6),8(%a0) - tstw FPTEMP_EX(%a6) - bgt sub_ckovf - orl #neg_mask,USER_FPSR(%a6) -sub_ckovf: - movew WBTEMP_EX(%a6),%d0 - andiw #0x7fff,%d0 - cmpiw #0x7fff,%d0 - bne frcfpnr -| -| The result has overflowed to $7fff exponent. Set I, ovfl, -| and aovfl, and clr the mantissa (incorrectly set by the -| round routine.) -| - orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) - clrl 4(%a0) - bra frcfpnr -| -| Inst is fcmp. -| -wrap_cmp: - cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, - beq fix_stk |restore to fpu -| -| One of the ops is denormalized. Test for wrap condition -| and complete the instruction. -| - cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm - bnes cmp_srcd -cmp_destd: - bsrl ckinf_ns - bne fix_stk - bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) - bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) - subl %d1,%d0 |subtract dest from src - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case - tstw ETEMP_EX(%a6) |set N to ~sign_of(src) - bge cmp_setn - rts -cmp_srcd: - bsrl ckinf_nd - bne fix_stk - bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) - bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) - subl %d1,%d0 |subtract src from dest - cmpl #0x8000,%d0 - blt fix_stk |if less, not wrap case - tstw FPTEMP_EX(%a6) |set N to sign_of(dest) - blt cmp_setn - rts -cmp_setn: - orl #neg_mask,USER_FPSR(%a6) - rts - -| -| Inst is fmul. -| -wrap_mul: - cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, - beq force_unf |force an underflow (really!) -| -| One of the ops is denormalized. Test for wrap condition -| and complete the instruction. -| - cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm - bnes mul_srcd -mul_destd: - bsrl ckinf_ns - bne fix_stk - bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) - bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) - addl %d1,%d0 |subtract dest from src - bgt fix_stk - bra force_unf -mul_srcd: - bsrl ckinf_nd - bne fix_stk - bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) - bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) - addl %d1,%d0 |subtract src from dest - bgt fix_stk - -| -| This code handles the case of the instruction resulting in -| an underflow condition. -| -force_unf: - bclrb #E1,E_BYTE(%a6) - orl #unfinx_mask,USER_FPSR(%a6) - clrw NMNEXC(%a6) - clrb WBTEMP_SGN(%a6) - movew ETEMP_EX(%a6),%d0 |find the sign of the result - movew FPTEMP_EX(%a6),%d1 - eorw %d1,%d0 - andiw #0x8000,%d0 - beqs frcunfcont - st WBTEMP_SGN(%a6) -frcunfcont: - lea WBTEMP(%a6),%a0 |point a0 to memory location - movew CMDREG1B(%a6),%d0 - btstl #6,%d0 |test for forced precision - beqs frcunf_fpcr - btstl #2,%d0 |check for double - bnes frcunf_dbl - movel #0x1,%d0 |inst is forced single - bras frcunf_rnd -frcunf_dbl: - movel #0x2,%d0 |inst is forced double - bras frcunf_rnd -frcunf_fpcr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec -frcunf_rnd: - bsrl unf_sub |get correct result based on -| ;round precision/mode. This -| ;sets FPSR_CC correctly - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs frcfpn - bsetb #sign_bit,WBTEMP_EX(%a6) - bra frcfpn - -| -| Write the result to the user's fpn. All results must be HUGE to be -| written; otherwise the results would have overflowed or underflowed. -| If the rounding precision is single or double, the ovf_res routine -| is needed to correctly supply the max value. -| -frcfpnr: - movew CMDREG1B(%a6),%d0 - btstl #6,%d0 |test for forced precision - beqs frcfpn_fpcr - btstl #2,%d0 |check for double - bnes frcfpn_dbl - movel #0x1,%d0 |inst is forced single - bras frcfpn_rnd -frcfpn_dbl: - movel #0x2,%d0 |inst is forced double - bras frcfpn_rnd -frcfpn_fpcr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec - tstb %d0 - beqs frcfpn |if extended, write what you got -frcfpn_rnd: - bclrb #sign_bit,WBTEMP_EX(%a6) - sne WBTEMP_SGN(%a6) - bsrl ovf_res |get correct result based on -| ;round precision/mode. This -| ;sets FPSR_CC correctly - bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format - beqs frcfpn_clr - bsetb #sign_bit,WBTEMP_EX(%a6) -frcfpn_clr: - orl #ovfinx_mask,USER_FPSR(%a6) -| -| Perform the write. -| -frcfpn: - bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register - cmpib #3,%d0 - bles frc0123 |check if dest is fp0-fp3 - movel #7,%d1 - subl %d0,%d1 - clrl %d0 - bsetl %d1,%d0 - fmovemx WBTEMP(%a6),%d0 - rts -frc0123: - cmpib #0,%d0 - beqs frc0_dst - cmpib #1,%d0 - beqs frc1_dst - cmpib #2,%d0 - beqs frc2_dst -frc3_dst: - movel WBTEMP_EX(%a6),USER_FP3(%a6) - movel WBTEMP_HI(%a6),USER_FP3+4(%a6) - movel WBTEMP_LO(%a6),USER_FP3+8(%a6) - rts -frc2_dst: - movel WBTEMP_EX(%a6),USER_FP2(%a6) - movel WBTEMP_HI(%a6),USER_FP2+4(%a6) - movel WBTEMP_LO(%a6),USER_FP2+8(%a6) - rts -frc1_dst: - movel WBTEMP_EX(%a6),USER_FP1(%a6) - movel WBTEMP_HI(%a6),USER_FP1+4(%a6) - movel WBTEMP_LO(%a6),USER_FP1+8(%a6) - rts -frc0_dst: - movel WBTEMP_EX(%a6),USER_FP0(%a6) - movel WBTEMP_HI(%a6),USER_FP0+4(%a6) - movel WBTEMP_LO(%a6),USER_FP0+8(%a6) - rts - -| -| Write etemp to fpn. -| A check is made on enabled and signalled snan exceptions, -| and the destination is not overwritten if this condition exists. -| This code is designed to make fmoveins of unsupported data types -| faster. -| -wr_etemp: - btstb #snan_bit,FPSR_EXCEPT(%a6) |if snan is set, and - beqs fmoveinc |enabled, force restore - btstb #snan_bit,FPCR_ENABLE(%a6) |and don't overwrite - beqs fmoveinc |the dest - movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for -| ;snan handler - tstb ETEMP(%a6) |check for negative - blts snan_neg - rts -snan_neg: - orl #neg_bit,USER_FPSR(%a6) |snan is negative; set N - rts -fmoveinc: - clrw NMNEXC(%a6) - bclrb #E1,E_BYTE(%a6) - moveb STAG(%a6),%d0 |check if stag is inf - andib #0xe0,%d0 - cmpib #0x40,%d0 - bnes fminc_cnan - orl #inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I - tstw LOCAL_EX(%a0) |check sign - bges fminc_con - orl #neg_mask,USER_FPSR(%a6) - bra fminc_con -fminc_cnan: - cmpib #0x60,%d0 |check if stag is NaN - bnes fminc_czero - orl #nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN - movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for -| ;snan handler - tstw LOCAL_EX(%a0) |check sign - bges fminc_con - orl #neg_mask,USER_FPSR(%a6) - bra fminc_con -fminc_czero: - cmpib #0x20,%d0 |check if zero - bnes fminc_con - orl #z_mask,USER_FPSR(%a6) |if zero, set Z - tstw LOCAL_EX(%a0) |check sign - bges fminc_con - orl #neg_mask,USER_FPSR(%a6) -fminc_con: - bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register - cmpib #3,%d0 - bles fp0123 |check if dest is fp0-fp3 - movel #7,%d1 - subl %d0,%d1 - clrl %d0 - bsetl %d1,%d0 - fmovemx ETEMP(%a6),%d0 - rts - -fp0123: - cmpib #0,%d0 - beqs fp0_dst - cmpib #1,%d0 - beqs fp1_dst - cmpib #2,%d0 - beqs fp2_dst -fp3_dst: - movel ETEMP_EX(%a6),USER_FP3(%a6) - movel ETEMP_HI(%a6),USER_FP3+4(%a6) - movel ETEMP_LO(%a6),USER_FP3+8(%a6) - rts -fp2_dst: - movel ETEMP_EX(%a6),USER_FP2(%a6) - movel ETEMP_HI(%a6),USER_FP2+4(%a6) - movel ETEMP_LO(%a6),USER_FP2+8(%a6) - rts -fp1_dst: - movel ETEMP_EX(%a6),USER_FP1(%a6) - movel ETEMP_HI(%a6),USER_FP1+4(%a6) - movel ETEMP_LO(%a6),USER_FP1+8(%a6) - rts -fp0_dst: - movel ETEMP_EX(%a6),USER_FP0(%a6) - movel ETEMP_HI(%a6),USER_FP0+4(%a6) - movel ETEMP_LO(%a6),USER_FP0+8(%a6) - rts - -opclass3: - st CU_ONLY(%a6) - movew CMDREG1B(%a6),%d0 |check if packed moveout - andiw #0x0c00,%d0 |isolate last 2 bits of size field - cmpiw #0x0c00,%d0 |if size is 011 or 111, it is packed - beq pack_out |else it is norm or denorm - bra mv_out - - -| -| MOVE OUT -| - -mv_tbl: - .long li - .long sgp - .long xp - .long mvout_end |should never be taken - .long wi - .long dp - .long bi - .long mvout_end |should never be taken -mv_out: - bfextu CMDREG1B(%a6){#3:#3},%d1 |put source specifier in d1 - leal mv_tbl,%a0 - movel %a0@(%d1:l:4),%a0 - jmp (%a0) - -| -| This exit is for move-out to memory. The aunfl bit is -| set if the result is inex and unfl is signalled. -| -mvout_end: - btstb #inex2_bit,FPSR_EXCEPT(%a6) - beqs no_aufl - btstb #unfl_bit,FPSR_EXCEPT(%a6) - beqs no_aufl - bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) -no_aufl: - clrw NMNEXC(%a6) - bclrb #E1,E_BYTE(%a6) - fmovel #0,%FPSR |clear any cc bits from res_func -| -| Return ETEMP to extended format from internal extended format so -| that gen_except will have a correctly signed value for ovfl/unfl -| handlers. -| - bfclr ETEMP_SGN(%a6){#0:#8} - beqs mvout_con - bsetb #sign_bit,ETEMP_EX(%a6) -mvout_con: - rts -| -| This exit is for move-out to int register. The aunfl bit is -| not set in any case for this move. -| -mvouti_end: - clrw NMNEXC(%a6) - bclrb #E1,E_BYTE(%a6) - fmovel #0,%FPSR |clear any cc bits from res_func -| -| Return ETEMP to extended format from internal extended format so -| that gen_except will have a correctly signed value for ovfl/unfl -| handlers. -| - bfclr ETEMP_SGN(%a6){#0:#8} - beqs mvouti_con - bsetb #sign_bit,ETEMP_EX(%a6) -mvouti_con: - rts -| -| li is used to handle a long integer source specifier -| - -li: - moveql #4,%d0 |set byte count - - btstb #7,STAG(%a6) |check for extended denorm - bne int_dnrm |if so, branch - - fmovemx ETEMP(%a6),%fp0-%fp0 - fcmpd #0x41dfffffffc00000,%fp0 -| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec - fbge lo_plrg - fcmpd #0xc1e0000000000000,%fp0 -| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec - fble lo_nlrg -| -| at this point, the answer is between the largest pos and neg values -| - movel USER_FPCR(%a6),%d1 |use user's rounding mode - andil #0x30,%d1 - fmovel %d1,%fpcr - fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set - bra int_wrt - - -lo_plrg: - movel #0x7fffffff,L_SCR1(%a6) |answer is largest positive int - fbeq int_wrt |exact answer - fcmpd #0x41dfffffffe00000,%fp0 -| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec - fbge int_operr |set operr - bra int_inx |set inexact - -lo_nlrg: - movel #0x80000000,L_SCR1(%a6) - fbeq int_wrt |exact answer - fcmpd #0xc1e0000000100000,%fp0 -| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec - fblt int_operr |set operr - bra int_inx |set inexact - -| -| wi is used to handle a word integer source specifier -| - -wi: - moveql #2,%d0 |set byte count - - btstb #7,STAG(%a6) |check for extended denorm - bne int_dnrm |branch if so - - fmovemx ETEMP(%a6),%fp0-%fp0 - fcmps #0x46fffe00,%fp0 -| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec - fbge wo_plrg - fcmps #0xc7000000,%fp0 -| c7000000 in sgl prec = c00e00008000000000000000 in ext prec - fble wo_nlrg - -| -| at this point, the answer is between the largest pos and neg values -| - movel USER_FPCR(%a6),%d1 |use user's rounding mode - andil #0x30,%d1 - fmovel %d1,%fpcr - fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set - bra int_wrt - -wo_plrg: - movew #0x7fff,L_SCR1(%a6) |answer is largest positive int - fbeq int_wrt |exact answer - fcmps #0x46ffff00,%fp0 -| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec - fbge int_operr |set operr - bra int_inx |set inexact - -wo_nlrg: - movew #0x8000,L_SCR1(%a6) - fbeq int_wrt |exact answer - fcmps #0xc7000080,%fp0 -| c7000080 in sgl prec = c00e00008000800000000000 in ext prec - fblt int_operr |set operr - bra int_inx |set inexact - -| -| bi is used to handle a byte integer source specifier -| - -bi: - moveql #1,%d0 |set byte count - - btstb #7,STAG(%a6) |check for extended denorm - bne int_dnrm |branch if so - - fmovemx ETEMP(%a6),%fp0-%fp0 - fcmps #0x42fe0000,%fp0 -| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec - fbge by_plrg - fcmps #0xc3000000,%fp0 -| c3000000 in sgl prec = c00600008000000000000000 in ext prec - fble by_nlrg - -| -| at this point, the answer is between the largest pos and neg values -| - movel USER_FPCR(%a6),%d1 |use user's rounding mode - andil #0x30,%d1 - fmovel %d1,%fpcr - fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion - fmovel %fpsr,%d1 - orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set - bra int_wrt - -by_plrg: - moveb #0x7f,L_SCR1(%a6) |answer is largest positive int - fbeq int_wrt |exact answer - fcmps #0x42ff0000,%fp0 -| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec - fbge int_operr |set operr - bra int_inx |set inexact - -by_nlrg: - moveb #0x80,L_SCR1(%a6) - fbeq int_wrt |exact answer - fcmps #0xc3008000,%fp0 -| c3008000 in sgl prec = c00600008080000000000000 in ext prec - fblt int_operr |set operr - bra int_inx |set inexact - -| -| Common integer routines -| -| int_drnrm---account for possible nonzero result for round up with positive -| operand and round down for negative answer. In the first case (result = 1) -| byte-width (store in d0) of result must be honored. In the second case, -| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out). - -int_dnrm: - movel #0,L_SCR1(%a6) | initialize result to 0 - bfextu FPCR_MODE(%a6){#2:#2},%d1 | d1 is the rounding mode - cmpb #2,%d1 - bmis int_inx | if RN or RZ, done - bnes int_rp | if RP, continue below - tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative - bpls int_inx | otherwise result is 0 - movel #-1,L_SCR1(%a6) - bras int_inx -int_rp: - tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if -| ; source is greater than 0 - bmis int_inx | otherwise, result is 0 - lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1 - addal %d0,%a1 | offset by destination width -1 - subal #1,%a1 - bsetb #0,(%a1) | set low bit at a1 address -int_inx: - oril #inx2a_mask,USER_FPSR(%a6) - bras int_wrt -int_operr: - fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended -| ;precision source that needs to be -| ;converted to integer this is required -| ;if the operr exception is enabled. -| ;set operr/aiop (no inex2 on int ovfl) - - oril #opaop_mask,USER_FPSR(%a6) -| ;fall through to perform int_wrt -int_wrt: - movel EXC_EA(%a6),%a1 |load destination address - tstl %a1 |check to see if it is a dest register - beqs wrt_dn |write data register - lea L_SCR1(%a6),%a0 |point to supervisor source address - bsrl mem_write - bra mvouti_end - -wrt_dn: - movel %d0,-(%sp) |d0 currently contains the size to write - bsrl get_fline |get_fline returns Dn in d0 - andiw #0x7,%d0 |isolate register - movel (%sp)+,%d1 |get size - cmpil #4,%d1 |most frequent case - beqs sz_long - cmpil #2,%d1 - bnes sz_con - orl #8,%d0 |add 'word' size to register# - bras sz_con -sz_long: - orl #0x10,%d0 |add 'long' size to register# -sz_con: - movel %d0,%d1 |reg_dest expects size:reg in d1 - bsrl reg_dest |load proper data register - bra mvouti_end -xp: - lea ETEMP(%a6),%a0 - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - btstb #7,STAG(%a6) |check for extended denorm - bne xdnrm - clrl %d0 - bras do_fp |do normal case -sgp: - lea ETEMP(%a6),%a0 - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - btstb #7,STAG(%a6) |check for extended denorm - bne sp_catas |branch if so - movew LOCAL_EX(%a0),%d0 - lea sp_bnds,%a1 - cmpw (%a1),%d0 - blt sp_under - cmpw 2(%a1),%d0 - bgt sp_over - movel #1,%d0 |set destination format to single - bras do_fp |do normal case -dp: - lea ETEMP(%a6),%a0 - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - - btstb #7,STAG(%a6) |check for extended denorm - bne dp_catas |branch if so - - movew LOCAL_EX(%a0),%d0 - lea dp_bnds,%a1 - - cmpw (%a1),%d0 - blt dp_under - cmpw 2(%a1),%d0 - bgt dp_over - - movel #2,%d0 |set destination format to double -| ;fall through to do_fp -| -do_fp: - bfextu FPCR_MODE(%a6){#2:#2},%d1 |rnd mode in d1 - swap %d0 |rnd prec in upper word - addl %d0,%d1 |d1 has PREC/MODE info - - clrl %d0 |clear g,r,s - - bsrl round |round - - movel %a0,%a1 - movel EXC_EA(%a6),%a0 - - bfextu CMDREG1B(%a6){#3:#3},%d1 |extract destination format -| ;at this point only the dest -| ;formats sgl, dbl, ext are -| ;possible - cmpb #2,%d1 - bgts ddbl |double=5, extended=2, single=1 - bnes dsgl -| ;fall through to dext -dext: - bsrl dest_ext - bra mvout_end -dsgl: - bsrl dest_sgl - bra mvout_end -ddbl: - bsrl dest_dbl - bra mvout_end - -| -| Handle possible denorm or catastrophic underflow cases here -| -xdnrm: - bsr set_xop |initialize WBTEMP - bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 - - movel %a0,%a1 - movel EXC_EA(%a6),%a0 |a0 has the destination pointer - bsrl dest_ext |store to memory - bsetb #unfl_bit,FPSR_EXCEPT(%a6) - bra mvout_end - -sp_under: - bsetb #etemp15_bit,STAG(%a6) - - cmpw 4(%a1),%d0 - blts sp_catas |catastrophic underflow case - - movel #1,%d0 |load in round precision - movel #sgl_thresh,%d1 |load in single denorm threshold - bsrl dpspdnrm |expects d1 to have the proper -| ;denorm threshold - bsrl dest_sgl |stores value to destination - bsetb #unfl_bit,FPSR_EXCEPT(%a6) - bra mvout_end |exit - -dp_under: - bsetb #etemp15_bit,STAG(%a6) - - cmpw 4(%a1),%d0 - blts dp_catas |catastrophic underflow case - - movel #dbl_thresh,%d1 |load in double precision threshold - movel #2,%d0 - bsrl dpspdnrm |expects d1 to have proper -| ;denorm threshold -| ;expects d0 to have round precision - bsrl dest_dbl |store value to destination - bsetb #unfl_bit,FPSR_EXCEPT(%a6) - bra mvout_end |exit - -| -| Handle catastrophic underflow cases here -| -sp_catas: -| Temp fix for z bit set in unf_sub - movel USER_FPSR(%a6),-(%a7) - - movel #1,%d0 |set round precision to sgl - - bsrl unf_sub |a0 points to result - - movel (%a7)+,USER_FPSR(%a6) - - movel #1,%d0 - subw %d0,LOCAL_EX(%a0) |account for difference between -| ;denorm/norm bias - - movel %a0,%a1 |a1 has the operand input - movel EXC_EA(%a6),%a0 |a0 has the destination pointer - - bsrl dest_sgl |store the result - oril #unfinx_mask,USER_FPSR(%a6) - bra mvout_end - -dp_catas: -| Temp fix for z bit set in unf_sub - movel USER_FPSR(%a6),-(%a7) - - movel #2,%d0 |set round precision to dbl - bsrl unf_sub |a0 points to result - - movel (%a7)+,USER_FPSR(%a6) - - movel #1,%d0 - subw %d0,LOCAL_EX(%a0) |account for difference between -| ;denorm/norm bias - - movel %a0,%a1 |a1 has the operand input - movel EXC_EA(%a6),%a0 |a0 has the destination pointer - - bsrl dest_dbl |store the result - oril #unfinx_mask,USER_FPSR(%a6) - bra mvout_end - -| -| Handle catastrophic overflow cases here -| -sp_over: -| Temp fix for z bit set in unf_sub - movel USER_FPSR(%a6),-(%a7) - - movel #1,%d0 - leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result - movel ETEMP_EX(%a6),(%a0) - movel ETEMP_HI(%a6),4(%a0) - movel ETEMP_LO(%a6),8(%a0) - bsrl ovf_res - - movel (%a7)+,USER_FPSR(%a6) - - movel %a0,%a1 - movel EXC_EA(%a6),%a0 - bsrl dest_sgl - orl #ovfinx_mask,USER_FPSR(%a6) - bra mvout_end - -dp_over: -| Temp fix for z bit set in ovf_res - movel USER_FPSR(%a6),-(%a7) - - movel #2,%d0 - leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result - movel ETEMP_EX(%a6),(%a0) - movel ETEMP_HI(%a6),4(%a0) - movel ETEMP_LO(%a6),8(%a0) - bsrl ovf_res - - movel (%a7)+,USER_FPSR(%a6) - - movel %a0,%a1 - movel EXC_EA(%a6),%a0 - bsrl dest_dbl - orl #ovfinx_mask,USER_FPSR(%a6) - bra mvout_end - -| -| DPSPDNRM -| -| This subroutine takes an extended normalized number and denormalizes -| it to the given round precision. This subroutine also decrements -| the input operand's exponent by 1 to account for the fact that -| dest_sgl or dest_dbl expects a normalized number's bias. -| -| Input: a0 points to a normalized number in internal extended format -| d0 is the round precision (=1 for sgl; =2 for dbl) -| d1 is the single precision or double precision -| denorm threshold -| -| Output: (In the format for dest_sgl or dest_dbl) -| a0 points to the destination -| a1 points to the operand -| -| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits -| -dpspdnrm: - movel %d0,-(%a7) |save round precision - clrl %d0 |clear initial g,r,s - bsrl dnrm_lp |careful with d0, it's needed by round - - bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode - swap %d1 - movew 2(%a7),%d1 |set rounding precision - swap %d1 |at this point d1 has PREC/MODE info - bsrl round |round result, sets the inex bit in -| ;USER_FPSR if needed - - movew #1,%d0 - subw %d0,LOCAL_EX(%a0) |account for difference in denorm -| ;vs norm bias - - movel %a0,%a1 |a1 has the operand input - movel EXC_EA(%a6),%a0 |a0 has the destination pointer - addw #4,%a7 |pop stack - rts -| -| SET_XOP initialized WBTEMP with the value pointed to by a0 -| input: a0 points to input operand in the internal extended format -| -set_xop: - movel LOCAL_EX(%a0),WBTEMP_EX(%a6) - movel LOCAL_HI(%a0),WBTEMP_HI(%a6) - movel LOCAL_LO(%a0),WBTEMP_LO(%a6) - bfclr WBTEMP_SGN(%a6){#0:#8} - beqs sxop - bsetb #sign_bit,WBTEMP_EX(%a6) -sxop: - bfclr STAG(%a6){#5:#4} |clear wbtm66,wbtm1,wbtm0,sbit - rts -| -| P_MOVE -| -p_movet: - .long p_move - .long p_movez - .long p_movei - .long p_moven - .long p_move -p_regd: - .long p_dyd0 - .long p_dyd1 - .long p_dyd2 - .long p_dyd3 - .long p_dyd4 - .long p_dyd5 - .long p_dyd6 - .long p_dyd7 - -pack_out: - leal p_movet,%a0 |load jmp table address - movew STAG(%a6),%d0 |get source tag - bfextu %d0{#16:#3},%d0 |isolate source bits - movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag - jmp (%a0) |go to the routine - -p_write: - movel #0x0c,%d0 |get byte count - movel EXC_EA(%a6),%a1 |get the destination address - bsr mem_write |write the user's destination - moveb #0,CU_SAVEPC(%a6) |set the cu save pc to all 0's - -| -| Also note that the dtag must be set to norm here - this is because -| the 040 uses the dtag to execute the correct microcode. -| - bfclr DTAG(%a6){#0:#3} |set dtag to norm - - rts - -| Notes on handling of special case (zero, inf, and nan) inputs: -| 1. Operr is not signalled if the k-factor is greater than 18. -| 2. Per the manual, status bits are not set. -| - -p_move: - movew CMDREG1B(%a6),%d0 - btstl #kfact_bit,%d0 |test for dynamic k-factor - beqs statick |if clear, k-factor is static -dynamick: - bfextu %d0{#25:#3},%d0 |isolate register for dynamic k-factor - lea p_regd,%a0 - movel %a0@(%d0:l:4),%a0 - jmp (%a0) -statick: - andiw #0x007f,%d0 |get k-factor - bfexts %d0{#25:#7},%d0 |sign extend d0 for bindec - leal ETEMP(%a6),%a0 |a0 will point to the packed decimal - bsrl bindec |perform the convert; data at a6 - leal FP_SCR1(%a6),%a0 |load a0 with result address - bral p_write -p_movez: - leal ETEMP(%a6),%a0 |a0 will point to the packed decimal - clrw 2(%a0) |clear lower word of exp - clrl 4(%a0) |load second lword of ZERO - clrl 8(%a0) |load third lword of ZERO - bra p_write |go write results -p_movei: - fmovel #0,%FPSR |clear aiop - leal ETEMP(%a6),%a0 |a0 will point to the packed decimal - clrw 2(%a0) |clear lower word of exp - bra p_write |go write the result -p_moven: - leal ETEMP(%a6),%a0 |a0 will point to the packed decimal - clrw 2(%a0) |clear lower word of exp - bra p_write |go write the result - -| -| Routines to read the dynamic k-factor from Dn. -| -p_dyd0: - movel USER_D0(%a6),%d0 - bras statick -p_dyd1: - movel USER_D1(%a6),%d0 - bras statick -p_dyd2: - movel %d2,%d0 - bras statick -p_dyd3: - movel %d3,%d0 - bras statick -p_dyd4: - movel %d4,%d0 - bras statick -p_dyd5: - movel %d5,%d0 - bras statick -p_dyd6: - movel %d6,%d0 - bra statick -p_dyd7: - movel %d7,%d0 - bra statick - - |end diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S deleted file mode 100644 index f84ae0dd435864d660e9054821cc199fa3135602..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/round.S +++ /dev/null @@ -1,648 +0,0 @@ -| -| round.sa 3.4 7/29/91 -| -| handle rounding and normalization tasks -| -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|ROUND idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -| -| round --- round result according to precision/mode -| -| a0 points to the input operand in the internal extended format -| d1(high word) contains rounding precision: -| ext = $0000xxxx -| sgl = $0001xxxx -| dbl = $0002xxxx -| d1(low word) contains rounding mode: -| RN = $xxxx0000 -| RZ = $xxxx0001 -| RM = $xxxx0010 -| RP = $xxxx0011 -| d0{31:29} contains the g,r,s bits (extended) -| -| On return the value pointed to by a0 is correctly rounded, -| a0 is preserved and the g-r-s bits in d0 are cleared. -| The result is not typed - the tag field is invalid. The -| result is still in the internal extended format. -| -| The INEX bit of USER_FPSR will be set if the rounded result was -| inexact (i.e. if any of the g-r-s bits were set). -| - - .global round -round: -| If g=r=s=0 then result is exact and round is done, else set -| the inex flag in status reg and continue. -| - bsrs ext_grs |this subroutine looks at the -| :rounding precision and sets -| ;the appropriate g-r-s bits. - tstl %d0 |if grs are zero, go force - bne rnd_cont |lower bits to zero for size - - swap %d1 |set up d1.w for round prec. - bra truncate - -rnd_cont: -| -| Use rounding mode as an index into a jump table for these modes. -| - orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex - lea mode_tab,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) -| -| Jump table indexed by rounding mode in d1.w. All following assumes -| grs != 0. -| -mode_tab: - .long rnd_near - .long rnd_zero - .long rnd_mnus - .long rnd_plus -| -| ROUND PLUS INFINITY -| -| If sign of fp number = 0 (positive), then add 1 to l. -| -rnd_plus: - swap %d1 |set up d1 for round prec. - tstb LOCAL_SGN(%a0) |check for sign - bmi truncate |if positive then truncate - movel #0xffffffff,%d0 |force g,r,s to be all f's - lea add_to_l,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) -| -| ROUND MINUS INFINITY -| -| If sign of fp number = 1 (negative), then add 1 to l. -| -rnd_mnus: - swap %d1 |set up d1 for round prec. - tstb LOCAL_SGN(%a0) |check for sign - bpl truncate |if negative then truncate - movel #0xffffffff,%d0 |force g,r,s to be all f's - lea add_to_l,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) -| -| ROUND ZERO -| -| Always truncate. -rnd_zero: - swap %d1 |set up d1 for round prec. - bra truncate -| -| -| ROUND NEAREST -| -| If (g=1), then add 1 to l and if (r=s=0), then clear l -| Note that this will round to even in case of a tie. -| -rnd_near: - swap %d1 |set up d1 for round prec. - asll #1,%d0 |shift g-bit to c-bit - bcc truncate |if (g=1) then - lea add_to_l,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) - -| -| ext_grs --- extract guard, round and sticky bits -| -| Input: d1 = PREC:ROUND -| Output: d0{31:29}= guard, round, sticky -| -| The ext_grs extract the guard/round/sticky bits according to the -| selected rounding precision. It is called by the round subroutine -| only. All registers except d0 are kept intact. d0 becomes an -| updated guard,round,sticky in d0{31:29} -| -| Notes: the ext_grs uses the round PREC, and therefore has to swap d1 -| prior to usage, and needs to restore d1 to original. -| -ext_grs: - swap %d1 |have d1.w point to round precision - cmpiw #0,%d1 - bnes sgl_or_dbl - bras end_ext_grs - -sgl_or_dbl: - moveml %d2/%d3,-(%a7) |make some temp registers - cmpiw #1,%d1 - bnes grs_dbl -grs_sgl: - bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right - movel #30,%d2 |of the sgl prec. limits - lsll %d2,%d3 |shift g-r bits to MSB of d3 - movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test - andil #0x0000003f,%d2 |s bit is the or of all other - bnes st_stky |bits to the right of g-r - tstl LOCAL_LO(%a0) |test lower mantissa - bnes st_stky |if any are set, set sticky - tstl %d0 |test original g,r,s - bnes st_stky |if any are set, set sticky - bras end_sd |if words 3 and 4 are clr, exit -grs_dbl: - bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right - movel #30,%d2 |of the dbl prec. limits - lsll %d2,%d3 |shift g-r bits to the MSB of d3 - movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test - andil #0x000001ff,%d2 |s bit is the or-ing of all - bnes st_stky |other bits to the right of g-r - tstl %d0 |test word original g,r,s - bnes st_stky |if any are set, set sticky - bras end_sd |if clear, exit -st_stky: - bset #rnd_stky_bit,%d3 -end_sd: - movel %d3,%d0 |return grs to d0 - moveml (%a7)+,%d2/%d3 |restore scratch registers -end_ext_grs: - swap %d1 |restore d1 to original - rts - -|******************* Local Equates - .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec - .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec - - -|Jump table for adding 1 to the l-bit indexed by rnd prec - -add_to_l: - .long add_ext - .long add_sgl - .long add_dbl - .long add_dbl -| -| ADD SINGLE -| -add_sgl: - addl #ad_1_sgl,LOCAL_HI(%a0) - bccs scc_clr |no mantissa overflow - roxrw LOCAL_HI(%a0) |shift v-bit back in - roxrw LOCAL_HI+2(%a0) |shift v-bit back in - addw #0x1,LOCAL_EX(%a0) |and incr exponent -scc_clr: - tstl %d0 |test for rs = 0 - bnes sgl_done - andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit -sgl_done: - andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit - clrl LOCAL_LO(%a0) |clear d2 - rts - -| -| ADD EXTENDED -| -add_ext: - addql #1,LOCAL_LO(%a0) |add 1 to l-bit - bccs xcc_clr |test for carry out - addql #1,LOCAL_HI(%a0) |propagate carry - bccs xcc_clr - roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit - roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit - roxrw LOCAL_LO(%a0) - roxrw LOCAL_LO+2(%a0) - addw #0x1,LOCAL_EX(%a0) |and inc exp -xcc_clr: - tstl %d0 |test rs = 0 - bnes add_ext_done - andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit -add_ext_done: - rts -| -| ADD DOUBLE -| -add_dbl: - addl #ad_1_dbl,LOCAL_LO(%a0) - bccs dcc_clr - addql #1,LOCAL_HI(%a0) |propagate carry - bccs dcc_clr - roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit - roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit - roxrw LOCAL_LO(%a0) - roxrw LOCAL_LO+2(%a0) - addw #0x1,LOCAL_EX(%a0) |incr exponent -dcc_clr: - tstl %d0 |test for rs = 0 - bnes dbl_done - andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit - -dbl_done: - andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit - rts - -error: - rts -| -| Truncate all other bits -| -trunct: - .long end_rnd - .long sgl_done - .long dbl_done - .long dbl_done - -truncate: - lea trunct,%a1 - movel (%a1,%d1.w*4),%a1 - jmp (%a1) - -end_rnd: - rts - -| -| NORMALIZE -| -| These routines (nrm_zero & nrm_set) normalize the unnorm. This -| is done by shifting the mantissa left while decrementing the -| exponent. -| -| NRM_SET shifts and decrements until there is a 1 set in the integer -| bit of the mantissa (msb in d1). -| -| NRM_ZERO shifts and decrements until there is a 1 set in the integer -| bit of the mantissa (msb in d1) unless this would mean the exponent -| would go less than 0. In that case the number becomes a denorm - the -| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not -| normalized. -| -| Note that both routines have been optimized (for the worst case) and -| therefore do not have the easy to follow decrement/shift loop. -| -| NRM_ZERO -| -| Distance to first 1 bit in mantissa = X -| Distance to 0 from exponent = Y -| If X < Y -| Then -| nrm_set -| Else -| shift mantissa by Y -| set exponent = 0 -| -|input: -| FP_SCR1 = exponent, ms mantissa part, ls mantissa part -|output: -| L_SCR1{4} = fpte15 or ete15 bit -| - .global nrm_zero -nrm_zero: - movew LOCAL_EX(%a0),%d0 - cmpw #64,%d0 |see if exp > 64 - bmis d0_less - bsr nrm_set |exp > 64 so exp won't exceed 0 - rts -d0_less: - moveml %d2/%d3/%d5/%d6,-(%a7) - movel LOCAL_HI(%a0),%d1 - movel LOCAL_LO(%a0),%d2 - - bfffo %d1{#0:#32},%d3 |get the distance to the first 1 -| ;in ms mant - beqs ms_clr |branch if no bits were set - cmpw %d3,%d0 |of X>Y - bmis greater |then exp will go past 0 (neg) if -| ;it is just shifted - bsr nrm_set |else exp won't go past 0 - moveml (%a7)+,%d2/%d3/%d5/%d6 - rts -greater: - movel %d2,%d6 |save ls mant in d6 - lsll %d0,%d2 |shift ls mant by count - lsll %d0,%d1 |shift ms mant by count - movel #32,%d5 - subl %d0,%d5 |make op a denorm by shifting bits - lsrl %d5,%d6 |by the number in the exp, then -| ;set exp = 0. - orl %d6,%d1 |shift the ls mant bits into the ms mant - movel #0,%d0 |same as if decremented exp to 0 -| ;while shifting - movew %d0,LOCAL_EX(%a0) - movel %d1,LOCAL_HI(%a0) - movel %d2,LOCAL_LO(%a0) - moveml (%a7)+,%d2/%d3/%d5/%d6 - rts -ms_clr: - bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant - beqs all_clr |branch if none set - addw #32,%d3 - cmpw %d3,%d0 |if X>Y - bmis greater |then branch - bsr nrm_set |else exp won't go past 0 - moveml (%a7)+,%d2/%d3/%d5/%d6 - rts -all_clr: - movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. - moveml (%a7)+,%d2/%d3/%d5/%d6 - rts -| -| NRM_SET -| - .global nrm_set -nrm_set: - movel %d7,-(%a7) - bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) - beqs lower |branch if ms mant is all 0's - - movel %d6,-(%a7) - - subw %d7,LOCAL_EX(%a0) |sub exponent by count - movel LOCAL_HI(%a0),%d0 |d0 has ms mant - movel LOCAL_LO(%a0),%d1 |d1 has ls mant - - lsll %d7,%d0 |shift first 1 to j bit position - movel %d1,%d6 |copy ls mant into d6 - lsll %d7,%d6 |shift ls mant by count - movel %d6,LOCAL_LO(%a0) |store ls mant into memory - moveql #32,%d6 - subl %d7,%d6 |continue shift - lsrl %d6,%d1 |shift off all bits but those that will -| ;be shifted into ms mant - orl %d1,%d0 |shift the ls mant bits into the ms mant - movel %d0,LOCAL_HI(%a0) |store ms mant into memory - moveml (%a7)+,%d7/%d6 |restore registers - rts - -| -| We get here if ms mant was = 0, and we assume ls mant has bits -| set (otherwise this would have been tagged a zero not a denorm). -| -lower: - movew LOCAL_EX(%a0),%d0 |d0 has exponent - movel LOCAL_LO(%a0),%d1 |d1 has ls mant - subw #32,%d0 |account for ms mant being all zeros - bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) - subw %d7,%d0 |subtract shift count from exp - lsll %d7,%d1 |shift first 1 to integer bit in ms mant - movew %d0,LOCAL_EX(%a0) |store ms mant - movel %d1,LOCAL_HI(%a0) |store exp - clrl LOCAL_LO(%a0) |clear ls mant - movel (%a7)+,%d7 - rts -| -| denorm --- denormalize an intermediate result -| -| Used by underflow. -| -| Input: -| a0 points to the operand to be denormalized -| (in the internal extended format) -| -| d0: rounding precision -| Output: -| a0 points to the denormalized result -| (in the internal extended format) -| -| d0 is guard,round,sticky -| -| d0 comes into this routine with the rounding precision. It -| is then loaded with the denormalized exponent threshold for the -| rounding precision. -| - - .global denorm -denorm: - btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 - beqs no_sgn_ext - bsetb #7,LOCAL_EX(%a0) |sign extend if it is so -no_sgn_ext: - - cmpib #0,%d0 |if 0 then extended precision - bnes not_ext |else branch - - clrl %d1 |load d1 with ext threshold - clrl %d0 |clear the sticky flag - bsr dnrm_lp |denormalize the number - tstb %d1 |check for inex - beq no_inex |if clr, no inex - bras dnrm_inex |if set, set inex - -not_ext: - cmpil #1,%d0 |if 1 then single precision - beqs load_sgl |else must be 2, double prec - -load_dbl: - movew #dbl_thresh,%d1 |put copy of threshold in d1 - movel %d1,%d0 |copy d1 into d0 - subw LOCAL_EX(%a0),%d0 |diff = threshold - exp - cmpw #67,%d0 |if diff > 67 (mant + grs bits) - bpls chk_stky |then branch (all bits would be -| ; shifted off in denorm routine) - clrl %d0 |else clear the sticky flag - bsr dnrm_lp |denormalize the number - tstb %d1 |check flag - beqs no_inex |if clr, no inex - bras dnrm_inex |if set, set inex - -load_sgl: - movew #sgl_thresh,%d1 |put copy of threshold in d1 - movel %d1,%d0 |copy d1 into d0 - subw LOCAL_EX(%a0),%d0 |diff = threshold - exp - cmpw #67,%d0 |if diff > 67 (mant + grs bits) - bpls chk_stky |then branch (all bits would be -| ; shifted off in denorm routine) - clrl %d0 |else clear the sticky flag - bsr dnrm_lp |denormalize the number - tstb %d1 |check flag - beqs no_inex |if clr, no inex - bras dnrm_inex |if set, set inex - -chk_stky: - tstl LOCAL_HI(%a0) |check for any bits set - bnes set_stky - tstl LOCAL_LO(%a0) |check for any bits set - bnes set_stky - bras clr_mant -set_stky: - orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex - movel #0x20000000,%d0 |set sticky bit in return value -clr_mant: - movew %d1,LOCAL_EX(%a0) |load exp with threshold - movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) - movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) - rts -dnrm_inex: - orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex -no_inex: - rts - -| -| dnrm_lp --- normalize exponent/mantissa to specified threshold -| -| Input: -| a0 points to the operand to be denormalized -| d0{31:29} initial guard,round,sticky -| d1{15:0} denormalization threshold -| Output: -| a0 points to the denormalized operand -| d0{31:29} final guard,round,sticky -| d1.b inexact flag: all ones means inexact result -| -| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 -| so that bfext can be used to extract the new low part of the mantissa. -| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there -| is no LOCAL_GRS scratch word following it on the fsave frame. -| - .global dnrm_lp -dnrm_lp: - movel %d2,-(%sp) |save d2 for temp use - btstb #E3,E_BYTE(%a6) |test for type E3 exception - beqs not_E3 |not type E3 exception - bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit - movel #29,%d0 - lsll %d0,%d2 |shift g,r,s to their positions - movel %d2,%d0 -not_E3: - movel (%sp)+,%d2 |restore d2 - movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) - movel %d0,FP_SCR2+LOCAL_GRS(%a6) - movel %d1,%d0 |copy the denorm threshold - subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent - bles no_lp |d1 <= 0 - cmpw #32,%d1 - blts case_1 |0 = d1 < 32 - cmpw #64,%d1 - blts case_2 |32 <= d1 < 64 - bra case_3 |d1 >= 64 -| -| No normalization necessary -| -no_lp: - clrb %d1 |set no inex2 reported - movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s - rts -| -| case (0= 64 Force the exponent to be the denorm threshold with the -| correct sign. -| -case_3: - movew %d0,LOCAL_EX(%a0) - tstw LOCAL_SGN(%a0) - bges c3con -c3neg: - orl #0x80000000,LOCAL_EX(%a0) -c3con: - cmpw #64,%d1 - beqs sixty_four - cmpw #65,%d1 - beqs sixty_five -| -| Shift value is out of range. Set d1 for inex2 flag and -| return a zero with the given threshold. -| - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - movel #0x20000000,%d0 - st %d1 - rts - -sixty_four: - movel LOCAL_HI(%a0),%d0 - bfextu %d0{#2:#30},%d1 - andil #0xc0000000,%d0 - bras c3com - -sixty_five: - movel LOCAL_HI(%a0),%d0 - bfextu %d0{#1:#31},%d1 - andil #0x80000000,%d0 - lsrl #1,%d0 |shift high bit into R bit - -c3com: - tstl %d1 - bnes c3ssticky - tstl LOCAL_LO(%a0) - bnes c3ssticky - tstb FP_SCR2+LOCAL_GRS(%a6) - bnes c3ssticky - clrb %d1 - bras c3end - -c3ssticky: - bsetl #rnd_stky_bit,%d0 - st %d1 -c3end: - clrl LOCAL_HI(%a0) - clrl LOCAL_LO(%a0) - rts - - |end diff --git a/arch/m68k/fpsp040/sacos.S b/arch/m68k/fpsp040/sacos.S deleted file mode 100644 index 513c7cca7318d004b5e96b395f2333ff4f575847..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/sacos.S +++ /dev/null @@ -1,114 +0,0 @@ -| -| sacos.sa 3.3 12/19/90 -| -| Description: The entry point sAcos computes the inverse cosine of -| an input argument; sAcosd does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value arccos(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program sCOS takes approximately 310 cycles. -| -| Algorithm: -| -| ACOS -| 1. If |X| >= 1, go to 3. -| -| 2. (|X| < 1) Calculate acos(X) by -| z := (1-X) / (1+X) -| acos(X) = 2 * atan( sqrt(z) ). -| Exit. -| -| 3. If |X| > 1, go to 5. -| -| 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. -| -| 5. (|X| > 1) Generate an invalid operation by 0 * infinity. -| Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SACOS idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -PI: .long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 -PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 - - |xref t_operr - |xref t_frcinx - |xref satan - - .global sacosd -sacosd: -|--ACOS(X) = PI/2 FOR DENORMALIZED X - fmovel %d1,%fpcr | ...load user's rounding mode/precision - fmovex PIBY2,%fp0 - bra t_frcinx - - .global sacos -sacos: - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 | ...pack exponent with upper 16 fraction - movew 4(%a0),%d0 - andil #0x7FFFFFFF,%d0 - cmpil #0x3FFF8000,%d0 - bges ACOSBIG - -|--THIS IS THE USUAL CASE, |X| < 1 -|--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) - - fmoves #0x3F800000,%fp1 - faddx %fp0,%fp1 | ...1+X - fnegx %fp0 | ... -X - fadds #0x3F800000,%fp0 | ...1-X - fdivx %fp1,%fp0 | ...(1-X)/(1+X) - fsqrtx %fp0 | ...SQRT((1-X)/(1+X)) - fmovemx %fp0-%fp0,(%a0) | ...overwrite input - movel %d1,-(%sp) |save original users fpcr - clrl %d1 - bsr satan | ...ATAN(SQRT([1-X]/[1+X])) - fmovel (%sp)+,%fpcr |restore users exceptions - faddx %fp0,%fp0 | ...2 * ATAN( STUFF ) - bra t_frcinx - -ACOSBIG: - fabsx %fp0 - fcmps #0x3F800000,%fp0 - fbgt t_operr |cause an operr exception - -|--|X| = 1, ACOS(X) = 0 OR PI - movel (%a0),%d0 | ...pack exponent with upper 16 fraction - movew 4(%a0),%d0 - cmpl #0,%d0 |D0 has original exponent+fraction - bgts ACOSP1 - -|--X = -1 -|Returns PI and inexact exception - fmovex PI,%fp0 - fmovel %d1,%FPCR - fadds #0x00800000,%fp0 |cause an inexact exception to be put -| ;into the 040 - will not trap until next -| ;fp inst. - bra t_frcinx - -ACOSP1: - fmovel %d1,%FPCR - fmoves #0x00000000,%fp0 - rts |Facos ; of +1 is exact - - |end diff --git a/arch/m68k/fpsp040/sasin.S b/arch/m68k/fpsp040/sasin.S deleted file mode 100644 index 2a269a58ceaa8ccde03ab59e77c740d34f44c128..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/sasin.S +++ /dev/null @@ -1,103 +0,0 @@ -| -| sasin.sa 3.3 12/19/90 -| -| Description: The entry point sAsin computes the inverse sine of -| an input argument; sAsind does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value arcsin(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program sASIN takes approximately 310 cycles. -| -| Algorithm: -| -| ASIN -| 1. If |X| >= 1, go to 3. -| -| 2. (|X| < 1) Calculate asin(X) by -| z := sqrt( [1-X][1+X] ) -| asin(X) = atan( x / z ). -| Exit. -| -| 3. If |X| > 1, go to 5. -| -| 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit. -| -| 5. (|X| > 1) Generate an invalid operation by 0 * infinity. -| Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SASIN idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 - - |xref t_operr - |xref t_frcinx - |xref t_extdnrm - |xref satan - - .global sasind -sasind: -|--ASIN(X) = X FOR DENORMALIZED X - - bra t_extdnrm - - .global sasin -sasin: - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - andil #0x7FFFFFFF,%d0 - cmpil #0x3FFF8000,%d0 - bges asinbig - -|--THIS IS THE USUAL CASE, |X| < 1 -|--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) - - fmoves #0x3F800000,%fp1 - fsubx %fp0,%fp1 | ...1-X - fmovemx %fp2-%fp2,-(%a7) - fmoves #0x3F800000,%fp2 - faddx %fp0,%fp2 | ...1+X - fmulx %fp2,%fp1 | ...(1+X)(1-X) - fmovemx (%a7)+,%fp2-%fp2 - fsqrtx %fp1 | ...SQRT([1-X][1+X]) - fdivx %fp1,%fp0 | ...X/SQRT([1-X][1+X]) - fmovemx %fp0-%fp0,(%a0) - bsr satan - bra t_frcinx - -asinbig: - fabsx %fp0 | ...|X| - fcmps #0x3F800000,%fp0 - fbgt t_operr |cause an operr exception - -|--|X| = 1, ASIN(X) = +- PI/2. - - fmovex PIBY2,%fp0 - movel (%a0),%d0 - andil #0x80000000,%d0 | ...SIGN BIT OF X - oril #0x3F800000,%d0 | ...+-1 IN SGL FORMAT - movel %d0,-(%sp) | ...push SIGN(X) IN SGL-FMT - fmovel %d1,%FPCR - fmuls (%sp)+,%fp0 - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S deleted file mode 100644 index c8a664998f92d659c2a44937657a1b461fa02600..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/satan.S +++ /dev/null @@ -1,477 +0,0 @@ -| -| satan.sa 3.3 12/19/90 -| -| The entry point satan computes the arctangent of an -| input value. satand does the same except the input value is a -| denormalized number. -| -| Input: Double-extended value in memory location pointed to by address -| register a0. -| -| Output: Arctan(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 2 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program satan takes approximately 160 cycles for input -| argument X such that 1/16 < |X| < 16. For the other arguments, -| the program will run no worse than 10% slower. -| -| Algorithm: -| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. -| -| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. -| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits -| of X with a bit-1 attached at the 6-th bit position. Define u -| to be u = (X-F) / (1 + X*F). -| -| Step 3. Approximate arctan(u) by a polynomial poly. -| -| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values -| calculated beforehand. Exit. -| -| Step 5. If |X| >= 16, go to Step 7. -| -| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. -| -| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. -| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|satan idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -BOUNDS1: .long 0x3FFB8000,0x4002FFFF - -ONE: .long 0x3F800000 - - .long 0x00000000 - -ATANA3: .long 0xBFF6687E,0x314987D8 -ATANA2: .long 0x4002AC69,0x34A26DB3 - -ATANA1: .long 0xBFC2476F,0x4E1DA28E -ATANB6: .long 0x3FB34444,0x7F876989 - -ATANB5: .long 0xBFB744EE,0x7FAF45DB -ATANB4: .long 0x3FBC71C6,0x46940220 - -ATANB3: .long 0xBFC24924,0x921872F9 -ATANB2: .long 0x3FC99999,0x99998FA9 - -ATANB1: .long 0xBFD55555,0x55555555 -ATANC5: .long 0xBFB70BF3,0x98539E6A - -ATANC4: .long 0x3FBC7187,0x962D1D7D -ATANC3: .long 0xBFC24924,0x827107B8 - -ATANC2: .long 0x3FC99999,0x9996263E -ATANC1: .long 0xBFD55555,0x55555536 - -PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 -NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 -PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 -NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 - -ATANTBL: - .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 - .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 - .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 - .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 - .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 - .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 - .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 - .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 - .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 - .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 - .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 - .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 - .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 - .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 - .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 - .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 - .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 - .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 - .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 - .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 - .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 - .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 - .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 - .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 - .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 - .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 - .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 - .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 - .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 - .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 - .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 - .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 - .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 - .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 - .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 - .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 - .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 - .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 - .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 - .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 - .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 - .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 - .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 - .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 - .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 - .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 - .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 - .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 - .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 - .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 - .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 - .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 - .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 - .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 - .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 - .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 - .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 - .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 - .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 - .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 - .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 - .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 - .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 - .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 - .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 - .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 - .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 - .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 - .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 - .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 - .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 - .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 - .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 - .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 - .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 - .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 - .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 - .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 - .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 - .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 - .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 - .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 - .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 - .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 - .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 - .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 - .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 - .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 - .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 - .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 - .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 - .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 - .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 - .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 - .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 - .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 - .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 - .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 - .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 - .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 - .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 - .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 - .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 - .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 - .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 - .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 - .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 - .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 - .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 - .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 - .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 - .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 - .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 - .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 - .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 - .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 - .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 - .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 - .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 - .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 - .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 - .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 - .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 - .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 - .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 - .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 - .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 - .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 - - .set X,FP_SCR1 - .set XDCARE,X+2 - .set XFRAC,X+4 - .set XFRACLO,X+8 - - .set ATANF,FP_SCR2 - .set ATANFHI,ATANF+4 - .set ATANFLO,ATANF+8 - - - | xref t_frcinx - |xref t_extdnrm - - .global satand -satand: -|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT - - bra t_extdnrm - - .global satan -satan: -|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S - - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - fmovex %fp0,X(%a6) - andil #0x7FFFFFFF,%d0 - - cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? - bges ATANOK1 - bra ATANSM - -ATANOK1: - cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? - bles ATANMAIN - bra ATANBIG - - -|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE -|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). -|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN -|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE -|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS -|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR -|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO -|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE -|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL -|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE -|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION -|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION -|--WILL INVOLVE A VERY LONG POLYNOMIAL. - -|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS -|--WE CHOSE F TO BE +-2^K * 1.BBBB1 -|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE -|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE -|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS -|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). - -ATANMAIN: - - movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE - andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS - oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 - movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F - - fmovex %fp0,%fp1 | ...FP1 IS X - fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 - fsubx X(%a6),%fp0 | ...FP0 IS X-F - fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F - fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) - -|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) -|--CREATE ATAN(F) AND STORE IT IN ATANF, AND -|--SAVE REGISTERS FP2. - - movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY - movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X - andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION - andil #0x7FFF0000,%d2 | ...EXPONENT OF F - subil #0x3FFB0000,%d2 | ...K+4 - asrl #1,%d2 - addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F - asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) - lea ATANTBL,%a1 - addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) - movel (%a1)+,ATANF(%a6) - movel (%a1)+,ATANFHI(%a6) - movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) - movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN - andil #0x80000000,%d0 | ...SIGN(F) - orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) - movel (%a7)+,%d2 | ...RESTORE d2 - -|--THAT'S ALL I HAVE TO DO FOR NOW, -|--BUT ALAS, THE DIVIDE IS STILL CRANKING! - -|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS -|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U -|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. -|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) -|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. -|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT -|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED - - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 - fmoved ATANA3,%fp2 - faddx %fp1,%fp2 | ...A3+V - fmulx %fp1,%fp2 | ...V*(A3+V) - fmulx %fp0,%fp1 | ...U*V - faddd ATANA2,%fp2 | ...A2+V*(A3+V) - fmuld ATANA1,%fp1 | ...A1*U*V - fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) - - faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED - fmovel %d1,%FPCR |restore users exceptions - faddx ATANF(%a6),%fp0 | ...ATAN(X) - bra t_frcinx - -ATANBORS: -|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. -|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. - cmpil #0x3FFF8000,%d0 - bgt ATANBIG | ...I.E. |X| >= 16 - -ATANSM: -|--|X| <= 1/16 -|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE -|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) -|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) -|--WHERE Y = X*X, AND Z = Y*Y. - - cmpil #0x3FD78000,%d0 - blt ATANTINY -|--COMPUTE POLYNOMIAL - fmulx %fp0,%fp0 | ...FP0 IS Y = X*X - - - movew #0x0000,XDCARE(%a6) - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y - - fmoved ATANB6,%fp2 - fmoved ATANB5,%fp3 - - fmulx %fp1,%fp2 | ...Z*B6 - fmulx %fp1,%fp3 | ...Z*B5 - - faddd ATANB4,%fp2 | ...B4+Z*B6 - faddd ATANB3,%fp3 | ...B3+Z*B5 - - fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) - fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) - - faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) - faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) - - fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) - fmulx X(%a6),%fp0 | ...X*Y - - faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] - - - fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) - - fmovel %d1,%FPCR |restore users exceptions - faddx X(%a6),%fp0 - - bra t_frcinx - -ATANTINY: -|--|X| < 2^(-40), ATAN(X) = X - movew #0x0000,XDCARE(%a6) - - fmovel %d1,%FPCR |restore users exceptions - fmovex X(%a6),%fp0 |last inst - possible exception set - - bra t_frcinx - -ATANBIG: -|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, -|--RETURN SIGN(X)*PI/2 + ATAN(-1/X). - cmpil #0x40638000,%d0 - bgt ATANHUGE - -|--APPROXIMATE ATAN(-1/X) BY -|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' -|--THIS CAN BE RE-WRITTEN AS -|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. - - fmoves #0xBF800000,%fp1 | ...LOAD -1 - fdivx %fp0,%fp1 | ...FP1 IS -1/X - - -|--DIVIDE IS STILL CRANKING - - fmovex %fp1,%fp0 | ...FP0 IS X' - fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' - fmovex %fp1,X(%a6) | ...X IS REALLY X' - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y - - fmoved ATANC5,%fp3 - fmoved ATANC4,%fp2 - - fmulx %fp1,%fp3 | ...Z*C5 - fmulx %fp1,%fp2 | ...Z*B4 - - faddd ATANC3,%fp3 | ...C3+Z*C5 - faddd ATANC2,%fp2 | ...C2+Z*C4 - - fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED - fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) - - faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) - fmulx X(%a6),%fp0 | ...X'*Y - - faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] - - - fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] -| ... +[Y*(B2+Z*(B4+Z*B6))]) - faddx X(%a6),%fp0 - - fmovel %d1,%FPCR |restore users exceptions - - btstb #7,(%a0) - beqs pos_big - -neg_big: - faddx NPIBY2,%fp0 - bra t_frcinx - -pos_big: - faddx PPIBY2,%fp0 - bra t_frcinx - -ATANHUGE: -|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY - btstb #7,(%a0) - beqs pos_huge - -neg_huge: - fmovex NPIBY2,%fp0 - fmovel %d1,%fpcr - fsubx NTINY,%fp0 - bra t_frcinx - -pos_huge: - fmovex PPIBY2,%fp0 - fmovel %d1,%fpcr - fsubx PTINY,%fp0 - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/satanh.S b/arch/m68k/fpsp040/satanh.S deleted file mode 100644 index ba91f77a75716e92edc25f6911bd0d81f4c307b2..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/satanh.S +++ /dev/null @@ -1,103 +0,0 @@ -| -| satanh.sa 3.3 12/19/90 -| -| The entry point satanh computes the inverse -| hyperbolic tangent of -| an input argument; satanhd does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value arctanh(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program satanh takes approximately 270 cycles. -| -| Algorithm: -| -| ATANH -| 1. If |X| >= 1, go to 3. -| -| 2. (|X| < 1) Calculate atanh(X) by -| sgn := sign(X) -| y := |X| -| z := 2y/(1-y) -| atanh(X) := sgn * (1/2) * logp1(z) -| Exit. -| -| 3. If |X| > 1, go to 5. -| -| 4. (|X| = 1) Generate infinity with an appropriate sign and -| divide-by-zero by -| sgn := sign(X) -| atan(X) := sgn / (+0). -| Exit. -| -| 5. (|X| > 1) Generate an invalid operation by 0 * infinity. -| Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|satanh idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - - |xref t_dz - |xref t_operr - |xref t_frcinx - |xref t_extdnrm - |xref slognp1 - - .global satanhd -satanhd: -|--ATANH(X) = X FOR DENORMALIZED X - - bra t_extdnrm - - .global satanh -satanh: - movel (%a0),%d0 - movew 4(%a0),%d0 - andil #0x7FFFFFFF,%d0 - cmpil #0x3FFF8000,%d0 - bges ATANHBIG - -|--THIS IS THE USUAL CASE, |X| < 1 -|--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). - - fabsx (%a0),%fp0 | ...Y = |X| - fmovex %fp0,%fp1 - fnegx %fp1 | ...-Y - faddx %fp0,%fp0 | ...2Y - fadds #0x3F800000,%fp1 | ...1-Y - fdivx %fp1,%fp0 | ...2Y/(1-Y) - movel (%a0),%d0 - andil #0x80000000,%d0 - oril #0x3F000000,%d0 | ...SIGN(X)*HALF - movel %d0,-(%sp) - - fmovemx %fp0-%fp0,(%a0) | ...overwrite input - movel %d1,-(%sp) - clrl %d1 - bsr slognp1 | ...LOG1P(Z) - fmovel (%sp)+,%fpcr - fmuls (%sp)+,%fp0 - bra t_frcinx - -ATANHBIG: - fabsx (%a0),%fp0 | ...|X| - fcmps #0x3F800000,%fp0 - fbgt t_operr - bra t_dz - - |end diff --git a/arch/m68k/fpsp040/scale.S b/arch/m68k/fpsp040/scale.S deleted file mode 100644 index 04829dd4f1f48172b978a7d582fcd8f57067cb7b..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/scale.S +++ /dev/null @@ -1,370 +0,0 @@ -| -| scale.sa 3.3 7/30/91 -| -| The entry point sSCALE computes the destination operand -| scaled by the source operand. If the absolute value of -| the source operand is (>= 2^14) an overflow or underflow -| is returned. -| -| The entry point sscale is called from do_func to emulate -| the fscale unimplemented instruction. -| -| Input: Double-extended destination operand in FPTEMP, -| double-extended source operand in ETEMP. -| -| Output: The function returns scale(X,Y) to fp0. -| -| Modifies: fp0. -| -| Algorithm: -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SCALE idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref t_ovfl2 - |xref t_unfl - |xref round - |xref t_resdnrm - -SRC_BNDS: .short 0x3fff,0x400c - -| -| This entry point is used by the unimplemented instruction exception -| handler. -| -| -| -| FSCALE -| - .global sscale -sscale: - fmovel #0,%fpcr |clr user enabled exc - clrl %d1 - movew FPTEMP(%a6),%d1 |get dest exponent - smi L_SCR1(%a6) |use L_SCR1 to hold sign - andil #0x7fff,%d1 |strip sign - movew ETEMP(%a6),%d0 |check src bounds - andiw #0x7fff,%d0 |clr sign bit - cmp2w SRC_BNDS,%d0 - bccs src_in - cmpiw #0x400c,%d0 |test for too large - bge src_out -| -| The source input is below 1, so we check for denormalized numbers -| and set unfl. -| -src_small: - moveb DTAG(%a6),%d0 - andib #0xe0,%d0 - tstb %d0 - beqs no_denorm - st STORE_FLG(%a6) |dest already contains result - orl #unfl_mask,USER_FPSR(%a6) |set UNFL -den_done: - leal FPTEMP(%a6),%a0 - bra t_resdnrm -no_denorm: - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 |simply return dest - rts - - -| -| Source is within 2^14 range. To perform the int operation, -| move it to d0. -| -src_in: - fmovex ETEMP(%a6),%fp0 |move in src for int - fmovel #rz_mode,%fpcr |force rz for src conversion - fmovel %fp0,%d0 |int src to d0 - fmovel #0,%FPSR |clr status from above - tstw ETEMP(%a6) |check src sign - blt src_neg -| -| Source is positive. Add the src to the dest exponent. -| The result can be denormalized, if src = 0, or overflow, -| if the result of the add sets a bit in the upper word. -| -src_pos: - tstw %d1 |check for denorm - beq dst_dnrm - addl %d0,%d1 |add src to dest exp - beqs denorm |if zero, result is denorm - cmpil #0x7fff,%d1 |test for overflow - bges ovfl - tstb L_SCR1(%a6) - beqs spos_pos - orw #0x8000,%d1 -spos_pos: - movew %d1,FPTEMP(%a6) |result in FPTEMP - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 |write result to fp0 - rts -ovfl: - tstb L_SCR1(%a6) - beqs sovl_pos - orw #0x8000,%d1 -sovl_pos: - movew FPTEMP(%a6),ETEMP(%a6) |result in ETEMP - movel FPTEMP_HI(%a6),ETEMP_HI(%a6) - movel FPTEMP_LO(%a6),ETEMP_LO(%a6) - bra t_ovfl2 - -denorm: - tstb L_SCR1(%a6) - beqs den_pos - orw #0x8000,%d1 -den_pos: - tstl FPTEMP_HI(%a6) |check j bit - blts nden_exit |if set, not denorm - movew %d1,ETEMP(%a6) |input expected in ETEMP - movel FPTEMP_HI(%a6),ETEMP_HI(%a6) - movel FPTEMP_LO(%a6),ETEMP_LO(%a6) - orl #unfl_bit,USER_FPSR(%a6) |set unfl - leal ETEMP(%a6),%a0 - bra t_resdnrm -nden_exit: - movew %d1,FPTEMP(%a6) |result in FPTEMP - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 |write result to fp0 - rts - -| -| Source is negative. Add the src to the dest exponent. -| (The result exponent will be reduced). The result can be -| denormalized. -| -src_neg: - addl %d0,%d1 |add src to dest - beqs denorm |if zero, result is denorm - blts fix_dnrm |if negative, result is -| ;needing denormalization - tstb L_SCR1(%a6) - beqs sneg_pos - orw #0x8000,%d1 -sneg_pos: - movew %d1,FPTEMP(%a6) |result in FPTEMP - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 |write result to fp0 - rts - - -| -| The result exponent is below denorm value. Test for catastrophic -| underflow and force zero if true. If not, try to shift the -| mantissa right until a zero exponent exists. -| -fix_dnrm: - cmpiw #0xffc0,%d1 |lower bound for normalization - blt fix_unfl |if lower, catastrophic unfl - movew %d1,%d0 |use d0 for exp - movel %d2,-(%a7) |free d2 for norm - movel FPTEMP_HI(%a6),%d1 - movel FPTEMP_LO(%a6),%d2 - clrl L_SCR2(%a6) -fix_loop: - addw #1,%d0 |drive d0 to 0 - lsrl #1,%d1 |while shifting the - roxrl #1,%d2 |mantissa to the right - bccs no_carry - st L_SCR2(%a6) |use L_SCR2 to capture inex -no_carry: - tstw %d0 |it is finished when - blts fix_loop |d0 is zero or the mantissa - tstb L_SCR2(%a6) - beqs tst_zero - orl #unfl_inx_mask,USER_FPSR(%a6) -| ;set unfl, aunfl, ainex -| -| Test for zero. If zero, simply use fmove to return +/- zero -| to the fpu. -| -tst_zero: - clrw FPTEMP_EX(%a6) - tstb L_SCR1(%a6) |test for sign - beqs tst_con - orw #0x8000,FPTEMP_EX(%a6) |set sign bit -tst_con: - movel %d1,FPTEMP_HI(%a6) - movel %d2,FPTEMP_LO(%a6) - movel (%a7)+,%d2 - tstl %d1 - bnes not_zero - tstl FPTEMP_LO(%a6) - bnes not_zero -| -| Result is zero. Check for rounding mode to set lsb. If the -| mode is rp, and the zero is positive, return smallest denorm. -| If the mode is rm, and the zero is negative, return smallest -| negative denorm. -| - btstb #5,FPCR_MODE(%a6) |test if rm or rp - beqs no_dir - btstb #4,FPCR_MODE(%a6) |check which one - beqs zer_rm -zer_rp: - tstb L_SCR1(%a6) |check sign - bnes no_dir |if set, neg op, no inc - movel #1,FPTEMP_LO(%a6) |set lsb - bras sm_dnrm -zer_rm: - tstb L_SCR1(%a6) |check sign - beqs no_dir |if clr, neg op, no inc - movel #1,FPTEMP_LO(%a6) |set lsb - orl #neg_mask,USER_FPSR(%a6) |set N - bras sm_dnrm -no_dir: - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 |use fmove to set cc's - rts - -| -| The rounding mode changed the zero to a smallest denorm. Call -| t_resdnrm with exceptional operand in ETEMP. -| -sm_dnrm: - movel FPTEMP_EX(%a6),ETEMP_EX(%a6) - movel FPTEMP_HI(%a6),ETEMP_HI(%a6) - movel FPTEMP_LO(%a6),ETEMP_LO(%a6) - leal ETEMP(%a6),%a0 - bra t_resdnrm - -| -| Result is still denormalized. -| -not_zero: - orl #unfl_mask,USER_FPSR(%a6) |set unfl - tstb L_SCR1(%a6) |check for sign - beqs fix_exit - orl #neg_mask,USER_FPSR(%a6) |set N -fix_exit: - bras sm_dnrm - - -| -| The result has underflowed to zero. Return zero and set -| unfl, aunfl, and ainex. -| -fix_unfl: - orl #unfl_inx_mask,USER_FPSR(%a6) - btstb #5,FPCR_MODE(%a6) |test if rm or rp - beqs no_dir2 - btstb #4,FPCR_MODE(%a6) |check which one - beqs zer_rm2 -zer_rp2: - tstb L_SCR1(%a6) |check sign - bnes no_dir2 |if set, neg op, no inc - clrl FPTEMP_EX(%a6) - clrl FPTEMP_HI(%a6) - movel #1,FPTEMP_LO(%a6) |set lsb - bras sm_dnrm |return smallest denorm -zer_rm2: - tstb L_SCR1(%a6) |check sign - beqs no_dir2 |if clr, neg op, no inc - movew #0x8000,FPTEMP_EX(%a6) - clrl FPTEMP_HI(%a6) - movel #1,FPTEMP_LO(%a6) |set lsb - orl #neg_mask,USER_FPSR(%a6) |set N - bra sm_dnrm |return smallest denorm - -no_dir2: - tstb L_SCR1(%a6) - bges pos_zero -neg_zero: - clrl FP_SCR1(%a6) |clear the exceptional operand - clrl FP_SCR1+4(%a6) |for gen_except. - clrl FP_SCR1+8(%a6) - fmoves #0x80000000,%fp0 - rts -pos_zero: - clrl FP_SCR1(%a6) |clear the exceptional operand - clrl FP_SCR1+4(%a6) |for gen_except. - clrl FP_SCR1+8(%a6) - fmoves #0x00000000,%fp0 - rts - -| -| The destination is a denormalized number. It must be handled -| by first shifting the bits in the mantissa until it is normalized, -| then adding the remainder of the source to the exponent. -| -dst_dnrm: - moveml %d2/%d3,-(%a7) - movew FPTEMP_EX(%a6),%d1 - movel FPTEMP_HI(%a6),%d2 - movel FPTEMP_LO(%a6),%d3 -dst_loop: - tstl %d2 |test for normalized result - blts dst_norm |exit loop if so - tstl %d0 |otherwise, test shift count - beqs dst_fin |if zero, shifting is done - subil #1,%d0 |dec src - lsll #1,%d3 - roxll #1,%d2 - bras dst_loop -| -| Destination became normalized. Simply add the remaining -| portion of the src to the exponent. -| -dst_norm: - addw %d0,%d1 |dst is normalized; add src - tstb L_SCR1(%a6) - beqs dnrm_pos - orl #0x8000,%d1 -dnrm_pos: - movemw %d1,FPTEMP_EX(%a6) - moveml %d2,FPTEMP_HI(%a6) - moveml %d3,FPTEMP_LO(%a6) - fmovel USER_FPCR(%a6),%FPCR - fmovex FPTEMP(%a6),%fp0 - moveml (%a7)+,%d2/%d3 - rts - -| -| Destination remained denormalized. Call t_excdnrm with -| exceptional operand in ETEMP. -| -dst_fin: - tstb L_SCR1(%a6) |check for sign - beqs dst_exit - orl #neg_mask,USER_FPSR(%a6) |set N - orl #0x8000,%d1 -dst_exit: - movemw %d1,ETEMP_EX(%a6) - moveml %d2,ETEMP_HI(%a6) - moveml %d3,ETEMP_LO(%a6) - orl #unfl_mask,USER_FPSR(%a6) |set unfl - moveml (%a7)+,%d2/%d3 - leal ETEMP(%a6),%a0 - bra t_resdnrm - -| -| Source is outside of 2^14 range. Test the sign and branch -| to the appropriate exception handler. -| -src_out: - tstb L_SCR1(%a6) - beqs scro_pos - orl #0x8000,%d1 -scro_pos: - movel FPTEMP_HI(%a6),ETEMP_HI(%a6) - movel FPTEMP_LO(%a6),ETEMP_LO(%a6) - tstw ETEMP(%a6) - blts res_neg -res_pos: - movew %d1,ETEMP(%a6) |result in ETEMP - bra t_ovfl2 -res_neg: - movew %d1,ETEMP(%a6) |result in ETEMP - leal ETEMP(%a6),%a0 - bra t_unfl - |end diff --git a/arch/m68k/fpsp040/scosh.S b/arch/m68k/fpsp040/scosh.S deleted file mode 100644 index 07d3a4d7c86d5c7fb4c1527c47b18c04c7770125..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/scosh.S +++ /dev/null @@ -1,131 +0,0 @@ -| -| scosh.sa 3.1 12/10/90 -| -| The entry point sCosh computes the hyperbolic cosine of -| an input argument; sCoshd does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value cosh(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program sCOSH takes approximately 250 cycles. -| -| Algorithm: -| -| COSH -| 1. If |X| > 16380 log2, go to 3. -| -| 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae -| y = |X|, z = exp(Y), and -| cosh(X) = (1/2)*( z + 1/z ). -| Exit. -| -| 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. -| -| 4. (16380 log2 < |X| <= 16480 log2) -| cosh(X) = sign(X) * exp(|X|)/2. -| However, invoking exp(|X|) may cause premature overflow. -| Thus, we calculate sinh(X) as follows: -| Y := |X| -| Fact := 2**(16380) -| Y' := Y - 16381 log2 -| cosh(X) := Fact * exp(Y'). -| Exit. -| -| 5. (|X| > 16480 log2) sinh(X) must overflow. Return -| Huge*Huge to generate overflow and an infinity with -| the appropriate sign. Huge is the largest finite number in -| extended format. Exit. -| -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SCOSH idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - - |xref t_ovfl - |xref t_frcinx - |xref setox - -T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD -T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL - -TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000 - - .global scoshd -scoshd: -|--COSH(X) = 1 FOR DENORMALIZED X - - fmoves #0x3F800000,%fp0 - - fmovel %d1,%FPCR - fadds #0x00800000,%fp0 - bra t_frcinx - - .global scosh -scosh: - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - andil #0x7FFFFFFF,%d0 - cmpil #0x400CB167,%d0 - bgts COSHBIG - -|--THIS IS THE USUAL CASE, |X| < 16380 LOG2 -|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) - - fabsx %fp0 | ...|X| - - movel %d1,-(%sp) - clrl %d1 - fmovemx %fp0-%fp0,(%a0) |pass parameter to setox - bsr setox | ...FP0 IS EXP(|X|) - fmuls #0x3F000000,%fp0 | ...(1/2)EXP(|X|) - movel (%sp)+,%d1 - - fmoves #0x3E800000,%fp1 | ...(1/4) - fdivx %fp0,%fp1 | ...1/(2 EXP(|X|)) - - fmovel %d1,%FPCR - faddx %fp1,%fp0 - - bra t_frcinx - -COSHBIG: - cmpil #0x400CB2B3,%d0 - bgts COSHHUGE - - fabsx %fp0 - fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD) - fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE - - movel %d1,-(%sp) - clrl %d1 - fmovemx %fp0-%fp0,(%a0) - bsr setox - fmovel (%sp)+,%fpcr - - fmulx TWO16380(%pc),%fp0 - bra t_frcinx - -COSHHUGE: - fmovel #0,%fpsr |clr N bit if set by source - bclrb #7,(%a0) |always return positive value - fmovemx (%a0),%fp0-%fp0 - bra t_ovfl - - |end diff --git a/arch/m68k/fpsp040/setox.S b/arch/m68k/fpsp040/setox.S deleted file mode 100644 index f1acf7e36d6b826c77c167efdefaa5729b140977..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/setox.S +++ /dev/null @@ -1,864 +0,0 @@ -| -| setox.sa 3.1 12/10/90 -| -| The entry point setox computes the exponential of a value. -| setoxd does the same except the input value is a denormalized -| number. setoxm1 computes exp(X)-1, and setoxm1d computes -| exp(X)-1 for denormalized X. -| -| INPUT -| ----- -| Double-extended value in memory location pointed to by address -| register a0. -| -| OUTPUT -| ------ -| exp(X) or exp(X)-1 returned in floating-point register fp0. -| -| ACCURACY and MONOTONICITY -| ------------------------- -| The returned result is within 0.85 ulps in 64 significant bit, i.e. -| within 0.5001 ulp to 53 bits if the result is subsequently rounded -| to double precision. The result is provably monotonic in double -| precision. -| -| SPEED -| ----- -| Two timings are measured, both in the copy-back mode. The -| first one is measured when the function is invoked the first time -| (so the instructions and data are not in cache), and the -| second one is measured when the function is reinvoked at the same -| input argument. -| -| The program setox takes approximately 210/190 cycles for input -| argument X whose magnitude is less than 16380 log2, which -| is the usual situation. For the less common arguments, -| depending on their values, the program may run faster or slower -- -| but no worse than 10% slower even in the extreme cases. -| -| The program setoxm1 takes approximately ??? / ??? cycles for input -| argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes -| approximately ??? / ??? cycles. For the less common arguments, -| depending on their values, the program may run faster or slower -- -| but no worse than 10% slower even in the extreme cases. -| -| ALGORITHM and IMPLEMENTATION NOTES -| ---------------------------------- -| -| setoxd -| ------ -| Step 1. Set ans := 1.0 -| -| Step 2. Return ans := ans + sign(X)*2^(-126). Exit. -| Notes: This will always generate one exception -- inexact. -| -| -| setox -| ----- -| -| Step 1. Filter out extreme cases of input argument. -| 1.1 If |X| >= 2^(-65), go to Step 1.3. -| 1.2 Go to Step 7. -| 1.3 If |X| < 16380 log(2), go to Step 2. -| 1.4 Go to Step 8. -| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. -| To avoid the use of floating-point comparisons, a -| compact representation of |X| is used. This format is a -| 32-bit integer, the upper (more significant) 16 bits are -| the sign and biased exponent field of |X|; the lower 16 -| bits are the 16 most significant fraction (including the -| explicit bit) bits of |X|. Consequently, the comparisons -| in Steps 1.1 and 1.3 can be performed by integer comparison. -| Note also that the constant 16380 log(2) used in Step 1.3 -| is also in the compact form. Thus taking the branch -| to Step 2 guarantees |X| < 16380 log(2). There is no harm -| to have a small number of cases where |X| is less than, -| but close to, 16380 log(2) and the branch to Step 9 is -| taken. -| -| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). -| 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken) -| 2.2 N := round-to-nearest-integer( X * 64/log2 ). -| 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63. -| 2.4 Calculate M = (N - J)/64; so N = 64M + J. -| 2.5 Calculate the address of the stored value of 2^(J/64). -| 2.6 Create the value Scale = 2^M. -| Notes: The calculation in 2.2 is really performed by -| -| Z := X * constant -| N := round-to-nearest-integer(Z) -| -| where -| -| constant := single-precision( 64/log 2 ). -| -| Using a single-precision constant avoids memory access. -| Another effect of using a single-precision "constant" is -| that the calculated value Z is -| -| Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). -| -| This error has to be considered later in Steps 3 and 4. -| -| Step 3. Calculate X - N*log2/64. -| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). -| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). -| Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate -| the value -log2/64 to 88 bits of accuracy. -| b) N*L1 is exact because N is no longer than 22 bits and -| L1 is no longer than 24 bits. -| c) The calculation X+N*L1 is also exact due to cancellation. -| Thus, R is practically X+N(L1+L2) to full 64 bits. -| d) It is important to estimate how large can |R| be after -| Step 3.2. -| -| N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) -| X*64/log2 (1+eps) = N + f, |f| <= 0.5 -| X*64/log2 - N = f - eps*X 64/log2 -| X - N*log2/64 = f*log2/64 - eps*X -| -| -| Now |X| <= 16446 log2, thus -| -| |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 -| <= 0.57 log2/64. -| This bound will be used in Step 4. -| -| Step 4. Approximate exp(R)-1 by a polynomial -| p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) -| Notes: a) In order to reduce memory access, the coefficients are -| made as "short" as possible: A1 (which is 1/2), A4 and A5 -| are single precision; A2 and A3 are double precision. -| b) Even with the restrictions above, -| |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. -| Note that 0.0062 is slightly bigger than 0.57 log2/64. -| c) To fully utilize the pipeline, p is separated into -| two independent pieces of roughly equal complexities -| p = [ R + R*S*(A2 + S*A4) ] + -| [ S*(A1 + S*(A3 + S*A5)) ] -| where S = R*R. -| -| Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by -| ans := T + ( T*p + t) -| where T and t are the stored values for 2^(J/64). -| Notes: 2^(J/64) is stored as T and t where T+t approximates -| 2^(J/64) to roughly 85 bits; T is in extended precision -| and t is in single precision. Note also that T is rounded -| to 62 bits so that the last two bits of T are zero. The -| reason for such a special form is that T-1, T-2, and T-8 -| will all be exact --- a property that will give much -| more accurate computation of the function EXPM1. -| -| Step 6. Reconstruction of exp(X) -| exp(X) = 2^M * 2^(J/64) * exp(R). -| 6.1 If AdjFlag = 0, go to 6.3 -| 6.2 ans := ans * AdjScale -| 6.3 Restore the user FPCR -| 6.4 Return ans := ans * Scale. Exit. -| Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, -| |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will -| neither overflow nor underflow. If AdjFlag = 1, that -| means that -| X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. -| Hence, exp(X) may overflow or underflow or neither. -| When that is the case, AdjScale = 2^(M1) where M1 is -| approximately M. Thus 6.2 will never cause over/underflow. -| Possible exception in 6.4 is overflow or underflow. -| The inexact exception is not generated in 6.4. Although -| one can argue that the inexact flag should always be -| raised, to simulate that exception cost to much than the -| flag is worth in practical uses. -| -| Step 7. Return 1 + X. -| 7.1 ans := X -| 7.2 Restore user FPCR. -| 7.3 Return ans := 1 + ans. Exit -| Notes: For non-zero X, the inexact exception will always be -| raised by 7.3. That is the only exception raised by 7.3. -| Note also that we use the FMOVEM instruction to move X -| in Step 7.1 to avoid unnecessary trapping. (Although -| the FMOVEM may not seem relevant since X is normalized, -| the precaution will be useful in the library version of -| this code where the separate entry for denormalized inputs -| will be done away with.) -| -| Step 8. Handle exp(X) where |X| >= 16380log2. -| 8.1 If |X| > 16480 log2, go to Step 9. -| (mimic 2.2 - 2.6) -| 8.2 N := round-to-integer( X * 64/log2 ) -| 8.3 Calculate J = N mod 64, J = 0,1,...,63 -| 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1. -| 8.5 Calculate the address of the stored value 2^(J/64). -| 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. -| 8.7 Go to Step 3. -| Notes: Refer to notes for 2.2 - 2.6. -| -| Step 9. Handle exp(X), |X| > 16480 log2. -| 9.1 If X < 0, go to 9.3 -| 9.2 ans := Huge, go to 9.4 -| 9.3 ans := Tiny. -| 9.4 Restore user FPCR. -| 9.5 Return ans := ans * ans. Exit. -| Notes: Exp(X) will surely overflow or underflow, depending on -| X's sign. "Huge" and "Tiny" are respectively large/tiny -| extended-precision numbers whose square over/underflow -| with an inexact result. Thus, 9.5 always raises the -| inexact together with either overflow or underflow. -| -| -| setoxm1d -| -------- -| -| Step 1. Set ans := 0 -| -| Step 2. Return ans := X + ans. Exit. -| Notes: This will return X with the appropriate rounding -| precision prescribed by the user FPCR. -| -| setoxm1 -| ------- -| -| Step 1. Check |X| -| 1.1 If |X| >= 1/4, go to Step 1.3. -| 1.2 Go to Step 7. -| 1.3 If |X| < 70 log(2), go to Step 2. -| 1.4 Go to Step 10. -| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. -| However, it is conceivable |X| can be small very often -| because EXPM1 is intended to evaluate exp(X)-1 accurately -| when |X| is small. For further details on the comparisons, -| see the notes on Step 1 of setox. -| -| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). -| 2.1 N := round-to-nearest-integer( X * 64/log2 ). -| 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63. -| 2.3 Calculate M = (N - J)/64; so N = 64M + J. -| 2.4 Calculate the address of the stored value of 2^(J/64). -| 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M). -| Notes: See the notes on Step 2 of setox. -| -| Step 3. Calculate X - N*log2/64. -| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). -| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). -| Notes: Applying the analysis of Step 3 of setox in this case -| shows that |R| <= 0.0055 (note that |X| <= 70 log2 in -| this case). -| -| Step 4. Approximate exp(R)-1 by a polynomial -| p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) -| Notes: a) In order to reduce memory access, the coefficients are -| made as "short" as possible: A1 (which is 1/2), A5 and A6 -| are single precision; A2, A3 and A4 are double precision. -| b) Even with the restriction above, -| |p - (exp(R)-1)| < |R| * 2^(-72.7) -| for all |R| <= 0.0055. -| c) To fully utilize the pipeline, p is separated into -| two independent pieces of roughly equal complexity -| p = [ R*S*(A2 + S*(A4 + S*A6)) ] + -| [ R + S*(A1 + S*(A3 + S*A5)) ] -| where S = R*R. -| -| Step 5. Compute 2^(J/64)*p by -| p := T*p -| where T and t are the stored values for 2^(J/64). -| Notes: 2^(J/64) is stored as T and t where T+t approximates -| 2^(J/64) to roughly 85 bits; T is in extended precision -| and t is in single precision. Note also that T is rounded -| to 62 bits so that the last two bits of T are zero. The -| reason for such a special form is that T-1, T-2, and T-8 -| will all be exact --- a property that will be exploited -| in Step 6 below. The total relative error in p is no -| bigger than 2^(-67.7) compared to the final result. -| -| Step 6. Reconstruction of exp(X)-1 -| exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). -| 6.1 If M <= 63, go to Step 6.3. -| 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 -| 6.3 If M >= -3, go to 6.5. -| 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 -| 6.5 ans := (T + OnebySc) + (p + t). -| 6.6 Restore user FPCR. -| 6.7 Return ans := Sc * ans. Exit. -| Notes: The various arrangements of the expressions give accurate -| evaluations. -| -| Step 7. exp(X)-1 for |X| < 1/4. -| 7.1 If |X| >= 2^(-65), go to Step 9. -| 7.2 Go to Step 8. -| -| Step 8. Calculate exp(X)-1, |X| < 2^(-65). -| 8.1 If |X| < 2^(-16312), goto 8.3 -| 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit. -| 8.3 X := X * 2^(140). -| 8.4 Restore FPCR; ans := ans - 2^(-16382). -| Return ans := ans*2^(140). Exit -| Notes: The idea is to return "X - tiny" under the user -| precision and rounding modes. To avoid unnecessary -| inefficiency, we stay away from denormalized numbers the -| best we can. For |X| >= 2^(-16312), the straightforward -| 8.2 generates the inexact exception as the case warrants. -| -| Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial -| p = X + X*X*(B1 + X*(B2 + ... + X*B12)) -| Notes: a) In order to reduce memory access, the coefficients are -| made as "short" as possible: B1 (which is 1/2), B9 to B12 -| are single precision; B3 to B8 are double precision; and -| B2 is double extended. -| b) Even with the restriction above, -| |p - (exp(X)-1)| < |X| 2^(-70.6) -| for all |X| <= 0.251. -| Note that 0.251 is slightly bigger than 1/4. -| c) To fully preserve accuracy, the polynomial is computed -| as X + ( S*B1 + Q ) where S = X*X and -| Q = X*S*(B2 + X*(B3 + ... + X*B12)) -| d) To fully utilize the pipeline, Q is separated into -| two independent pieces of roughly equal complexity -| Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + -| [ S*S*(B3 + S*(B5 + ... + S*B11)) ] -| -| Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. -| 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical -| purposes. Therefore, go to Step 1 of setox. -| 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes. -| ans := -1 -| Restore user FPCR -| Return ans := ans + 2^(-126). Exit. -| Notes: 10.2 will always create an inexact and return -1 + tiny -| in the user rounding precision and mode. -| -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|setox idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -L2: .long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 - -EXPA3: .long 0x3FA55555,0x55554431 -EXPA2: .long 0x3FC55555,0x55554018 - -HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 -TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 - -EM1A4: .long 0x3F811111,0x11174385 -EM1A3: .long 0x3FA55555,0x55554F5A - -EM1A2: .long 0x3FC55555,0x55555555,0x00000000,0x00000000 - -EM1B8: .long 0x3EC71DE3,0xA5774682 -EM1B7: .long 0x3EFA01A0,0x19D7CB68 - -EM1B6: .long 0x3F2A01A0,0x1A019DF3 -EM1B5: .long 0x3F56C16C,0x16C170E2 - -EM1B4: .long 0x3F811111,0x11111111 -EM1B3: .long 0x3FA55555,0x55555555 - -EM1B2: .long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB - .long 0x00000000 - -TWO140: .long 0x48B00000,0x00000000 -TWON140: .long 0x37300000,0x00000000 - -EXPTBL: - .long 0x3FFF0000,0x80000000,0x00000000,0x00000000 - .long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B - .long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 - .long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 - .long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C - .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F - .long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 - .long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF - .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF - .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA - .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 - .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 - .long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 - .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 - .long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D - .long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 - .long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD - .long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 - .long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 - .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D - .long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 - .long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C - .long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 - .long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 - .long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 - .long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA - .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A - .long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC - .long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC - .long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 - .long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 - .long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A - .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 - .long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 - .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC - .long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 - .long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 - .long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 - .long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 - .long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B - .long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 - .long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E - .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 - .long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D - .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 - .long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C - .long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 - .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 - .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F - .long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F - .long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 - .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 - .long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B - .long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 - .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A - .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 - .long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 - .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B - .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 - .long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 - .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 - .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 - .long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 - .long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A - - .set ADJFLAG,L_SCR2 - .set SCALE,FP_SCR1 - .set ADJSCALE,FP_SCR2 - .set SC,FP_SCR3 - .set ONEBYSC,FP_SCR4 - - | xref t_frcinx - |xref t_extdnrm - |xref t_unfl - |xref t_ovfl - - .global setoxd -setoxd: -|--entry point for EXP(X), X is denormalized - movel (%a0),%d0 - andil #0x80000000,%d0 - oril #0x00800000,%d0 | ...sign(X)*2^(-126) - movel %d0,-(%sp) - fmoves #0x3F800000,%fp0 - fmovel %d1,%fpcr - fadds (%sp)+,%fp0 - bra t_frcinx - - .global setox -setox: -|--entry point for EXP(X), here X is finite, non-zero, and not NaN's - -|--Step 1. - movel (%a0),%d0 | ...load part of input X - andil #0x7FFF0000,%d0 | ...biased expo. of X - cmpil #0x3FBE0000,%d0 | ...2^(-65) - bges EXPC1 | ...normal case - bra EXPSM - -EXPC1: -|--The case |X| >= 2^(-65) - movew 4(%a0),%d0 | ...expo. and partial sig. of |X| - cmpil #0x400CB167,%d0 | ...16380 log2 trunc. 16 bits - blts EXPMAIN | ...normal case - bra EXPBIG - -EXPMAIN: -|--Step 2. -|--This is the normal branch: 2^(-65) <= |X| < 16380 log2. - fmovex (%a0),%fp0 | ...load input from (a0) - - fmovex %fp0,%fp1 - fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X - fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 - movel #0,ADJFLAG(%a6) - fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) - lea EXPTBL,%a1 - fmovel %d0,%fp0 | ...convert to floating-format - - movel %d0,L_SCR1(%a6) | ...save N temporarily - andil #0x3F,%d0 | ...D0 is J = N mod 64 - lsll #4,%d0 - addal %d0,%a1 | ...address of 2^(J/64) - movel L_SCR1(%a6),%d0 - asrl #6,%d0 | ...D0 is M - addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) - movew L2,L_SCR1(%a6) | ...prefetch L2, no need in CB - -EXPCONT1: -|--Step 3. -|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, -|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) - fmovex %fp0,%fp2 - fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) - fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 - faddx %fp1,%fp0 | ...X + N*L1 - faddx %fp2,%fp0 | ...fp0 is R, reduced arg. -| MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache - -|--Step 4. -|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL -|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) -|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R -|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...fp1 IS S = R*R - - fmoves #0x3AB60B70,%fp2 | ...fp2 IS A5 -| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache - - fmulx %fp1,%fp2 | ...fp2 IS S*A5 - fmovex %fp1,%fp3 - fmuls #0x3C088895,%fp3 | ...fp3 IS S*A4 - - faddd EXPA3,%fp2 | ...fp2 IS A3+S*A5 - faddd EXPA2,%fp3 | ...fp3 IS A2+S*A4 - - fmulx %fp1,%fp2 | ...fp2 IS S*(A3+S*A5) - movew %d0,SCALE(%a6) | ...SCALE is 2^(M) in extended - clrw SCALE+2(%a6) - movel #0x80000000,SCALE+4(%a6) - clrl SCALE+8(%a6) - - fmulx %fp1,%fp3 | ...fp3 IS S*(A2+S*A4) - - fadds #0x3F000000,%fp2 | ...fp2 IS A1+S*(A3+S*A5) - fmulx %fp0,%fp3 | ...fp3 IS R*S*(A2+S*A4) - - fmulx %fp1,%fp2 | ...fp2 IS S*(A1+S*(A3+S*A5)) - faddx %fp3,%fp0 | ...fp0 IS R+R*S*(A2+S*A4), -| ...fp3 released - - fmovex (%a1)+,%fp1 | ...fp1 is lead. pt. of 2^(J/64) - faddx %fp2,%fp0 | ...fp0 is EXP(R) - 1 -| ...fp2 released - -|--Step 5 -|--final reconstruction process -|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) - - fmulx %fp1,%fp0 | ...2^(J/64)*(Exp(R)-1) - fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored - fadds (%a1),%fp0 | ...accurate 2^(J/64) - - faddx %fp1,%fp0 | ...2^(J/64) + 2^(J/64)*... - movel ADJFLAG(%a6),%d0 - -|--Step 6 - tstl %d0 - beqs NORMAL -ADJUST: - fmulx ADJSCALE(%a6),%fp0 -NORMAL: - fmovel %d1,%FPCR | ...restore user FPCR - fmulx SCALE(%a6),%fp0 | ...multiply 2^(M) - bra t_frcinx - -EXPSM: -|--Step 7 - fmovemx (%a0),%fp0-%fp0 | ...in case X is denormalized - fmovel %d1,%FPCR - fadds #0x3F800000,%fp0 | ...1+X in user mode - bra t_frcinx - -EXPBIG: -|--Step 8 - cmpil #0x400CB27C,%d0 | ...16480 log2 - bgts EXP2BIG -|--Steps 8.2 -- 8.6 - fmovex (%a0),%fp0 | ...load input from (a0) - - fmovex %fp0,%fp1 - fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X - fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 - movel #1,ADJFLAG(%a6) - fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) - lea EXPTBL,%a1 - fmovel %d0,%fp0 | ...convert to floating-format - movel %d0,L_SCR1(%a6) | ...save N temporarily - andil #0x3F,%d0 | ...D0 is J = N mod 64 - lsll #4,%d0 - addal %d0,%a1 | ...address of 2^(J/64) - movel L_SCR1(%a6),%d0 - asrl #6,%d0 | ...D0 is K - movel %d0,L_SCR1(%a6) | ...save K temporarily - asrl #1,%d0 | ...D0 is M1 - subl %d0,L_SCR1(%a6) | ...a1 is M - addiw #0x3FFF,%d0 | ...biased expo. of 2^(M1) - movew %d0,ADJSCALE(%a6) | ...ADJSCALE := 2^(M1) - clrw ADJSCALE+2(%a6) - movel #0x80000000,ADJSCALE+4(%a6) - clrl ADJSCALE+8(%a6) - movel L_SCR1(%a6),%d0 | ...D0 is M - addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) - bra EXPCONT1 | ...go back to Step 3 - -EXP2BIG: -|--Step 9 - fmovel %d1,%FPCR - movel (%a0),%d0 - bclrb #sign_bit,(%a0) | ...setox always returns positive - cmpil #0,%d0 - blt t_unfl - bra t_ovfl - - .global setoxm1d -setoxm1d: -|--entry point for EXPM1(X), here X is denormalized -|--Step 0. - bra t_extdnrm - - - .global setoxm1 -setoxm1: -|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN - -|--Step 1. -|--Step 1.1 - movel (%a0),%d0 | ...load part of input X - andil #0x7FFF0000,%d0 | ...biased expo. of X - cmpil #0x3FFD0000,%d0 | ...1/4 - bges EM1CON1 | ...|X| >= 1/4 - bra EM1SM - -EM1CON1: -|--Step 1.3 -|--The case |X| >= 1/4 - movew 4(%a0),%d0 | ...expo. and partial sig. of |X| - cmpil #0x4004C215,%d0 | ...70log2 rounded up to 16 bits - bles EM1MAIN | ...1/4 <= |X| <= 70log2 - bra EM1BIG - -EM1MAIN: -|--Step 2. -|--This is the case: 1/4 <= |X| <= 70 log2. - fmovex (%a0),%fp0 | ...load input from (a0) - - fmovex %fp0,%fp1 - fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X - fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 -| MOVE.W #$3F81,EM1A4 ...prefetch in CB mode - fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) - lea EXPTBL,%a1 - fmovel %d0,%fp0 | ...convert to floating-format - - movel %d0,L_SCR1(%a6) | ...save N temporarily - andil #0x3F,%d0 | ...D0 is J = N mod 64 - lsll #4,%d0 - addal %d0,%a1 | ...address of 2^(J/64) - movel L_SCR1(%a6),%d0 - asrl #6,%d0 | ...D0 is M - movel %d0,L_SCR1(%a6) | ...save a copy of M -| MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode - -|--Step 3. -|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, -|--a0 points to 2^(J/64), D0 and a1 both contain M - fmovex %fp0,%fp2 - fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) - fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 - faddx %fp1,%fp0 | ...X + N*L1 - faddx %fp2,%fp0 | ...fp0 is R, reduced arg. -| MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache - addiw #0x3FFF,%d0 | ...D0 is biased expo. of 2^M - -|--Step 4. -|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL -|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) -|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R -|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...fp1 IS S = R*R - - fmoves #0x3950097B,%fp2 | ...fp2 IS a6 -| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache - - fmulx %fp1,%fp2 | ...fp2 IS S*A6 - fmovex %fp1,%fp3 - fmuls #0x3AB60B6A,%fp3 | ...fp3 IS S*A5 - - faddd EM1A4,%fp2 | ...fp2 IS A4+S*A6 - faddd EM1A3,%fp3 | ...fp3 IS A3+S*A5 - movew %d0,SC(%a6) | ...SC is 2^(M) in extended - clrw SC+2(%a6) - movel #0x80000000,SC+4(%a6) - clrl SC+8(%a6) - - fmulx %fp1,%fp2 | ...fp2 IS S*(A4+S*A6) - movel L_SCR1(%a6),%d0 | ...D0 is M - negw %d0 | ...D0 is -M - fmulx %fp1,%fp3 | ...fp3 IS S*(A3+S*A5) - addiw #0x3FFF,%d0 | ...biased expo. of 2^(-M) - faddd EM1A2,%fp2 | ...fp2 IS A2+S*(A4+S*A6) - fadds #0x3F000000,%fp3 | ...fp3 IS A1+S*(A3+S*A5) - - fmulx %fp1,%fp2 | ...fp2 IS S*(A2+S*(A4+S*A6)) - oriw #0x8000,%d0 | ...signed/expo. of -2^(-M) - movew %d0,ONEBYSC(%a6) | ...OnebySc is -2^(-M) - clrw ONEBYSC+2(%a6) - movel #0x80000000,ONEBYSC+4(%a6) - clrl ONEBYSC+8(%a6) - fmulx %fp3,%fp1 | ...fp1 IS S*(A1+S*(A3+S*A5)) -| ...fp3 released - - fmulx %fp0,%fp2 | ...fp2 IS R*S*(A2+S*(A4+S*A6)) - faddx %fp1,%fp0 | ...fp0 IS R+S*(A1+S*(A3+S*A5)) -| ...fp1 released - - faddx %fp2,%fp0 | ...fp0 IS EXP(R)-1 -| ...fp2 released - fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored - -|--Step 5 -|--Compute 2^(J/64)*p - - fmulx (%a1),%fp0 | ...2^(J/64)*(Exp(R)-1) - -|--Step 6 -|--Step 6.1 - movel L_SCR1(%a6),%d0 | ...retrieve M - cmpil #63,%d0 - bles MLE63 -|--Step 6.2 M >= 64 - fmoves 12(%a1),%fp1 | ...fp1 is t - faddx ONEBYSC(%a6),%fp1 | ...fp1 is t+OnebySc - faddx %fp1,%fp0 | ...p+(t+OnebySc), fp1 released - faddx (%a1),%fp0 | ...T+(p+(t+OnebySc)) - bras EM1SCALE -MLE63: -|--Step 6.3 M <= 63 - cmpil #-3,%d0 - bges MGEN3 -MLTN3: -|--Step 6.4 M <= -4 - fadds 12(%a1),%fp0 | ...p+t - faddx (%a1),%fp0 | ...T+(p+t) - faddx ONEBYSC(%a6),%fp0 | ...OnebySc + (T+(p+t)) - bras EM1SCALE -MGEN3: -|--Step 6.5 -3 <= M <= 63 - fmovex (%a1)+,%fp1 | ...fp1 is T - fadds (%a1),%fp0 | ...fp0 is p+t - faddx ONEBYSC(%a6),%fp1 | ...fp1 is T+OnebySc - faddx %fp1,%fp0 | ...(T+OnebySc)+(p+t) - -EM1SCALE: -|--Step 6.6 - fmovel %d1,%FPCR - fmulx SC(%a6),%fp0 - - bra t_frcinx - -EM1SM: -|--Step 7 |X| < 1/4. - cmpil #0x3FBE0000,%d0 | ...2^(-65) - bges EM1POLY - -EM1TINY: -|--Step 8 |X| < 2^(-65) - cmpil #0x00330000,%d0 | ...2^(-16312) - blts EM12TINY -|--Step 8.2 - movel #0x80010000,SC(%a6) | ...SC is -2^(-16382) - movel #0x80000000,SC+4(%a6) - clrl SC+8(%a6) - fmovex (%a0),%fp0 - fmovel %d1,%FPCR - faddx SC(%a6),%fp0 - - bra t_frcinx - -EM12TINY: -|--Step 8.3 - fmovex (%a0),%fp0 - fmuld TWO140,%fp0 - movel #0x80010000,SC(%a6) - movel #0x80000000,SC+4(%a6) - clrl SC+8(%a6) - faddx SC(%a6),%fp0 - fmovel %d1,%FPCR - fmuld TWON140,%fp0 - - bra t_frcinx - -EM1POLY: -|--Step 9 exp(X)-1 by a simple polynomial - fmovex (%a0),%fp0 | ...fp0 is X - fmulx %fp0,%fp0 | ...fp0 is S := X*X - fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 - fmoves #0x2F30CAA8,%fp1 | ...fp1 is B12 - fmulx %fp0,%fp1 | ...fp1 is S*B12 - fmoves #0x310F8290,%fp2 | ...fp2 is B11 - fadds #0x32D73220,%fp1 | ...fp1 is B10+S*B12 - - fmulx %fp0,%fp2 | ...fp2 is S*B11 - fmulx %fp0,%fp1 | ...fp1 is S*(B10 + ... - - fadds #0x3493F281,%fp2 | ...fp2 is B9+S*... - faddd EM1B8,%fp1 | ...fp1 is B8+S*... - - fmulx %fp0,%fp2 | ...fp2 is S*(B9+... - fmulx %fp0,%fp1 | ...fp1 is S*(B8+... - - faddd EM1B7,%fp2 | ...fp2 is B7+S*... - faddd EM1B6,%fp1 | ...fp1 is B6+S*... - - fmulx %fp0,%fp2 | ...fp2 is S*(B7+... - fmulx %fp0,%fp1 | ...fp1 is S*(B6+... - - faddd EM1B5,%fp2 | ...fp2 is B5+S*... - faddd EM1B4,%fp1 | ...fp1 is B4+S*... - - fmulx %fp0,%fp2 | ...fp2 is S*(B5+... - fmulx %fp0,%fp1 | ...fp1 is S*(B4+... - - faddd EM1B3,%fp2 | ...fp2 is B3+S*... - faddx EM1B2,%fp1 | ...fp1 is B2+S*... - - fmulx %fp0,%fp2 | ...fp2 is S*(B3+... - fmulx %fp0,%fp1 | ...fp1 is S*(B2+... - - fmulx %fp0,%fp2 | ...fp2 is S*S*(B3+...) - fmulx (%a0),%fp1 | ...fp1 is X*S*(B2... - - fmuls #0x3F000000,%fp0 | ...fp0 is S*B1 - faddx %fp2,%fp1 | ...fp1 is Q -| ...fp2 released - - fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored - - faddx %fp1,%fp0 | ...fp0 is S*B1+Q -| ...fp1 released - - fmovel %d1,%FPCR - faddx (%a0),%fp0 - - bra t_frcinx - -EM1BIG: -|--Step 10 |X| > 70 log2 - movel (%a0),%d0 - cmpil #0,%d0 - bgt EXPC1 -|--Step 10.2 - fmoves #0xBF800000,%fp0 | ...fp0 is -1 - fmovel %d1,%FPCR - fadds #0x00800000,%fp0 | ...-1 + 2^(-126) - - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/sgetem.S b/arch/m68k/fpsp040/sgetem.S deleted file mode 100644 index d9234f4aed57c8801c2a23b99fe4a2d91bdd2ec5..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/sgetem.S +++ /dev/null @@ -1,140 +0,0 @@ -| -| sgetem.sa 3.1 12/10/90 -| -| The entry point sGETEXP returns the exponent portion -| of the input argument. The exponent bias is removed -| and the exponent value is returned as an extended -| precision number in fp0. sGETEXPD handles denormalized -| numbers. -| -| The entry point sGETMAN extracts the mantissa of the -| input argument. The mantissa is converted to an -| extended precision number and returned in fp0. The -| range of the result is [1.0 - 2.0). -| -| -| Input: Double-extended number X in the ETEMP space in -| the floating-point save stack. -| -| Output: The functions return exp(X) or man(X) in fp0. -| -| Modified: fp0. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SGETEM idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref nrm_set - -| -| This entry point is used by the unimplemented instruction exception -| handler. It points a0 to the input operand. -| -| -| -| SGETEXP -| - - .global sgetexp -sgetexp: - movew LOCAL_EX(%a0),%d0 |get the exponent - bclrl #15,%d0 |clear the sign bit - subw #0x3fff,%d0 |subtract off the bias - fmovew %d0,%fp0 |move the exp to fp0 - rts - - .global sgetexpd -sgetexpd: - bclrb #sign_bit,LOCAL_EX(%a0) - bsr nrm_set |normalize (exp will go negative) - movew LOCAL_EX(%a0),%d0 |load resulting exponent into d0 - subw #0x3fff,%d0 |subtract off the bias - fmovew %d0,%fp0 |move the exp to fp0 - rts -| -| -| This entry point is used by the unimplemented instruction exception -| handler. It points a0 to the input operand. -| -| -| -| SGETMAN -| -| -| For normalized numbers, leave the mantissa alone, simply load -| with an exponent of +/- $3fff. -| - .global sgetman -sgetman: - movel USER_FPCR(%a6),%d0 - andil #0xffffff00,%d0 |clear rounding precision and mode - fmovel %d0,%fpcr |this fpcr setting is used by the 882 - movew LOCAL_EX(%a0),%d0 |get the exp (really just want sign bit) - orw #0x7fff,%d0 |clear old exp - bclrl #14,%d0 |make it the new exp +-3fff - movew %d0,LOCAL_EX(%a0) |move the sign & exp back to fsave stack - fmovex (%a0),%fp0 |put new value back in fp0 - rts - -| -| For denormalized numbers, shift the mantissa until the j-bit = 1, -| then load the exponent with +/1 $3fff. -| - .global sgetmand -sgetmand: - movel LOCAL_HI(%a0),%d0 |load ms mant in d0 - movel LOCAL_LO(%a0),%d1 |load ls mant in d1 - bsr shft |shift mantissa bits till msbit is set - movel %d0,LOCAL_HI(%a0) |put ms mant back on stack - movel %d1,LOCAL_LO(%a0) |put ls mant back on stack - bras sgetman - -| -| SHFT -| -| Shifts the mantissa bits until msbit is set. -| input: -| ms mantissa part in d0 -| ls mantissa part in d1 -| output: -| shifted bits in d0 and d1 -shft: - tstl %d0 |if any bits set in ms mant - bnes upper |then branch -| ;else no bits set in ms mant - tstl %d1 |test if any bits set in ls mant - bnes cont |if set then continue - bras shft_end |else return -cont: - movel %d3,-(%a7) |save d3 - exg %d0,%d1 |shift ls mant to ms mant - bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0 - lsll %d3,%d0 |shift first 1 to integer bit in ms mant - movel (%a7)+,%d3 |restore d3 - bras shft_end -upper: - - moveml %d3/%d5/%d6,-(%a7) |save registers - bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0 - lsll %d3,%d0 |shift ms mant until j-bit is set - movel %d1,%d6 |save ls mant in d6 - lsll %d3,%d1 |shift ls mant by count - movel #32,%d5 - subl %d3,%d5 |sub 32 from shift for ls mant - lsrl %d5,%d6 |shift off all bits but those that will -| ;be shifted into ms mant - orl %d6,%d0 |shift the ls mant bits into the ms mant - moveml (%a7)+,%d3/%d5/%d6 |restore registers -shft_end: - rts - - |end diff --git a/arch/m68k/fpsp040/sint.S b/arch/m68k/fpsp040/sint.S deleted file mode 100644 index 0e92d4e5d231b4687348f83a79317560288fe218..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/sint.S +++ /dev/null @@ -1,246 +0,0 @@ -| -| sint.sa 3.1 12/10/90 -| -| The entry point sINT computes the rounded integer -| equivalent of the input argument, sINTRZ computes -| the integer rounded to zero of the input argument. -| -| Entry points sint and sintrz are called from do_func -| to emulate the fint and fintrz unimplemented instructions, -| respectively. Entry point sintdo is used by bindec. -| -| Input: (Entry points sint and sintrz) Double-extended -| number X in the ETEMP space in the floating-point -| save stack. -| (Entry point sintdo) Double-extended number X in -| location pointed to by the address register a0. -| (Entry point sintd) Double-extended denormalized -| number X in the ETEMP space in the floating-point -| save stack. -| -| Output: The function returns int(X) or intrz(X) in fp0. -| -| Modifies: fp0. -| -| Algorithm: (sint and sintrz) -| -| 1. If exp(X) >= 63, return X. -| If exp(X) < 0, return +/- 0 or +/- 1, according to -| the rounding mode. -| -| 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the -| result to the exponent $403e. -| -| 3. Round the result in the mode given in USER_FPCR. For -| sintrz, force round-to-zero mode. -| -| 4. Normalize the rounded result; store in fp0. -| -| For the denormalized cases, force the correct result -| for the given sign and rounding mode. -| -| Sign(X) -| RMODE + - -| ----- -------- -| RN +0 -0 -| RZ +0 -0 -| RM +0 -1 -| RP +1 -0 -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SINT idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref dnrm_lp - |xref nrm_set - |xref round - |xref t_inx2 - |xref ld_pone - |xref ld_mone - |xref ld_pzero - |xref ld_mzero - |xref snzrinx - -| -| FINT -| - .global sint -sint: - bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding -| ;implicitly has extend precision -| ;in upper word. - movel %d1,L_SCR1(%a6) |save mode bits - bras sintexc - -| -| FINT with extended denorm inputs. -| - .global sintd -sintd: - btstb #5,FPCR_MODE(%a6) - beq snzrinx |if round nearest or round zero, +/- 0 - btstb #4,FPCR_MODE(%a6) - beqs rnd_mns -rnd_pls: - btstb #sign_bit,LOCAL_EX(%a0) - bnes sintmz - bsr ld_pone |if round plus inf and pos, answer is +1 - bra t_inx2 -rnd_mns: - btstb #sign_bit,LOCAL_EX(%a0) - beqs sintpz - bsr ld_mone |if round mns inf and neg, answer is -1 - bra t_inx2 -sintpz: - bsr ld_pzero - bra t_inx2 -sintmz: - bsr ld_mzero - bra t_inx2 - -| -| FINTRZ -| - .global sintrz -sintrz: - movel #1,L_SCR1(%a6) |use rz mode for rounding -| ;implicitly has extend precision -| ;in upper word. - bras sintexc -| -| SINTDO -| -| Input: a0 points to an IEEE extended format operand -| Output: fp0 has the result -| -| Exceptions: -| -| If the subroutine results in an inexact operation, the inx2 and -| ainx bits in the USER_FPSR are set. -| -| - .global sintdo -sintdo: - bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding -| ;implicitly has ext precision -| ;in upper word. - movel %d1,L_SCR1(%a6) |save mode bits -| -| Real work of sint is in sintexc -| -sintexc: - bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal extended -| ;format - sne LOCAL_SGN(%a0) - cmpw #0x403e,LOCAL_EX(%a0) |check if (unbiased) exp > 63 - bgts out_rnge |branch if exp < 63 - cmpw #0x3ffd,LOCAL_EX(%a0) |check if (unbiased) exp < 0 - bgt in_rnge |if 63 >= exp > 0, do calc -| -| Input is less than zero. Restore sign, and check for directed -| rounding modes. L_SCR1 contains the rmode in the lower byte. -| -un_rnge: - btstb #1,L_SCR1+3(%a6) |check for rn and rz - beqs un_rnrz - tstb LOCAL_SGN(%a0) |check for sign - bnes un_rmrp_neg -| -| Sign is +. If rp, load +1.0, if rm, load +0.0 -| - cmpib #3,L_SCR1+3(%a6) |check for rp - beqs un_ldpone |if rp, load +1.0 - bsr ld_pzero |if rm, load +0.0 - bra t_inx2 -un_ldpone: - bsr ld_pone - bra t_inx2 -| -| Sign is -. If rm, load -1.0, if rp, load -0.0 -| -un_rmrp_neg: - cmpib #2,L_SCR1+3(%a6) |check for rm - beqs un_ldmone |if rm, load -1.0 - bsr ld_mzero |if rp, load -0.0 - bra t_inx2 -un_ldmone: - bsr ld_mone - bra t_inx2 -| -| Rmode is rn or rz; return signed zero -| -un_rnrz: - tstb LOCAL_SGN(%a0) |check for sign - bnes un_rnrz_neg - bsr ld_pzero - bra t_inx2 -un_rnrz_neg: - bsr ld_mzero - bra t_inx2 - -| -| Input is greater than 2^63. All bits are significant. Return -| the input. -| -out_rnge: - bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format - beqs intps - bsetb #sign_bit,LOCAL_EX(%a0) -intps: - fmovel %fpcr,-(%sp) - fmovel #0,%fpcr - fmovex LOCAL_EX(%a0),%fp0 |if exp > 63 -| ;then return X to the user -| ;there are no fraction bits - fmovel (%sp)+,%fpcr - rts - -in_rnge: -| ;shift off fraction bits - clrl %d0 |clear d0 - initial g,r,s for -| ;dnrm_lp - movel #0x403e,%d1 |set threshold for dnrm_lp -| ;assumes a0 points to operand - bsr dnrm_lp -| ;returns unnormalized number -| ;pointed by a0 -| ;output d0 supplies g,r,s -| ;used by round - movel L_SCR1(%a6),%d1 |use selected rounding mode -| -| - bsr round |round the unnorm based on users -| ;input a0 ptr to ext X -| ; d0 g,r,s bits -| ; d1 PREC/MODE info -| ;output a0 ptr to rounded result -| ;inexact flag set in USER_FPSR -| ;if initial grs set -| -| normalize the rounded result and store value in fp0 -| - bsr nrm_set |normalize the unnorm -| ;Input: a0 points to operand to -| ;be normalized -| ;Output: a0 points to normalized -| ;result - bfclr LOCAL_SGN(%a0){#0:#8} - beqs nrmrndp - bsetb #sign_bit,LOCAL_EX(%a0) |return to IEEE extended format -nrmrndp: - fmovel %fpcr,-(%sp) - fmovel #0,%fpcr - fmovex LOCAL_EX(%a0),%fp0 |move result to fp0 - fmovel (%sp)+,%fpcr - rts - - |end diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S deleted file mode 100644 index a8f41615d94a7283db6638117c59eb91b41f8df8..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/skeleton.S +++ /dev/null @@ -1,513 +0,0 @@ -| -| skeleton.sa 3.2 4/26/91 -| -| This file contains code that is system dependent and will -| need to be modified to install the FPSP. -| -| Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'. -| Put any target system specific handling that must be done immediately -| before the jump instruction. If there no handling necessary, then -| the 'fpsp_xxxx' handler entry point should be placed in the exception -| table so that the 'jmp' can be eliminated. If the FPSP determines that the -| exception is one that must be reported then there will be a -| return from the package by a 'jmp real_xxxx'. At that point -| the machine state will be identical to the state before -| the FPSP was entered. In particular, whatever condition -| that caused the exception will still be pending when the FPSP -| package returns. Thus, there will be system specific code -| to handle the exception. -| -| If the exception was completely handled by the package, then -| the return will be via a 'jmp fpsp_done'. Unless there is -| OS specific work to be done (such as handling a context switch or -| interrupt) the user program can be resumed via 'rte'. -| -| In the following skeleton code, some typical 'real_xxxx' handling -| code is shown. This code may need to be moved to an appropriate -| place in the target system, or rewritten. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -| -| Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk) -| - -#include -#include -#include - -|SKELETON idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 15 -| -| The following counters are used for standalone testing -| - - |section 8 - -#include "fpsp.h" - - |xref b1238_fix - -| -| Divide by Zero exception -| -| All dz exceptions are 'real', hence no fpsp_dz entry point. -| - .global dz - .global real_dz -dz: -real_dz: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E1,E_BYTE(%a6) - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Inexact exception -| -| All inexact exceptions are real, but the 'real' handler -| will probably want to clear the pending exception. -| The provided code will clear the E3 exception (if pending), -| otherwise clear the E1 exception. The frestore is not really -| necessary for E1 exceptions. -| -| Code following the 'inex' label is to handle bug #1232. In this -| bug, if an E1 snan, ovfl, or unfl occurred, and the process was -| swapped out before taking the exception, the exception taken on -| return was inex, rather than the correct exception. The snan, ovfl, -| and unfl exception to be taken must not have been enabled. The -| fix is to check for E1, and the existence of one of snan, ovfl, -| or unfl bits set in the fpsr. If any of these are set, branch -| to the appropriate handler for the exception in the fpsr. Note -| that this fix is only for d43b parts, and is skipped if the -| version number is not $40. -| -| - .global real_inex - .global inex -inex: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - cmpib #VER_40,(%sp) |test version number - bnes not_fmt40 - fmovel %fpsr,-(%sp) - btstb #E1,E_BYTE(%a6) |test for E1 set - beqs not_b1232 - btstb #snan_bit,2(%sp) |test for snan - beq inex_ckofl - addl #4,%sp - frestore (%sp)+ - unlk %a6 - bra snan -inex_ckofl: - btstb #ovfl_bit,2(%sp) |test for ovfl - beq inex_ckufl - addl #4,%sp - frestore (%sp)+ - unlk %a6 - bra ovfl -inex_ckufl: - btstb #unfl_bit,2(%sp) |test for unfl - beq not_b1232 - addl #4,%sp - frestore (%sp)+ - unlk %a6 - bra unfl - -| -| We do not have the bug 1232 case. Clean up the stack and call -| real_inex. -| -not_b1232: - addl #4,%sp - frestore (%sp)+ - unlk %a6 - -real_inex: - - link %a6,#-LOCAL_SIZE - fsave -(%sp) -not_fmt40: - bclrb #E3,E_BYTE(%a6) |clear and test E3 flag - beqs inex_cke1 -| -| Clear dirty bit on dest resister in the frame before branching -| to b1238_fix. -| - moveml %d0/%d1,USER_DA(%a6) - bfextu CMDREG1B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix |test for bug1238 case - moveml USER_DA(%a6),%d0/%d1 - bras inex_done -inex_cke1: - bclrb #E1,E_BYTE(%a6) -inex_done: - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Overflow exception -| - |xref fpsp_ovfl - .global real_ovfl - .global ovfl -ovfl: - jmp fpsp_ovfl -real_ovfl: - - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E3,E_BYTE(%a6) |clear and test E3 flag - bnes ovfl_done - bclrb #E1,E_BYTE(%a6) -ovfl_done: - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Underflow exception -| - |xref fpsp_unfl - .global real_unfl - .global unfl -unfl: - jmp fpsp_unfl -real_unfl: - - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E3,E_BYTE(%a6) |clear and test E3 flag - bnes unfl_done - bclrb #E1,E_BYTE(%a6) -unfl_done: - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Signalling NAN exception -| - |xref fpsp_snan - .global real_snan - .global snan -snan: - jmp fpsp_snan -real_snan: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E1,E_BYTE(%a6) |snan is always an E1 exception - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Operand Error exception -| - |xref fpsp_operr - .global real_operr - .global operr -operr: - jmp fpsp_operr -real_operr: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E1,E_BYTE(%a6) |operr is always an E1 exception - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - - -| -| BSUN exception -| -| This sample handler simply clears the nan bit in the FPSR. -| - |xref fpsp_bsun - .global real_bsun - .global bsun -bsun: - jmp fpsp_bsun -real_bsun: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E1,E_BYTE(%a6) |bsun is always an E1 exception - fmovel %FPSR,-(%sp) - bclrb #nan_bit,(%sp) - fmovel (%sp)+,%FPSR - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| F-line exception -| -| A 'real' F-line exception is one that the FPSP isn't supposed to -| handle. E.g. an instruction with a co-processor ID that is not 1. -| -| - |xref fpsp_fline - .global real_fline - .global fline -fline: - jmp fpsp_fline -real_fline: - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Unsupported data type exception -| - |xref fpsp_unsupp - .global real_unsupp - .global unsupp -unsupp: - jmp fpsp_unsupp -real_unsupp: - link %a6,#-LOCAL_SIZE - fsave -(%sp) - bclrb #E1,E_BYTE(%a6) |unsupp is always an E1 exception - frestore (%sp)+ - unlk %a6 - - SAVE_ALL_INT - GET_CURRENT(%d0) - movel %sp,%sp@- | stack frame pointer argument - bsrl trap_c - addql #4,%sp - bral ret_from_exception - -| -| Trace exception -| - .global real_trace -real_trace: - | - bral trap - -| -| fpsp_fmt_error --- exit point for frame format error -| -| The fpu stack frame does not match the frames existing -| or planned at the time of this writing. The fpsp is -| unable to handle frame sizes not in the following -| version:size pairs: -| -| {4060, 4160} - busy frame -| {4028, 4130} - unimp frame -| {4000, 4100} - idle frame -| -| This entry point simply holds an f-line illegal value. -| Replace this with a call to your kernel panic code or -| code to handle future revisions of the fpu. -| - .global fpsp_fmt_error -fpsp_fmt_error: - - .long 0xf27f0000 |f-line illegal - -| -| fpsp_done --- FPSP exit point -| -| The exception has been handled by the package and we are ready -| to return to user mode, but there may be OS specific code -| to execute before we do. If there is, do it now. -| -| - - .global fpsp_done -fpsp_done: - btst #0x5,%sp@ | supervisor bit set in saved SR? - beq .Lnotkern - rte -.Lnotkern: - SAVE_ALL_INT - GET_CURRENT(%d0) - | deliver signals, reschedule etc.. - jra ret_from_exception - -| -| mem_write --- write to user or supervisor address space -| -| Writes to memory while in supervisor mode. copyout accomplishes -| this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function. -| If you don't have copyout, use the local copy of the function below. -| -| a0 - supervisor source address -| a1 - user destination address -| d0 - number of bytes to write (maximum count is 12) -| -| The supervisor source address is guaranteed to point into the supervisor -| stack. The result is that a UNIX -| process is allowed to sleep as a consequence of a page fault during -| copyout. The probability of a page fault is exceedingly small because -| the 68040 always reads the destination address and thus the page -| faults should have already been handled. -| -| If the EXC_SR shows that the exception was from supervisor space, -| then just do a dumb (and slow) memory move. In a UNIX environment -| there shouldn't be any supervisor mode floating point exceptions. -| - .global mem_write -mem_write: - btstb #5,EXC_SR(%a6) |check for supervisor state - beqs user_write -super_write: - moveb (%a0)+,(%a1)+ - subql #1,%d0 - bnes super_write - rts -user_write: - movel %d1,-(%sp) |preserve d1 just in case - movel %d0,-(%sp) - movel %a1,-(%sp) - movel %a0,-(%sp) - jsr copyout - addw #12,%sp - movel (%sp)+,%d1 - rts -| -| mem_read --- read from user or supervisor address space -| -| Reads from memory while in supervisor mode. copyin accomplishes -| this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function. -| If you don't have copyin, use the local copy of the function below. -| -| The FPSP calls mem_read to read the original F-line instruction in order -| to extract the data register number when the 'Dn' addressing mode is -| used. -| -|Input: -| a0 - user source address -| a1 - supervisor destination address -| d0 - number of bytes to read (maximum count is 12) -| -| Like mem_write, mem_read always reads with a supervisor -| destination address on the supervisor stack. Also like mem_write, -| the EXC_SR is checked and a simple memory copy is done if reading -| from supervisor space is indicated. -| - .global mem_read -mem_read: - btstb #5,EXC_SR(%a6) |check for supervisor state - beqs user_read -super_read: - moveb (%a0)+,(%a1)+ - subql #1,%d0 - bnes super_read - rts -user_read: - movel %d1,-(%sp) |preserve d1 just in case - movel %d0,-(%sp) - movel %a1,-(%sp) - movel %a0,-(%sp) - jsr copyin - addw #12,%sp - movel (%sp)+,%d1 - rts - -| -| Use these routines if your kernel doesn't have copyout/copyin equivalents. -| Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC, -| and copyin overwrites SFC. -| -copyout: - movel 4(%sp),%a0 | source - movel 8(%sp),%a1 | destination - movel 12(%sp),%d0 | count - subl #1,%d0 | dec count by 1 for dbra - movel #1,%d1 - -| DFC is already set -| movec %d1,%DFC | set dfc for user data space -moreout: - moveb (%a0)+,%d1 | fetch supervisor byte -out_ea: - movesb %d1,(%a1)+ | write user byte - dbf %d0,moreout - rts - -copyin: - movel 4(%sp),%a0 | source - movel 8(%sp),%a1 | destination - movel 12(%sp),%d0 | count - subl #1,%d0 | dec count by 1 for dbra - movel #1,%d1 -| SFC is already set -| movec %d1,%SFC | set sfc for user space -morein: -in_ea: - movesb (%a0)+,%d1 | fetch user byte - moveb %d1,(%a1)+ | write supervisor byte - dbf %d0,morein - rts - - .section .fixup,#alloc,#execinstr - .even -1: - jbra fpsp040_die - - .section __ex_table,#alloc - .align 4 - - .long in_ea,1b - .long out_ea,1b - - |end diff --git a/arch/m68k/fpsp040/slog2.S b/arch/m68k/fpsp040/slog2.S deleted file mode 100644 index fac2c738382e87c2031055d4f773593a8e797557..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/slog2.S +++ /dev/null @@ -1,187 +0,0 @@ -| -| slog2.sa 3.1 12/10/90 -| -| The entry point slog10 computes the base-10 -| logarithm of an input argument X. -| slog10d does the same except the input value is a -| denormalized number. -| sLog2 and sLog2d are the base-2 analogues. -| -| INPUT: Double-extended value in memory location pointed to -| by address register a0. -| -| OUTPUT: log_10(X) or log_2(X) returned in floating-point -| register fp0. -| -| ACCURACY and MONOTONICITY: The returned result is within 1.7 -| ulps in 64 significant bit, i.e. within 0.5003 ulp -| to 53 bits if the result is subsequently rounded -| to double precision. The result is provably monotonic -| in double precision. -| -| SPEED: Two timings are measured, both in the copy-back mode. -| The first one is measured when the function is invoked -| the first time (so the instructions and data are not -| in cache), and the second one is measured when the -| function is reinvoked at the same input argument. -| -| ALGORITHM and IMPLEMENTATION NOTES: -| -| slog10d: -| -| Step 0. If X < 0, create a NaN and raise the invalid operation -| flag. Otherwise, save FPCR in D1; set FpCR to default. -| Notes: Default means round-to-nearest mode, no floating-point -| traps, and precision control = double extended. -| -| Step 1. Call slognd to obtain Y = log(X), the natural log of X. -| Notes: Even if X is denormalized, log(X) is always normalized. -| -| Step 2. Compute log_10(X) = log(X) * (1/log(10)). -| 2.1 Restore the user FPCR -| 2.2 Return ans := Y * INV_L10. -| -| -| slog10: -| -| Step 0. If X < 0, create a NaN and raise the invalid operation -| flag. Otherwise, save FPCR in D1; set FpCR to default. -| Notes: Default means round-to-nearest mode, no floating-point -| traps, and precision control = double extended. -| -| Step 1. Call sLogN to obtain Y = log(X), the natural log of X. -| -| Step 2. Compute log_10(X) = log(X) * (1/log(10)). -| 2.1 Restore the user FPCR -| 2.2 Return ans := Y * INV_L10. -| -| -| sLog2d: -| -| Step 0. If X < 0, create a NaN and raise the invalid operation -| flag. Otherwise, save FPCR in D1; set FpCR to default. -| Notes: Default means round-to-nearest mode, no floating-point -| traps, and precision control = double extended. -| -| Step 1. Call slognd to obtain Y = log(X), the natural log of X. -| Notes: Even if X is denormalized, log(X) is always normalized. -| -| Step 2. Compute log_10(X) = log(X) * (1/log(2)). -| 2.1 Restore the user FPCR -| 2.2 Return ans := Y * INV_L2. -| -| -| sLog2: -| -| Step 0. If X < 0, create a NaN and raise the invalid operation -| flag. Otherwise, save FPCR in D1; set FpCR to default. -| Notes: Default means round-to-nearest mode, no floating-point -| traps, and precision control = double extended. -| -| Step 1. If X is not an integer power of two, i.e., X != 2^k, -| go to Step 3. -| -| Step 2. Return k. -| 2.1 Get integer k, X = 2^k. -| 2.2 Restore the user FPCR. -| 2.3 Return ans := convert-to-double-extended(k). -| -| Step 3. Call sLogN to obtain Y = log(X), the natural log of X. -| -| Step 4. Compute log_2(X) = log(X) * (1/log(2)). -| 4.1 Restore the user FPCR -| 4.2 Return ans := Y * INV_L2. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SLOG2 idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - - |xref t_frcinx - |xref t_operr - |xref slogn - |xref slognd - -INV_L10: .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 - -INV_L2: .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 - - .global slog10d -slog10d: -|--entry point for Log10(X), X is denormalized - movel (%a0),%d0 - blt invalid - movel %d1,-(%sp) - clrl %d1 - bsr slognd | ...log(X), X denorm. - fmovel (%sp)+,%fpcr - fmulx INV_L10,%fp0 - bra t_frcinx - - .global slog10 -slog10: -|--entry point for Log10(X), X is normalized - - movel (%a0),%d0 - blt invalid - movel %d1,-(%sp) - clrl %d1 - bsr slogn | ...log(X), X normal. - fmovel (%sp)+,%fpcr - fmulx INV_L10,%fp0 - bra t_frcinx - - - .global slog2d -slog2d: -|--entry point for Log2(X), X is denormalized - - movel (%a0),%d0 - blt invalid - movel %d1,-(%sp) - clrl %d1 - bsr slognd | ...log(X), X denorm. - fmovel (%sp)+,%fpcr - fmulx INV_L2,%fp0 - bra t_frcinx - - .global slog2 -slog2: -|--entry point for Log2(X), X is normalized - movel (%a0),%d0 - blt invalid - - movel 8(%a0),%d0 - bnes continue | ...X is not 2^k - - movel 4(%a0),%d0 - andl #0x7FFFFFFF,%d0 - tstl %d0 - bnes continue - -|--X = 2^k. - movew (%a0),%d0 - andl #0x00007FFF,%d0 - subl #0x3FFF,%d0 - fmovel %d1,%fpcr - fmovel %d0,%fp0 - bra t_frcinx - -continue: - movel %d1,-(%sp) - clrl %d1 - bsr slogn | ...log(X), X normal. - fmovel (%sp)+,%fpcr - fmulx INV_L2,%fp0 - bra t_frcinx - -invalid: - bra t_operr - - |end diff --git a/arch/m68k/fpsp040/slogn.S b/arch/m68k/fpsp040/slogn.S deleted file mode 100644 index d98eaf641ec4c65b6997ba01b309958120aaeb17..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/slogn.S +++ /dev/null @@ -1,591 +0,0 @@ -| -| slogn.sa 3.1 12/10/90 -| -| slogn computes the natural logarithm of an -| input value. slognd does the same except the input value is a -| denormalized number. slognp1 computes log(1+X), and slognp1d -| computes log(1+X) for denormalized X. -| -| Input: Double-extended value in memory location pointed to by address -| register a0. -| -| Output: log(X) or log(1+X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 2 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program slogn takes approximately 190 cycles for input -| argument X such that |X-1| >= 1/16, which is the usual -| situation. For those arguments, slognp1 takes approximately -| 210 cycles. For the less common arguments, the program will -| run no worse than 10% slower. -| -| Algorithm: -| LOGN: -| Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in -| u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. -| -| Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven -| significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base -| 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). -| -| Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, -| log(1+u) = poly. -| -| Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) -| by k*log(2) + (log(F) + poly). The values of log(F) are calculated -| beforehand and stored in the program. -| -| lognp1: -| Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in -| u where u = 2X/(2+X). Otherwise, move on to Step 2. -| -| Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 -| of the algorithm for LOGN and compute log(1+X) as -| k*log(2) + log(F) + poly where poly approximates log(1+u), -| u = (Y-F)/F. -| -| Implementation Notes: -| Note 1. There are 64 different possible values for F, thus 64 log(F)'s -| need to be tabulated. Moreover, the values of 1/F are also -| tabulated so that the division in (Y-F)/F can be performed by a -| multiplication. -| -| Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value -| Y-F has to be calculated carefully when 1/2 <= X < 3/2. -| -| Note 3. To fully exploit the pipeline, polynomials are usually separated -| into two parts evaluated independently before being added up. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|slogn idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 -BOUNDS2: .long 0x3FFE8000,0x3FFFC000 - -LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 - -one: .long 0x3F800000 -zero: .long 0x00000000 -infty: .long 0x7F800000 -negone: .long 0xBF800000 - -LOGA6: .long 0x3FC2499A,0xB5E4040B -LOGA5: .long 0xBFC555B5,0x848CB7DB - -LOGA4: .long 0x3FC99999,0x987D8730 -LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 - -LOGA2: .long 0x3FD55555,0x555555a4 -LOGA1: .long 0xBFE00000,0x00000008 - -LOGB5: .long 0x3F175496,0xADD7DAD6 -LOGB4: .long 0x3F3C71C2,0xFE80C7E0 - -LOGB3: .long 0x3F624924,0x928BCCFF -LOGB2: .long 0x3F899999,0x999995EC - -LOGB1: .long 0x3FB55555,0x55555555 -TWO: .long 0x40000000,0x00000000 - -LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 - -LOGTBL: - .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 - .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 - .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 - .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 - .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 - .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 - .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 - .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 - .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 - .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 - .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 - .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 - .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 - .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 - .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 - .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 - .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 - .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 - .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 - .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 - .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 - .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 - .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 - .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 - .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 - .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 - .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 - .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 - .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 - .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 - .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 - .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 - .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 - .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 - .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 - .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 - .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 - .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 - .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 - .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 - .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 - .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 - .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 - .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 - .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 - .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 - .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 - .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 - .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 - .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 - .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 - .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 - .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 - .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 - .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 - .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 - .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 - .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 - .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 - .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 - .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 - .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 - .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 - .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 - .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 - .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 - .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 - .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 - .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 - .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 - .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 - .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 - .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 - .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 - .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 - .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 - .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 - .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 - .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 - .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 - .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 - .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 - .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 - .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 - .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 - .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 - .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 - .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 - .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 - .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 - .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 - .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 - .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 - .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 - .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 - .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 - .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 - .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 - .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 - .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 - .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 - .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 - .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 - .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 - .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 - .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 - .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 - .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 - .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 - .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 - .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 - .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 - .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 - .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 - .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 - .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 - .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 - .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 - .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 - .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 - .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 - .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 - .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 - .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 - .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 - .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 - .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 - .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 - - .set ADJK,L_SCR1 - - .set X,FP_SCR1 - .set XDCARE,X+2 - .set XFRAC,X+4 - - .set F,FP_SCR2 - .set FFRAC,F+4 - - .set KLOG2,FP_SCR3 - - .set SAVEU,FP_SCR4 - - | xref t_frcinx - |xref t_extdnrm - |xref t_operr - |xref t_dz - - .global slognd -slognd: -|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT - - movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 - -|----normalize the input value by left shifting k bits (k to be determined -|----below), adjusting exponent and storing -k to ADJK -|----the value TWOTO100 is no longer needed. -|----Note that this code assumes the denormalized input is NON-ZERO. - - moveml %d2-%d7,-(%a7) | ...save some registers - movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # - movel 4(%a0),%d4 - movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) - clrl %d2 | ...D2 used for holding K - - tstl %d4 - bnes HiX_not0 - -HiX_0: - movel %d5,%d4 - clrl %d5 - movel #32,%d2 - clrl %d6 - bfffo %d4{#0:#32},%d6 - lsll %d6,%d4 - addl %d6,%d2 | ...(D3,D4,D5) is normalized - - movel %d3,X(%a6) - movel %d4,XFRAC(%a6) - movel %d5,XFRAC+4(%a6) - negl %d2 - movel %d2,ADJK(%a6) - fmovex X(%a6),%fp0 - moveml (%a7)+,%d2-%d7 | ...restore registers - lea X(%a6),%a0 - bras LOGBGN | ...begin regular log(X) - - -HiX_not0: - clrl %d6 - bfffo %d4{#0:#32},%d6 | ...find first 1 - movel %d6,%d2 | ...get k - lsll %d6,%d4 - movel %d5,%d7 | ...a copy of D5 - lsll %d6,%d5 - negl %d6 - addil #32,%d6 - lsrl %d6,%d7 - orl %d7,%d4 | ...(D3,D4,D5) normalized - - movel %d3,X(%a6) - movel %d4,XFRAC(%a6) - movel %d5,XFRAC+4(%a6) - negl %d2 - movel %d2,ADJK(%a6) - fmovex X(%a6),%fp0 - moveml (%a7)+,%d2-%d7 | ...restore registers - lea X(%a6),%a0 - bras LOGBGN | ...begin regular log(X) - - - .global slogn -slogn: -|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S - - fmovex (%a0),%fp0 | ...LOAD INPUT - movel #0x00000000,ADJK(%a6) - -LOGBGN: -|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS -|--A FINITE, NON-ZERO, NORMALIZED NUMBER. - - movel (%a0),%d0 - movew 4(%a0),%d0 - - movel (%a0),X(%a6) - movel 4(%a0),X+4(%a6) - movel 8(%a0),X+8(%a6) - - cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE - blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID - cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 - bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] - -LOGMAIN: -|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 - -|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. -|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. -|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) -|-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). -|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING -|--LOG(1+U) CAN BE VERY EFFICIENT. -|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO -|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. - -|--GET K, Y, F, AND ADDRESS OF 1/F. - asrl #8,%d0 - asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X - subil #0x3FFF,%d0 | ...THIS IS K - addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. - lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) - fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT - -|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F - movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X - movel XFRAC(%a6),FFRAC(%a6) - andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y - oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT - movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F - andil #0x7E000000,%d0 - asrl #8,%d0 - asrl #8,%d0 - asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT - addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F - - fmovex X(%a6),%fp0 - movel #0x3fff0000,F(%a6) - clrl F+8(%a6) - fsubx F(%a6),%fp0 | ...Y-F - fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY -|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K -|--REGISTERS SAVED: FPCR, FP1, FP2 - -LP1CONT1: -|--AN RE-ENTRY POINT FOR LOGNP1 - fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F - fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY - fmovex %fp0,%fp2 - fmulx %fp2,%fp2 | ...FP2 IS V=U*U - fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 - -|--LOG(1+U) IS APPROXIMATED BY -|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS -|--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] - - fmovex %fp2,%fp3 - fmovex %fp2,%fp1 - - fmuld LOGA6,%fp1 | ...V*A6 - fmuld LOGA5,%fp2 | ...V*A5 - - faddd LOGA4,%fp1 | ...A4+V*A6 - faddd LOGA3,%fp2 | ...A3+V*A5 - - fmulx %fp3,%fp1 | ...V*(A4+V*A6) - fmulx %fp3,%fp2 | ...V*(A3+V*A5) - - faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) - faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) - - fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) - addal #16,%a0 | ...ADDRESS OF LOG(F) - fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED - - fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) - faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED - - faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) - fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 - faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) - - fmovel %d1,%fpcr - faddx KLOG2(%a6),%fp0 | ...FINAL ADD - bra t_frcinx - - -LOGNEAR1: -|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. - fmovex %fp0,%fp1 - fsubs one,%fp1 | ...FP1 IS X-1 - fadds one,%fp0 | ...FP0 IS X+1 - faddx %fp1,%fp1 | ...FP1 IS 2(X-1) -|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL -|--IN U, U = 2(X-1)/(X+1) = FP1/FP0 - -LP1CONT2: -|--THIS IS AN RE-ENTRY POINT FOR LOGNP1 - fdivx %fp0,%fp1 | ...FP1 IS U - fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 -|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 -|--LET V=U*U, W=V*V, CALCULATE -|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY -|--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) - fmovex %fp1,%fp0 - fmulx %fp0,%fp0 | ...FP0 IS V - fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS W - - fmoved LOGB5,%fp3 - fmoved LOGB4,%fp2 - - fmulx %fp1,%fp3 | ...W*B5 - fmulx %fp1,%fp2 | ...W*B4 - - faddd LOGB3,%fp3 | ...B3+W*B5 - faddd LOGB2,%fp2 | ...B2+W*B4 - - fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED - - fmulx %fp0,%fp2 | ...V*(B2+W*B4) - - faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) - fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V - - faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED - fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED - - fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) - - fmovel %d1,%fpcr - faddx SAVEU(%a6),%fp0 - bra t_frcinx - rts - -LOGNEG: -|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID - bra t_operr - - .global slognp1d -slognp1d: -|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT -| Simply return the denorm - - bra t_extdnrm - - .global slognp1 -slognp1: -|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S - - fmovex (%a0),%fp0 | ...LOAD INPUT - fabsx %fp0 |test magnitude - fcmpx LTHOLD,%fp0 |compare with min threshold - fbgt LP1REAL |if greater, continue - fmovel #0,%fpsr |clr N flag from compare - fmovel %d1,%fpcr - fmovex (%a0),%fp0 |return signed argument - bra t_frcinx - -LP1REAL: - fmovex (%a0),%fp0 | ...LOAD INPUT - movel #0x00000000,ADJK(%a6) - fmovex %fp0,%fp1 | ...FP1 IS INPUT Z - fadds one,%fp0 | ...X := ROUND(1+Z) - fmovex %fp0,X(%a6) - movew XFRAC(%a6),XDCARE(%a6) - movel X(%a6),%d0 - cmpil #0,%d0 - ble LP1NEG0 | ...LOG OF ZERO OR -VE - cmp2l BOUNDS2,%d0 - bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] -|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, -|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, -|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). - -LP1NEAR1: -|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) - cmp2l BOUNDS1,%d0 - bcss LP1CARE - -LP1ONE16: -|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) -|--WHERE U = 2Z/(2+Z) = 2Z/(1+X). - faddx %fp1,%fp1 | ...FP1 IS 2Z - fadds one,%fp0 | ...FP0 IS 1+X -|--U = FP1/FP0 - bra LP1CONT2 - -LP1CARE: -|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE -|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST -|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], -|--THERE ARE ONLY TWO CASES. -|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z -|--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z -|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF -|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. - - movel XFRAC(%a6),FFRAC(%a6) - andil #0xFE000000,FFRAC(%a6) - oril #0x01000000,FFRAC(%a6) | ...F OBTAINED - cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 - bges KISZERO - -KISNEG1: - fmoves TWO,%fp0 - movel #0x3fff0000,F(%a6) - clrl F+8(%a6) - fsubx F(%a6),%fp0 | ...2-F - movel FFRAC(%a6),%d0 - andil #0x7E000000,%d0 - asrl #8,%d0 - asrl #8,%d0 - asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F - faddx %fp1,%fp1 | ...GET 2Z - fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 - faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z - lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F - addal %d0,%a0 - fmoves negone,%fp1 | ...FP1 IS K = -1 - bra LP1CONT1 - -KISZERO: - fmoves one,%fp0 - movel #0x3fff0000,F(%a6) - clrl F+8(%a6) - fsubx F(%a6),%fp0 | ...1-F - movel FFRAC(%a6),%d0 - andil #0x7E000000,%d0 - asrl #8,%d0 - asrl #8,%d0 - asrl #4,%d0 - faddx %fp1,%fp0 | ...FP0 IS Y-F - fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED - lea LOGTBL,%a0 - addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F - fmoves zero,%fp1 | ...FP1 IS K = 0 - bra LP1CONT1 - -LP1NEG0: -|--FPCR SAVED. D0 IS X IN COMPACT FORM. - cmpil #0,%d0 - blts LP1NEG -LP1ZERO: - fmoves negone,%fp0 - - fmovel %d1,%fpcr - bra t_dz - -LP1NEG: - fmoves zero,%fp0 - - fmovel %d1,%fpcr - bra t_operr - - |end diff --git a/arch/m68k/fpsp040/smovecr.S b/arch/m68k/fpsp040/smovecr.S deleted file mode 100644 index 73c36512081b1c66b56cb99f18fed73e8c736012..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/smovecr.S +++ /dev/null @@ -1,161 +0,0 @@ -| -| smovecr.sa 3.1 12/10/90 -| -| The entry point sMOVECR returns the constant at the -| offset given in the instruction field. -| -| Input: An offset in the instruction word. -| -| Output: The constant rounded to the user's rounding -| mode unchecked for overflow. -| -| Modified: fp0. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SMOVECR idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref nrm_set - |xref round - |xref PIRN - |xref PIRZRM - |xref PIRP - |xref SMALRN - |xref SMALRZRM - |xref SMALRP - |xref BIGRN - |xref BIGRZRM - |xref BIGRP - -FZERO: .long 00000000 -| -| FMOVECR -| - .global smovcr -smovcr: - bfextu CMDREG1B(%a6){#9:#7},%d0 |get offset - bfextu USER_FPCR(%a6){#26:#2},%d1 |get rmode -| -| check range of offset -| - tstb %d0 |if zero, offset is to pi - beqs PI_TBL |it is pi - cmpib #0x0a,%d0 |check range $01 - $0a - bles Z_VAL |if in this range, return zero - cmpib #0x0e,%d0 |check range $0b - $0e - bles SM_TBL |valid constants in this range - cmpib #0x2f,%d0 |check range $10 - $2f - bles Z_VAL |if in this range, return zero - cmpib #0x3f,%d0 |check range $30 - $3f - ble BG_TBL |valid constants in this range -Z_VAL: - fmoves FZERO,%fp0 - rts -PI_TBL: - tstb %d1 |offset is zero, check for rmode - beqs PI_RN |if zero, rn mode - cmpib #0x3,%d1 |check for rp - beqs PI_RP |if 3, rp mode -PI_RZRM: - leal PIRZRM,%a0 |rmode is rz or rm, load PIRZRM in a0 - bra set_finx -PI_RN: - leal PIRN,%a0 |rmode is rn, load PIRN in a0 - bra set_finx -PI_RP: - leal PIRP,%a0 |rmode is rp, load PIRP in a0 - bra set_finx -SM_TBL: - subil #0xb,%d0 |make offset in 0 - 4 range - tstb %d1 |check for rmode - beqs SM_RN |if zero, rn mode - cmpib #0x3,%d1 |check for rp - beqs SM_RP |if 3, rp mode -SM_RZRM: - leal SMALRZRM,%a0 |rmode is rz or rm, load SMRZRM in a0 - cmpib #0x2,%d0 |check if result is inex - ble set_finx |if 0 - 2, it is inexact - bra no_finx |if 3, it is exact -SM_RN: - leal SMALRN,%a0 |rmode is rn, load SMRN in a0 - cmpib #0x2,%d0 |check if result is inex - ble set_finx |if 0 - 2, it is inexact - bra no_finx |if 3, it is exact -SM_RP: - leal SMALRP,%a0 |rmode is rp, load SMRP in a0 - cmpib #0x2,%d0 |check if result is inex - ble set_finx |if 0 - 2, it is inexact - bra no_finx |if 3, it is exact -BG_TBL: - subil #0x30,%d0 |make offset in 0 - f range - tstb %d1 |check for rmode - beqs BG_RN |if zero, rn mode - cmpib #0x3,%d1 |check for rp - beqs BG_RP |if 3, rp mode -BG_RZRM: - leal BIGRZRM,%a0 |rmode is rz or rm, load BGRZRM in a0 - cmpib #0x1,%d0 |check if result is inex - ble set_finx |if 0 - 1, it is inexact - cmpib #0x7,%d0 |second check - ble no_finx |if 0 - 7, it is exact - bra set_finx |if 8 - f, it is inexact -BG_RN: - leal BIGRN,%a0 |rmode is rn, load BGRN in a0 - cmpib #0x1,%d0 |check if result is inex - ble set_finx |if 0 - 1, it is inexact - cmpib #0x7,%d0 |second check - ble no_finx |if 0 - 7, it is exact - bra set_finx |if 8 - f, it is inexact -BG_RP: - leal BIGRP,%a0 |rmode is rp, load SMRP in a0 - cmpib #0x1,%d0 |check if result is inex - ble set_finx |if 0 - 1, it is inexact - cmpib #0x7,%d0 |second check - ble no_finx |if 0 - 7, it is exact -| bra set_finx ;if 8 - f, it is inexact -set_finx: - orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex -no_finx: - mulul #12,%d0 |use offset to point into tables - movel %d1,L_SCR1(%a6) |load mode for round call - bfextu USER_FPCR(%a6){#24:#2},%d1 |get precision - tstl %d1 |check if extended precision -| -| Precision is extended -| - bnes not_ext |if extended, do not call round - fmovemx (%a0,%d0),%fp0-%fp0 |return result in fp0 - rts -| -| Precision is single or double -| -not_ext: - swap %d1 |rnd prec in upper word of d1 - addl L_SCR1(%a6),%d1 |merge rmode in low word of d1 - movel (%a0,%d0),FP_SCR1(%a6) |load first word to temp storage - movel 4(%a0,%d0),FP_SCR1+4(%a6) |load second word - movel 8(%a0,%d0),FP_SCR1+8(%a6) |load third word - clrl %d0 |clear g,r,s - lea FP_SCR1(%a6),%a0 - btstb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) |convert to internal ext. format - - bsr round |go round the mantissa - - bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format - beqs fin_fcr - bsetb #sign_bit,LOCAL_EX(%a0) -fin_fcr: - fmovemx (%a0),%fp0-%fp0 - rts - - |end diff --git a/arch/m68k/fpsp040/srem_mod.S b/arch/m68k/fpsp040/srem_mod.S deleted file mode 100644 index a27e70c9a0eb3608b21cbebc9c86770736697596..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/srem_mod.S +++ /dev/null @@ -1,421 +0,0 @@ -| -| srem_mod.sa 3.1 12/10/90 -| -| The entry point sMOD computes the floating point MOD of the -| input values X and Y. The entry point sREM computes the floating -| point (IEEE) REM of the input values X and Y. -| -| INPUT -| ----- -| Double-extended value Y is pointed to by address in register -| A0. Double-extended value X is located in -12(A0). The values -| of X and Y are both nonzero and finite; although either or both -| of them can be denormalized. The special cases of zeros, NaNs, -| and infinities are handled elsewhere. -| -| OUTPUT -| ------ -| FREM(X,Y) or FMOD(X,Y), depending on entry point. -| -| ALGORITHM -| --------- -| -| Step 1. Save and strip signs of X and Y: signX := sign(X), -| signY := sign(Y), X := |X|, Y := |Y|, -| signQ := signX EOR signY. Record whether MOD or REM -| is requested. -| -| Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. -| If (L < 0) then -| R := X, go to Step 4. -| else -| R := 2^(-L)X, j := L. -| endif -| -| Step 3. Perform MOD(X,Y) -| 3.1 If R = Y, go to Step 9. -| 3.2 If R > Y, then { R := R - Y, Q := Q + 1} -| 3.3 If j = 0, go to Step 4. -| 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to -| Step 3.1. -| -| Step 4. At this point, R = X - QY = MOD(X,Y). Set -| Last_Subtract := false (used in Step 7 below). If -| MOD is requested, go to Step 6. -| -| Step 5. R = MOD(X,Y), but REM(X,Y) is requested. -| 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to -| Step 6. -| 5.2 If R > Y/2, then { set Last_Subtract := true, -| Q := Q + 1, Y := signY*Y }. Go to Step 6. -| 5.3 This is the tricky case of R = Y/2. If Q is odd, -| then { Q := Q + 1, signX := -signX }. -| -| Step 6. R := signX*R. -| -| Step 7. If Last_Subtract = true, R := R - Y. -| -| Step 8. Return signQ, last 7 bits of Q, and R as required. -| -| Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, -| X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), -| R := 0. Return signQ, last 7 bits of Q, and R. -| -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -SREM_MOD: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - .set Mod_Flag,L_SCR3 - .set SignY,FP_SCR3+4 - .set SignX,FP_SCR3+8 - .set SignQ,FP_SCR3+12 - .set Sc_Flag,FP_SCR4 - - .set Y,FP_SCR1 - .set Y_Hi,Y+4 - .set Y_Lo,Y+8 - - .set R,FP_SCR2 - .set R_Hi,R+4 - .set R_Lo,R+8 - - -Scale: .long 0x00010000,0x80000000,0x00000000,0x00000000 - - |xref t_avoid_unsupp - - .global smod -smod: - - movel #0,Mod_Flag(%a6) - bras Mod_Rem - - .global srem -srem: - - movel #1,Mod_Flag(%a6) - -Mod_Rem: -|..Save sign of X and Y - moveml %d2-%d7,-(%a7) | ...save data registers - movew (%a0),%d3 - movew %d3,SignY(%a6) - andil #0x00007FFF,%d3 | ...Y := |Y| - -| - movel 4(%a0),%d4 - movel 8(%a0),%d5 | ...(D3,D4,D5) is |Y| - - tstl %d3 - bnes Y_Normal - - movel #0x00003FFE,%d3 | ...$3FFD + 1 - tstl %d4 - bnes HiY_not0 - -HiY_0: - movel %d5,%d4 - clrl %d5 - subil #32,%d3 - clrl %d6 - bfffo %d4{#0:#32},%d6 - lsll %d6,%d4 - subl %d6,%d3 | ...(D3,D4,D5) is normalized -| ...with bias $7FFD - bras Chk_X - -HiY_not0: - clrl %d6 - bfffo %d4{#0:#32},%d6 - subl %d6,%d3 - lsll %d6,%d4 - movel %d5,%d7 | ...a copy of D5 - lsll %d6,%d5 - negl %d6 - addil #32,%d6 - lsrl %d6,%d7 - orl %d7,%d4 | ...(D3,D4,D5) normalized -| ...with bias $7FFD - bras Chk_X - -Y_Normal: - addil #0x00003FFE,%d3 | ...(D3,D4,D5) normalized -| ...with bias $7FFD - -Chk_X: - movew -12(%a0),%d0 - movew %d0,SignX(%a6) - movew SignY(%a6),%d1 - eorl %d0,%d1 - andil #0x00008000,%d1 - movew %d1,SignQ(%a6) | ...sign(Q) obtained - andil #0x00007FFF,%d0 - movel -8(%a0),%d1 - movel -4(%a0),%d2 | ...(D0,D1,D2) is |X| - tstl %d0 - bnes X_Normal - movel #0x00003FFE,%d0 - tstl %d1 - bnes HiX_not0 - -HiX_0: - movel %d2,%d1 - clrl %d2 - subil #32,%d0 - clrl %d6 - bfffo %d1{#0:#32},%d6 - lsll %d6,%d1 - subl %d6,%d0 | ...(D0,D1,D2) is normalized -| ...with bias $7FFD - bras Init - -HiX_not0: - clrl %d6 - bfffo %d1{#0:#32},%d6 - subl %d6,%d0 - lsll %d6,%d1 - movel %d2,%d7 | ...a copy of D2 - lsll %d6,%d2 - negl %d6 - addil #32,%d6 - lsrl %d6,%d7 - orl %d7,%d1 | ...(D0,D1,D2) normalized -| ...with bias $7FFD - bras Init - -X_Normal: - addil #0x00003FFE,%d0 | ...(D0,D1,D2) normalized -| ...with bias $7FFD - -Init: -| - movel %d3,L_SCR1(%a6) | ...save biased expo(Y) - movel %d0,L_SCR2(%a6) |save d0 - subl %d3,%d0 | ...L := expo(X)-expo(Y) -| Move.L D0,L ...D0 is j - clrl %d6 | ...D6 := carry <- 0 - clrl %d3 | ...D3 is Q - moveal #0,%a1 | ...A1 is k; j+k=L, Q=0 - -|..(Carry,D1,D2) is R - tstl %d0 - bges Mod_Loop - -|..expo(X) < expo(Y). Thus X = mod(X,Y) -| - movel L_SCR2(%a6),%d0 |restore d0 - bra Get_Mod - -|..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L - - -Mod_Loop: - tstl %d6 | ...test carry bit - bgts R_GT_Y - -|..At this point carry = 0, R = (D1,D2), Y = (D4,D5) - cmpl %d4,%d1 | ...compare hi(R) and hi(Y) - bnes R_NE_Y - cmpl %d5,%d2 | ...compare lo(R) and lo(Y) - bnes R_NE_Y - -|..At this point, R = Y - bra Rem_is_0 - -R_NE_Y: -|..use the borrow of the previous compare - bcss R_LT_Y | ...borrow is set iff R < Y - -R_GT_Y: -|..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 -|..and Y < (D1,D2) < 2Y. Either way, perform R - Y - subl %d5,%d2 | ...lo(R) - lo(Y) - subxl %d4,%d1 | ...hi(R) - hi(Y) - clrl %d6 | ...clear carry - addql #1,%d3 | ...Q := Q + 1 - -R_LT_Y: -|..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. - tstl %d0 | ...see if j = 0. - beqs PostLoop - - addl %d3,%d3 | ...Q := 2Q - addl %d2,%d2 | ...lo(R) = 2lo(R) - roxll #1,%d1 | ...hi(R) = 2hi(R) + carry - scs %d6 | ...set Carry if 2(R) overflows - addql #1,%a1 | ...k := k+1 - subql #1,%d0 | ...j := j - 1 -|..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. - - bras Mod_Loop - -PostLoop: -|..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. - -|..normalize R. - movel L_SCR1(%a6),%d0 | ...new biased expo of R - tstl %d1 - bnes HiR_not0 - -HiR_0: - movel %d2,%d1 - clrl %d2 - subil #32,%d0 - clrl %d6 - bfffo %d1{#0:#32},%d6 - lsll %d6,%d1 - subl %d6,%d0 | ...(D0,D1,D2) is normalized -| ...with bias $7FFD - bras Get_Mod - -HiR_not0: - clrl %d6 - bfffo %d1{#0:#32},%d6 - bmis Get_Mod | ...already normalized - subl %d6,%d0 - lsll %d6,%d1 - movel %d2,%d7 | ...a copy of D2 - lsll %d6,%d2 - negl %d6 - addil #32,%d6 - lsrl %d6,%d7 - orl %d7,%d1 | ...(D0,D1,D2) normalized - -| -Get_Mod: - cmpil #0x000041FE,%d0 - bges No_Scale -Do_Scale: - movew %d0,R(%a6) - clrw R+2(%a6) - movel %d1,R_Hi(%a6) - movel %d2,R_Lo(%a6) - movel L_SCR1(%a6),%d6 - movew %d6,Y(%a6) - clrw Y+2(%a6) - movel %d4,Y_Hi(%a6) - movel %d5,Y_Lo(%a6) - fmovex R(%a6),%fp0 | ...no exception - movel #1,Sc_Flag(%a6) - bras ModOrRem -No_Scale: - movel %d1,R_Hi(%a6) - movel %d2,R_Lo(%a6) - subil #0x3FFE,%d0 - movew %d0,R(%a6) - clrw R+2(%a6) - movel L_SCR1(%a6),%d6 - subil #0x3FFE,%d6 - movel %d6,L_SCR1(%a6) - fmovex R(%a6),%fp0 - movew %d6,Y(%a6) - movel %d4,Y_Hi(%a6) - movel %d5,Y_Lo(%a6) - movel #0,Sc_Flag(%a6) - -| - - -ModOrRem: - movel Mod_Flag(%a6),%d6 - beqs Fix_Sign - - movel L_SCR1(%a6),%d6 | ...new biased expo(Y) - subql #1,%d6 | ...biased expo(Y/2) - cmpl %d6,%d0 - blts Fix_Sign - bgts Last_Sub - - cmpl %d4,%d1 - bnes Not_EQ - cmpl %d5,%d2 - bnes Not_EQ - bra Tie_Case - -Not_EQ: - bcss Fix_Sign - -Last_Sub: -| - fsubx Y(%a6),%fp0 | ...no exceptions - addql #1,%d3 | ...Q := Q + 1 - -| - -Fix_Sign: -|..Get sign of X - movew SignX(%a6),%d6 - bges Get_Q - fnegx %fp0 - -|..Get Q -| -Get_Q: - clrl %d6 - movew SignQ(%a6),%d6 | ...D6 is sign(Q) - movel #8,%d7 - lsrl %d7,%d6 - andil #0x0000007F,%d3 | ...7 bits of Q - orl %d6,%d3 | ...sign and bits of Q - swap %d3 - fmovel %fpsr,%d6 - andil #0xFF00FFFF,%d6 - orl %d3,%d6 - fmovel %d6,%fpsr | ...put Q in fpsr - -| -Restore: - moveml (%a7)+,%d2-%d7 - fmovel USER_FPCR(%a6),%fpcr - movel Sc_Flag(%a6),%d0 - beqs Finish - fmulx Scale(%pc),%fp0 | ...may cause underflow - bra t_avoid_unsupp |check for denorm as a -| ;result of the scaling - -Finish: - fmovex %fp0,%fp0 |capture exceptions & round - rts - -Rem_is_0: -|..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) - addql #1,%d3 - cmpil #8,%d0 | ...D0 is j - bges Q_Big - - lsll %d0,%d3 - bras Set_R_0 - -Q_Big: - clrl %d3 - -Set_R_0: - fmoves #0x00000000,%fp0 - movel #0,Sc_Flag(%a6) - bra Fix_Sign - -Tie_Case: -|..Check parity of Q - movel %d3,%d6 - andil #0x00000001,%d6 - tstl %d6 - beq Fix_Sign | ...Q is even - -|..Q is odd, Q := Q + 1, signX := -signX - addql #1,%d3 - movew SignX(%a6),%d6 - eoril #0x00008000,%d6 - movew %d6,SignX(%a6) - bra Fix_Sign - - |end diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S deleted file mode 100644 index a1ef8e01bf06703bd732abcd89053e7cec214226..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/ssin.S +++ /dev/null @@ -1,745 +0,0 @@ -| -| ssin.sa 3.3 7/29/91 -| -| The entry point sSIN computes the sine of an input argument -| sCOS computes the cosine, and sSINCOS computes both. The -| corresponding entry points with a "d" computes the same -| corresponding function values for denormalized inputs. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or -| COS is requested. Otherwise, for SINCOS, sin(X) is returned -| in Fp0, and cos(X) is returned in Fp1. -| -| Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. -| -| Accuracy and Monotonicity: The returned result is within 1 ulp in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The programs sSIN and sCOS take approximately 150 cycles for -| input argument X such that |X| < 15Pi, which is the usual -| situation. The speed for sSINCOS is approximately 190 cycles. -| -| Algorithm: -| -| SIN and COS: -| 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. -| -| 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. -| -| 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let -| k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite -| k by k := k + AdjN. -| -| 4. If k is even, go to 6. -| -| 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) -| where cos(r) is approximated by an even polynomial in r, -| 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. -| Exit. -| -| 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) -| where sin(r) is approximated by an odd polynomial in r -| r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. -| Exit. -| -| 7. If |X| > 1, go to 9. -| -| 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. -| -| 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. -| -| SINCOS: -| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. -| -| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let -| k = N mod 4, so in particular, k = 0,1,2,or 3. -| -| 3. If k is even, go to 5. -| -| 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. -| j1 exclusive or with the l.s.b. of k. -| sgn1 := (-1)**j1, sgn2 := (-1)**j2. -| SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where -| sin(r) and cos(r) are computed as odd and even polynomials -| in r, respectively. Exit -| -| 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. -| SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where -| sin(r) and cos(r) are computed as odd and even polynomials -| in r, respectively. Exit -| -| 6. If |X| > 1, go to 8. -| -| 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. -| -| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SSIN idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -BOUNDS1: .long 0x3FD78000,0x4004BC7E -TWOBYPI: .long 0x3FE45F30,0x6DC9C883 - -SINA7: .long 0xBD6AAA77,0xCCC994F5 -SINA6: .long 0x3DE61209,0x7AAE8DA1 - -SINA5: .long 0xBE5AE645,0x2A118AE4 -SINA4: .long 0x3EC71DE3,0xA5341531 - -SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 - -SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000 - -SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 - -COSB8: .long 0x3D2AC4D0,0xD6011EE3 -COSB7: .long 0xBDA9396F,0x9F45AC19 - -COSB6: .long 0x3E21EED9,0x0612C972 -COSB5: .long 0xBE927E4F,0xB79D9FCF - -COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 - -COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 - -COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E -COSB1: .long 0xBF000000 - -INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A - -TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 -TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 - - |xref PITBL - - .set INARG,FP_SCR4 - - .set X,FP_SCR5 - .set XDCARE,X+2 - .set XFRAC,X+4 - - .set RPRIME,FP_SCR1 - .set SPRIME,FP_SCR2 - - .set POSNEG1,L_SCR1 - .set TWOTO63,L_SCR1 - - .set ENDFLAG,L_SCR2 - .set N,L_SCR2 - - .set ADJN,L_SCR3 - - | xref t_frcinx - |xref t_extdnrm - |xref sto_cos - - .global ssind -ssind: -|--SIN(X) = X FOR DENORMALIZED X - bra t_extdnrm - - .global scosd -scosd: -|--COS(X) = 1 FOR DENORMALIZED X - - fmoves #0x3F800000,%fp0 -| -| 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits -| - fmovel #0,%fpsr -| - bra t_frcinx - - .global ssin -ssin: -|--SET ADJN TO 0 - movel #0,ADJN(%a6) - bras SINBGN - - .global scos -scos: -|--SET ADJN TO 1 - movel #1,ADJN(%a6) - -SINBGN: -|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE - - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - fmovex %fp0,X(%a6) - andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X - - cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? - bges SOK1 - bra SINSM - -SOK1: - cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? - blts SINMAIN - bra REDUCEX - -SINMAIN: -|--THIS IS THE USUAL CASE, |X| <= 15 PI. -|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. - fmovex %fp0,%fp1 - fmuld TWOBYPI,%fp1 | ...X*2/PI - -|--HIDE THE NEXT THREE INSTRUCTIONS - lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 - - -|--FP1 IS NOW READY - fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER - - movel N(%a6),%d0 - asll #4,%d0 - addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2 -| ...WHICH IS IN TWO PIECES Y1 & Y2 - - fsubx (%a1)+,%fp0 | ...X-Y1 -|--HIDE THE NEXT ONE - fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 - -SINCONT: -|--continuation from REDUCEX - -|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED - movel N(%a6),%d0 - addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN - rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE - cmpil #0,%d0 - blt COSPOLY - -SINPOLY: -|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. -|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY -|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE -|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS -|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) -|--WHERE T=S*S. -|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION -|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. - fmovex %fp0,X(%a6) | ...X IS R - fmulx %fp0,%fp0 | ...FP0 IS S -|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 - fmoved SINA7,%fp3 - fmoved SINA6,%fp2 -|--FP0 IS NOW READY - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS T -|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 - - rorl #1,%d0 - andil #0x80000000,%d0 -| ...LEAST SIG. BIT OF D0 IN SIGN POSITION - eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R - - fmulx %fp1,%fp3 | ...TA7 - fmulx %fp1,%fp2 | ...TA6 - - faddd SINA5,%fp3 | ...A5+TA7 - faddd SINA4,%fp2 | ...A4+TA6 - - fmulx %fp1,%fp3 | ...T(A5+TA7) - fmulx %fp1,%fp2 | ...T(A4+TA6) - - faddd SINA3,%fp3 | ...A3+T(A5+TA7) - faddx SINA2,%fp2 | ...A2+T(A4+TA6) - - fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7)) - - fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6)) - faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7)) - fmulx X(%a6),%fp0 | ...R'*S - - faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] -|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING -|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING - - - fmulx %fp1,%fp0 | ...SIN(R')-R' -|--FP1 RELEASED. - - fmovel %d1,%FPCR |restore users exceptions - faddx X(%a6),%fp0 |last inst - possible exception set - bra t_frcinx - - -COSPOLY: -|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. -|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY -|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE -|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS -|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) -|--WHERE T=S*S. -|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION -|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 -|--AND IS THEREFORE STORED AS SINGLE PRECISION. - - fmulx %fp0,%fp0 | ...FP0 IS S -|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 - fmoved COSB8,%fp2 - fmoved COSB7,%fp3 -|--FP0 IS NOW READY - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS T -|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 - fmovex %fp0,X(%a6) | ...X IS S - rorl #1,%d0 - andil #0x80000000,%d0 -| ...LEAST SIG. BIT OF D0 IN SIGN POSITION - - fmulx %fp1,%fp2 | ...TB8 -|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU - eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S - andil #0x80000000,%d0 - - fmulx %fp1,%fp3 | ...TB7 -|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU - oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE - movel %d0,POSNEG1(%a6) - - faddd COSB6,%fp2 | ...B6+TB8 - faddd COSB5,%fp3 | ...B5+TB7 - - fmulx %fp1,%fp2 | ...T(B6+TB8) - fmulx %fp1,%fp3 | ...T(B5+TB7) - - faddd COSB4,%fp2 | ...B4+T(B6+TB8) - faddx COSB3,%fp3 | ...B3+T(B5+TB7) - - fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8)) - fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7)) - - faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8)) - fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7)) - - fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8))) -|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING -|--FP2 RELEASED. - - - faddx %fp1,%fp0 -|--FP1 RELEASED - - fmulx X(%a6),%fp0 - - fmovel %d1,%FPCR |restore users exceptions - fadds POSNEG1(%a6),%fp0 |last inst - possible exception set - bra t_frcinx - - -SINBORS: -|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. -|--IF |X| < 2**(-40), RETURN X OR 1. - cmpil #0x3FFF8000,%d0 - bgts REDUCEX - - -SINSM: - movel ADJN(%a6),%d0 - cmpil #0,%d0 - bgts COSTINY - -SINTINY: - movew #0x0000,XDCARE(%a6) | ...JUST IN CASE - fmovel %d1,%FPCR |restore users exceptions - fmovex X(%a6),%fp0 |last inst - possible exception set - bra t_frcinx - - -COSTINY: - fmoves #0x3F800000,%fp0 - - fmovel %d1,%FPCR |restore users exceptions - fsubs #0x00800000,%fp0 |last inst - possible exception set - bra t_frcinx - - -REDUCEX: -|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. -|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING -|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. - - fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5 - movel %d2,-(%a7) - fmoves #0x00000000,%fp1 -|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that -|--there is a danger of unwanted overflow in first LOOP iteration. In this -|--case, reduce argument by one remainder step to make subsequent reduction -|--safe. - cmpil #0x7ffeffff,%d0 |is argument dangerously large? - bnes LOOP - movel #0x7ffe0000,FP_SCR2(%a6) |yes -| ;create 2**16383*PI/2 - movel #0xc90fdaa2,FP_SCR2+4(%a6) - clrl FP_SCR2+8(%a6) - ftstx %fp0 |test sign of argument - movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383* -| ;PI/2 at FP_SCR3 - movel #0x85a308d3,FP_SCR3+4(%a6) - clrl FP_SCR3+8(%a6) - fblt red_neg - orw #0x8000,FP_SCR2(%a6) |positive arg - orw #0x8000,FP_SCR3(%a6) -red_neg: - faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact - fmovex %fp0,%fp1 |save high result in fp1 - faddx FP_SCR3(%a6),%fp0 |low part of reduction - fsubx %fp0,%fp1 |determine low component of result - faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument. - -|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. -|--integer quotient will be stored in N -|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) - -LOOP: - fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2 - movew INARG(%a6),%d0 - movel %d0,%a1 | ...save a copy of D0 - andil #0x00007FFF,%d0 - subil #0x00003FFF,%d0 | ...D0 IS K - cmpil #28,%d0 - bles LASTLOOP -CONTLOOP: - subil #27,%d0 | ...D0 IS L := K-27 - movel #0,ENDFLAG(%a6) - bras WORK -LASTLOOP: - clrl %d0 | ...D0 IS L := 0 - movel #1,ENDFLAG(%a6) - -WORK: -|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN -|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. - -|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), -|--2**L * (PIby2_1), 2**L * (PIby2_2) - - movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI - subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI) - - movel #0xA2F9836E,FP_SCR1+4(%a6) - movel #0x4E44152A,FP_SCR1+8(%a6) - movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI) - - fmovex %fp0,%fp2 - fmulx FP_SCR1(%a6),%fp2 -|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN -|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N -|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT -|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE -|--US THE DESIRED VALUE IN FLOATING POINT. - -|--HIDE SIX CYCLES OF INSTRUCTION - movel %a1,%d2 - swap %d2 - andil #0x80000000,%d2 - oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL - movel %d2,TWOTO63(%a6) - - movel %d0,%d2 - addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2) - -|--FP2 IS READY - fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED - -|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 - movew %d2,FP_SCR2(%a6) - clrw FP_SCR2+2(%a6) - movel #0xC90FDAA2,FP_SCR2+4(%a6) - clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1 - -|--FP2 IS READY - fsubs TWOTO63(%a6),%fp2 | ...FP2 is N - - addil #0x00003FDD,%d0 - movew %d0,FP_SCR3(%a6) - clrw FP_SCR3+2(%a6) - movel #0x85A308D3,FP_SCR3+4(%a6) - clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2 - - movel ENDFLAG(%a6),%d0 - -|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and -|--P2 = 2**(L) * Piby2_2 - fmovex %fp2,%fp4 - fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1 - fmovex %fp2,%fp5 - fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2 - fmovex %fp4,%fp3 -|--we want P+p = W+w but |p| <= half ulp of P -|--Then, we need to compute A := R-P and a := r-p - faddx %fp5,%fp3 | ...FP3 is P - fsubx %fp3,%fp4 | ...W-P - - fsubx %fp3,%fp0 | ...FP0 is A := R - P - faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w - - fmovex %fp0,%fp3 | ...FP3 A - fsubx %fp4,%fp1 | ...FP1 is a := r - p - -|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but -|--|r| <= half ulp of R. - faddx %fp1,%fp0 | ...FP0 is R := A+a -|--No need to calculate r if this is the last loop - cmpil #0,%d0 - bgt RESTORE - -|--Need to calculate r - fsubx %fp0,%fp3 | ...A-R - faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a - bra LOOP - -RESTORE: - fmovel %fp2,N(%a6) - movel (%a7)+,%d2 - fmovemx (%a7)+,%fp2-%fp5 - - - movel ADJN(%a6),%d0 - cmpil #4,%d0 - - blt SINCONT - bras SCCONT - - .global ssincosd -ssincosd: -|--SIN AND COS OF X FOR DENORMALIZED X - - fmoves #0x3F800000,%fp1 - bsr sto_cos |store cosine result - bra t_extdnrm - - .global ssincos -ssincos: -|--SET ADJN TO 4 - movel #4,ADJN(%a6) - - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - fmovex %fp0,X(%a6) - andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X - - cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? - bges SCOK1 - bra SCSM - -SCOK1: - cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? - blts SCMAIN - bra REDUCEX - - -SCMAIN: -|--THIS IS THE USUAL CASE, |X| <= 15 PI. -|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. - fmovex %fp0,%fp1 - fmuld TWOBYPI,%fp1 | ...X*2/PI - -|--HIDE THE NEXT THREE INSTRUCTIONS - lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 - - -|--FP1 IS NOW READY - fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER - - movel N(%a6),%d0 - asll #4,%d0 - addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2 - - fsubx (%a1)+,%fp0 | ...X-Y1 - fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 - -SCCONT: -|--continuation point from REDUCEX - -|--HIDE THE NEXT TWO - movel N(%a6),%d0 - rorl #1,%d0 - - cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD - bge NEVEN - -NODD: -|--REGISTERS SAVED SO FAR: D0, A0, FP2. - - fmovex %fp0,RPRIME(%a6) - fmulx %fp0,%fp0 | ...FP0 IS S = R*R - fmoved SINA7,%fp1 | ...A7 - fmoved COSB8,%fp2 | ...B8 - fmulx %fp0,%fp1 | ...SA7 - movel %d2,-(%a7) - movel %d0,%d2 - fmulx %fp0,%fp2 | ...SB8 - rorl #1,%d2 - andil #0x80000000,%d2 - - faddd SINA6,%fp1 | ...A6+SA7 - eorl %d0,%d2 - andil #0x80000000,%d2 - faddd COSB7,%fp2 | ...B7+SB8 - - fmulx %fp0,%fp1 | ...S(A6+SA7) - eorl %d2,RPRIME(%a6) - movel (%a7)+,%d2 - fmulx %fp0,%fp2 | ...S(B7+SB8) - rorl #1,%d0 - andil #0x80000000,%d0 - - faddd SINA5,%fp1 | ...A5+S(A6+SA7) - movel #0x3F800000,POSNEG1(%a6) - eorl %d0,POSNEG1(%a6) - faddd COSB6,%fp2 | ...B6+S(B7+SB8) - - fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7)) - fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8)) - fmovex %fp0,SPRIME(%a6) - - faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7)) - eorl %d0,SPRIME(%a6) - faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8)) - - fmulx %fp0,%fp1 | ...S(A4+...) - fmulx %fp0,%fp2 | ...S(B5+...) - - faddd SINA3,%fp1 | ...A3+S(A4+...) - faddd COSB4,%fp2 | ...B4+S(B5+...) - - fmulx %fp0,%fp1 | ...S(A3+...) - fmulx %fp0,%fp2 | ...S(B4+...) - - faddx SINA2,%fp1 | ...A2+S(A3+...) - faddx COSB3,%fp2 | ...B3+S(B4+...) - - fmulx %fp0,%fp1 | ...S(A2+...) - fmulx %fp0,%fp2 | ...S(B3+...) - - faddx SINA1,%fp1 | ...A1+S(A2+...) - faddx COSB2,%fp2 | ...B2+S(B3+...) - - fmulx %fp0,%fp1 | ...S(A1+...) - fmulx %fp2,%fp0 | ...S(B2+...) - - - - fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...) - fadds COSB1,%fp0 | ...B1+S(B2...) - fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...)) - - movel %d1,-(%sp) |restore users mode & precision - andil #0xff,%d1 |mask off all exceptions - fmovel %d1,%FPCR - faddx RPRIME(%a6),%fp1 | ...COS(X) - bsr sto_cos |store cosine result - fmovel (%sp)+,%FPCR |restore users exceptions - fadds POSNEG1(%a6),%fp0 | ...SIN(X) - - bra t_frcinx - - -NEVEN: -|--REGISTERS SAVED SO FAR: FP2. - - fmovex %fp0,RPRIME(%a6) - fmulx %fp0,%fp0 | ...FP0 IS S = R*R - fmoved COSB8,%fp1 | ...B8 - fmoved SINA7,%fp2 | ...A7 - fmulx %fp0,%fp1 | ...SB8 - fmovex %fp0,SPRIME(%a6) - fmulx %fp0,%fp2 | ...SA7 - rorl #1,%d0 - andil #0x80000000,%d0 - faddd COSB7,%fp1 | ...B7+SB8 - faddd SINA6,%fp2 | ...A6+SA7 - eorl %d0,RPRIME(%a6) - eorl %d0,SPRIME(%a6) - fmulx %fp0,%fp1 | ...S(B7+SB8) - oril #0x3F800000,%d0 - movel %d0,POSNEG1(%a6) - fmulx %fp0,%fp2 | ...S(A6+SA7) - - faddd COSB6,%fp1 | ...B6+S(B7+SB8) - faddd SINA5,%fp2 | ...A5+S(A6+SA7) - - fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8)) - fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7)) - - faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8)) - faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7)) - - fmulx %fp0,%fp1 | ...S(B5+...) - fmulx %fp0,%fp2 | ...S(A4+...) - - faddd COSB4,%fp1 | ...B4+S(B5+...) - faddd SINA3,%fp2 | ...A3+S(A4+...) - - fmulx %fp0,%fp1 | ...S(B4+...) - fmulx %fp0,%fp2 | ...S(A3+...) - - faddx COSB3,%fp1 | ...B3+S(B4+...) - faddx SINA2,%fp2 | ...A2+S(A3+...) - - fmulx %fp0,%fp1 | ...S(B3+...) - fmulx %fp0,%fp2 | ...S(A2+...) - - faddx COSB2,%fp1 | ...B2+S(B3+...) - faddx SINA1,%fp2 | ...A1+S(A2+...) - - fmulx %fp0,%fp1 | ...S(B2+...) - fmulx %fp2,%fp0 | ...s(a1+...) - - - - fadds COSB1,%fp1 | ...B1+S(B2...) - fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...) - fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...)) - - movel %d1,-(%sp) |save users mode & precision - andil #0xff,%d1 |mask off all exceptions - fmovel %d1,%FPCR - fadds POSNEG1(%a6),%fp1 | ...COS(X) - bsr sto_cos |store cosine result - fmovel (%sp)+,%FPCR |restore users exceptions - faddx RPRIME(%a6),%fp0 | ...SIN(X) - - bra t_frcinx - -SCBORS: - cmpil #0x3FFF8000,%d0 - bgt REDUCEX - - -SCSM: - movew #0x0000,XDCARE(%a6) - fmoves #0x3F800000,%fp1 - - movel %d1,-(%sp) |save users mode & precision - andil #0xff,%d1 |mask off all exceptions - fmovel %d1,%FPCR - fsubs #0x00800000,%fp1 - bsr sto_cos |store cosine result - fmovel (%sp)+,%FPCR |restore users exceptions - fmovex X(%a6),%fp0 - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/ssinh.S b/arch/m68k/fpsp040/ssinh.S deleted file mode 100644 index 8a560edc7653e903580c74490b0f4d69a2f43f2a..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/ssinh.S +++ /dev/null @@ -1,134 +0,0 @@ -| -| ssinh.sa 3.1 12/10/90 -| -| The entry point sSinh computes the hyperbolic sine of -| an input argument; sSinhd does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value sinh(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program sSINH takes approximately 280 cycles. -| -| Algorithm: -| -| SINH -| 1. If |X| > 16380 log2, go to 3. -| -| 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae -| y = |X|, sgn = sign(X), and z = expm1(Y), -| sinh(X) = sgn*(1/2)*( z + z/(1+z) ). -| Exit. -| -| 3. If |X| > 16480 log2, go to 5. -| -| 4. (16380 log2 < |X| <= 16480 log2) -| sinh(X) = sign(X) * exp(|X|)/2. -| However, invoking exp(|X|) may cause premature overflow. -| Thus, we calculate sinh(X) as follows: -| Y := |X| -| sgn := sign(X) -| sgnFact := sgn * 2**(16380) -| Y' := Y - 16381 log2 -| sinh(X) := sgnFact * exp(Y'). -| Exit. -| -| 5. (|X| > 16480 log2) sinh(X) must overflow. Return -| sign(X)*Huge*Huge to generate overflow and an infinity with -| the appropriate sign. Huge is the largest finite number in -| extended format. Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|SSINH idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD -T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL - - |xref t_frcinx - |xref t_ovfl - |xref t_extdnrm - |xref setox - |xref setoxm1 - - .global ssinhd -ssinhd: -|--SINH(X) = X FOR DENORMALIZED X - - bra t_extdnrm - - .global ssinh -ssinh: - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - movel %d0,%a1 | save a copy of original (compacted) operand - andl #0x7FFFFFFF,%d0 - cmpl #0x400CB167,%d0 - bgts SINHBIG - -|--THIS IS THE USUAL CASE, |X| < 16380 LOG2 -|--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) - - fabsx %fp0 | ...Y = |X| - - moveml %a1/%d1,-(%sp) - fmovemx %fp0-%fp0,(%a0) - clrl %d1 - bsr setoxm1 | ...FP0 IS Z = EXPM1(Y) - fmovel #0,%fpcr - moveml (%sp)+,%a1/%d1 - - fmovex %fp0,%fp1 - fadds #0x3F800000,%fp1 | ...1+Z - fmovex %fp0,-(%sp) - fdivx %fp1,%fp0 | ...Z/(1+Z) - movel %a1,%d0 - andl #0x80000000,%d0 - orl #0x3F000000,%d0 - faddx (%sp)+,%fp0 - movel %d0,-(%sp) - - fmovel %d1,%fpcr - fmuls (%sp)+,%fp0 |last fp inst - possible exceptions set - - bra t_frcinx - -SINHBIG: - cmpl #0x400CB2B3,%d0 - bgt t_ovfl - fabsx %fp0 - fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD) - movel #0,-(%sp) - movel #0x80000000,-(%sp) - movel %a1,%d0 - andl #0x80000000,%d0 - orl #0x7FFB0000,%d0 - movel %d0,-(%sp) | ...EXTENDED FMT - fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE - - movel %d1,-(%sp) - clrl %d1 - fmovemx %fp0-%fp0,(%a0) - bsr setox - fmovel (%sp)+,%fpcr - - fmulx (%sp)+,%fp0 |possible exception - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/stan.S b/arch/m68k/fpsp040/stan.S deleted file mode 100644 index f8553aaececbc64d76be7908e57b43869089558e..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/stan.S +++ /dev/null @@ -1,454 +0,0 @@ -| -| stan.sa 3.3 7/29/91 -| -| The entry point stan computes the tangent of -| an input argument; -| stand does the same except for denormalized input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value tan(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulp in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program sTAN takes approximately 170 cycles for -| input argument X such that |X| < 15Pi, which is the usual -| situation. -| -| Algorithm: -| -| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. -| -| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let -| k = N mod 2, so in particular, k = 0 or 1. -| -| 3. If k is odd, go to 5. -| -| 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a -| rational function U/V where -| U = r + r*s*(P1 + s*(P2 + s*P3)), and -| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. -| Exit. -| -| 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a -| rational function U/V where -| U = r + r*s*(P1 + s*(P2 + s*P3)), and -| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, -| -Cot(r) = -V/U. Exit. -| -| 6. If |X| > 1, go to 8. -| -| 7. (|X|<2**(-40)) Tan(X) = X. Exit. -| -| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|STAN idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -BOUNDS1: .long 0x3FD78000,0x4004BC7E -TWOBYPI: .long 0x3FE45F30,0x6DC9C883 - -TANQ4: .long 0x3EA0B759,0xF50F8688 -TANP3: .long 0xBEF2BAA5,0xA8924F04 - -TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 - -TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 - -TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 - -TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 - -TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 - -INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 - -TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 -TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 - -|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING -|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT -|--MOST 69 BITS LONG. - .global PITBL -PITBL: - .long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 - .long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 - .long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 - .long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 - .long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 - .long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 - .long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 - .long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 - .long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 - .long 0xC0040000,0x90836524,0x88034B96,0x20B00000 - .long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 - .long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 - .long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 - .long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 - .long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 - .long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 - .long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 - .long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 - .long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 - .long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 - .long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 - .long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 - .long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 - .long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 - .long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 - .long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 - .long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 - .long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 - .long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 - .long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 - .long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 - .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 - .long 0x00000000,0x00000000,0x00000000,0x00000000 - .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 - .long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 - .long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 - .long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 - .long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 - .long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 - .long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 - .long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 - .long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 - .long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 - .long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 - .long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 - .long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 - .long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 - .long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 - .long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 - .long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 - .long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 - .long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 - .long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 - .long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 - .long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 - .long 0x40040000,0x90836524,0x88034B96,0xA0B00000 - .long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 - .long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 - .long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 - .long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 - .long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 - .long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 - .long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 - .long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 - .long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 - - .set INARG,FP_SCR4 - - .set TWOTO63,L_SCR1 - .set ENDFLAG,L_SCR2 - .set N,L_SCR3 - - | xref t_frcinx - |xref t_extdnrm - - .global stand -stand: -|--TAN(X) = X FOR DENORMALIZED X - - bra t_extdnrm - - .global stan -stan: - fmovex (%a0),%fp0 | ...LOAD INPUT - - movel (%a0),%d0 - movew 4(%a0),%d0 - andil #0x7FFFFFFF,%d0 - - cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? - bges TANOK1 - bra TANSM -TANOK1: - cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? - blts TANMAIN - bra REDUCEX - - -TANMAIN: -|--THIS IS THE USUAL CASE, |X| <= 15 PI. -|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. - fmovex %fp0,%fp1 - fmuld TWOBYPI,%fp1 | ...X*2/PI - -|--HIDE THE NEXT TWO INSTRUCTIONS - leal PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 - -|--FP1 IS NOW READY - fmovel %fp1,%d0 | ...CONVERT TO INTEGER - - asll #4,%d0 - addal %d0,%a1 | ...ADDRESS N*PIBY2 IN Y1, Y2 - - fsubx (%a1)+,%fp0 | ...X-Y1 -|--HIDE THE NEXT ONE - - fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 - - rorl #5,%d0 - andil #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0 - -TANCONT: - - cmpil #0,%d0 - blt NODD - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...S = R*R - - fmoved TANQ4,%fp3 - fmoved TANP3,%fp2 - - fmulx %fp1,%fp3 | ...SQ4 - fmulx %fp1,%fp2 | ...SP3 - - faddd TANQ3,%fp3 | ...Q3+SQ4 - faddx TANP2,%fp2 | ...P2+SP3 - - fmulx %fp1,%fp3 | ...S(Q3+SQ4) - fmulx %fp1,%fp2 | ...S(P2+SP3) - - faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4) - faddx TANP1,%fp2 | ...P1+S(P2+SP3) - - fmulx %fp1,%fp3 | ...S(Q2+S(Q3+SQ4)) - fmulx %fp1,%fp2 | ...S(P1+S(P2+SP3)) - - faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4)) - fmulx %fp0,%fp2 | ...RS(P1+S(P2+SP3)) - - fmulx %fp3,%fp1 | ...S(Q1+S(Q2+S(Q3+SQ4))) - - - faddx %fp2,%fp0 | ...R+RS(P1+S(P2+SP3)) - - - fadds #0x3F800000,%fp1 | ...1+S(Q1+...) - - fmovel %d1,%fpcr |restore users exceptions - fdivx %fp1,%fp0 |last inst - possible exception set - - bra t_frcinx - -NODD: - fmovex %fp0,%fp1 - fmulx %fp0,%fp0 | ...S = R*R - - fmoved TANQ4,%fp3 - fmoved TANP3,%fp2 - - fmulx %fp0,%fp3 | ...SQ4 - fmulx %fp0,%fp2 | ...SP3 - - faddd TANQ3,%fp3 | ...Q3+SQ4 - faddx TANP2,%fp2 | ...P2+SP3 - - fmulx %fp0,%fp3 | ...S(Q3+SQ4) - fmulx %fp0,%fp2 | ...S(P2+SP3) - - faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4) - faddx TANP1,%fp2 | ...P1+S(P2+SP3) - - fmulx %fp0,%fp3 | ...S(Q2+S(Q3+SQ4)) - fmulx %fp0,%fp2 | ...S(P1+S(P2+SP3)) - - faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4)) - fmulx %fp1,%fp2 | ...RS(P1+S(P2+SP3)) - - fmulx %fp3,%fp0 | ...S(Q1+S(Q2+S(Q3+SQ4))) - - - faddx %fp2,%fp1 | ...R+RS(P1+S(P2+SP3)) - fadds #0x3F800000,%fp0 | ...1+S(Q1+...) - - - fmovex %fp1,-(%sp) - eoril #0x80000000,(%sp) - - fmovel %d1,%fpcr |restore users exceptions - fdivx (%sp)+,%fp0 |last inst - possible exception set - - bra t_frcinx - -TANBORS: -|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. -|--IF |X| < 2**(-40), RETURN X OR 1. - cmpil #0x3FFF8000,%d0 - bgts REDUCEX - -TANSM: - - fmovex %fp0,-(%sp) - fmovel %d1,%fpcr |restore users exceptions - fmovex (%sp)+,%fp0 |last inst - possible exception set - - bra t_frcinx - - -REDUCEX: -|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. -|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING -|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. - - fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5 - movel %d2,-(%a7) - fmoves #0x00000000,%fp1 - -|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that -|--there is a danger of unwanted overflow in first LOOP iteration. In this -|--case, reduce argument by one remainder step to make subsequent reduction -|--safe. - cmpil #0x7ffeffff,%d0 |is argument dangerously large? - bnes LOOP - movel #0x7ffe0000,FP_SCR2(%a6) |yes -| ;create 2**16383*PI/2 - movel #0xc90fdaa2,FP_SCR2+4(%a6) - clrl FP_SCR2+8(%a6) - ftstx %fp0 |test sign of argument - movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383* -| ;PI/2 at FP_SCR3 - movel #0x85a308d3,FP_SCR3+4(%a6) - clrl FP_SCR3+8(%a6) - fblt red_neg - orw #0x8000,FP_SCR2(%a6) |positive arg - orw #0x8000,FP_SCR3(%a6) -red_neg: - faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact - fmovex %fp0,%fp1 |save high result in fp1 - faddx FP_SCR3(%a6),%fp0 |low part of reduction - fsubx %fp0,%fp1 |determine low component of result - faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument. - -|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. -|--integer quotient will be stored in N -|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) - -LOOP: - fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2 - movew INARG(%a6),%d0 - movel %d0,%a1 | ...save a copy of D0 - andil #0x00007FFF,%d0 - subil #0x00003FFF,%d0 | ...D0 IS K - cmpil #28,%d0 - bles LASTLOOP -CONTLOOP: - subil #27,%d0 | ...D0 IS L := K-27 - movel #0,ENDFLAG(%a6) - bras WORK -LASTLOOP: - clrl %d0 | ...D0 IS L := 0 - movel #1,ENDFLAG(%a6) - -WORK: -|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN -|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. - -|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), -|--2**L * (PIby2_1), 2**L * (PIby2_2) - - movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI - subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI) - - movel #0xA2F9836E,FP_SCR1+4(%a6) - movel #0x4E44152A,FP_SCR1+8(%a6) - movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI) - - fmovex %fp0,%fp2 - fmulx FP_SCR1(%a6),%fp2 -|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN -|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N -|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT -|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE -|--US THE DESIRED VALUE IN FLOATING POINT. - -|--HIDE SIX CYCLES OF INSTRUCTION - movel %a1,%d2 - swap %d2 - andil #0x80000000,%d2 - oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL - movel %d2,TWOTO63(%a6) - - movel %d0,%d2 - addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2) - -|--FP2 IS READY - fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED - -|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 - movew %d2,FP_SCR2(%a6) - clrw FP_SCR2+2(%a6) - movel #0xC90FDAA2,FP_SCR2+4(%a6) - clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1 - -|--FP2 IS READY - fsubs TWOTO63(%a6),%fp2 | ...FP2 is N - - addil #0x00003FDD,%d0 - movew %d0,FP_SCR3(%a6) - clrw FP_SCR3+2(%a6) - movel #0x85A308D3,FP_SCR3+4(%a6) - clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2 - - movel ENDFLAG(%a6),%d0 - -|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and -|--P2 = 2**(L) * Piby2_2 - fmovex %fp2,%fp4 - fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1 - fmovex %fp2,%fp5 - fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2 - fmovex %fp4,%fp3 -|--we want P+p = W+w but |p| <= half ulp of P -|--Then, we need to compute A := R-P and a := r-p - faddx %fp5,%fp3 | ...FP3 is P - fsubx %fp3,%fp4 | ...W-P - - fsubx %fp3,%fp0 | ...FP0 is A := R - P - faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w - - fmovex %fp0,%fp3 | ...FP3 A - fsubx %fp4,%fp1 | ...FP1 is a := r - p - -|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but -|--|r| <= half ulp of R. - faddx %fp1,%fp0 | ...FP0 is R := A+a -|--No need to calculate r if this is the last loop - cmpil #0,%d0 - bgt RESTORE - -|--Need to calculate r - fsubx %fp0,%fp3 | ...A-R - faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a - bra LOOP - -RESTORE: - fmovel %fp2,N(%a6) - movel (%a7)+,%d2 - fmovemx (%a7)+,%fp2-%fp5 - - - movel N(%a6),%d0 - rorl #1,%d0 - - - bra TANCONT - - |end diff --git a/arch/m68k/fpsp040/stanh.S b/arch/m68k/fpsp040/stanh.S deleted file mode 100644 index 7e12e59ee8c7cc02f163df53fe157aba7b7777e8..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/stanh.S +++ /dev/null @@ -1,184 +0,0 @@ -| -| stanh.sa 3.1 12/10/90 -| -| The entry point sTanh computes the hyperbolic tangent of -| an input argument; sTanhd does the same except for denormalized -| input. -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The value tanh(X) returned in floating-point register Fp0. -| -| Accuracy and Monotonicity: The returned result is within 3 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program stanh takes approximately 270 cycles. -| -| Algorithm: -| -| TANH -| 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. -| -| 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by -| sgn := sign(X), y := 2|X|, z := expm1(Y), and -| tanh(X) = sgn*( z/(2+z) ). -| Exit. -| -| 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, -| go to 7. -| -| 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. -| -| 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by -| sgn := sign(X), y := 2|X|, z := exp(Y), -| tanh(X) = sgn - [ sgn*2/(1+z) ]. -| Exit. -| -| 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we -| calculate Tanh(X) by -| sgn := sign(X), Tiny := 2**(-126), -| tanh(X) := sgn - sgn*Tiny. -| Exit. -| -| 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|STANH idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - .set X,FP_SCR5 - .set XDCARE,X+2 - .set XFRAC,X+4 - - .set SGN,L_SCR3 - - .set V,FP_SCR6 - -BOUNDS1: .long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2 - - |xref t_frcinx - |xref t_extdnrm - |xref setox - |xref setoxm1 - - .global stanhd -stanhd: -|--TANH(X) = X FOR DENORMALIZED X - - bra t_extdnrm - - .global stanh -stanh: - fmovex (%a0),%fp0 | ...LOAD INPUT - - fmovex %fp0,X(%a6) - movel (%a0),%d0 - movew 4(%a0),%d0 - movel %d0,X(%a6) - andl #0x7FFFFFFF,%d0 - cmp2l BOUNDS1(%pc),%d0 | ...2**(-40) < |X| < (5/2)LOG2 ? - bcss TANHBORS - -|--THIS IS THE USUAL CASE -|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). - - movel X(%a6),%d0 - movel %d0,SGN(%a6) - andl #0x7FFF0000,%d0 - addl #0x00010000,%d0 | ...EXPONENT OF 2|X| - movel %d0,X(%a6) - andl #0x80000000,SGN(%a6) - fmovex X(%a6),%fp0 | ...FP0 IS Y = 2|X| - - movel %d1,-(%a7) - clrl %d1 - fmovemx %fp0-%fp0,(%a0) - bsr setoxm1 | ...FP0 IS Z = EXPM1(Y) - movel (%a7)+,%d1 - - fmovex %fp0,%fp1 - fadds #0x40000000,%fp1 | ...Z+2 - movel SGN(%a6),%d0 - fmovex %fp1,V(%a6) - eorl %d0,V(%a6) - - fmovel %d1,%FPCR |restore users exceptions - fdivx V(%a6),%fp0 - bra t_frcinx - -TANHBORS: - cmpl #0x3FFF8000,%d0 - blt TANHSM - - cmpl #0x40048AA1,%d0 - bgt TANHHUGE - -|-- (5/2) LOG2 < |X| < 50 LOG2, -|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), -|--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. - - movel X(%a6),%d0 - movel %d0,SGN(%a6) - andl #0x7FFF0000,%d0 - addl #0x00010000,%d0 | ...EXPO OF 2|X| - movel %d0,X(%a6) | ...Y = 2|X| - andl #0x80000000,SGN(%a6) - movel SGN(%a6),%d0 - fmovex X(%a6),%fp0 | ...Y = 2|X| - - movel %d1,-(%a7) - clrl %d1 - fmovemx %fp0-%fp0,(%a0) - bsr setox | ...FP0 IS EXP(Y) - movel (%a7)+,%d1 - movel SGN(%a6),%d0 - fadds #0x3F800000,%fp0 | ...EXP(Y)+1 - - eorl #0xC0000000,%d0 | ...-SIGN(X)*2 - fmoves %d0,%fp1 | ...-SIGN(X)*2 IN SGL FMT - fdivx %fp0,%fp1 | ...-SIGN(X)2 / [EXP(Y)+1 ] - - movel SGN(%a6),%d0 - orl #0x3F800000,%d0 | ...SGN - fmoves %d0,%fp0 | ...SGN IN SGL FMT - - fmovel %d1,%FPCR |restore users exceptions - faddx %fp1,%fp0 - - bra t_frcinx - -TANHSM: - movew #0x0000,XDCARE(%a6) - - fmovel %d1,%FPCR |restore users exceptions - fmovex X(%a6),%fp0 |last inst - possible exception set - - bra t_frcinx - -TANHHUGE: -|---RETURN SGN(X) - SGN(X)EPS - movel X(%a6),%d0 - andl #0x80000000,%d0 - orl #0x3F800000,%d0 - fmoves %d0,%fp0 - andl #0x80000000,%d0 - eorl #0x80800000,%d0 | ...-SIGN(X)*EPS - - fmovel %d1,%FPCR |restore users exceptions - fadds %d0,%fp0 - - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/sto_res.S b/arch/m68k/fpsp040/sto_res.S deleted file mode 100644 index 484b47d4eaad3038a29a3094743f9ad7e5a0bd29..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/sto_res.S +++ /dev/null @@ -1,97 +0,0 @@ -| -| sto_res.sa 3.1 12/10/90 -| -| Takes the result and puts it in where the user expects it. -| Library functions return result in fp0. If fp0 is not the -| users destination register then fp0 is moved to the -| correct floating-point destination register. fp0 and fp1 -| are then restored to the original contents. -| -| Input: result in fp0,fp1 -| -| d2 & a0 should be kept unmodified -| -| Output: moves the result to the true destination reg or mem -| -| Modifies: destination floating point register -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -STO_RES: |idnt 2,1 | Motorola 040 Floating Point Software Package - - - |section 8 - -#include "fpsp.h" - - .global sto_cos -sto_cos: - bfextu CMDREG1B(%a6){#13:#3},%d0 |extract cos destination - cmpib #3,%d0 |check for fp0/fp1 cases - bles c_fp0123 - fmovemx %fp1-%fp1,-(%a7) - moveql #7,%d1 - subl %d0,%d1 |d1 = 7- (dest. reg. no.) - clrl %d0 - bsetl %d1,%d0 |d0 is dynamic register mask - fmovemx (%a7)+,%d0 - rts -c_fp0123: - cmpib #0,%d0 - beqs c_is_fp0 - cmpib #1,%d0 - beqs c_is_fp1 - cmpib #2,%d0 - beqs c_is_fp2 -c_is_fp3: - fmovemx %fp1-%fp1,USER_FP3(%a6) - rts -c_is_fp2: - fmovemx %fp1-%fp1,USER_FP2(%a6) - rts -c_is_fp1: - fmovemx %fp1-%fp1,USER_FP1(%a6) - rts -c_is_fp0: - fmovemx %fp1-%fp1,USER_FP0(%a6) - rts - - - .global sto_res -sto_res: - bfextu CMDREG1B(%a6){#6:#3},%d0 |extract destination register - cmpib #3,%d0 |check for fp0/fp1 cases - bles fp0123 - fmovemx %fp0-%fp0,-(%a7) - moveql #7,%d1 - subl %d0,%d1 |d1 = 7- (dest. reg. no.) - clrl %d0 - bsetl %d1,%d0 |d0 is dynamic register mask - fmovemx (%a7)+,%d0 - rts -fp0123: - cmpib #0,%d0 - beqs is_fp0 - cmpib #1,%d0 - beqs is_fp1 - cmpib #2,%d0 - beqs is_fp2 -is_fp3: - fmovemx %fp0-%fp0,USER_FP3(%a6) - rts -is_fp2: - fmovemx %fp0-%fp0,USER_FP2(%a6) - rts -is_fp1: - fmovemx %fp0-%fp0,USER_FP1(%a6) - rts -is_fp0: - fmovemx %fp0-%fp0,USER_FP0(%a6) - rts - - |end diff --git a/arch/m68k/fpsp040/stwotox.S b/arch/m68k/fpsp040/stwotox.S deleted file mode 100644 index 0d5e6a1436a638c59f0fc123974f43c418169d69..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/stwotox.S +++ /dev/null @@ -1,426 +0,0 @@ -| -| stwotox.sa 3.1 12/10/90 -| -| stwotox --- 2**X -| stwotoxd --- 2**X for denormalized X -| stentox --- 10**X -| stentoxd --- 10**X for denormalized X -| -| Input: Double-extended number X in location pointed to -| by address register a0. -| -| Output: The function values are returned in Fp0. -| -| Accuracy and Monotonicity: The returned result is within 2 ulps in -| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the -| result is subsequently rounded to double precision. The -| result is provably monotonic in double precision. -| -| Speed: The program stwotox takes approximately 190 cycles and the -| program stentox takes approximately 200 cycles. -| -| Algorithm: -| -| twotox -| 1. If |X| > 16480, go to ExpBig. -| -| 2. If |X| < 2**(-70), go to ExpSm. -| -| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore -| decompose N as -| N = 64(M + M') + j, j = 0,1,2,...,63. -| -| 4. Overwrite r := r * log2. Then -| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). -| Go to expr to compute that expression. -| -| tentox -| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. -| -| 2. If |X| < 2**(-70), go to ExpSm. -| -| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set -| N := round-to-int(y). Decompose N as -| N = 64(M + M') + j, j = 0,1,2,...,63. -| -| 4. Define r as -| r := ((X - N*L1)-N*L2) * L10 -| where L1, L2 are the leading and trailing parts of log_10(2)/64 -| and L10 is the natural log of 10. Then -| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). -| Go to expr to compute that expression. -| -| expr -| 1. Fetch 2**(j/64) from table as Fact1 and Fact2. -| -| 2. Overwrite Fact1 and Fact2 by -| Fact1 := 2**(M) * Fact1 -| Fact2 := 2**(M) * Fact2 -| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). -| -| 3. Calculate P where 1 + P approximates exp(r): -| P = r + r*r*(A1+r*(A2+...+r*A5)). -| -| 4. Let AdjFact := 2**(M'). Return -| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). -| Exit. -| -| ExpBig -| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate -| underflow by Tiny * Tiny. -| -| ExpSm -| 1. Return 1 + X. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - -BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480 -BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10 - -L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2 -L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10 - -L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 - -LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 - -LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 - -EXPA5: .long 0x3F56C16D,0x6F7BD0B2 -EXPA4: .long 0x3F811112,0x302C712C -EXPA3: .long 0x3FA55555,0x55554CC1 -EXPA2: .long 0x3FC55555,0x55554A54 -EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000 - -HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 -TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 - -EXPTBL: - .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 - .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA - .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 - .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 - .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA - .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C - .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 - .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA - .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 - .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 - .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 - .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 - .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D - .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 - .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B - .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 - .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A - .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B - .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF - .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA - .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD - .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E - .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B - .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB - .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB - .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 - .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C - .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 - .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 - .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 - .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F - .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C - .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB - .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB - .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C - .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA - .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD - .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 - .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A - .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 - .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB - .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 - .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C - .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 - .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 - .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE - .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 - .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 - .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A - .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 - .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 - .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 - .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 - .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE - .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 - .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F - .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A - .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A - .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC - .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F - .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A - .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 - .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B - .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 - - .set N,L_SCR1 - - .set X,FP_SCR1 - .set XDCARE,X+2 - .set XFRAC,X+4 - - .set ADJFACT,FP_SCR2 - - .set FACT1,FP_SCR3 - .set FACT1HI,FACT1+4 - .set FACT1LOW,FACT1+8 - - .set FACT2,FP_SCR4 - .set FACT2HI,FACT2+4 - .set FACT2LOW,FACT2+8 - - | xref t_unfl - |xref t_ovfl - |xref t_frcinx - - .global stwotoxd -stwotoxd: -|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT - - fmovel %d1,%fpcr | ...set user's rounding mode/precision - fmoves #0x3F800000,%fp0 | ...RETURN 1 + X - movel (%a0),%d0 - orl #0x00800001,%d0 - fadds %d0,%fp0 - bra t_frcinx - - .global stwotox -stwotox: -|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S - fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's - - movel (%a0),%d0 - movew 4(%a0),%d0 - fmovex %fp0,X(%a6) - andil #0x7FFFFFFF,%d0 - - cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? - bges TWOOK1 - bra EXPBORS - -TWOOK1: - cmpil #0x400D80C0,%d0 | ...|X| > 16480? - bles TWOMAIN - bra EXPBORS - - -TWOMAIN: -|--USUAL CASE, 2^(-70) <= |X| <= 16480 - - fmovex %fp0,%fp1 - fmuls #0x42800000,%fp1 | ...64 * X - - fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X) - movel %d2,-(%sp) - lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) - fmovel N(%a6),%fp1 | ...N --> FLOATING FMT - movel N(%a6),%d0 - movel %d0,%d2 - andil #0x3F,%d0 | ...D0 IS J - asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) - addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) - asrl #6,%d2 | ...d2 IS L, N = 64L + J - movel %d2,%d0 - asrl #1,%d0 | ...D0 IS M - subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J - addil #0x3FFF,%d2 - movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') - movel (%sp)+,%d2 -|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), -|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. -|--ADJFACT = 2^(M'). -|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. - - fmuls #0x3C800000,%fp1 | ...(1/64)*N - movel (%a1)+,FACT1(%a6) - movel (%a1)+,FACT1HI(%a6) - movel (%a1)+,FACT1LOW(%a6) - movew (%a1)+,FACT2(%a6) - clrw FACT2+2(%a6) - - fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X) - - movew (%a1)+,FACT2HI(%a6) - clrw FACT2HI+2(%a6) - clrl FACT2LOW(%a6) - addw %d0,FACT1(%a6) - - fmulx LOG2,%fp0 | ...FP0 IS R - addw %d0,FACT2(%a6) - - bra expr - -EXPBORS: -|--FPCR, D0 SAVED - cmpil #0x3FFF8000,%d0 - bgts EXPBIG - -EXPSM: -|--|X| IS SMALL, RETURN 1 + X - - fmovel %d1,%FPCR |restore users exceptions - fadds #0x3F800000,%fp0 | ...RETURN 1 + X - - bra t_frcinx - -EXPBIG: -|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW -|--REGISTERS SAVE SO FAR ARE FPCR AND D0 - movel X(%a6),%d0 - cmpil #0,%d0 - blts EXPNEG - - bclrb #7,(%a0) |t_ovfl expects positive value - bra t_ovfl - -EXPNEG: - bclrb #7,(%a0) |t_unfl expects positive value - bra t_unfl - - .global stentoxd -stentoxd: -|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT - - fmovel %d1,%fpcr | ...set user's rounding mode/precision - fmoves #0x3F800000,%fp0 | ...RETURN 1 + X - movel (%a0),%d0 - orl #0x00800001,%d0 - fadds %d0,%fp0 - bra t_frcinx - - .global stentox -stentox: -|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S - fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's - - movel (%a0),%d0 - movew 4(%a0),%d0 - fmovex %fp0,X(%a6) - andil #0x7FFFFFFF,%d0 - - cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? - bges TENOK1 - bra EXPBORS - -TENOK1: - cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ? - bles TENMAIN - bra EXPBORS - -TENMAIN: -|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 - - fmovex %fp0,%fp1 - fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2 - - fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2) - movel %d2,-(%sp) - lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) - fmovel N(%a6),%fp1 | ...N --> FLOATING FMT - movel N(%a6),%d0 - movel %d0,%d2 - andil #0x3F,%d0 | ...D0 IS J - asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) - addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) - asrl #6,%d2 | ...d2 IS L, N = 64L + J - movel %d2,%d0 - asrl #1,%d0 | ...D0 IS M - subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J - addil #0x3FFF,%d2 - movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') - movel (%sp)+,%d2 - -|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), -|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. -|--ADJFACT = 2^(M'). -|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. - - fmovex %fp1,%fp2 - - fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD - movel (%a1)+,FACT1(%a6) - - fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL - - movel (%a1)+,FACT1HI(%a6) - movel (%a1)+,FACT1LOW(%a6) - fsubx %fp1,%fp0 | ...X - N L_LEAD - movew (%a1)+,FACT2(%a6) - - fsubx %fp2,%fp0 | ...X - N L_TRAIL - - clrw FACT2+2(%a6) - movew (%a1)+,FACT2HI(%a6) - clrw FACT2HI+2(%a6) - clrl FACT2LOW(%a6) - - fmulx LOG10,%fp0 | ...FP0 IS R - - addw %d0,FACT1(%a6) - addw %d0,FACT2(%a6) - -expr: -|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. -|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). -|--FP0 IS R. THE FOLLOWING CODE COMPUTES -|-- 2**(M'+M) * 2**(J/64) * EXP(R) - - fmovex %fp0,%fp1 - fmulx %fp1,%fp1 | ...FP1 IS S = R*R - - fmoved EXPA5,%fp2 | ...FP2 IS A5 - fmoved EXPA4,%fp3 | ...FP3 IS A4 - - fmulx %fp1,%fp2 | ...FP2 IS S*A5 - fmulx %fp1,%fp3 | ...FP3 IS S*A4 - - faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5 - faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4 - - fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5) - fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4) - - faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5) - fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4) - - fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5)) - faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4) - - faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1 - - -|--FINAL RECONSTRUCTION PROCESS -|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) - - fmulx FACT1(%a6),%fp0 - faddx FACT2(%a6),%fp0 - faddx FACT1(%a6),%fp0 - - fmovel %d1,%FPCR |restore users exceptions - clrw ADJFACT+2(%a6) - movel #0x80000000,ADJFACT+4(%a6) - clrl ADJFACT+8(%a6) - fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT - - bra t_frcinx - - |end diff --git a/arch/m68k/fpsp040/tbldo.S b/arch/m68k/fpsp040/tbldo.S deleted file mode 100644 index fd5c37a5a2b97cc24de7bfc0892ca79e54226d04..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/tbldo.S +++ /dev/null @@ -1,553 +0,0 @@ -| -| tbldo.sa 3.1 12/10/90 -| -| Modified: -| 8/16/90 chinds The table was constructed to use only one level -| of indirection in do_func for monadic -| functions. Dyadic functions require two -| levels, and the tables are still contained -| in do_func. The table is arranged for -| index with a 10-bit index, with the first -| 7 bits the opcode, and the remaining 3 -| the stag. For dyadic functions, all -| valid addresses are to the generic entry -| point. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|TBLDO idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - - |xref ld_pinf,ld_pone,ld_ppi2 - |xref t_dz2,t_operr - |xref serror,sone,szero,sinf,snzrinx - |xref sopr_inf,spi_2,src_nan,szr_inf - - |xref smovcr - |xref pmod,prem,pscale - |xref satanh,satanhd - |xref sacos,sacosd,sasin,sasind,satan,satand - |xref setox,setoxd,setoxm1,setoxm1d,setoxm1i - |xref sgetexp,sgetexpd,sgetman,sgetmand - |xref sint,sintd,sintrz - |xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz - |xref scos,scosd,ssin,ssind,stan,stand - |xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd - |xref sslog10,sslog2,sslogn,sslognp1 - |xref sslog10d,sslog2d,sslognd,slognp1d - |xref stentox,stentoxd,stwotox,stwotoxd - -| instruction ;opcode-stag Notes - .global tblpre -tblpre: - .long smovcr |$00-0 fmovecr all - .long smovcr |$00-1 fmovecr all - .long smovcr |$00-2 fmovecr all - .long smovcr |$00-3 fmovecr all - .long smovcr |$00-4 fmovecr all - .long smovcr |$00-5 fmovecr all - .long smovcr |$00-6 fmovecr all - .long smovcr |$00-7 fmovecr all - - .long sint |$01-0 fint norm - .long szero |$01-1 fint zero - .long sinf |$01-2 fint inf - .long src_nan |$01-3 fint nan - .long sintd |$01-4 fint denorm inx - .long serror |$01-5 fint ERROR - .long serror |$01-6 fint ERROR - .long serror |$01-7 fint ERROR - - .long ssinh |$02-0 fsinh norm - .long szero |$02-1 fsinh zero - .long sinf |$02-2 fsinh inf - .long src_nan |$02-3 fsinh nan - .long ssinhd |$02-4 fsinh denorm - .long serror |$02-5 fsinh ERROR - .long serror |$02-6 fsinh ERROR - .long serror |$02-7 fsinh ERROR - - .long sintrz |$03-0 fintrz norm - .long szero |$03-1 fintrz zero - .long sinf |$03-2 fintrz inf - .long src_nan |$03-3 fintrz nan - .long snzrinx |$03-4 fintrz denorm inx - .long serror |$03-5 fintrz ERROR - .long serror |$03-6 fintrz ERROR - .long serror |$03-7 fintrz ERROR - - .long serror |$04-0 ERROR - illegal extension - .long serror |$04-1 ERROR - illegal extension - .long serror |$04-2 ERROR - illegal extension - .long serror |$04-3 ERROR - illegal extension - .long serror |$04-4 ERROR - illegal extension - .long serror |$04-5 ERROR - illegal extension - .long serror |$04-6 ERROR - illegal extension - .long serror |$04-7 ERROR - illegal extension - - .long serror |$05-0 ERROR - illegal extension - .long serror |$05-1 ERROR - illegal extension - .long serror |$05-2 ERROR - illegal extension - .long serror |$05-3 ERROR - illegal extension - .long serror |$05-4 ERROR - illegal extension - .long serror |$05-5 ERROR - illegal extension - .long serror |$05-6 ERROR - illegal extension - .long serror |$05-7 ERROR - illegal extension - - .long sslognp1 |$06-0 flognp1 norm - .long szero |$06-1 flognp1 zero - .long sopr_inf |$06-2 flognp1 inf - .long src_nan |$06-3 flognp1 nan - .long slognp1d |$06-4 flognp1 denorm - .long serror |$06-5 flognp1 ERROR - .long serror |$06-6 flognp1 ERROR - .long serror |$06-7 flognp1 ERROR - - .long serror |$07-0 ERROR - illegal extension - .long serror |$07-1 ERROR - illegal extension - .long serror |$07-2 ERROR - illegal extension - .long serror |$07-3 ERROR - illegal extension - .long serror |$07-4 ERROR - illegal extension - .long serror |$07-5 ERROR - illegal extension - .long serror |$07-6 ERROR - illegal extension - .long serror |$07-7 ERROR - illegal extension - - .long setoxm1 |$08-0 fetoxm1 norm - .long szero |$08-1 fetoxm1 zero - .long setoxm1i |$08-2 fetoxm1 inf - .long src_nan |$08-3 fetoxm1 nan - .long setoxm1d |$08-4 fetoxm1 denorm - .long serror |$08-5 fetoxm1 ERROR - .long serror |$08-6 fetoxm1 ERROR - .long serror |$08-7 fetoxm1 ERROR - - .long stanh |$09-0 ftanh norm - .long szero |$09-1 ftanh zero - .long sone |$09-2 ftanh inf - .long src_nan |$09-3 ftanh nan - .long stanhd |$09-4 ftanh denorm - .long serror |$09-5 ftanh ERROR - .long serror |$09-6 ftanh ERROR - .long serror |$09-7 ftanh ERROR - - .long satan |$0a-0 fatan norm - .long szero |$0a-1 fatan zero - .long spi_2 |$0a-2 fatan inf - .long src_nan |$0a-3 fatan nan - .long satand |$0a-4 fatan denorm - .long serror |$0a-5 fatan ERROR - .long serror |$0a-6 fatan ERROR - .long serror |$0a-7 fatan ERROR - - .long serror |$0b-0 ERROR - illegal extension - .long serror |$0b-1 ERROR - illegal extension - .long serror |$0b-2 ERROR - illegal extension - .long serror |$0b-3 ERROR - illegal extension - .long serror |$0b-4 ERROR - illegal extension - .long serror |$0b-5 ERROR - illegal extension - .long serror |$0b-6 ERROR - illegal extension - .long serror |$0b-7 ERROR - illegal extension - - .long sasin |$0c-0 fasin norm - .long szero |$0c-1 fasin zero - .long t_operr |$0c-2 fasin inf - .long src_nan |$0c-3 fasin nan - .long sasind |$0c-4 fasin denorm - .long serror |$0c-5 fasin ERROR - .long serror |$0c-6 fasin ERROR - .long serror |$0c-7 fasin ERROR - - .long satanh |$0d-0 fatanh norm - .long szero |$0d-1 fatanh zero - .long t_operr |$0d-2 fatanh inf - .long src_nan |$0d-3 fatanh nan - .long satanhd |$0d-4 fatanh denorm - .long serror |$0d-5 fatanh ERROR - .long serror |$0d-6 fatanh ERROR - .long serror |$0d-7 fatanh ERROR - - .long ssin |$0e-0 fsin norm - .long szero |$0e-1 fsin zero - .long t_operr |$0e-2 fsin inf - .long src_nan |$0e-3 fsin nan - .long ssind |$0e-4 fsin denorm - .long serror |$0e-5 fsin ERROR - .long serror |$0e-6 fsin ERROR - .long serror |$0e-7 fsin ERROR - - .long stan |$0f-0 ftan norm - .long szero |$0f-1 ftan zero - .long t_operr |$0f-2 ftan inf - .long src_nan |$0f-3 ftan nan - .long stand |$0f-4 ftan denorm - .long serror |$0f-5 ftan ERROR - .long serror |$0f-6 ftan ERROR - .long serror |$0f-7 ftan ERROR - - .long setox |$10-0 fetox norm - .long ld_pone |$10-1 fetox zero - .long szr_inf |$10-2 fetox inf - .long src_nan |$10-3 fetox nan - .long setoxd |$10-4 fetox denorm - .long serror |$10-5 fetox ERROR - .long serror |$10-6 fetox ERROR - .long serror |$10-7 fetox ERROR - - .long stwotox |$11-0 ftwotox norm - .long ld_pone |$11-1 ftwotox zero - .long szr_inf |$11-2 ftwotox inf - .long src_nan |$11-3 ftwotox nan - .long stwotoxd |$11-4 ftwotox denorm - .long serror |$11-5 ftwotox ERROR - .long serror |$11-6 ftwotox ERROR - .long serror |$11-7 ftwotox ERROR - - .long stentox |$12-0 ftentox norm - .long ld_pone |$12-1 ftentox zero - .long szr_inf |$12-2 ftentox inf - .long src_nan |$12-3 ftentox nan - .long stentoxd |$12-4 ftentox denorm - .long serror |$12-5 ftentox ERROR - .long serror |$12-6 ftentox ERROR - .long serror |$12-7 ftentox ERROR - - .long serror |$13-0 ERROR - illegal extension - .long serror |$13-1 ERROR - illegal extension - .long serror |$13-2 ERROR - illegal extension - .long serror |$13-3 ERROR - illegal extension - .long serror |$13-4 ERROR - illegal extension - .long serror |$13-5 ERROR - illegal extension - .long serror |$13-6 ERROR - illegal extension - .long serror |$13-7 ERROR - illegal extension - - .long sslogn |$14-0 flogn norm - .long t_dz2 |$14-1 flogn zero - .long sopr_inf |$14-2 flogn inf - .long src_nan |$14-3 flogn nan - .long sslognd |$14-4 flogn denorm - .long serror |$14-5 flogn ERROR - .long serror |$14-6 flogn ERROR - .long serror |$14-7 flogn ERROR - - .long sslog10 |$15-0 flog10 norm - .long t_dz2 |$15-1 flog10 zero - .long sopr_inf |$15-2 flog10 inf - .long src_nan |$15-3 flog10 nan - .long sslog10d |$15-4 flog10 denorm - .long serror |$15-5 flog10 ERROR - .long serror |$15-6 flog10 ERROR - .long serror |$15-7 flog10 ERROR - - .long sslog2 |$16-0 flog2 norm - .long t_dz2 |$16-1 flog2 zero - .long sopr_inf |$16-2 flog2 inf - .long src_nan |$16-3 flog2 nan - .long sslog2d |$16-4 flog2 denorm - .long serror |$16-5 flog2 ERROR - .long serror |$16-6 flog2 ERROR - .long serror |$16-7 flog2 ERROR - - .long serror |$17-0 ERROR - illegal extension - .long serror |$17-1 ERROR - illegal extension - .long serror |$17-2 ERROR - illegal extension - .long serror |$17-3 ERROR - illegal extension - .long serror |$17-4 ERROR - illegal extension - .long serror |$17-5 ERROR - illegal extension - .long serror |$17-6 ERROR - illegal extension - .long serror |$17-7 ERROR - illegal extension - - .long serror |$18-0 ERROR - illegal extension - .long serror |$18-1 ERROR - illegal extension - .long serror |$18-2 ERROR - illegal extension - .long serror |$18-3 ERROR - illegal extension - .long serror |$18-4 ERROR - illegal extension - .long serror |$18-5 ERROR - illegal extension - .long serror |$18-6 ERROR - illegal extension - .long serror |$18-7 ERROR - illegal extension - - .long scosh |$19-0 fcosh norm - .long ld_pone |$19-1 fcosh zero - .long ld_pinf |$19-2 fcosh inf - .long src_nan |$19-3 fcosh nan - .long scoshd |$19-4 fcosh denorm - .long serror |$19-5 fcosh ERROR - .long serror |$19-6 fcosh ERROR - .long serror |$19-7 fcosh ERROR - - .long serror |$1a-0 ERROR - illegal extension - .long serror |$1a-1 ERROR - illegal extension - .long serror |$1a-2 ERROR - illegal extension - .long serror |$1a-3 ERROR - illegal extension - .long serror |$1a-4 ERROR - illegal extension - .long serror |$1a-5 ERROR - illegal extension - .long serror |$1a-6 ERROR - illegal extension - .long serror |$1a-7 ERROR - illegal extension - - .long serror |$1b-0 ERROR - illegal extension - .long serror |$1b-1 ERROR - illegal extension - .long serror |$1b-2 ERROR - illegal extension - .long serror |$1b-3 ERROR - illegal extension - .long serror |$1b-4 ERROR - illegal extension - .long serror |$1b-5 ERROR - illegal extension - .long serror |$1b-6 ERROR - illegal extension - .long serror |$1b-7 ERROR - illegal extension - - .long sacos |$1c-0 facos norm - .long ld_ppi2 |$1c-1 facos zero - .long t_operr |$1c-2 facos inf - .long src_nan |$1c-3 facos nan - .long sacosd |$1c-4 facos denorm - .long serror |$1c-5 facos ERROR - .long serror |$1c-6 facos ERROR - .long serror |$1c-7 facos ERROR - - .long scos |$1d-0 fcos norm - .long ld_pone |$1d-1 fcos zero - .long t_operr |$1d-2 fcos inf - .long src_nan |$1d-3 fcos nan - .long scosd |$1d-4 fcos denorm - .long serror |$1d-5 fcos ERROR - .long serror |$1d-6 fcos ERROR - .long serror |$1d-7 fcos ERROR - - .long sgetexp |$1e-0 fgetexp norm - .long szero |$1e-1 fgetexp zero - .long t_operr |$1e-2 fgetexp inf - .long src_nan |$1e-3 fgetexp nan - .long sgetexpd |$1e-4 fgetexp denorm - .long serror |$1e-5 fgetexp ERROR - .long serror |$1e-6 fgetexp ERROR - .long serror |$1e-7 fgetexp ERROR - - .long sgetman |$1f-0 fgetman norm - .long szero |$1f-1 fgetman zero - .long t_operr |$1f-2 fgetman inf - .long src_nan |$1f-3 fgetman nan - .long sgetmand |$1f-4 fgetman denorm - .long serror |$1f-5 fgetman ERROR - .long serror |$1f-6 fgetman ERROR - .long serror |$1f-7 fgetman ERROR - - .long serror |$20-0 ERROR - illegal extension - .long serror |$20-1 ERROR - illegal extension - .long serror |$20-2 ERROR - illegal extension - .long serror |$20-3 ERROR - illegal extension - .long serror |$20-4 ERROR - illegal extension - .long serror |$20-5 ERROR - illegal extension - .long serror |$20-6 ERROR - illegal extension - .long serror |$20-7 ERROR - illegal extension - - .long pmod |$21-0 fmod all - .long pmod |$21-1 fmod all - .long pmod |$21-2 fmod all - .long pmod |$21-3 fmod all - .long pmod |$21-4 fmod all - .long serror |$21-5 fmod ERROR - .long serror |$21-6 fmod ERROR - .long serror |$21-7 fmod ERROR - - .long serror |$22-0 ERROR - illegal extension - .long serror |$22-1 ERROR - illegal extension - .long serror |$22-2 ERROR - illegal extension - .long serror |$22-3 ERROR - illegal extension - .long serror |$22-4 ERROR - illegal extension - .long serror |$22-5 ERROR - illegal extension - .long serror |$22-6 ERROR - illegal extension - .long serror |$22-7 ERROR - illegal extension - - .long serror |$23-0 ERROR - illegal extension - .long serror |$23-1 ERROR - illegal extension - .long serror |$23-2 ERROR - illegal extension - .long serror |$23-3 ERROR - illegal extension - .long serror |$23-4 ERROR - illegal extension - .long serror |$23-5 ERROR - illegal extension - .long serror |$23-6 ERROR - illegal extension - .long serror |$23-7 ERROR - illegal extension - - .long serror |$24-0 ERROR - illegal extension - .long serror |$24-1 ERROR - illegal extension - .long serror |$24-2 ERROR - illegal extension - .long serror |$24-3 ERROR - illegal extension - .long serror |$24-4 ERROR - illegal extension - .long serror |$24-5 ERROR - illegal extension - .long serror |$24-6 ERROR - illegal extension - .long serror |$24-7 ERROR - illegal extension - - .long prem |$25-0 frem all - .long prem |$25-1 frem all - .long prem |$25-2 frem all - .long prem |$25-3 frem all - .long prem |$25-4 frem all - .long serror |$25-5 frem ERROR - .long serror |$25-6 frem ERROR - .long serror |$25-7 frem ERROR - - .long pscale |$26-0 fscale all - .long pscale |$26-1 fscale all - .long pscale |$26-2 fscale all - .long pscale |$26-3 fscale all - .long pscale |$26-4 fscale all - .long serror |$26-5 fscale ERROR - .long serror |$26-6 fscale ERROR - .long serror |$26-7 fscale ERROR - - .long serror |$27-0 ERROR - illegal extension - .long serror |$27-1 ERROR - illegal extension - .long serror |$27-2 ERROR - illegal extension - .long serror |$27-3 ERROR - illegal extension - .long serror |$27-4 ERROR - illegal extension - .long serror |$27-5 ERROR - illegal extension - .long serror |$27-6 ERROR - illegal extension - .long serror |$27-7 ERROR - illegal extension - - .long serror |$28-0 ERROR - illegal extension - .long serror |$28-1 ERROR - illegal extension - .long serror |$28-2 ERROR - illegal extension - .long serror |$28-3 ERROR - illegal extension - .long serror |$28-4 ERROR - illegal extension - .long serror |$28-5 ERROR - illegal extension - .long serror |$28-6 ERROR - illegal extension - .long serror |$28-7 ERROR - illegal extension - - .long serror |$29-0 ERROR - illegal extension - .long serror |$29-1 ERROR - illegal extension - .long serror |$29-2 ERROR - illegal extension - .long serror |$29-3 ERROR - illegal extension - .long serror |$29-4 ERROR - illegal extension - .long serror |$29-5 ERROR - illegal extension - .long serror |$29-6 ERROR - illegal extension - .long serror |$29-7 ERROR - illegal extension - - .long serror |$2a-0 ERROR - illegal extension - .long serror |$2a-1 ERROR - illegal extension - .long serror |$2a-2 ERROR - illegal extension - .long serror |$2a-3 ERROR - illegal extension - .long serror |$2a-4 ERROR - illegal extension - .long serror |$2a-5 ERROR - illegal extension - .long serror |$2a-6 ERROR - illegal extension - .long serror |$2a-7 ERROR - illegal extension - - .long serror |$2b-0 ERROR - illegal extension - .long serror |$2b-1 ERROR - illegal extension - .long serror |$2b-2 ERROR - illegal extension - .long serror |$2b-3 ERROR - illegal extension - .long serror |$2b-4 ERROR - illegal extension - .long serror |$2b-5 ERROR - illegal extension - .long serror |$2b-6 ERROR - illegal extension - .long serror |$2b-7 ERROR - illegal extension - - .long serror |$2c-0 ERROR - illegal extension - .long serror |$2c-1 ERROR - illegal extension - .long serror |$2c-2 ERROR - illegal extension - .long serror |$2c-3 ERROR - illegal extension - .long serror |$2c-4 ERROR - illegal extension - .long serror |$2c-5 ERROR - illegal extension - .long serror |$2c-6 ERROR - illegal extension - .long serror |$2c-7 ERROR - illegal extension - - .long serror |$2d-0 ERROR - illegal extension - .long serror |$2d-1 ERROR - illegal extension - .long serror |$2d-2 ERROR - illegal extension - .long serror |$2d-3 ERROR - illegal extension - .long serror |$2d-4 ERROR - illegal extension - .long serror |$2d-5 ERROR - illegal extension - .long serror |$2d-6 ERROR - illegal extension - .long serror |$2d-7 ERROR - illegal extension - - .long serror |$2e-0 ERROR - illegal extension - .long serror |$2e-1 ERROR - illegal extension - .long serror |$2e-2 ERROR - illegal extension - .long serror |$2e-3 ERROR - illegal extension - .long serror |$2e-4 ERROR - illegal extension - .long serror |$2e-5 ERROR - illegal extension - .long serror |$2e-6 ERROR - illegal extension - .long serror |$2e-7 ERROR - illegal extension - - .long serror |$2f-0 ERROR - illegal extension - .long serror |$2f-1 ERROR - illegal extension - .long serror |$2f-2 ERROR - illegal extension - .long serror |$2f-3 ERROR - illegal extension - .long serror |$2f-4 ERROR - illegal extension - .long serror |$2f-5 ERROR - illegal extension - .long serror |$2f-6 ERROR - illegal extension - .long serror |$2f-7 ERROR - illegal extension - - .long ssincos |$30-0 fsincos norm - .long ssincosz |$30-1 fsincos zero - .long ssincosi |$30-2 fsincos inf - .long ssincosnan |$30-3 fsincos nan - .long ssincosd |$30-4 fsincos denorm - .long serror |$30-5 fsincos ERROR - .long serror |$30-6 fsincos ERROR - .long serror |$30-7 fsincos ERROR - - .long ssincos |$31-0 fsincos norm - .long ssincosz |$31-1 fsincos zero - .long ssincosi |$31-2 fsincos inf - .long ssincosnan |$31-3 fsincos nan - .long ssincosd |$31-4 fsincos denorm - .long serror |$31-5 fsincos ERROR - .long serror |$31-6 fsincos ERROR - .long serror |$31-7 fsincos ERROR - - .long ssincos |$32-0 fsincos norm - .long ssincosz |$32-1 fsincos zero - .long ssincosi |$32-2 fsincos inf - .long ssincosnan |$32-3 fsincos nan - .long ssincosd |$32-4 fsincos denorm - .long serror |$32-5 fsincos ERROR - .long serror |$32-6 fsincos ERROR - .long serror |$32-7 fsincos ERROR - - .long ssincos |$33-0 fsincos norm - .long ssincosz |$33-1 fsincos zero - .long ssincosi |$33-2 fsincos inf - .long ssincosnan |$33-3 fsincos nan - .long ssincosd |$33-4 fsincos denorm - .long serror |$33-5 fsincos ERROR - .long serror |$33-6 fsincos ERROR - .long serror |$33-7 fsincos ERROR - - .long ssincos |$34-0 fsincos norm - .long ssincosz |$34-1 fsincos zero - .long ssincosi |$34-2 fsincos inf - .long ssincosnan |$34-3 fsincos nan - .long ssincosd |$34-4 fsincos denorm - .long serror |$34-5 fsincos ERROR - .long serror |$34-6 fsincos ERROR - .long serror |$34-7 fsincos ERROR - - .long ssincos |$35-0 fsincos norm - .long ssincosz |$35-1 fsincos zero - .long ssincosi |$35-2 fsincos inf - .long ssincosnan |$35-3 fsincos nan - .long ssincosd |$35-4 fsincos denorm - .long serror |$35-5 fsincos ERROR - .long serror |$35-6 fsincos ERROR - .long serror |$35-7 fsincos ERROR - - .long ssincos |$36-0 fsincos norm - .long ssincosz |$36-1 fsincos zero - .long ssincosi |$36-2 fsincos inf - .long ssincosnan |$36-3 fsincos nan - .long ssincosd |$36-4 fsincos denorm - .long serror |$36-5 fsincos ERROR - .long serror |$36-6 fsincos ERROR - .long serror |$36-7 fsincos ERROR - - .long ssincos |$37-0 fsincos norm - .long ssincosz |$37-1 fsincos zero - .long ssincosi |$37-2 fsincos inf - .long ssincosnan |$37-3 fsincos nan - .long ssincosd |$37-4 fsincos denorm - .long serror |$37-5 fsincos ERROR - .long serror |$37-6 fsincos ERROR - .long serror |$37-7 fsincos ERROR - - |end diff --git a/arch/m68k/fpsp040/util.S b/arch/m68k/fpsp040/util.S deleted file mode 100644 index 65b26fa88c60a4918368a2990396ecb1d19b44c6..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/util.S +++ /dev/null @@ -1,747 +0,0 @@ -| -| util.sa 3.7 7/29/91 -| -| This file contains routines used by other programs. -| -| ovf_res: used by overflow to force the correct -| result. ovf_r_k, ovf_r_x2, ovf_r_x3 are -| derivatives of this routine. -| get_fline: get user's opcode word -| g_dfmtou: returns the destination format. -| g_opcls: returns the opclass of the float instruction. -| g_rndpr: returns the rounding precision. -| reg_dest: write byte, word, or long data to Dn -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -|UTIL idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref mem_read - - .global g_dfmtou - .global g_opcls - .global g_rndpr - .global get_fline - .global reg_dest - -| -| Final result table for ovf_res. Note that the negative counterparts -| are unnecessary as ovf_res always returns the sign separately from -| the exponent. -| ;+inf -EXT_PINF: .long 0x7fff0000,0x00000000,0x00000000,0x00000000 -| ;largest +ext -EXT_PLRG: .long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 -| ;largest magnitude +sgl in ext -SGL_PLRG: .long 0x407e0000,0xffffff00,0x00000000,0x00000000 -| ;largest magnitude +dbl in ext -DBL_PLRG: .long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 -| ;largest -ext - -tblovfl: - .long EXT_RN - .long EXT_RZ - .long EXT_RM - .long EXT_RP - .long SGL_RN - .long SGL_RZ - .long SGL_RM - .long SGL_RP - .long DBL_RN - .long DBL_RZ - .long DBL_RM - .long DBL_RP - .long error - .long error - .long error - .long error - - -| -| ovf_r_k --- overflow result calculation -| -| This entry point is used by kernel_ex. -| -| This forces the destination precision to be extended -| -| Input: operand in ETEMP -| Output: a result is in ETEMP (internal extended format) -| - .global ovf_r_k -ovf_r_k: - lea ETEMP(%a6),%a0 |a0 points to source operand - bclrb #sign_bit,ETEMP_EX(%a6) - sne ETEMP_SGN(%a6) |convert to internal IEEE format - -| -| ovf_r_x2 --- overflow result calculation -| -| This entry point used by x_ovfl. (opclass 0 and 2) -| -| Input a0 points to an operand in the internal extended format -| Output a0 points to the result in the internal extended format -| -| This sets the round precision according to the user's FPCR unless the -| instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul, -| fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg. -| If the instruction is fsgldiv of fsglmul, the rounding precision must be -| extended. If the instruction is not fsgldiv or fsglmul but a force- -| precision instruction, the rounding precision is then set to the force -| precision. - - .global ovf_r_x2 -ovf_r_x2: - btstb #E3,E_BYTE(%a6) |check for nu exception - beql ovf_e1_exc |it is cu exception -ovf_e3_exc: - movew CMDREG3B(%a6),%d0 |get the command word - andiw #0x00000060,%d0 |clear all bits except 6 and 5 - cmpil #0x00000040,%d0 - beql ovff_sgl |force precision is single - cmpil #0x00000060,%d0 - beql ovff_dbl |force precision is double - movew CMDREG3B(%a6),%d0 |get the command word again - andil #0x7f,%d0 |clear all except operation - cmpil #0x33,%d0 - beql ovf_fsgl |fsglmul or fsgldiv - cmpil #0x30,%d0 - beql ovf_fsgl - bra ovf_fpcr |instruction is none of the above -| ;use FPCR -ovf_e1_exc: - movew CMDREG1B(%a6),%d0 |get command word - andil #0x00000044,%d0 |clear all bits except 6 and 2 - cmpil #0x00000040,%d0 - beql ovff_sgl |the instruction is force single - cmpil #0x00000044,%d0 - beql ovff_dbl |the instruction is force double - movew CMDREG1B(%a6),%d0 |again get the command word - andil #0x0000007f,%d0 |clear all except the op code - cmpil #0x00000027,%d0 - beql ovf_fsgl |fsglmul - cmpil #0x00000024,%d0 - beql ovf_fsgl |fsgldiv - bra ovf_fpcr |none of the above, use FPCR -| -| -| Inst is either fsgldiv or fsglmul. Force extended precision. -| -ovf_fsgl: - clrl %d0 - bra ovf_res - -ovff_sgl: - movel #0x00000001,%d0 |set single - bra ovf_res -ovff_dbl: - movel #0x00000002,%d0 |set double - bra ovf_res -| -| The precision is in the fpcr. -| -ovf_fpcr: - bfextu FPCR_MODE(%a6){#0:#2},%d0 |set round precision - bra ovf_res - -| -| -| ovf_r_x3 --- overflow result calculation -| -| This entry point used by x_ovfl. (opclass 3 only) -| -| Input a0 points to an operand in the internal extended format -| Output a0 points to the result in the internal extended format -| -| This sets the round precision according to the destination size. -| - .global ovf_r_x3 -ovf_r_x3: - bsr g_dfmtou |get dest fmt in d0{1:0} -| ;for fmovout, the destination format -| ;is the rounding precision - -| -| ovf_res --- overflow result calculation -| -| Input: -| a0 points to operand in internal extended format -| Output: -| a0 points to result in internal extended format -| - .global ovf_res -ovf_res: - lsll #2,%d0 |move round precision to d0{3:2} - bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode - orl %d1,%d0 |index is fmt:mode in d0{3:0} - leal tblovfl,%a1 |load a1 with table address - movel %a1@(%d0:l:4),%a1 |use d0 as index to the table - jmp (%a1) |go to the correct routine -| -|case DEST_FMT = EXT -| -EXT_RN: - leal EXT_PINF,%a1 |answer is +/- infinity - bsetb #inf_bit,FPSR_CC(%a6) - bra set_sign |now go set the sign -EXT_RZ: - leal EXT_PLRG,%a1 |answer is +/- large number - bra set_sign |now go set the sign -EXT_RM: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs e_rm_pos -e_rm_neg: - leal EXT_PINF,%a1 |answer is negative infinity - orl #neginf_mask,USER_FPSR(%a6) - bra end_ovfr -e_rm_pos: - leal EXT_PLRG,%a1 |answer is large positive number - bra end_ovfr -EXT_RP: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs e_rp_pos -e_rp_neg: - leal EXT_PLRG,%a1 |answer is large negative number - bsetb #neg_bit,FPSR_CC(%a6) - bra end_ovfr -e_rp_pos: - leal EXT_PINF,%a1 |answer is positive infinity - bsetb #inf_bit,FPSR_CC(%a6) - bra end_ovfr -| -|case DEST_FMT = DBL -| -DBL_RN: - leal EXT_PINF,%a1 |answer is +/- infinity - bsetb #inf_bit,FPSR_CC(%a6) - bra set_sign -DBL_RZ: - leal DBL_PLRG,%a1 |answer is +/- large number - bra set_sign |now go set the sign -DBL_RM: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs d_rm_pos -d_rm_neg: - leal EXT_PINF,%a1 |answer is negative infinity - orl #neginf_mask,USER_FPSR(%a6) - bra end_ovfr |inf is same for all precisions (ext,dbl,sgl) -d_rm_pos: - leal DBL_PLRG,%a1 |answer is large positive number - bra end_ovfr -DBL_RP: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs d_rp_pos -d_rp_neg: - leal DBL_PLRG,%a1 |answer is large negative number - bsetb #neg_bit,FPSR_CC(%a6) - bra end_ovfr -d_rp_pos: - leal EXT_PINF,%a1 |answer is positive infinity - bsetb #inf_bit,FPSR_CC(%a6) - bra end_ovfr -| -|case DEST_FMT = SGL -| -SGL_RN: - leal EXT_PINF,%a1 |answer is +/- infinity - bsetb #inf_bit,FPSR_CC(%a6) - bras set_sign -SGL_RZ: - leal SGL_PLRG,%a1 |answer is +/- large number - bras set_sign -SGL_RM: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs s_rm_pos -s_rm_neg: - leal EXT_PINF,%a1 |answer is negative infinity - orl #neginf_mask,USER_FPSR(%a6) - bras end_ovfr -s_rm_pos: - leal SGL_PLRG,%a1 |answer is large positive number - bras end_ovfr -SGL_RP: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs s_rp_pos -s_rp_neg: - leal SGL_PLRG,%a1 |answer is large negative number - bsetb #neg_bit,FPSR_CC(%a6) - bras end_ovfr -s_rp_pos: - leal EXT_PINF,%a1 |answer is positive infinity - bsetb #inf_bit,FPSR_CC(%a6) - bras end_ovfr - -set_sign: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs end_ovfr -neg_sign: - bsetb #neg_bit,FPSR_CC(%a6) - -end_ovfr: - movew LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign - movel LOCAL_HI(%a1),LOCAL_HI(%a0) - movel LOCAL_LO(%a1),LOCAL_LO(%a0) - rts - - -| -| ERROR -| -error: - rts -| -| get_fline --- get f-line opcode of interrupted instruction -| -| Returns opcode in the low word of d0. -| -get_fline: - movel USER_FPIAR(%a6),%a0 |opcode address - movel #0,-(%a7) |reserve a word on the stack - leal 2(%a7),%a1 |point to low word of temporary - movel #2,%d0 |count - bsrl mem_read - movel (%a7)+,%d0 - rts -| -| g_rndpr --- put rounding precision in d0{1:0} -| -| valid return codes are: -| 00 - extended -| 01 - single -| 10 - double -| -| begin -| get rounding precision (cmdreg3b{6:5}) -| begin -| case opclass = 011 (move out) -| get destination format - this is the also the rounding precision -| -| case opclass = 0x0 -| if E3 -| *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL -| *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL -| case RndPr(from cmdreg3b{6:5} = 00 | 01 -| use precision from FPCR{7:6} -| case 00 then RND_PREC = EXT -| case 01 then RND_PREC = SGL -| case 10 then RND_PREC = DBL -| else E1 -| use precision in FPCR{7:6} -| case 00 then RND_PREC = EXT -| case 01 then RND_PREC = SGL -| case 10 then RND_PREC = DBL -| end -| -g_rndpr: - bsr g_opcls |get opclass in d0{2:0} - cmpw #0x0003,%d0 |check for opclass 011 - bnes op_0x0 - -| -| For move out instructions (opclass 011) the destination format -| is the same as the rounding precision. Pass results from g_dfmtou. -| - bsr g_dfmtou - rts -op_0x0: - btstb #E3,E_BYTE(%a6) - beql unf_e1_exc |branch to e1 underflow -unf_e3_exc: - movel CMDREG3B(%a6),%d0 |rounding precision in d0{10:9} - bfextu %d0{#9:#2},%d0 |move the rounding prec bits to d0{1:0} - cmpil #0x2,%d0 - beql unff_sgl |force precision is single - cmpil #0x3,%d0 |force precision is double - beql unff_dbl - movew CMDREG3B(%a6),%d0 |get the command word again - andil #0x7f,%d0 |clear all except operation - cmpil #0x33,%d0 - beql unf_fsgl |fsglmul or fsgldiv - cmpil #0x30,%d0 - beql unf_fsgl |fsgldiv or fsglmul - bra unf_fpcr -unf_e1_exc: - movel CMDREG1B(%a6),%d0 |get 32 bits off the stack, 1st 16 bits -| ;are the command word - andil #0x00440000,%d0 |clear all bits except bits 6 and 2 - cmpil #0x00400000,%d0 - beql unff_sgl |force single - cmpil #0x00440000,%d0 |force double - beql unff_dbl - movel CMDREG1B(%a6),%d0 |get the command word again - andil #0x007f0000,%d0 |clear all bits except the operation - cmpil #0x00270000,%d0 - beql unf_fsgl |fsglmul - cmpil #0x00240000,%d0 - beql unf_fsgl |fsgldiv - bra unf_fpcr - -| -| Convert to return format. The values from cmdreg3b and the return -| values are: -| cmdreg3b return precision -| -------- ------ --------- -| 00,01 0 ext -| 10 1 sgl -| 11 2 dbl -| Force single -| -unff_sgl: - movel #1,%d0 |return 1 - rts -| -| Force double -| -unff_dbl: - movel #2,%d0 |return 2 - rts -| -| Force extended -| -unf_fsgl: - movel #0,%d0 - rts -| -| Get rounding precision set in FPCR{7:6}. -| -unf_fpcr: - movel USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6} - bfextu %d0{#24:#2},%d0 |move the rounding prec bits to d0{1:0} - rts -| -| g_opcls --- put opclass in d0{2:0} -| -g_opcls: - btstb #E3,E_BYTE(%a6) - beqs opc_1b |if set, go to cmdreg1b -opc_3b: - clrl %d0 |if E3, only opclass 0x0 is possible - rts -opc_1b: - movel CMDREG1B(%a6),%d0 - bfextu %d0{#0:#3},%d0 |shift opclass bits d0{31:29} to d0{2:0} - rts -| -| g_dfmtou --- put destination format in d0{1:0} -| -| If E1, the format is from cmdreg1b{12:10} -| If E3, the format is extended. -| -| Dest. Fmt. -| extended 010 -> 00 -| single 001 -> 01 -| double 101 -> 10 -| -g_dfmtou: - btstb #E3,E_BYTE(%a6) - beqs op011 - clrl %d0 |if E1, size is always ext - rts -op011: - movel CMDREG1B(%a6),%d0 - bfextu %d0{#3:#3},%d0 |dest fmt from cmdreg1b{12:10} - cmpb #1,%d0 |check for single - bnes not_sgl - movel #1,%d0 - rts -not_sgl: - cmpb #5,%d0 |check for double - bnes not_dbl - movel #2,%d0 - rts -not_dbl: - clrl %d0 |must be extended - rts - -| -| -| Final result table for unf_sub. Note that the negative counterparts -| are unnecessary as unf_sub always returns the sign separately from -| the exponent. -| ;+zero -EXT_PZRO: .long 0x00000000,0x00000000,0x00000000,0x00000000 -| ;+zero -SGL_PZRO: .long 0x3f810000,0x00000000,0x00000000,0x00000000 -| ;+zero -DBL_PZRO: .long 0x3c010000,0x00000000,0x00000000,0x00000000 -| ;smallest +ext denorm -EXT_PSML: .long 0x00000000,0x00000000,0x00000001,0x00000000 -| ;smallest +sgl denorm -SGL_PSML: .long 0x3f810000,0x00000100,0x00000000,0x00000000 -| ;smallest +dbl denorm -DBL_PSML: .long 0x3c010000,0x00000000,0x00000800,0x00000000 -| -| UNF_SUB --- underflow result calculation -| -| Input: -| d0 contains round precision -| a0 points to input operand in the internal extended format -| -| Output: -| a0 points to correct internal extended precision result. -| - -tblunf: - .long uEXT_RN - .long uEXT_RZ - .long uEXT_RM - .long uEXT_RP - .long uSGL_RN - .long uSGL_RZ - .long uSGL_RM - .long uSGL_RP - .long uDBL_RN - .long uDBL_RZ - .long uDBL_RM - .long uDBL_RP - .long uDBL_RN - .long uDBL_RZ - .long uDBL_RM - .long uDBL_RP - - .global unf_sub -unf_sub: - lsll #2,%d0 |move round precision to d0{3:2} - bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode - orl %d1,%d0 |index is fmt:mode in d0{3:0} - leal tblunf,%a1 |load a1 with table address - movel %a1@(%d0:l:4),%a1 |use d0 as index to the table - jmp (%a1) |go to the correct routine -| -|case DEST_FMT = EXT -| -uEXT_RN: - leal EXT_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bra uset_sign |now go set the sign -uEXT_RZ: - leal EXT_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bra uset_sign |now go set the sign -uEXT_RM: - tstb LOCAL_SGN(%a0) |if negative underflow - beqs ue_rm_pos -ue_rm_neg: - leal EXT_PSML,%a1 |answer is negative smallest denorm - bsetb #neg_bit,FPSR_CC(%a6) - bra end_unfr -ue_rm_pos: - leal EXT_PZRO,%a1 |answer is positive zero - bsetb #z_bit,FPSR_CC(%a6) - bra end_unfr -uEXT_RP: - tstb LOCAL_SGN(%a0) |if negative underflow - beqs ue_rp_pos -ue_rp_neg: - leal EXT_PZRO,%a1 |answer is negative zero - oril #negz_mask,USER_FPSR(%a6) - bra end_unfr -ue_rp_pos: - leal EXT_PSML,%a1 |answer is positive smallest denorm - bra end_unfr -| -|case DEST_FMT = DBL -| -uDBL_RN: - leal DBL_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bra uset_sign -uDBL_RZ: - leal DBL_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bra uset_sign |now go set the sign -uDBL_RM: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs ud_rm_pos -ud_rm_neg: - leal DBL_PSML,%a1 |answer is smallest denormalized negative - bsetb #neg_bit,FPSR_CC(%a6) - bra end_unfr -ud_rm_pos: - leal DBL_PZRO,%a1 |answer is positive zero - bsetb #z_bit,FPSR_CC(%a6) - bra end_unfr -uDBL_RP: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs ud_rp_pos -ud_rp_neg: - leal DBL_PZRO,%a1 |answer is negative zero - oril #negz_mask,USER_FPSR(%a6) - bra end_unfr -ud_rp_pos: - leal DBL_PSML,%a1 |answer is smallest denormalized negative - bra end_unfr -| -|case DEST_FMT = SGL -| -uSGL_RN: - leal SGL_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bras uset_sign -uSGL_RZ: - leal SGL_PZRO,%a1 |answer is +/- zero - bsetb #z_bit,FPSR_CC(%a6) - bras uset_sign -uSGL_RM: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs us_rm_pos -us_rm_neg: - leal SGL_PSML,%a1 |answer is smallest denormalized negative - bsetb #neg_bit,FPSR_CC(%a6) - bras end_unfr -us_rm_pos: - leal SGL_PZRO,%a1 |answer is positive zero - bsetb #z_bit,FPSR_CC(%a6) - bras end_unfr -uSGL_RP: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs us_rp_pos -us_rp_neg: - leal SGL_PZRO,%a1 |answer is negative zero - oril #negz_mask,USER_FPSR(%a6) - bras end_unfr -us_rp_pos: - leal SGL_PSML,%a1 |answer is smallest denormalized positive - bras end_unfr - -uset_sign: - tstb LOCAL_SGN(%a0) |if negative overflow - beqs end_unfr -uneg_sign: - bsetb #neg_bit,FPSR_CC(%a6) - -end_unfr: - movew LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign - movel LOCAL_HI(%a1),LOCAL_HI(%a0) - movel LOCAL_LO(%a1),LOCAL_LO(%a0) - rts -| -| reg_dest --- write byte, word, or long data to Dn -| -| -| Input: -| L_SCR1: Data -| d1: data size and dest register number formatted as: -| -| 32 5 4 3 2 1 0 -| ----------------------------------------------- -| | 0 | Size | Dest Reg # | -| ----------------------------------------------- -| -| Size is: -| 0 - Byte -| 1 - Word -| 2 - Long/Single -| -pregdst: - .long byte_d0 - .long byte_d1 - .long byte_d2 - .long byte_d3 - .long byte_d4 - .long byte_d5 - .long byte_d6 - .long byte_d7 - .long word_d0 - .long word_d1 - .long word_d2 - .long word_d3 - .long word_d4 - .long word_d5 - .long word_d6 - .long word_d7 - .long long_d0 - .long long_d1 - .long long_d2 - .long long_d3 - .long long_d4 - .long long_d5 - .long long_d6 - .long long_d7 - -reg_dest: - leal pregdst,%a0 - movel %a0@(%d1:l:4),%a0 - jmp (%a0) - -byte_d0: - moveb L_SCR1(%a6),USER_D0+3(%a6) - rts -byte_d1: - moveb L_SCR1(%a6),USER_D1+3(%a6) - rts -byte_d2: - moveb L_SCR1(%a6),%d2 - rts -byte_d3: - moveb L_SCR1(%a6),%d3 - rts -byte_d4: - moveb L_SCR1(%a6),%d4 - rts -byte_d5: - moveb L_SCR1(%a6),%d5 - rts -byte_d6: - moveb L_SCR1(%a6),%d6 - rts -byte_d7: - moveb L_SCR1(%a6),%d7 - rts -word_d0: - movew L_SCR1(%a6),USER_D0+2(%a6) - rts -word_d1: - movew L_SCR1(%a6),USER_D1+2(%a6) - rts -word_d2: - movew L_SCR1(%a6),%d2 - rts -word_d3: - movew L_SCR1(%a6),%d3 - rts -word_d4: - movew L_SCR1(%a6),%d4 - rts -word_d5: - movew L_SCR1(%a6),%d5 - rts -word_d6: - movew L_SCR1(%a6),%d6 - rts -word_d7: - movew L_SCR1(%a6),%d7 - rts -long_d0: - movel L_SCR1(%a6),USER_D0(%a6) - rts -long_d1: - movel L_SCR1(%a6),USER_D1(%a6) - rts -long_d2: - movel L_SCR1(%a6),%d2 - rts -long_d3: - movel L_SCR1(%a6),%d3 - rts -long_d4: - movel L_SCR1(%a6),%d4 - rts -long_d5: - movel L_SCR1(%a6),%d5 - rts -long_d6: - movel L_SCR1(%a6),%d6 - rts -long_d7: - movel L_SCR1(%a6),%d7 - rts - |end diff --git a/arch/m68k/fpsp040/x_bsun.S b/arch/m68k/fpsp040/x_bsun.S deleted file mode 100644 index d5a576bfac79447b12d7117119c854fb5757a39d..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_bsun.S +++ /dev/null @@ -1,46 +0,0 @@ -| -| x_bsun.sa 3.3 7/1/91 -| -| fpsp_bsun --- FPSP handler for branch/set on unordered exception -| -| Copy the PC to FPIAR to maintain 881/882 compatibility -| -| The real_bsun handler will need to perform further corrective -| measures as outlined in the 040 User's Manual on pages -| 9-41f, section 9.8.3. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_BSUN: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref real_bsun - - .global fpsp_bsun -fpsp_bsun: -| - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - -| - movel EXC_PC(%a6),USER_FPIAR(%a6) -| - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_bsun -| - |end diff --git a/arch/m68k/fpsp040/x_fline.S b/arch/m68k/fpsp040/x_fline.S deleted file mode 100644 index 264e126d1db7bc19ffd2d6151b9abb6f5f30e7c2..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_fline.S +++ /dev/null @@ -1,103 +0,0 @@ -| -| x_fline.sa 3.3 1/10/91 -| -| fpsp_fline --- FPSP handler for fline exception -| -| First determine if the exception is one of the unimplemented -| floating point instructions. If so, let fpsp_unimp handle it. -| Next, determine if the instruction is an fmovecr with a non-zero -| field. If so, handle here and return. Otherwise, it -| must be a real F-line exception. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_FLINE: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref real_fline - |xref fpsp_unimp - |xref uni_2 - |xref mem_read - |xref fpsp_fmt_error - - .global fpsp_fline -fpsp_fline: -| -| check for unimplemented vector first. Use EXC_VEC-4 because -| the equate is valid only after a 'link a6' has pushed one more -| long onto the stack. -| - cmpw #UNIMP_VEC,EXC_VEC-4(%a7) - beql fpsp_unimp - -| -| fmovecr with non-zero handling here -| - subl #4,%a7 |4 accounts for 2-word difference -| ;between six word frame (unimp) and -| ;four word frame - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - moveal EXC_PC+4(%a6),%a0 |get address of fline instruction - leal L_SCR1(%a6),%a1 |use L_SCR1 as scratch - movel #4,%d0 - addl #4,%a6 |to offset the sub.l #4,a7 above so that -| ;a6 can point correctly to the stack frame -| ;before branching to mem_read - bsrl mem_read - subl #4,%a6 - movel L_SCR1(%a6),%d0 |d0 contains the fline and command word - bfextu %d0{#4:#3},%d1 |extract coprocessor id - cmpib #1,%d1 |check if cpid=1 - bne not_mvcr |exit if not - bfextu %d0{#16:#6},%d1 - cmpib #0x17,%d1 |check if it is an FMOVECR encoding - bne not_mvcr -| ;if an FMOVECR instruction, fix stack -| ;and go to FPSP_UNIMP -fix_stack: - cmpib #VER_40,(%a7) |test for orig unimp frame - bnes ck_rev - subl #UNIMP_40_SIZE-4,%a7 |emulate an orig fsave - moveb #VER_40,(%a7) - moveb #UNIMP_40_SIZE-4,1(%a7) - clrw 2(%a7) - bras fix_con -ck_rev: - cmpib #VER_41,(%a7) |test for rev unimp frame - bnel fpsp_fmt_error |if not $40 or $41, exit with error - subl #UNIMP_41_SIZE-4,%a7 |emulate a rev fsave - moveb #VER_41,(%a7) - moveb #UNIMP_41_SIZE-4,1(%a7) - clrw 2(%a7) -fix_con: - movew EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position - movel EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position - fmovel EXC_PC(%a6),%FPIAR |point FPIAR to fline inst - movel #4,%d1 - addl %d1,EXC_PC(%a6) |increment stacked pc value to next inst - movew #0x202c,EXC_VEC(%a6) |reformat vector to unimp - clrl EXC_EA(%a6) |clear the EXC_EA field - movew %d0,CMDREG1B(%a6) |move the lower word into CMDREG1B - clrl E_BYTE(%a6) - bsetb #UFLAG,T_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers - bral uni_2 - -not_mvcr: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers - frestore (%a7)+ - unlk %a6 - addl #4,%a7 - bral real_fline - - |end diff --git a/arch/m68k/fpsp040/x_operr.S b/arch/m68k/fpsp040/x_operr.S deleted file mode 100644 index e2c371c3a45dbf084f79076a9058fe6a87dbd087..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_operr.S +++ /dev/null @@ -1,355 +0,0 @@ -| -| x_operr.sa 3.5 7/1/91 -| -| fpsp_operr --- FPSP handler for operand error exception -| -| See 68040 User's Manual pp. 9-44f -| -| Note 1: For trap disabled 040 does the following: -| If the dest is a fp reg, then an extended precision non_signaling -| NAN is stored in the dest reg. If the dest format is b, w, or l and -| the source op is a NAN, then garbage is stored as the result (actually -| the upper 32 bits of the mantissa are sent to the integer unit). If -| the dest format is integer (b, w, l) and the operr is caused by -| integer overflow, or the source op is inf, then the result stored is -| garbage. -| There are three cases in which operr is incorrectly signaled on the -| 040. This occurs for move_out of format b, w, or l for the largest -| negative integer (-2^7 for b, -2^15 for w, -2^31 for l). -| -| On opclass = 011 fmove.(b,w,l) that causes a conversion -| overflow -> OPERR, the exponent in wbte (and fpte) is: -| byte 56 - (62 - exp) -| word 48 - (62 - exp) -| long 32 - (62 - exp) -| -| where exp = (true exp) - 1 -| -| So, wbtemp and fptemp will contain the following on erroneously -| signalled operr: -| fpts = 1 -| fpte = $4000 (15 bit externally) -| byte fptm = $ffffffff ffffff80 -| word fptm = $ffffffff ffff8000 -| long fptm = $ffffffff 80000000 -| -| Note 2: For trap enabled 040 does the following: -| If the inst is move_out, then same as Note 1. -| If the inst is not move_out, the dest is not modified. -| The exceptional operand is not defined for integer overflow -| during a move_out. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_OPERR: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref mem_write - |xref real_operr - |xref real_inex - |xref get_fline - |xref fpsp_done - |xref reg_dest - - .global fpsp_operr -fpsp_operr: -| - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - -| -| Check if this is an opclass 3 instruction. -| If so, fall through, else branch to operr_end -| - btstb #TFLAG,T_BYTE(%a6) - beqs operr_end - -| -| If the destination size is B,W,or L, the operr must be -| handled here. -| - movel CMDREG1B(%a6),%d0 - bfextu %d0{#3:#3},%d0 |0=long, 4=word, 6=byte - cmpib #0,%d0 |determine size; check long - beq operr_long - cmpib #4,%d0 |check word - beq operr_word - cmpib #6,%d0 |check byte - beq operr_byte - -| -| The size is not B,W,or L, so the operr is handled by the -| kernel handler. Set the operr bits and clean up, leaving -| only the integer exception frame on the stack, and the -| fpu in the original exceptional state. -| -operr_end: - bsetb #operr_bit,FPSR_EXCEPT(%a6) - bsetb #aiop_bit,FPSR_AEXCEPT(%a6) - - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_operr - -operr_long: - moveql #4,%d1 |write size to d1 - moveb STAG(%a6),%d0 |test stag for nan - andib #0xe0,%d0 |clr all but tag - cmpib #0x60,%d0 |check for nan - beq operr_nan - cmpil #0x80000000,FPTEMP_LO(%a6) |test if ls lword is special - bnes chklerr |if not equal, check for incorrect operr - bsr check_upper |check if exp and ms mant are special - tstl %d0 - bnes chklerr |if d0 is true, check for incorrect operr - movel #0x80000000,%d0 |store special case result - bsr operr_store - bra not_enabled |clean and exit -| -| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE -| -chklerr: - movew FPTEMP_EX(%a6),%d0 - andw #0x7FFF,%d0 |ignore sign bit - cmpw #0x3FFE,%d0 |this is the only possible exponent value - bnes chklerr2 -fixlong: - movel FPTEMP_LO(%a6),%d0 - bsr operr_store - bra not_enabled -chklerr2: - movew FPTEMP_EX(%a6),%d0 - andw #0x7FFF,%d0 |ignore sign bit - cmpw #0x4000,%d0 - bcc store_max |exponent out of range - - movel FPTEMP_LO(%a6),%d0 - andl #0x7FFF0000,%d0 |look for all 1's on bits 30-16 - cmpl #0x7FFF0000,%d0 - beqs fixlong - - tstl FPTEMP_LO(%a6) - bpls chklepos - cmpl #0xFFFFFFFF,FPTEMP_HI(%a6) - beqs fixlong - bra store_max -chklepos: - tstl FPTEMP_HI(%a6) - beqs fixlong - bra store_max - -operr_word: - moveql #2,%d1 |write size to d1 - moveb STAG(%a6),%d0 |test stag for nan - andib #0xe0,%d0 |clr all but tag - cmpib #0x60,%d0 |check for nan - beq operr_nan - cmpil #0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special - bnes chkwerr |if not equal, check for incorrect operr - bsr check_upper |check if exp and ms mant are special - tstl %d0 - bnes chkwerr |if d0 is true, check for incorrect operr - movel #0x80000000,%d0 |store special case result - bsr operr_store - bra not_enabled |clean and exit -| -| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE -| -chkwerr: - movew FPTEMP_EX(%a6),%d0 - andw #0x7FFF,%d0 |ignore sign bit - cmpw #0x3FFE,%d0 |this is the only possible exponent value - bnes store_max - movel FPTEMP_LO(%a6),%d0 - swap %d0 - bsr operr_store - bra not_enabled - -operr_byte: - moveql #1,%d1 |write size to d1 - moveb STAG(%a6),%d0 |test stag for nan - andib #0xe0,%d0 |clr all but tag - cmpib #0x60,%d0 |check for nan - beqs operr_nan - cmpil #0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special - bnes chkberr |if not equal, check for incorrect operr - bsr check_upper |check if exp and ms mant are special - tstl %d0 - bnes chkberr |if d0 is true, check for incorrect operr - movel #0x80000000,%d0 |store special case result - bsr operr_store - bra not_enabled |clean and exit -| -| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE -| -chkberr: - movew FPTEMP_EX(%a6),%d0 - andw #0x7FFF,%d0 |ignore sign bit - cmpw #0x3FFE,%d0 |this is the only possible exponent value - bnes store_max - movel FPTEMP_LO(%a6),%d0 - asll #8,%d0 - swap %d0 - bsr operr_store - bra not_enabled - -| -| This operr condition is not of the special case. Set operr -| and aiop and write the portion of the nan to memory for the -| given size. -| -operr_nan: - orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop - - movel ETEMP_HI(%a6),%d0 |output will be from upper 32 bits - bsr operr_store - bra end_operr -| -| Store_max loads the max pos or negative for the size, sets -| the operr and aiop bits, and clears inex and ainex, incorrectly -| set by the 040. -| -store_max: - orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop - bclrb #inex2_bit,FPSR_EXCEPT(%a6) - bclrb #ainex_bit,FPSR_AEXCEPT(%a6) - fmovel #0,%FPSR - - tstw FPTEMP_EX(%a6) |check sign - blts load_neg - movel #0x7fffffff,%d0 - bsr operr_store - bra end_operr -load_neg: - movel #0x80000000,%d0 - bsr operr_store - bra end_operr - -| -| This routine stores the data in d0, for the given size in d1, -| to memory or data register as required. A read of the fline -| is required to determine the destination. -| -operr_store: - movel %d0,L_SCR1(%a6) |move write data to L_SCR1 - movel %d1,-(%a7) |save register size - bsrl get_fline |fline returned in d0 - movel (%a7)+,%d1 - bftst %d0{#26:#3} |if mode is zero, dest is Dn - bnes dest_mem -| -| Destination is Dn. Get register number from d0. Data is on -| the stack at (a7). D1 has size: 1=byte,2=word,4=long/single -| - andil #7,%d0 |isolate register number - cmpil #4,%d1 - beqs op_long |the most frequent case - cmpil #2,%d1 - bnes op_con - orl #8,%d0 - bras op_con -op_long: - orl #0x10,%d0 -op_con: - movel %d0,%d1 |format size:reg for reg_dest - bral reg_dest |call to reg_dest returns to caller -| ;of operr_store -| -| Destination is memory. Get from integer exception frame -| and call mem_write. -| -dest_mem: - leal L_SCR1(%a6),%a0 |put ptr to write data in a0 - movel EXC_EA(%a6),%a1 |put user destination address in a1 - movel %d1,%d0 |put size in d0 - bsrl mem_write - rts -| -| Check the exponent for $c000 and the upper 32 bits of the -| mantissa for $ffffffff. If both are true, return d0 clr -| and store the lower n bits of the least lword of FPTEMP -| to d0 for write out. If not, it is a real operr, and set d0. -| -check_upper: - cmpil #0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's - bnes true_operr |if not all 1's then was true operr - cmpiw #0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled - beqs not_true_operr |branch if not true operr - cmpiw #0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled - beqs not_true_operr |branch if not true operr -true_operr: - movel #1,%d0 |signal real operr - rts -not_true_operr: - clrl %d0 |signal no real operr - rts - -| -| End_operr tests for operr enabled. If not, it cleans up the stack -| and does an rte. If enabled, it cleans up the stack and branches -| to the kernel operr handler with only the integer exception -| frame on the stack and the fpu in the original exceptional state -| with correct data written to the destination. -| -end_operr: - btstb #operr_bit,FPCR_ENABLE(%a6) - beqs not_enabled -enabled: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_operr - -not_enabled: -| -| It is possible to have either inex2 or inex1 exceptions with the -| operr. If the inex enable bit is set in the FPCR, and either -| inex2 or inex1 occurred, we must clean up and branch to the -| real inex handler. -| -ck_inex: - moveb FPCR_ENABLE(%a6),%d0 - andb FPSR_EXCEPT(%a6),%d0 - andib #0x3,%d0 - beq operr_exit -| -| Inexact enabled and reported, and we must take an inexact exception. -| -take_inex: - moveb #INEX_VEC,EXC_VEC+1(%a6) - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_inex -| -| Since operr is only an E1 exception, there is no need to frestore -| any state back to the fpu. -| -operr_exit: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - unlk %a6 - bral fpsp_done - - |end diff --git a/arch/m68k/fpsp040/x_ovfl.S b/arch/m68k/fpsp040/x_ovfl.S deleted file mode 100644 index 6fe4989ee31f7c5644f521dc42213fa870038551..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_ovfl.S +++ /dev/null @@ -1,185 +0,0 @@ -| -| x_ovfl.sa 3.5 7/1/91 -| -| fpsp_ovfl --- FPSP handler for overflow exception -| -| Overflow occurs when a floating-point intermediate result is -| too large to be represented in a floating-point data register, -| or when storing to memory, the contents of a floating-point -| data register are too large to be represented in the -| destination format. -| -| Trap disabled results -| -| If the instruction is move_out, then garbage is stored in the -| destination. If the instruction is not move_out, then the -| destination is not affected. For 68881 compatibility, the -| following values should be stored at the destination, based -| on the current rounding mode: -| -| RN Infinity with the sign of the intermediate result. -| RZ Largest magnitude number, with the sign of the -| intermediate result. -| RM For pos overflow, the largest pos number. For neg overflow, -| -infinity -| RP For pos overflow, +infinity. For neg overflow, the largest -| neg number -| -| Trap enabled results -| All trap disabled code applies. In addition the exceptional -| operand needs to be made available to the users exception handler -| with a bias of $6000 subtracted from the exponent. -| -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_OVFL: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref ovf_r_x2 - |xref ovf_r_x3 - |xref store - |xref real_ovfl - |xref real_inex - |xref fpsp_done - |xref g_opcls - |xref b1238_fix - - .global fpsp_ovfl -fpsp_ovfl: - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - -| -| The 040 doesn't set the AINEX bit in the FPSR, the following -| line temporarily rectifies this error. -| - bsetb #ainex_bit,FPSR_AEXCEPT(%a6) -| - bsrl ovf_adj |denormalize, round & store interm op -| -| if overflow traps not enabled check for inexact exception -| - btstb #ovfl_bit,FPCR_ENABLE(%a6) - beqs ck_inex -| - btstb #E3,E_BYTE(%a6) - beqs no_e3_1 - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) -no_e3_1: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_ovfl -| -| It is possible to have either inex2 or inex1 exceptions with the -| ovfl. If the inex enable bit is set in the FPCR, and either -| inex2 or inex1 occurred, we must clean up and branch to the -| real inex handler. -| -ck_inex: -| move.b FPCR_ENABLE(%a6),%d0 -| and.b FPSR_EXCEPT(%a6),%d0 -| andi.b #$3,%d0 - btstb #inex2_bit,FPCR_ENABLE(%a6) - beqs ovfl_exit -| -| Inexact enabled and reported, and we must take an inexact exception. -| -take_inex: - btstb #E3,E_BYTE(%a6) - beqs no_e3_2 - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) -no_e3_2: - moveb #INEX_VEC,EXC_VEC+1(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_inex - -ovfl_exit: - bclrb #E3,E_BYTE(%a6) |test and clear E3 bit - beqs e1_set -| -| Clear dirty bit on dest resister in the frame before branching -| to b1238_fix. -| - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix |test for bug1238 case - - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral fpsp_done -e1_set: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - unlk %a6 - bral fpsp_done - -| -| ovf_adj -| -ovf_adj: -| -| Have a0 point to the correct operand. -| - btstb #E3,E_BYTE(%a6) |test E3 bit - beqs ovf_e1 - - lea WBTEMP(%a6),%a0 - bras ovf_com -ovf_e1: - lea ETEMP(%a6),%a0 - -ovf_com: - bclrb #sign_bit,LOCAL_EX(%a0) - sne LOCAL_SGN(%a0) - - bsrl g_opcls |returns opclass in d0 - cmpiw #3,%d0 |check for opclass3 - bnes not_opc011 - -| -| FPSR_CC is saved and restored because ovf_r_x3 affects it. The -| CCs are defined to be 'not affected' for the opclass3 instruction. -| - moveb FPSR_CC(%a6),L_SCR1(%a6) - bsrl ovf_r_x3 |returns a0 pointing to result - moveb L_SCR1(%a6),FPSR_CC(%a6) - bral store |stores to memory or register - -not_opc011: - bsrl ovf_r_x2 |returns a0 pointing to result - bral store |stores to memory or register - - |end diff --git a/arch/m68k/fpsp040/x_snan.S b/arch/m68k/fpsp040/x_snan.S deleted file mode 100644 index 4ed7664163781bccaebf477566550cb6404d776d..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_snan.S +++ /dev/null @@ -1,276 +0,0 @@ -| -| x_snan.sa 3.3 7/1/91 -| -| fpsp_snan --- FPSP handler for signalling NAN exception -| -| SNAN for float -> integer conversions (integer conversion of -| an SNAN) is a non-maskable run-time exception. -| -| For trap disabled the 040 does the following: -| If the dest data format is s, d, or x, then the SNAN bit in the NAN -| is set to one and the resulting non-signaling NAN (truncated if -| necessary) is transferred to the dest. If the dest format is b, w, -| or l, then garbage is written to the dest (actually the upper 32 bits -| of the mantissa are sent to the integer unit). -| -| For trap enabled the 040 does the following: -| If the inst is move_out, then the results are the same as for trap -| disabled with the exception posted. If the instruction is not move_ -| out, the dest. is not modified, and the exception is posted. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_SNAN: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref get_fline - |xref mem_write - |xref real_snan - |xref real_inex - |xref fpsp_done - |xref reg_dest - - .global fpsp_snan -fpsp_snan: - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - -| -| Check if trap enabled -| - btstb #snan_bit,FPCR_ENABLE(%a6) - bnes ena |If enabled, then branch - - bsrl move_out |else SNAN disabled -| -| It is possible to have an inex1 exception with the -| snan. If the inex enable bit is set in the FPCR, and either -| inex2 or inex1 occurred, we must clean up and branch to the -| real inex handler. -| -ck_inex: - moveb FPCR_ENABLE(%a6),%d0 - andb FPSR_EXCEPT(%a6),%d0 - andib #0x3,%d0 - beq end_snan -| -| Inexact enabled and reported, and we must take an inexact exception. -| -take_inex: - moveb #INEX_VEC,EXC_VEC+1(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_inex -| -| SNAN is enabled. Check if inst is move_out. -| Make any corrections to the 040 output as necessary. -| -ena: - btstb #5,CMDREG1B(%a6) |if set, inst is move out - beq not_out - - bsrl move_out - -report_snan: - moveb (%a7),VER_TMP(%a6) - cmpib #VER_40,(%a7) |test for orig unimp frame - bnes ck_rev - moveql #13,%d0 |need to zero 14 lwords - bras rep_con -ck_rev: - moveql #11,%d0 |need to zero 12 lwords -rep_con: - clrl (%a7) -loop1: - clrl -(%a7) |clear and dec a7 - dbra %d0,loop1 - moveb VER_TMP(%a6),(%a7) |format a busy frame - moveb #BUSY_SIZE-4,1(%a7) - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_snan -| -| Exit snan handler by expanding the unimp frame into a busy frame -| -end_snan: - bclrb #E1,E_BYTE(%a6) - - moveb (%a7),VER_TMP(%a6) - cmpib #VER_40,(%a7) |test for orig unimp frame - bnes ck_rev2 - moveql #13,%d0 |need to zero 14 lwords - bras rep_con2 -ck_rev2: - moveql #11,%d0 |need to zero 12 lwords -rep_con2: - clrl (%a7) -loop2: - clrl -(%a7) |clear and dec a7 - dbra %d0,loop2 - moveb VER_TMP(%a6),(%a7) |format a busy frame - moveb #BUSY_SIZE-4,1(%a7) |write busy size - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral fpsp_done - -| -| Move_out -| -move_out: - movel EXC_EA(%a6),%a0 |get from exc frame - - bfextu CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0} - cmpil #0,%d0 |check for long - beqs sto_long |branch if move_out long - - cmpil #4,%d0 |check for word - beqs sto_word |branch if move_out word - - cmpil #6,%d0 |check for byte - beqs sto_byte |branch if move_out byte - -| -| Not byte, word or long -| - rts -| -| Get the 32 most significant bits of etemp mantissa -| -sto_long: - movel ETEMP_HI(%a6),%d1 - movel #4,%d0 |load byte count -| -| Set signalling nan bit -| - bsetl #30,%d1 -| -| Store to the users destination address -| - tstl %a0 |check if is 0 - beqs wrt_dn |destination is a data register - - movel %d1,-(%a7) |move the snan onto the stack - movel %a0,%a1 |load dest addr into a1 - movel %a7,%a0 |load src addr of snan into a0 - bsrl mem_write |write snan to user memory - movel (%a7)+,%d1 |clear off stack - rts -| -| Get the 16 most significant bits of etemp mantissa -| -sto_word: - movel ETEMP_HI(%a6),%d1 - movel #2,%d0 |load byte count -| -| Set signalling nan bit -| - bsetl #30,%d1 -| -| Store to the users destination address -| - tstl %a0 |check if is 0 - beqs wrt_dn |destination is a data register - - movel %d1,-(%a7) |move the snan onto the stack - movel %a0,%a1 |load dest addr into a1 - movel %a7,%a0 |point to low word - bsrl mem_write |write snan to user memory - movel (%a7)+,%d1 |clear off stack - rts -| -| Get the 8 most significant bits of etemp mantissa -| -sto_byte: - movel ETEMP_HI(%a6),%d1 - movel #1,%d0 |load byte count -| -| Set signalling nan bit -| - bsetl #30,%d1 -| -| Store to the users destination address -| - tstl %a0 |check if is 0 - beqs wrt_dn |destination is a data register - movel %d1,-(%a7) |move the snan onto the stack - movel %a0,%a1 |load dest addr into a1 - movel %a7,%a0 |point to source byte - bsrl mem_write |write snan to user memory - movel (%a7)+,%d1 |clear off stack - rts - -| -| wrt_dn --- write to a data register -| -| We get here with D1 containing the data to write and D0 the -| number of bytes to write: 1=byte,2=word,4=long. -| -wrt_dn: - movel %d1,L_SCR1(%a6) |data - movel %d0,-(%a7) |size - bsrl get_fline |returns fline word in d0 - movel %d0,%d1 - andil #0x7,%d1 |d1 now holds register number - movel (%sp)+,%d0 |get original size - cmpil #4,%d0 - beqs wrt_long - cmpil #2,%d0 - bnes wrt_byte -wrt_word: - orl #0x8,%d1 - bral reg_dest -wrt_long: - orl #0x10,%d1 - bral reg_dest -wrt_byte: - bral reg_dest -| -| Check if it is a src nan or dst nan -| -not_out: - movel DTAG(%a6),%d0 - bfextu %d0{#0:#3},%d0 |isolate dtag in lsbs - - cmpib #3,%d0 |check for nan in destination - bnes issrc |destination nan has priority -dst_nan: - btstb #6,FPTEMP_HI(%a6) |check if dest nan is an snan - bnes issrc |no, so check source for snan - movew FPTEMP_EX(%a6),%d0 - bras cont -issrc: - movew ETEMP_EX(%a6),%d0 -cont: - btstl #15,%d0 |test for sign of snan - beqs clr_neg - bsetb #neg_bit,FPSR_CC(%a6) - bra report_snan -clr_neg: - bclrb #neg_bit,FPSR_CC(%a6) - bra report_snan - - |end diff --git a/arch/m68k/fpsp040/x_store.S b/arch/m68k/fpsp040/x_store.S deleted file mode 100644 index 402dc0c0ebc04d814b69f55636ae1320bf24b747..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_store.S +++ /dev/null @@ -1,255 +0,0 @@ -| -| x_store.sa 3.2 1/24/91 -| -| store --- store operand to memory or register -| -| Used by underflow and overflow handlers. -| -| a6 = points to fp value to be stored. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_STORE: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -fpreg_mask: - .byte 0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01 - -#include "fpsp.h" - - |xref mem_write - |xref get_fline - |xref g_opcls - |xref g_dfmtou - |xref reg_dest - - .global dest_ext - .global dest_dbl - .global dest_sgl - - .global store -store: - btstb #E3,E_BYTE(%a6) - beqs E1_sto -E3_sto: - movel CMDREG3B(%a6),%d0 - bfextu %d0{#6:#3},%d0 |isolate dest. reg from cmdreg3b -sto_fp: - lea fpreg_mask,%a1 - moveb (%a1,%d0.w),%d0 |convert reg# to dynamic register mask - tstb LOCAL_SGN(%a0) - beqs is_pos - bsetb #sign_bit,LOCAL_EX(%a0) -is_pos: - fmovemx (%a0),%d0 |move to correct register -| -| if fp0-fp3 is being modified, we must put a copy -| in the USER_FPn variable on the stack because all exception -| handlers restore fp0-fp3 from there. -| - cmpb #0x80,%d0 - bnes not_fp0 - fmovemx %fp0-%fp0,USER_FP0(%a6) - rts -not_fp0: - cmpb #0x40,%d0 - bnes not_fp1 - fmovemx %fp1-%fp1,USER_FP1(%a6) - rts -not_fp1: - cmpb #0x20,%d0 - bnes not_fp2 - fmovemx %fp2-%fp2,USER_FP2(%a6) - rts -not_fp2: - cmpb #0x10,%d0 - bnes not_fp3 - fmovemx %fp3-%fp3,USER_FP3(%a6) - rts -not_fp3: - rts - -E1_sto: - bsrl g_opcls |returns opclass in d0 - cmpib #3,%d0 - beq opc011 |branch if opclass 3 - movel CMDREG1B(%a6),%d0 - bfextu %d0{#6:#3},%d0 |extract destination register - bras sto_fp - -opc011: - bsrl g_dfmtou |returns dest format in d0 -| ;ext=00, sgl=01, dbl=10 - movel %a0,%a1 |save source addr in a1 - movel EXC_EA(%a6),%a0 |get the address - cmpil #0,%d0 |if dest format is extended - beq dest_ext |then branch - cmpil #1,%d0 |if dest format is single - beq dest_sgl |then branch -| -| fall through to dest_dbl -| - -| -| dest_dbl --- write double precision value to user space -| -|Input -| a0 -> destination address -| a1 -> source in extended precision -|Output -| a0 -> destroyed -| a1 -> destroyed -| d0 -> 0 -| -|Changes extended precision to double precision. -| Note: no attempt is made to round the extended value to double. -| dbl_sign = ext_sign -| dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) -| get rid of ext integer bit -| dbl_mant = ext_mant{62:12} -| -| --------------- --------------- --------------- -| extended -> |s| exp | |1| ms mant | | ls mant | -| --------------- --------------- --------------- -| 95 64 63 62 32 31 11 0 -| | | -| | | -| | | -| v v -| --------------- --------------- -| double -> |s|exp| mant | | mant | -| --------------- --------------- -| 63 51 32 31 0 -| -dest_dbl: - clrl %d0 |clear d0 - movew LOCAL_EX(%a1),%d0 |get exponent - subw #0x3fff,%d0 |subtract extended precision bias - cmpw #0x4000,%d0 |check if inf - beqs inf |if so, special case - addw #0x3ff,%d0 |add double precision bias - swap %d0 |d0 now in upper word - lsll #4,%d0 |d0 now in proper place for dbl prec exp - tstb LOCAL_SGN(%a1) - beqs get_mant |if positive, go process mantissa - bsetl #31,%d0 |if negative, put in sign information -| ; before continuing - bras get_mant |go process mantissa -inf: - movel #0x7ff00000,%d0 |load dbl inf exponent - clrl LOCAL_HI(%a1) |clear msb - tstb LOCAL_SGN(%a1) - beqs dbl_inf |if positive, go ahead and write it - bsetl #31,%d0 |if negative put in sign information -dbl_inf: - movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack - bras dbl_wrt -get_mant: - movel LOCAL_HI(%a1),%d1 |get ms mantissa - bfextu %d1{#1:#20},%d1 |get upper 20 bits of ms - orl %d1,%d0 |put these bits in ms word of double - movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack - movel LOCAL_HI(%a1),%d1 |get ms mantissa - movel #21,%d0 |load shift count - lsll %d0,%d1 |put lower 11 bits in upper bits - movel %d1,LOCAL_HI(%a1) |build lower lword in memory - movel LOCAL_LO(%a1),%d1 |get ls mantissa - bfextu %d1{#0:#21},%d0 |get ls 21 bits of double - orl %d0,LOCAL_HI(%a1) |put them in double result -dbl_wrt: - movel #0x8,%d0 |byte count for double precision number - exg %a0,%a1 |a0=supervisor source, a1=user dest - bsrl mem_write |move the number to the user's memory - rts -| -| dest_sgl --- write single precision value to user space -| -|Input -| a0 -> destination address -| a1 -> source in extended precision -| -|Output -| a0 -> destroyed -| a1 -> destroyed -| d0 -> 0 -| -|Changes extended precision to single precision. -| sgl_sign = ext_sign -| sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) -| get rid of ext integer bit -| sgl_mant = ext_mant{62:12} -| -| --------------- --------------- --------------- -| extended -> |s| exp | |1| ms mant | | ls mant | -| --------------- --------------- --------------- -| 95 64 63 62 40 32 31 12 0 -| | | -| | | -| | | -| v v -| --------------- -| single -> |s|exp| mant | -| --------------- -| 31 22 0 -| -dest_sgl: - clrl %d0 - movew LOCAL_EX(%a1),%d0 |get exponent - subw #0x3fff,%d0 |subtract extended precision bias - cmpw #0x4000,%d0 |check if inf - beqs sinf |if so, special case - addw #0x7f,%d0 |add single precision bias - swap %d0 |put exp in upper word of d0 - lsll #7,%d0 |shift it into single exp bits - tstb LOCAL_SGN(%a1) - beqs get_sman |if positive, continue - bsetl #31,%d0 |if negative, put in sign first - bras get_sman |get mantissa -sinf: - movel #0x7f800000,%d0 |load single inf exp to d0 - tstb LOCAL_SGN(%a1) - beqs sgl_wrt |if positive, continue - bsetl #31,%d0 |if negative, put in sign info - bras sgl_wrt - -get_sman: - movel LOCAL_HI(%a1),%d1 |get ms mantissa - bfextu %d1{#1:#23},%d1 |get upper 23 bits of ms - orl %d1,%d0 |put these bits in ms word of single - -sgl_wrt: - movel %d0,L_SCR1(%a6) |put the new exp back on the stack - movel #0x4,%d0 |byte count for single precision number - tstl %a0 |users destination address - beqs sgl_Dn |destination is a data register - exg %a0,%a1 |a0=supervisor source, a1=user dest - leal L_SCR1(%a6),%a0 |point a0 to data - bsrl mem_write |move the number to the user's memory - rts -sgl_Dn: - bsrl get_fline |returns fline word in d0 - andw #0x7,%d0 |isolate register number - movel %d0,%d1 |d1 has size:reg formatted for reg_dest - orl #0x10,%d1 |reg_dest wants size added to reg# - bral reg_dest |size is X, rts in reg_dest will -| ;return to caller of dest_sgl - -dest_ext: - tstb LOCAL_SGN(%a1) |put back sign into exponent word - beqs dstx_cont - bsetb #sign_bit,LOCAL_EX(%a1) -dstx_cont: - clrb LOCAL_SGN(%a1) |clear out the sign byte - - movel #0x0c,%d0 |byte count for extended number - exg %a0,%a1 |a0=supervisor source, a1=user dest - bsrl mem_write |move the number to the user's memory - rts - - |end diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S deleted file mode 100644 index eb772ff3b812c71747de877f3a815b81300454cb..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_unfl.S +++ /dev/null @@ -1,268 +0,0 @@ -| -| x_unfl.sa 3.4 7/1/91 -| -| fpsp_unfl --- FPSP handler for underflow exception -| -| Trap disabled results -| For 881/2 compatibility, sw must denormalize the intermediate -| result, then store the result. Denormalization is accomplished -| by taking the intermediate result (which is always normalized) and -| shifting the mantissa right while incrementing the exponent until -| it is equal to the denormalized exponent for the destination -| format. After denormalization, the result is rounded to the -| destination format. -| -| Trap enabled results -| All trap disabled code applies. In addition the exceptional -| operand needs to made available to the user with a bias of $6000 -| added to the exponent. -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref denorm - |xref round - |xref store - |xref g_rndpr - |xref g_opcls - |xref g_dfmtou - |xref real_unfl - |xref real_inex - |xref fpsp_done - |xref b1238_fix - - .global fpsp_unfl -fpsp_unfl: - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - -| - bsrl unf_res |denormalize, round & store interm op -| -| If underflow exceptions are not enabled, check for inexact -| exception -| - btstb #unfl_bit,FPCR_ENABLE(%a6) - beqs ck_inex - - btstb #E3,E_BYTE(%a6) - beqs no_e3_1 -| -| Clear dirty bit on dest resister in the frame before branching -| to b1238_fix. -| - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix |test for bug1238 case - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) -no_e3_1: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_unfl -| -| It is possible to have either inex2 or inex1 exceptions with the -| unfl. If the inex enable bit is set in the FPCR, and either -| inex2 or inex1 occurred, we must clean up and branch to the -| real inex handler. -| -ck_inex: - moveb FPCR_ENABLE(%a6),%d0 - andb FPSR_EXCEPT(%a6),%d0 - andib #0x3,%d0 - beqs unfl_done - -| -| Inexact enabled and reported, and we must take an inexact exception -| -take_inex: - btstb #E3,E_BYTE(%a6) - beqs no_e3_2 -| -| Clear dirty bit on dest resister in the frame before branching -| to b1238_fix. -| - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix |test for bug1238 case - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) -no_e3_2: - moveb #INEX_VEC,EXC_VEC+1(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral real_inex - -unfl_done: - bclrb #E3,E_BYTE(%a6) - beqs e1_set |if set then branch -| -| Clear dirty bit on dest resister in the frame before branching -| to b1238_fix. -| - bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no - bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit - bsrl b1238_fix |test for bug1238 case - movel USER_FPSR(%a6),FPSR_SHADOW(%a6) - orl #sx_mask,E_BYTE(%a6) - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - frestore (%a7)+ - unlk %a6 - bral fpsp_done -e1_set: - moveml USER_DA(%a6),%d0-%d1/%a0-%a1 - fmovemx USER_FP0(%a6),%fp0-%fp3 - fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar - unlk %a6 - bral fpsp_done -| -| unf_res --- underflow result calculation -| -unf_res: - bsrl g_rndpr |returns RND_PREC in d0 0=ext, -| ;1=sgl, 2=dbl -| ;we need the RND_PREC in the -| ;upper word for round - movew #0,-(%a7) - movew %d0,-(%a7) |copy RND_PREC to stack -| -| -| If the exception bit set is E3, the exceptional operand from the -| fpu is in WBTEMP; else it is in FPTEMP. -| - btstb #E3,E_BYTE(%a6) - beqs unf_E1 -unf_E3: - lea WBTEMP(%a6),%a0 |a0 now points to operand -| -| Test for fsgldiv and fsglmul. If the inst was one of these, then -| force the precision to extended for the denorm routine. Use -| the user's precision for the round routine. -| - movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul - andiw #0x7f,%d1 - cmpiw #0x30,%d1 |check for sgldiv - beqs unf_sgl - cmpiw #0x33,%d1 |check for sglmul - bnes unf_cont |if not, use fpcr prec in round -unf_sgl: - clrl %d0 - movew #0x1,(%a7) |override g_rndpr precision -| ;force single - bras unf_cont -unf_E1: - lea FPTEMP(%a6),%a0 |a0 now points to operand -unf_cont: - bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit - sne LOCAL_SGN(%a0) |store sign - - bsrl denorm |returns denorm, a0 points to it -| -| WARNING: -| ;d0 has guard,round sticky bit -| ;make sure that it is not corrupted -| ;before it reaches the round subroutine -| ;also ensure that a0 isn't corrupted - -| -| Set up d1 for round subroutine d1 contains the PREC/MODE -| information respectively on upper/lower register halves. -| - bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR -| ;mode in lower d1 - addl (%a7)+,%d1 |merge PREC/MODE -| -| WARNING: a0 and d0 are assumed to be intact between the denorm and -| round subroutines. All code between these two subroutines -| must not corrupt a0 and d0. -| -| -| Perform Round -| Input: a0 points to input operand -| d0{31:29} has guard, round, sticky -| d1{01:00} has rounding mode -| d1{17:16} has rounding precision -| Output: a0 points to rounded operand -| - - bsrl round |returns rounded denorm at (a0) -| -| Differentiate between store to memory vs. store to register -| -unf_store: - bsrl g_opcls |returns opclass in d0{2:0} - cmpib #0x3,%d0 - bnes not_opc011 -| -| At this point, a store to memory is pending -| -opc011: - bsrl g_dfmtou - tstb %d0 - beqs ext_opc011 |If extended, do not subtract -| ;If destination format is sgl/dbl, - tstb LOCAL_HI(%a0) |If rounded result is normal,don't -| ;subtract - bmis ext_opc011 - subqw #1,LOCAL_EX(%a0) |account for denorm bias vs. -| ;normalized bias -| ; normalized denormalized -| ;single $7f $7e -| ;double $3ff $3fe -| -ext_opc011: - bsrl store |stores to memory - bras unf_done |finish up - -| -| At this point, a store to a float register is pending -| -not_opc011: - bsrl store |stores to float register -| ;a0 is not corrupted on a store to a -| ;float register. -| -| Set the condition codes according to result -| - tstl LOCAL_HI(%a0) |check upper mantissa - bnes ck_sgn - tstl LOCAL_LO(%a0) |check lower mantissa - bnes ck_sgn - bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero -ck_sgn: - btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit - beqs unf_done - bsetb #neg_bit,FPSR_CC(%a6) - -| -| Finish. -| -unf_done: - btstb #inex2_bit,FPSR_EXCEPT(%a6) - beqs no_aunfl - bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) -no_aunfl: - rts - - |end diff --git a/arch/m68k/fpsp040/x_unimp.S b/arch/m68k/fpsp040/x_unimp.S deleted file mode 100644 index 6f382b21228b68c568e63c8567c7ca27873d78ae..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_unimp.S +++ /dev/null @@ -1,76 +0,0 @@ -| -| x_unimp.sa 3.3 7/1/91 -| -| fpsp_unimp --- FPSP handler for unimplemented instruction -| exception. -| -| Invoked when the user program encounters a floating-point -| op-code that hardware does not support. Trap vector# 11 -| (See table 8-1 MC68030 User's Manual). -| -| -| Note: An fsave for an unimplemented inst. will create a short -| fsave stack. -| -| Input: 1. Six word stack frame for unimplemented inst, four word -| for illegal -| (See table 8-7 MC68030 User's Manual). -| 2. Unimp (short) fsave state frame created here by fsave -| instruction. -| -| -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_UNIMP: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref get_op - |xref do_func - |xref sto_res - |xref gen_except - |xref fpsp_fmt_error - - .global fpsp_unimp - .global uni_2 -fpsp_unimp: - link %a6,#-LOCAL_SIZE - fsave -(%a7) -uni_2: - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - moveb (%a7),%d0 |test for valid version num - andib #0xf0,%d0 |test for $4x - cmpib #VER_4,%d0 |must be $4x or exit - bnel fpsp_fmt_error -| -| Temporary D25B Fix -| The following lines are used to ensure that the FPSR -| exception byte and condition codes are clear before proceeding -| - movel USER_FPSR(%a6),%d0 - andl #0xFF00FF,%d0 |clear all but accrued exceptions - movel %d0,USER_FPSR(%a6) - fmovel #0,%FPSR |clear all user bits - fmovel #0,%FPCR |clear all user exceptions for FPSP - - clrb UFLG_TMP(%a6) |clr flag for unsupp data - - bsrl get_op |go get operand(s) - clrb STORE_FLG(%a6) - bsrl do_func |do the function - fsave -(%a7) |capture possible exc state - tstb STORE_FLG(%a6) - bnes no_store |if STORE_FLG is set, no store - bsrl sto_res |store the result in user space -no_store: - bral gen_except |post any exceptions and return - - |end diff --git a/arch/m68k/fpsp040/x_unsupp.S b/arch/m68k/fpsp040/x_unsupp.S deleted file mode 100644 index d7cf46208c62900833e892c1e09f9d3784de7b3d..0000000000000000000000000000000000000000 --- a/arch/m68k/fpsp040/x_unsupp.S +++ /dev/null @@ -1,82 +0,0 @@ -| -| x_unsupp.sa 3.3 7/1/91 -| -| fpsp_unsupp --- FPSP handler for unsupported data type exception -| -| Trap vector #55 (See table 8-1 Mc68030 User's manual). -| Invoked when the user program encounters a data format (packed) that -| hardware does not support or a data type (denormalized numbers or un- -| normalized numbers). -| Normalizes denorms and unnorms, unpacks packed numbers then stores -| them back into the machine to let the 040 finish the operation. -| -| Unsupp calls two routines: -| 1. get_op - gets the operand(s) -| 2. res_func - restore the function back into the 040 or -| if fmove.p fpm, then pack source (fpm) -| and store in users memory . -| -| Input: Long fsave stack frame -| -| - -| Copyright (C) Motorola, Inc. 1990 -| All Rights Reserved -| -| For details on the license for this file, please see the -| file, README, in this same directory. - -X_UNSUPP: |idnt 2,1 | Motorola 040 Floating Point Software Package - - |section 8 - -#include "fpsp.h" - - |xref get_op - |xref res_func - |xref gen_except - |xref fpsp_fmt_error - - .global fpsp_unsupp -fpsp_unsupp: -| - link %a6,#-LOCAL_SIZE - fsave -(%a7) - moveml %d0-%d1/%a0-%a1,USER_DA(%a6) - fmovemx %fp0-%fp3,USER_FP0(%a6) - fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) - - - moveb (%a7),VER_TMP(%a6) |save version number - moveb (%a7),%d0 |test for valid version num - andib #0xf0,%d0 |test for $4x - cmpib #VER_4,%d0 |must be $4x or exit - bnel fpsp_fmt_error - - fmovel #0,%FPSR |clear all user status bits - fmovel #0,%FPCR |clear all user control bits -| -| The following lines are used to ensure that the FPSR -| exception byte and condition codes are clear before proceeding, -| except in the case of fmove, which leaves the cc's intact. -| -unsupp_con: - movel USER_FPSR(%a6),%d1 - btst #5,CMDREG1B(%a6) |looking for fmove out - bne fmove_con - andl #0xFF00FF,%d1 |clear all but aexcs and qbyte - bras end_fix -fmove_con: - andl #0x0FFF40FF,%d1 |clear all but cc's, snan bit, aexcs, and qbyte -end_fix: - movel %d1,USER_FPSR(%a6) - - st UFLG_TMP(%a6) |set flag for unsupp data - - bsrl get_op |everything okay, go get operand(s) - bsrl res_func |fix up stack frame so can restore it - clrl -(%a7) - moveb VER_TMP(%a6),(%a7) |move idle fmt word to top of stack - bral gen_except -| - |end diff --git a/arch/m68k/hp300/reboot.S b/arch/m68k/hp300/reboot.S deleted file mode 100644 index 52eb852e6b0444618c2719bcd4843bbb2405656f..0000000000000000000000000000000000000000 --- a/arch/m68k/hp300/reboot.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * linux/arch/m68k/hp300/reboot.S - * - * Copyright (C) 1998 Philip Blundell - * - * Do the dirty work of rebooting the machine. Basically we need to undo all the - * good stuff that head.S did when we started up. The caches and MMU must be - * disabled and then we jump back to the PROM. This is a bit gruesome but we put - * a brave face on it. - */ - -/* XXX Doesn't work yet. Not sure why and can't be bothered to fix it at the moment. */ - - .globl hp300_reset -hp300_reset: - jmp hp300_reset diff --git a/arch/m68k/ifpsp060/fskeleton.S b/arch/m68k/ifpsp060/fskeleton.S deleted file mode 100644 index 0a1ae4f44130f4a1a8ed502314431f68d51eb360..0000000000000000000000000000000000000000 --- a/arch/m68k/ifpsp060/fskeleton.S +++ /dev/null @@ -1,342 +0,0 @@ -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP -|M68000 Hi-Performance Microprocessor Division -|M68060 Software Package -|Production Release P1.00 -- October 10, 1994 -| -|M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. -| -|THE SOFTWARE is provided on an "AS IS" basis and without warranty. -|To the maximum extent permitted by applicable law, -|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, -|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE -|and any warranty against infringement with regard to the SOFTWARE -|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. -| -|To the maximum extent permitted by applicable law, -|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER -|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) -|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. -|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. -| -|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE -|so long as this entire notice is retained without alteration in any modified and/or -|redistributed versions, and that such modified versions are clearly identified as such. -|No licenses are granted by implication, estoppel or otherwise under any patents -|or trademarks of Motorola, Inc. -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -| fskeleton.s -| -| This file contains: -| (1) example "Call-out"s -| (2) example package entry code -| (3) example "Call-out" table -| - -#include - -|################################ -| (1) EXAMPLE CALL-OUTS # -| # -| _060_fpsp_done() # -| _060_real_ovfl() # -| _060_real_unfl() # -| _060_real_operr() # -| _060_real_snan() # -| _060_real_dz() # -| _060_real_inex() # -| _060_real_bsun() # -| _060_real_fline() # -| _060_real_fpu_disabled() # -| _060_real_trap() # -|################################ - -| -| _060_fpsp_done(): -| -| This is the main exit point for the 68060 Floating-Point -| Software Package. For a normal exit, all 060FPSP routines call this -| routine. The operating system can do system dependent clean-up or -| simply execute an "rte" as with the sample code below. -| - .global _060_fpsp_done -_060_fpsp_done: - bral _060_isp_done | do the same as isp_done - -| -| _060_real_ovfl(): -| -| This is the exit point for the 060FPSP when an enabled overflow exception -| is present. The routine below should point to the operating system handler -| for enabled overflow conditions. The exception stack frame is an overflow -| stack frame. The FP state frame holds the EXCEPTIONAL OPERAND. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_ovfl -_060_real_ovfl: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - - -| -| _060_real_unfl(): -| -| This is the exit point for the 060FPSP when an enabled underflow exception -| is present. The routine below should point to the operating system handler -| for enabled underflow conditions. The exception stack frame is an underflow -| stack frame. The FP state frame holds the EXCEPTIONAL OPERAND. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_unfl -_060_real_unfl: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - -| -| _060_real_operr(): -| -| This is the exit point for the 060FPSP when an enabled operand error exception -| is present. The routine below should point to the operating system handler -| for enabled operand error exceptions. The exception stack frame is an operand error -| stack frame. The FP state frame holds the source operand of the faulting -| instruction. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_operr -_060_real_operr: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - -| -| _060_real_snan(): -| -| This is the exit point for the 060FPSP when an enabled signalling NaN exception -| is present. The routine below should point to the operating system handler -| for enabled signalling NaN exceptions. The exception stack frame is a signalling NaN -| stack frame. The FP state frame holds the source operand of the faulting -| instruction. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_snan -_060_real_snan: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - -| -| _060_real_dz(): -| -| This is the exit point for the 060FPSP when an enabled divide-by-zero exception -| is present. The routine below should point to the operating system handler -| for enabled divide-by-zero exceptions. The exception stack frame is a divide-by-zero -| stack frame. The FP state frame holds the source operand of the faulting -| instruction. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_dz -_060_real_dz: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - -| -| _060_real_inex(): -| -| This is the exit point for the 060FPSP when an enabled inexact exception -| is present. The routine below should point to the operating system handler -| for enabled inexact exceptions. The exception stack frame is an inexact -| stack frame. The FP state frame holds the source operand of the faulting -| instruction. -| -| The sample routine below simply clears the exception status bit and -| does an "rte". -| - .global _060_real_inex -_060_real_inex: - fsave -(%sp) - move.w #0x6000,0x2(%sp) - frestore (%sp)+ - bral trap | jump to trap handler - -| -| _060_real_bsun(): -| -| This is the exit point for the 060FPSP when an enabled bsun exception -| is present. The routine below should point to the operating system handler -| for enabled bsun exceptions. The exception stack frame is a bsun -| stack frame. -| -| The sample routine below clears the exception status bit, clears the NaN -| bit in the FPSR, and does an "rte". The instruction that caused the -| bsun will now be re-executed but with the NaN FPSR bit cleared. -| - .global _060_real_bsun -_060_real_bsun: -| fsave -(%sp) - - fmove.l %fpsr,-(%sp) - andi.b #0xfe,(%sp) - fmove.l (%sp)+,%fpsr - - bral trap | jump to trap handler - -| -| _060_real_fline(): -| -| This is the exit point for the 060FPSP when an F-Line Illegal exception is -| encountered. Three different types of exceptions can enter the F-Line exception -| vector number 11: FP Unimplemented Instructions, FP implemented instructions when -| the FPU is disabled, and F-Line Illegal instructions. The 060FPSP module -| _fpsp_fline() distinguishes between the three and acts appropriately. F-Line -| Illegals branch here. -| - .global _060_real_fline -_060_real_fline: - bral trap | jump to trap handler - -| -| _060_real_fpu_disabled(): -| -| This is the exit point for the 060FPSP when an FPU disabled exception is -| encountered. Three different types of exceptions can enter the F-Line exception -| vector number 11: FP Unimplemented Instructions, FP implemented instructions when -| the FPU is disabled, and F-Line Illegal instructions. The 060FPSP module -| _fpsp_fline() distinguishes between the three and acts appropriately. FPU disabled -| exceptions branch here. -| -| The sample code below enables the FPU, sets the PC field in the exception stack -| frame to the PC of the instruction causing the exception, and does an "rte". -| The execution of the instruction then proceeds with an enabled floating-point -| unit. -| - .global _060_real_fpu_disabled -_060_real_fpu_disabled: - move.l %d0,-(%sp) | enabled the fpu - .long 0x4E7A0808 |movec pcr,%d0 - bclr #0x1,%d0 - .long 0x4E7B0808 |movec %d0,pcr - move.l (%sp)+,%d0 - - move.l 0xc(%sp),0x2(%sp) | set "Current PC" - rte - -| -| _060_real_trap(): -| -| This is the exit point for the 060FPSP when an emulated "ftrapcc" instruction -| discovers that the trap condition is true and it should branch to the operating -| system handler for the trap exception vector number 7. -| -| The sample code below simply executes an "rte". -| - .global _060_real_trap -_060_real_trap: - bral trap | jump to trap handler - -|############################################################################ - -|################################# -| (2) EXAMPLE PACKAGE ENTRY CODE # -|################################# - - .global _060_fpsp_snan -_060_fpsp_snan: - bra.l _FP_CALL_TOP+0x80+0x00 - - .global _060_fpsp_operr -_060_fpsp_operr: - bra.l _FP_CALL_TOP+0x80+0x08 - - .global _060_fpsp_ovfl -_060_fpsp_ovfl: - bra.l _FP_CALL_TOP+0x80+0x10 - - .global _060_fpsp_unfl -_060_fpsp_unfl: - bra.l _FP_CALL_TOP+0x80+0x18 - - .global _060_fpsp_dz -_060_fpsp_dz: - bra.l _FP_CALL_TOP+0x80+0x20 - - .global _060_fpsp_inex -_060_fpsp_inex: - bra.l _FP_CALL_TOP+0x80+0x28 - - .global _060_fpsp_fline -_060_fpsp_fline: - bra.l _FP_CALL_TOP+0x80+0x30 - - .global _060_fpsp_unsupp -_060_fpsp_unsupp: - bra.l _FP_CALL_TOP+0x80+0x38 - - .global _060_fpsp_effadd -_060_fpsp_effadd: - bra.l _FP_CALL_TOP+0x80+0x40 - -|############################################################################ - -|############################### -| (3) EXAMPLE CALL-OUT SECTION # -|############################### - -| The size of this section MUST be 128 bytes!!! - -_FP_CALL_TOP: - .long _060_real_bsun - _FP_CALL_TOP - .long _060_real_snan - _FP_CALL_TOP - .long _060_real_operr - _FP_CALL_TOP - .long _060_real_ovfl - _FP_CALL_TOP - .long _060_real_unfl - _FP_CALL_TOP - .long _060_real_dz - _FP_CALL_TOP - .long _060_real_inex - _FP_CALL_TOP - .long _060_real_fline - _FP_CALL_TOP - .long _060_real_fpu_disabled - _FP_CALL_TOP - .long _060_real_trap - _FP_CALL_TOP - .long _060_real_trace - _FP_CALL_TOP - .long _060_real_access - _FP_CALL_TOP - .long _060_fpsp_done - _FP_CALL_TOP - - .long 0x00000000, 0x00000000, 0x00000000 - - .long _060_imem_read - _FP_CALL_TOP - .long _060_dmem_read - _FP_CALL_TOP - .long _060_dmem_write - _FP_CALL_TOP - .long _060_imem_read_word - _FP_CALL_TOP - .long _060_imem_read_long - _FP_CALL_TOP - .long _060_dmem_read_byte - _FP_CALL_TOP - .long _060_dmem_read_word - _FP_CALL_TOP - .long _060_dmem_read_long - _FP_CALL_TOP - .long _060_dmem_write_byte - _FP_CALL_TOP - .long _060_dmem_write_word - _FP_CALL_TOP - .long _060_dmem_write_long - _FP_CALL_TOP - - .long 0x00000000 - - .long 0x00000000, 0x00000000, 0x00000000, 0x00000000 - -|############################################################################ - -| 060 FPSP KERNEL PACKAGE NEEDS TO GO HERE!!! - -#include "fpsp.sa" diff --git a/arch/m68k/ifpsp060/iskeleton.S b/arch/m68k/ifpsp060/iskeleton.S deleted file mode 100644 index 91a9c65fee8a9568b1bd1be5256b767d31476d9f..0000000000000000000000000000000000000000 --- a/arch/m68k/ifpsp060/iskeleton.S +++ /dev/null @@ -1,347 +0,0 @@ -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP -|M68000 Hi-Performance Microprocessor Division -|M68060 Software Package -|Production Release P1.00 -- October 10, 1994 -| -|M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. -| -|THE SOFTWARE is provided on an "AS IS" basis and without warranty. -|To the maximum extent permitted by applicable law, -|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, -|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE -|and any warranty against infringement with regard to the SOFTWARE -|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. -| -|To the maximum extent permitted by applicable law, -|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER -|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) -|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. -|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. -| -|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE -|so long as this entire notice is retained without alteration in any modified and/or -|redistributed versions, and that such modified versions are clearly identified as such. -|No licenses are granted by implication, estoppel or otherwise under any patents -|or trademarks of Motorola, Inc. -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -| iskeleton.s -| -| This file contains: -| (1) example "Call-out"s -| (2) example package entry code -| (3) example "Call-out" table -| - -#include -#include -#include - - -|################################ -| (1) EXAMPLE CALL-OUTS # -| # -| _060_isp_done() # -| _060_real_chk() # -| _060_real_divbyzero() # -| # -| _060_real_cas() # -| _060_real_cas2() # -| _060_real_lock_page() # -| _060_real_unlock_page() # -|################################ - -| -| _060_isp_done(): -| -| This is and example main exit point for the Unimplemented Integer -| Instruction exception handler. For a normal exit, the -| _isp_unimp() branches to here so that the operating system -| can do any clean-up desired. The stack frame is the -| Unimplemented Integer Instruction stack frame with -| the PC pointing to the instruction following the instruction -| just emulated. -| To simply continue execution at the next instruction, just -| do an "rte". -| -| Linux/68k: If returning to user space, check for needed reselections. - - .global _060_isp_done -_060_isp_done: - btst #0x5,%sp@ | supervisor bit set in saved SR? - beq .Lnotkern - rte -.Lnotkern: - SAVE_ALL_INT - GET_CURRENT(%d0) - | deliver signals, reschedule etc.. - jra ret_from_exception - -| -| _060_real_chk(): -| -| This is an alternate exit point for the Unimplemented Integer -| Instruction exception handler. If the instruction was a "chk2" -| and the operand was out of bounds, then _isp_unimp() creates -| a CHK exception stack frame from the Unimplemented Integer Instrcution -| stack frame and branches to this routine. -| -| Linux/68k: commented out test for tracing - - .global _060_real_chk -_060_real_chk: -| tst.b (%sp) | is tracing enabled? -| bpls real_chk_end | no - -| -| CHK FRAME TRACE FRAME -| ***************** ***************** -| * Current PC * * Current PC * -| ***************** ***************** -| * 0x2 * 0x018 * * 0x2 * 0x024 * -| ***************** ***************** -| * Next * * Next * -| * PC * * PC * -| ***************** ***************** -| * SR * * SR * -| ***************** ***************** -| -| move.b #0x24,0x7(%sp) | set trace vecno -| bral _060_real_trace - -real_chk_end: - bral trap | jump to trap handler - -| -| _060_real_divbyzero: -| -| This is an alternate exit point for the Unimplemented Integer -| Instruction exception handler isp_unimp(). If the instruction is a 64-bit -| integer divide where the source operand is a zero, then the _isp_unimp() -| creates a Divide-by-zero exception stack frame from the Unimplemented -| Integer Instruction stack frame and branches to this routine. -| -| Remember that a trace exception may be pending. The code below performs -| no action associated with the "chk" exception. If tracing is enabled, -| then it create a Trace exception stack frame from the "chk" exception -| stack frame and branches to the _real_trace() entry point. -| -| Linux/68k: commented out test for tracing - - .global _060_real_divbyzero -_060_real_divbyzero: -| tst.b (%sp) | is tracing enabled? -| bpls real_divbyzero_end | no - -| -| DIVBYZERO FRAME TRACE FRAME -| ***************** ***************** -| * Current PC * * Current PC * -| ***************** ***************** -| * 0x2 * 0x014 * * 0x2 * 0x024 * -| ***************** ***************** -| * Next * * Next * -| * PC * * PC * -| ***************** ***************** -| * SR * * SR * -| ***************** ***************** -| -| move.b #0x24,0x7(%sp) | set trace vecno -| bral _060_real_trace - -real_divbyzero_end: - bral trap | jump to trap handler - -|########################## - -| -| _060_real_cas(): -| -| Entry point for the selected cas emulation code implementation. -| If the implementation provided by the 68060ISP is sufficient, -| then this routine simply re-enters the package through _isp_cas. -| - .global _060_real_cas -_060_real_cas: - bral _I_CALL_TOP+0x80+0x08 - -| -| _060_real_cas2(): -| -| Entry point for the selected cas2 emulation code implementation. -| If the implementation provided by the 68060ISP is sufficient, -| then this routine simply re-enters the package through _isp_cas2. -| - .global _060_real_cas2 -_060_real_cas2: - bral _I_CALL_TOP+0x80+0x10 - -| -| _060_lock_page(): -| -| Entry point for the operating system`s routine to "lock" a page -| from being paged out. This routine is needed by the cas/cas2 -| algorithms so that no page faults occur within the "core" code -| region. Note: the routine must lock two pages if the operand -| spans two pages. -| NOTE: THE ROUTINE SHOULD RETURN AN FSLW VALUE IN D0 ON FAILURE -| SO THAT THE 060SP CAN CREATE A PROPER ACCESS ERROR FRAME. -| Arguments: -| a0 = operand address -| d0 = `xxxxxxff -> supervisor; `xxxxxx00 -> user -| d1 = `xxxxxxff -> longword; `xxxxxx00 -> word -| Expected outputs: -| d0 = 0 -> success; non-zero -> failure -| -| Linux/m68k: Make sure the page is properly paged in, so we use -| plpaw and handle any exception here. The kernel must not be -| preempted until _060_unlock_page(), so that the page stays mapped. -| - .global _060_real_lock_page -_060_real_lock_page: - move.l %d2,-(%sp) - | load sfc/dfc - tst.b %d0 - jne 1f - moveq #1,%d0 - jra 2f -1: moveq #5,%d0 -2: movec.l %dfc,%d2 - movec.l %d0,%dfc - movec.l %d0,%sfc - - clr.l %d0 - | prefetch address - .chip 68060 - move.l %a0,%a1 -1: plpaw (%a1) - addq.w #1,%a0 - tst.b %d1 - jeq 2f - addq.w #2,%a0 -2: plpaw (%a0) -3: .chip 68k - - | restore sfc/dfc - movec.l %d2,%dfc - movec.l %d2,%sfc - move.l (%sp)+,%d2 - rts - -.section __ex_table,"a" - .align 4 - .long 1b,11f - .long 2b,21f -.previous -.section .fixup,"ax" - .even -11: move.l #0x020003c0,%d0 - or.l %d2,%d0 - swap %d0 - jra 3b -21: move.l #0x02000bc0,%d0 - or.l %d2,%d0 - swap %d0 - jra 3b -.previous - -| -| _060_unlock_page(): -| -| Entry point for the operating system`s routine to "unlock" a -| page that has been "locked" previously with _real_lock_page. -| Note: the routine must unlock two pages if the operand spans -| two pages. -| Arguments: -| a0 = operand address -| d0 = `xxxxxxff -> supervisor; `xxxxxx00 -> user -| d1 = `xxxxxxff -> longword; `xxxxxx00 -> word -| -| Linux/m68k: perhaps reenable preemption here... - - .global _060_real_unlock_page -_060_real_unlock_page: - clr.l %d0 - rts - -|########################################################################### - -|################################# -| (2) EXAMPLE PACKAGE ENTRY CODE # -|################################# - - .global _060_isp_unimp -_060_isp_unimp: - bral _I_CALL_TOP+0x80+0x00 - - .global _060_isp_cas -_060_isp_cas: - bral _I_CALL_TOP+0x80+0x08 - - .global _060_isp_cas2 -_060_isp_cas2: - bral _I_CALL_TOP+0x80+0x10 - - .global _060_isp_cas_finish -_060_isp_cas_finish: - bra.l _I_CALL_TOP+0x80+0x18 - - .global _060_isp_cas2_finish -_060_isp_cas2_finish: - bral _I_CALL_TOP+0x80+0x20 - - .global _060_isp_cas_inrange -_060_isp_cas_inrange: - bral _I_CALL_TOP+0x80+0x28 - - .global _060_isp_cas_terminate -_060_isp_cas_terminate: - bral _I_CALL_TOP+0x80+0x30 - - .global _060_isp_cas_restart -_060_isp_cas_restart: - bral _I_CALL_TOP+0x80+0x38 - -|########################################################################### - -|############################### -| (3) EXAMPLE CALL-OUT SECTION # -|############################### - -| The size of this section MUST be 128 bytes!!! - -_I_CALL_TOP: - .long _060_real_chk - _I_CALL_TOP - .long _060_real_divbyzero - _I_CALL_TOP - .long _060_real_trace - _I_CALL_TOP - .long _060_real_access - _I_CALL_TOP - .long _060_isp_done - _I_CALL_TOP - - .long _060_real_cas - _I_CALL_TOP - .long _060_real_cas2 - _I_CALL_TOP - .long _060_real_lock_page - _I_CALL_TOP - .long _060_real_unlock_page - _I_CALL_TOP - - .long 0x00000000, 0x00000000, 0x00000000, 0x00000000 - .long 0x00000000, 0x00000000, 0x00000000 - - .long _060_imem_read - _I_CALL_TOP - .long _060_dmem_read - _I_CALL_TOP - .long _060_dmem_write - _I_CALL_TOP - .long _060_imem_read_word - _I_CALL_TOP - .long _060_imem_read_long - _I_CALL_TOP - .long _060_dmem_read_byte - _I_CALL_TOP - .long _060_dmem_read_word - _I_CALL_TOP - .long _060_dmem_read_long - _I_CALL_TOP - .long _060_dmem_write_byte - _I_CALL_TOP - .long _060_dmem_write_word - _I_CALL_TOP - .long _060_dmem_write_long - _I_CALL_TOP - - .long 0x00000000 - .long 0x00000000, 0x00000000, 0x00000000, 0x00000000 - -|########################################################################### - -| 060 INTEGER KERNEL PACKAGE MUST GO HERE!!! -#include "isp.sa" diff --git a/arch/m68k/ifpsp060/os.S b/arch/m68k/ifpsp060/os.S deleted file mode 100644 index 7a0d6e42806656a59f7b5da80f019aed1d1626c6..0000000000000000000000000000000000000000 --- a/arch/m68k/ifpsp060/os.S +++ /dev/null @@ -1,396 +0,0 @@ -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP -|M68000 Hi-Performance Microprocessor Division -|M68060 Software Package -|Production Release P1.00 -- October 10, 1994 -| -|M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. -| -|THE SOFTWARE is provided on an "AS IS" basis and without warranty. -|To the maximum extent permitted by applicable law, -|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, -|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE -|and any warranty against infringement with regard to the SOFTWARE -|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. -| -|To the maximum extent permitted by applicable law, -|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER -|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) -|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. -|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. -| -|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE -|so long as this entire notice is retained without alteration in any modified and/or -|redistributed versions, and that such modified versions are clearly identified as such. -|No licenses are granted by implication, estoppel or otherwise under any patents -|or trademarks of Motorola, Inc. -|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -| os.s -| -| This file contains: -| - example "Call-Out"s required by both the ISP and FPSP. -| - -#include - -|################################ -| EXAMPLE CALL-OUTS # -| # -| _060_dmem_write() # -| _060_dmem_read() # -| _060_imem_read() # -| _060_dmem_read_byte() # -| _060_dmem_read_word() # -| _060_dmem_read_long() # -| _060_imem_read_word() # -| _060_imem_read_long() # -| _060_dmem_write_byte() # -| _060_dmem_write_word() # -| _060_dmem_write_long() # -| # -| _060_real_trace() # -| _060_real_access() # -|################################ - -| -| Each IO routine checks to see if the memory write/read is to/from user -| or supervisor application space. The examples below use simple "move" -| instructions for supervisor mode applications and call _copyin()/_copyout() -| for user mode applications. -| When installing the 060SP, the _copyin()/_copyout() equivalents for a -| given operating system should be substituted. -| -| The addresses within the 060SP are guaranteed to be on the stack. -| The result is that Unix processes are allowed to sleep as a consequence -| of a page fault during a _copyout. -| -| Linux/68k: The _060_[id]mem_{read,write}_{byte,word,long} functions -| (i.e. all the known length <= 4) are implemented by single moves -| statements instead of (more expensive) copy{in,out} calls, if -| working in user space - -| -| _060_dmem_write(): -| -| Writes to data memory while in supervisor mode. -| -| INPUTS: -| a0 - supervisor source address -| a1 - user destination address -| d0 - number of bytes to write -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_write -_060_dmem_write: - subq.l #1,%d0 - btst #0x5,0x4(%a6) | check for supervisor state - beqs user_write -super_write: - move.b (%a0)+,(%a1)+ | copy 1 byte - dbra %d0,super_write | quit if --ctr < 0 - clr.l %d1 | return success - rts -user_write: - move.b (%a0)+,%d1 | copy 1 byte -copyoutae: - movs.b %d1,(%a1)+ - dbra %d0,user_write | quit if --ctr < 0 - clr.l %d1 | return success - rts - -| -| _060_imem_read(), _060_dmem_read(): -| -| Reads from data/instruction memory while in supervisor mode. -| -| INPUTS: -| a0 - user source address -| a1 - supervisor destination address -| d0 - number of bytes to read -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d1 - 0 = success, !0 = failure -| - .global _060_imem_read - .global _060_dmem_read -_060_imem_read: -_060_dmem_read: - subq.l #1,%d0 - btst #0x5,0x4(%a6) | check for supervisor state - beqs user_read -super_read: - move.b (%a0)+,(%a1)+ | copy 1 byte - dbra %d0,super_read | quit if --ctr < 0 - clr.l %d1 | return success - rts -user_read: -copyinae: - movs.b (%a0)+,%d1 - move.b %d1,(%a1)+ | copy 1 byte - dbra %d0,user_read | quit if --ctr < 0 - clr.l %d1 | return success - rts - -| -| _060_dmem_read_byte(): -| -| Read a data byte from user memory. -| -| INPUTS: -| a0 - user source address -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d0 - data byte in d0 -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_read_byte -_060_dmem_read_byte: - clr.l %d0 | clear whole longword - clr.l %d1 | assume success - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmrbs | supervisor -dmrbuae:movs.b (%a0),%d0 | fetch user byte - rts -dmrbs: move.b (%a0),%d0 | fetch super byte - rts - -| -| _060_dmem_read_word(): -| -| Read a data word from user memory. -| -| INPUTS: -| a0 - user source address -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d0 - data word in d0 -| d1 - 0 = success, !0 = failure -| -| _060_imem_read_word(): -| -| Read an instruction word from user memory. -| -| INPUTS: -| a0 - user source address -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d0 - instruction word in d0 -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_read_word - .global _060_imem_read_word -_060_dmem_read_word: -_060_imem_read_word: - clr.l %d1 | assume success - clr.l %d0 | clear whole longword - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmrws | supervisor -dmrwuae:movs.w (%a0), %d0 | fetch user word - rts -dmrws: move.w (%a0), %d0 | fetch super word - rts - -| -| _060_dmem_read_long(): -| - -| -| INPUTS: -| a0 - user source address -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d0 - data longword in d0 -| d1 - 0 = success, !0 = failure -| -| _060_imem_read_long(): -| -| Read an instruction longword from user memory. -| -| INPUTS: -| a0 - user source address -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d0 - instruction longword in d0 -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_read_long - .global _060_imem_read_long -_060_dmem_read_long: -_060_imem_read_long: - clr.l %d1 | assume success - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmrls | supervisor -dmrluae:movs.l (%a0),%d0 | fetch user longword - rts -dmrls: move.l (%a0),%d0 | fetch super longword - rts - -| -| _060_dmem_write_byte(): -| -| Write a data byte to user memory. -| -| INPUTS: -| a0 - user destination address -| d0 - data byte in d0 -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_write_byte -_060_dmem_write_byte: - clr.l %d1 | assume success - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmwbs | supervisor -dmwbuae:movs.b %d0,(%a0) | store user byte - rts -dmwbs: move.b %d0,(%a0) | store super byte - rts - -| -| _060_dmem_write_word(): -| -| Write a data word to user memory. -| -| INPUTS: -| a0 - user destination address -| d0 - data word in d0 -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_write_word -_060_dmem_write_word: - clr.l %d1 | assume success - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmwws | supervisor -dmwwu: -dmwwuae:movs.w %d0,(%a0) | store user word - bras dmwwr -dmwws: move.w %d0,(%a0) | store super word -dmwwr: clr.l %d1 | return success - rts - -| -| _060_dmem_write_long(): -| -| Write a data longword to user memory. -| -| INPUTS: -| a0 - user destination address -| d0 - data longword in d0 -| 0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode -| OUTPUTS: -| d1 - 0 = success, !0 = failure -| - .global _060_dmem_write_long -_060_dmem_write_long: - clr.l %d1 | assume success - btst #0x5,0x4(%a6) | check for supervisor state - bnes dmwls | supervisor -dmwluae:movs.l %d0,(%a0) | store user longword - rts -dmwls: move.l %d0,(%a0) | store super longword - rts - - -#if 0 -|############################################### - -| -| Use these routines if your kernel doesn't have _copyout/_copyin equivalents. -| Assumes that D0/D1/A0/A1 are scratch registers. The _copyin/_copyout -| below assume that the SFC/DFC have been set previously. -| -| Linux/68k: These are basically non-inlined versions of -| memcpy_{to,from}fs, but without long-transfer optimization -| Note: Assumed that SFC/DFC are pointing correctly to user data -| space... Should be right, or are there any exceptions? - -| -| int _copyout(supervisor_addr, user_addr, nbytes) -| - .global _copyout -_copyout: - move.l 4(%sp),%a0 | source - move.l 8(%sp),%a1 | destination - move.l 12(%sp),%d0 | count - subq.l #1,%d0 -moreout: - move.b (%a0)+,%d1 | fetch supervisor byte -copyoutae: - movs.b %d1,(%a1)+ | store user byte - dbra %d0,moreout | are we through yet? - moveq #0,%d0 | return success - rts - -| -| int _copyin(user_addr, supervisor_addr, nbytes) -| - .global _copyin -_copyin: - move.l 4(%sp),%a0 | source - move.l 8(%sp),%a1 | destination - move.l 12(%sp),%d0 | count - subq.l #1,%d0 -morein: -copyinae: - movs.b (%a0)+,%d1 | fetch user byte - move.b %d1,(%a1)+ | write supervisor byte - dbra %d0,morein | are we through yet? - moveq #0,%d0 | return success - rts -#endif - -|########################################################################### - -| -| _060_real_trace(): -| -| This is the exit point for the 060FPSP when an instruction is being traced -| and there are no other higher priority exceptions pending for this instruction -| or they have already been processed. -| -| The sample code below simply executes an "rte". -| - .global _060_real_trace -_060_real_trace: - bral trap - -| -| _060_real_access(): -| -| This is the exit point for the 060FPSP when an access error exception -| is encountered. The routine below should point to the operating system -| handler for access error exceptions. The exception stack frame is an -| 8-word access error frame. -| -| The sample routine below simply executes an "rte" instruction which -| is most likely the incorrect thing to do and could put the system -| into an infinite loop. -| - .global _060_real_access -_060_real_access: - bral buserr - - - -| Execption handling for movs access to illegal memory - .section .fixup,#alloc,#execinstr - .even -1: moveq #-1,%d1 - rts -.section __ex_table,#alloc - .align 4 - .long dmrbuae,1b - .long dmrwuae,1b - .long dmrluae,1b - .long dmwbuae,1b - .long dmwwuae,1b - .long dmwluae,1b - .long copyoutae,1b - .long copyinae,1b - .text diff --git a/arch/m68k/ifpsp060/src/fplsp.S b/arch/m68k/ifpsp060/src/fplsp.S deleted file mode 100644 index 3b7ea2dc9f1bbf098fe6cf03fdffeb1bb934e5ab..0000000000000000000000000000000000000000 --- a/arch/m68k/ifpsp060/src/fplsp.S +++ /dev/null @@ -1,10980 +0,0 @@ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP -M68000 Hi-Performance Microprocessor Division -M68060 Software Package -Production Release P1.00 -- October 10, 1994 - -M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. - -THE SOFTWARE is provided on an "AS IS" basis and without warranty. -To the maximum extent permitted by applicable law, -MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, -INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE -and any warranty against infringement with regard to the SOFTWARE -(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. - -To the maximum extent permitted by applicable law, -IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER -(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) -ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. -Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. - -You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE -so long as this entire notice is retained without alteration in any modified and/or -redistributed versions, and that such modified versions are clearly identified as such. -No licenses are granted by implication, estoppel or otherwise under any patents -or trademarks of Motorola, Inc. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# lfptop.s: -# This file is appended to the top of the 060ILSP package -# and contains the entry points into the package. The user, in -# effect, branches to one of the branch table entries located here. -# - - bra.l _facoss_ - short 0x0000 - bra.l _facosd_ - short 0x0000 - bra.l _facosx_ - short 0x0000 - - bra.l _fasins_ - short 0x0000 - bra.l _fasind_ - short 0x0000 - bra.l _fasinx_ - short 0x0000 - - bra.l _fatans_ - short 0x0000 - bra.l _fatand_ - short 0x0000 - bra.l _fatanx_ - short 0x0000 - - bra.l _fatanhs_ - short 0x0000 - bra.l _fatanhd_ - short 0x0000 - bra.l _fatanhx_ - short 0x0000 - - bra.l _fcoss_ - short 0x0000 - bra.l _fcosd_ - short 0x0000 - bra.l _fcosx_ - short 0x0000 - - bra.l _fcoshs_ - short 0x0000 - bra.l _fcoshd_ - short 0x0000 - bra.l _fcoshx_ - short 0x0000 - - bra.l _fetoxs_ - short 0x0000 - bra.l _fetoxd_ - short 0x0000 - bra.l _fetoxx_ - short 0x0000 - - bra.l _fetoxm1s_ - short 0x0000 - bra.l _fetoxm1d_ - short 0x0000 - bra.l _fetoxm1x_ - short 0x0000 - - bra.l _fgetexps_ - short 0x0000 - bra.l _fgetexpd_ - short 0x0000 - bra.l _fgetexpx_ - short 0x0000 - - bra.l _fgetmans_ - short 0x0000 - bra.l _fgetmand_ - short 0x0000 - bra.l _fgetmanx_ - short 0x0000 - - bra.l _flog10s_ - short 0x0000 - bra.l _flog10d_ - short 0x0000 - bra.l _flog10x_ - short 0x0000 - - bra.l _flog2s_ - short 0x0000 - bra.l _flog2d_ - short 0x0000 - bra.l _flog2x_ - short 0x0000 - - bra.l _flogns_ - short 0x0000 - bra.l _flognd_ - short 0x0000 - bra.l _flognx_ - short 0x0000 - - bra.l _flognp1s_ - short 0x0000 - bra.l _flognp1d_ - short 0x0000 - bra.l _flognp1x_ - short 0x0000 - - bra.l _fmods_ - short 0x0000 - bra.l _fmodd_ - short 0x0000 - bra.l _fmodx_ - short 0x0000 - - bra.l _frems_ - short 0x0000 - bra.l _fremd_ - short 0x0000 - bra.l _fremx_ - short 0x0000 - - bra.l _fscales_ - short 0x0000 - bra.l _fscaled_ - short 0x0000 - bra.l _fscalex_ - short 0x0000 - - bra.l _fsins_ - short 0x0000 - bra.l _fsind_ - short 0x0000 - bra.l _fsinx_ - short 0x0000 - - bra.l _fsincoss_ - short 0x0000 - bra.l _fsincosd_ - short 0x0000 - bra.l _fsincosx_ - short 0x0000 - - bra.l _fsinhs_ - short 0x0000 - bra.l _fsinhd_ - short 0x0000 - bra.l _fsinhx_ - short 0x0000 - - bra.l _ftans_ - short 0x0000 - bra.l _ftand_ - short 0x0000 - bra.l _ftanx_ - short 0x0000 - - bra.l _ftanhs_ - short 0x0000 - bra.l _ftanhd_ - short 0x0000 - bra.l _ftanhx_ - short 0x0000 - - bra.l _ftentoxs_ - short 0x0000 - bra.l _ftentoxd_ - short 0x0000 - bra.l _ftentoxx_ - short 0x0000 - - bra.l _ftwotoxs_ - short 0x0000 - bra.l _ftwotoxd_ - short 0x0000 - bra.l _ftwotoxx_ - short 0x0000 - - bra.l _fabss_ - short 0x0000 - bra.l _fabsd_ - short 0x0000 - bra.l _fabsx_ - short 0x0000 - - bra.l _fadds_ - short 0x0000 - bra.l _faddd_ - short 0x0000 - bra.l _faddx_ - short 0x0000 - - bra.l _fdivs_ - short 0x0000 - bra.l _fdivd_ - short 0x0000 - bra.l _fdivx_ - short 0x0000 - - bra.l _fints_ - short 0x0000 - bra.l _fintd_ - short 0x0000 - bra.l _fintx_ - short 0x0000 - - bra.l _fintrzs_ - short 0x0000 - bra.l _fintrzd_ - short 0x0000 - bra.l _fintrzx_ - short 0x0000 - - bra.l _fmuls_ - short 0x0000 - bra.l _fmuld_ - short 0x0000 - bra.l _fmulx_ - short 0x0000 - - bra.l _fnegs_ - short 0x0000 - bra.l _fnegd_ - short 0x0000 - bra.l _fnegx_ - short 0x0000 - - bra.l _fsqrts_ - short 0x0000 - bra.l _fsqrtd_ - short 0x0000 - bra.l _fsqrtx_ - short 0x0000 - - bra.l _fsubs_ - short 0x0000 - bra.l _fsubd_ - short 0x0000 - bra.l _fsubx_ - short 0x0000 - -# leave room for future possible additions - align 0x400 - -# -# This file contains a set of define statements for constants -# in order to promote readability within the corecode itself. -# - -set LOCAL_SIZE, 192 # stack frame size(bytes) -set LV, -LOCAL_SIZE # stack offset - -set EXC_SR, 0x4 # stack status register -set EXC_PC, 0x6 # stack pc -set EXC_VOFF, 0xa # stacked vector offset -set EXC_EA, 0xc # stacked - -set EXC_FP, 0x0 # frame pointer - -set EXC_AREGS, -68 # offset of all address regs -set EXC_DREGS, -100 # offset of all data regs -set EXC_FPREGS, -36 # offset of all fp regs - -set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 -set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 -set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 -set EXC_A5, EXC_AREGS+(5*4) -set EXC_A4, EXC_AREGS+(4*4) -set EXC_A3, EXC_AREGS+(3*4) -set EXC_A2, EXC_AREGS+(2*4) -set EXC_A1, EXC_AREGS+(1*4) -set EXC_A0, EXC_AREGS+(0*4) -set EXC_D7, EXC_DREGS+(7*4) -set EXC_D6, EXC_DREGS+(6*4) -set EXC_D5, EXC_DREGS+(5*4) -set EXC_D4, EXC_DREGS+(4*4) -set EXC_D3, EXC_DREGS+(3*4) -set EXC_D2, EXC_DREGS+(2*4) -set EXC_D1, EXC_DREGS+(1*4) -set EXC_D0, EXC_DREGS+(0*4) - -set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 -set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 -set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) - -set FP_SCR1, LV+80 # fp scratch 1 -set FP_SCR1_EX, FP_SCR1+0 -set FP_SCR1_SGN, FP_SCR1+2 -set FP_SCR1_HI, FP_SCR1+4 -set FP_SCR1_LO, FP_SCR1+8 - -set FP_SCR0, LV+68 # fp scratch 0 -set FP_SCR0_EX, FP_SCR0+0 -set FP_SCR0_SGN, FP_SCR0+2 -set FP_SCR0_HI, FP_SCR0+4 -set FP_SCR0_LO, FP_SCR0+8 - -set FP_DST, LV+56 # fp destination operand -set FP_DST_EX, FP_DST+0 -set FP_DST_SGN, FP_DST+2 -set FP_DST_HI, FP_DST+4 -set FP_DST_LO, FP_DST+8 - -set FP_SRC, LV+44 # fp source operand -set FP_SRC_EX, FP_SRC+0 -set FP_SRC_SGN, FP_SRC+2 -set FP_SRC_HI, FP_SRC+4 -set FP_SRC_LO, FP_SRC+8 - -set USER_FPIAR, LV+40 # FP instr address register - -set USER_FPSR, LV+36 # FP status register -set FPSR_CC, USER_FPSR+0 # FPSR condition codes -set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte -set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte -set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte - -set USER_FPCR, LV+32 # FP control register -set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable -set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control - -set L_SCR3, LV+28 # integer scratch 3 -set L_SCR2, LV+24 # integer scratch 2 -set L_SCR1, LV+20 # integer scratch 1 - -set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) - -set EXC_TEMP2, LV+24 # temporary space -set EXC_TEMP, LV+16 # temporary space - -set DTAG, LV+15 # destination operand type -set STAG, LV+14 # source operand type - -set SPCOND_FLG, LV+10 # flag: special case (see below) - -set EXC_CC, LV+8 # saved condition codes -set EXC_EXTWPTR, LV+4 # saved current PC (active) -set EXC_EXTWORD, LV+2 # saved extension word -set EXC_CMDREG, LV+2 # saved extension word -set EXC_OPWORD, LV+0 # saved operation word - -################################ - -# Helpful macros - -set FTEMP, 0 # offsets within an -set FTEMP_EX, 0 # extended precision -set FTEMP_SGN, 2 # value saved in memory. -set FTEMP_HI, 4 -set FTEMP_LO, 8 -set FTEMP_GRS, 12 - -set LOCAL, 0 # offsets within an -set LOCAL_EX, 0 # extended precision -set LOCAL_SGN, 2 # value saved in memory. -set LOCAL_HI, 4 -set LOCAL_LO, 8 -set LOCAL_GRS, 12 - -set DST, 0 # offsets within an -set DST_EX, 0 # extended precision -set DST_HI, 4 # value saved in memory. -set DST_LO, 8 - -set SRC, 0 # offsets within an -set SRC_EX, 0 # extended precision -set SRC_HI, 4 # value saved in memory. -set SRC_LO, 8 - -set SGL_LO, 0x3f81 # min sgl prec exponent -set SGL_HI, 0x407e # max sgl prec exponent -set DBL_LO, 0x3c01 # min dbl prec exponent -set DBL_HI, 0x43fe # max dbl prec exponent -set EXT_LO, 0x0 # min ext prec exponent -set EXT_HI, 0x7ffe # max ext prec exponent - -set EXT_BIAS, 0x3fff # extended precision bias -set SGL_BIAS, 0x007f # single precision bias -set DBL_BIAS, 0x03ff # double precision bias - -set NORM, 0x00 # operand type for STAG/DTAG -set ZERO, 0x01 # operand type for STAG/DTAG -set INF, 0x02 # operand type for STAG/DTAG -set QNAN, 0x03 # operand type for STAG/DTAG -set DENORM, 0x04 # operand type for STAG/DTAG -set SNAN, 0x05 # operand type for STAG/DTAG -set UNNORM, 0x06 # operand type for STAG/DTAG - -################## -# FPSR/FPCR bits # -################## -set neg_bit, 0x3 # negative result -set z_bit, 0x2 # zero result -set inf_bit, 0x1 # infinite result -set nan_bit, 0x0 # NAN result - -set q_sn_bit, 0x7 # sign bit of quotient byte - -set bsun_bit, 7 # branch on unordered -set snan_bit, 6 # signalling NAN -set operr_bit, 5 # operand error -set ovfl_bit, 4 # overflow -set unfl_bit, 3 # underflow -set dz_bit, 2 # divide by zero -set inex2_bit, 1 # inexact result 2 -set inex1_bit, 0 # inexact result 1 - -set aiop_bit, 7 # accrued inexact operation bit -set aovfl_bit, 6 # accrued overflow bit -set aunfl_bit, 5 # accrued underflow bit -set adz_bit, 4 # accrued dz bit -set ainex_bit, 3 # accrued inexact bit - -############################# -# FPSR individual bit masks # -############################# -set neg_mask, 0x08000000 # negative bit mask (lw) -set inf_mask, 0x02000000 # infinity bit mask (lw) -set z_mask, 0x04000000 # zero bit mask (lw) -set nan_mask, 0x01000000 # nan bit mask (lw) - -set neg_bmask, 0x08 # negative bit mask (byte) -set inf_bmask, 0x02 # infinity bit mask (byte) -set z_bmask, 0x04 # zero bit mask (byte) -set nan_bmask, 0x01 # nan bit mask (byte) - -set bsun_mask, 0x00008000 # bsun exception mask -set snan_mask, 0x00004000 # snan exception mask -set operr_mask, 0x00002000 # operr exception mask -set ovfl_mask, 0x00001000 # overflow exception mask -set unfl_mask, 0x00000800 # underflow exception mask -set dz_mask, 0x00000400 # dz exception mask -set inex2_mask, 0x00000200 # inex2 exception mask -set inex1_mask, 0x00000100 # inex1 exception mask - -set aiop_mask, 0x00000080 # accrued illegal operation -set aovfl_mask, 0x00000040 # accrued overflow -set aunfl_mask, 0x00000020 # accrued underflow -set adz_mask, 0x00000010 # accrued divide by zero -set ainex_mask, 0x00000008 # accrued inexact - -###################################### -# FPSR combinations used in the FPSP # -###################################### -set dzinf_mask, inf_mask+dz_mask+adz_mask -set opnan_mask, nan_mask+operr_mask+aiop_mask -set nzi_mask, 0x01ffffff #clears N, Z, and I -set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask -set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask -set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask -set inx1a_mask, inex1_mask+ainex_mask -set inx2a_mask, inex2_mask+ainex_mask -set snaniop_mask, nan_mask+snan_mask+aiop_mask -set snaniop2_mask, snan_mask+aiop_mask -set naniop_mask, nan_mask+aiop_mask -set neginf_mask, neg_mask+inf_mask -set infaiop_mask, inf_mask+aiop_mask -set negz_mask, neg_mask+z_mask -set opaop_mask, operr_mask+aiop_mask -set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask -set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask - -######### -# misc. # -######### -set rnd_stky_bit, 29 # stky bit pos in longword - -set sign_bit, 0x7 # sign bit -set signan_bit, 0x6 # signalling nan bit - -set sgl_thresh, 0x3f81 # minimum sgl exponent -set dbl_thresh, 0x3c01 # minimum dbl exponent - -set x_mode, 0x0 # extended precision -set s_mode, 0x4 # single precision -set d_mode, 0x8 # double precision - -set rn_mode, 0x0 # round-to-nearest -set rz_mode, 0x1 # round-to-zero -set rm_mode, 0x2 # round-tp-minus-infinity -set rp_mode, 0x3 # round-to-plus-infinity - -set mantissalen, 64 # length of mantissa in bits - -set BYTE, 1 # len(byte) == 1 byte -set WORD, 2 # len(word) == 2 bytes -set LONG, 4 # len(longword) == 2 bytes - -set BSUN_VEC, 0xc0 # bsun vector offset -set INEX_VEC, 0xc4 # inexact vector offset -set DZ_VEC, 0xc8 # dz vector offset -set UNFL_VEC, 0xcc # unfl vector offset -set OPERR_VEC, 0xd0 # operr vector offset -set OVFL_VEC, 0xd4 # ovfl vector offset -set SNAN_VEC, 0xd8 # snan vector offset - -########################### -# SPecial CONDition FLaGs # -########################### -set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception -set fbsun_flg, 0x02 # flag bit: bsun exception -set mia7_flg, 0x04 # flag bit: (a7)+ -set mda7_flg, 0x08 # flag bit: -(a7) -set fmovm_flg, 0x40 # flag bit: fmovm instruction -set immed_flg, 0x80 # flag bit: & - -set ftrapcc_bit, 0x0 -set fbsun_bit, 0x1 -set mia7_bit, 0x2 -set mda7_bit, 0x3 -set immed_bit, 0x7 - -################################## -# TRANSCENDENTAL "LAST-OP" FLAGS # -################################## -set FMUL_OP, 0x0 # fmul instr performed last -set FDIV_OP, 0x1 # fdiv performed last -set FADD_OP, 0x2 # fadd performed last -set FMOV_OP, 0x3 # fmov performed last - -############# -# CONSTANTS # -############# -T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD -T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL - -PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 -PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 - -TWOBYPI: - long 0x3FE45F30,0x6DC9C883 - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fsins_ -_fsins_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L0_2s - bsr.l ssin # operand is a NORM - bra.b _L0_6s -_L0_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L0_3s # no - bsr.l src_zero # yes - bra.b _L0_6s -_L0_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L0_4s # no - bsr.l t_operr # yes - bra.b _L0_6s -_L0_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L0_5s # no - bsr.l src_qnan # yes - bra.b _L0_6s -_L0_5s: - bsr.l ssind # operand is a DENORM -_L0_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fsind_ -_fsind_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L0_2d - bsr.l ssin # operand is a NORM - bra.b _L0_6d -_L0_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L0_3d # no - bsr.l src_zero # yes - bra.b _L0_6d -_L0_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L0_4d # no - bsr.l t_operr # yes - bra.b _L0_6d -_L0_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L0_5d # no - bsr.l src_qnan # yes - bra.b _L0_6d -_L0_5d: - bsr.l ssind # operand is a DENORM -_L0_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fsinx_ -_fsinx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L0_2x - bsr.l ssin # operand is a NORM - bra.b _L0_6x -_L0_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L0_3x # no - bsr.l src_zero # yes - bra.b _L0_6x -_L0_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L0_4x # no - bsr.l t_operr # yes - bra.b _L0_6x -_L0_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L0_5x # no - bsr.l src_qnan # yes - bra.b _L0_6x -_L0_5x: - bsr.l ssind # operand is a DENORM -_L0_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fcoss_ -_fcoss_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L1_2s - bsr.l scos # operand is a NORM - bra.b _L1_6s -_L1_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L1_3s # no - bsr.l ld_pone # yes - bra.b _L1_6s -_L1_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L1_4s # no - bsr.l t_operr # yes - bra.b _L1_6s -_L1_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L1_5s # no - bsr.l src_qnan # yes - bra.b _L1_6s -_L1_5s: - bsr.l scosd # operand is a DENORM -_L1_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fcosd_ -_fcosd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L1_2d - bsr.l scos # operand is a NORM - bra.b _L1_6d -_L1_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L1_3d # no - bsr.l ld_pone # yes - bra.b _L1_6d -_L1_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L1_4d # no - bsr.l t_operr # yes - bra.b _L1_6d -_L1_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L1_5d # no - bsr.l src_qnan # yes - bra.b _L1_6d -_L1_5d: - bsr.l scosd # operand is a DENORM -_L1_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fcosx_ -_fcosx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L1_2x - bsr.l scos # operand is a NORM - bra.b _L1_6x -_L1_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L1_3x # no - bsr.l ld_pone # yes - bra.b _L1_6x -_L1_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L1_4x # no - bsr.l t_operr # yes - bra.b _L1_6x -_L1_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L1_5x # no - bsr.l src_qnan # yes - bra.b _L1_6x -_L1_5x: - bsr.l scosd # operand is a DENORM -_L1_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fsinhs_ -_fsinhs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L2_2s - bsr.l ssinh # operand is a NORM - bra.b _L2_6s -_L2_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L2_3s # no - bsr.l src_zero # yes - bra.b _L2_6s -_L2_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L2_4s # no - bsr.l src_inf # yes - bra.b _L2_6s -_L2_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L2_5s # no - bsr.l src_qnan # yes - bra.b _L2_6s -_L2_5s: - bsr.l ssinhd # operand is a DENORM -_L2_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fsinhd_ -_fsinhd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L2_2d - bsr.l ssinh # operand is a NORM - bra.b _L2_6d -_L2_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L2_3d # no - bsr.l src_zero # yes - bra.b _L2_6d -_L2_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L2_4d # no - bsr.l src_inf # yes - bra.b _L2_6d -_L2_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L2_5d # no - bsr.l src_qnan # yes - bra.b _L2_6d -_L2_5d: - bsr.l ssinhd # operand is a DENORM -_L2_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fsinhx_ -_fsinhx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L2_2x - bsr.l ssinh # operand is a NORM - bra.b _L2_6x -_L2_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L2_3x # no - bsr.l src_zero # yes - bra.b _L2_6x -_L2_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L2_4x # no - bsr.l src_inf # yes - bra.b _L2_6x -_L2_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L2_5x # no - bsr.l src_qnan # yes - bra.b _L2_6x -_L2_5x: - bsr.l ssinhd # operand is a DENORM -_L2_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _flognp1s_ -_flognp1s_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L3_2s - bsr.l slognp1 # operand is a NORM - bra.b _L3_6s -_L3_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L3_3s # no - bsr.l src_zero # yes - bra.b _L3_6s -_L3_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L3_4s # no - bsr.l sopr_inf # yes - bra.b _L3_6s -_L3_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L3_5s # no - bsr.l src_qnan # yes - bra.b _L3_6s -_L3_5s: - bsr.l slognp1d # operand is a DENORM -_L3_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flognp1d_ -_flognp1d_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L3_2d - bsr.l slognp1 # operand is a NORM - bra.b _L3_6d -_L3_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L3_3d # no - bsr.l src_zero # yes - bra.b _L3_6d -_L3_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L3_4d # no - bsr.l sopr_inf # yes - bra.b _L3_6d -_L3_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L3_5d # no - bsr.l src_qnan # yes - bra.b _L3_6d -_L3_5d: - bsr.l slognp1d # operand is a DENORM -_L3_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flognp1x_ -_flognp1x_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L3_2x - bsr.l slognp1 # operand is a NORM - bra.b _L3_6x -_L3_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L3_3x # no - bsr.l src_zero # yes - bra.b _L3_6x -_L3_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L3_4x # no - bsr.l sopr_inf # yes - bra.b _L3_6x -_L3_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L3_5x # no - bsr.l src_qnan # yes - bra.b _L3_6x -_L3_5x: - bsr.l slognp1d # operand is a DENORM -_L3_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fetoxm1s_ -_fetoxm1s_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L4_2s - bsr.l setoxm1 # operand is a NORM - bra.b _L4_6s -_L4_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L4_3s # no - bsr.l src_zero # yes - bra.b _L4_6s -_L4_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L4_4s # no - bsr.l setoxm1i # yes - bra.b _L4_6s -_L4_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L4_5s # no - bsr.l src_qnan # yes - bra.b _L4_6s -_L4_5s: - bsr.l setoxm1d # operand is a DENORM -_L4_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fetoxm1d_ -_fetoxm1d_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L4_2d - bsr.l setoxm1 # operand is a NORM - bra.b _L4_6d -_L4_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L4_3d # no - bsr.l src_zero # yes - bra.b _L4_6d -_L4_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L4_4d # no - bsr.l setoxm1i # yes - bra.b _L4_6d -_L4_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L4_5d # no - bsr.l src_qnan # yes - bra.b _L4_6d -_L4_5d: - bsr.l setoxm1d # operand is a DENORM -_L4_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fetoxm1x_ -_fetoxm1x_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L4_2x - bsr.l setoxm1 # operand is a NORM - bra.b _L4_6x -_L4_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L4_3x # no - bsr.l src_zero # yes - bra.b _L4_6x -_L4_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L4_4x # no - bsr.l setoxm1i # yes - bra.b _L4_6x -_L4_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L4_5x # no - bsr.l src_qnan # yes - bra.b _L4_6x -_L4_5x: - bsr.l setoxm1d # operand is a DENORM -_L4_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _ftanhs_ -_ftanhs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L5_2s - bsr.l stanh # operand is a NORM - bra.b _L5_6s -_L5_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L5_3s # no - bsr.l src_zero # yes - bra.b _L5_6s -_L5_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L5_4s # no - bsr.l src_one # yes - bra.b _L5_6s -_L5_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L5_5s # no - bsr.l src_qnan # yes - bra.b _L5_6s -_L5_5s: - bsr.l stanhd # operand is a DENORM -_L5_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftanhd_ -_ftanhd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L5_2d - bsr.l stanh # operand is a NORM - bra.b _L5_6d -_L5_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L5_3d # no - bsr.l src_zero # yes - bra.b _L5_6d -_L5_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L5_4d # no - bsr.l src_one # yes - bra.b _L5_6d -_L5_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L5_5d # no - bsr.l src_qnan # yes - bra.b _L5_6d -_L5_5d: - bsr.l stanhd # operand is a DENORM -_L5_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftanhx_ -_ftanhx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L5_2x - bsr.l stanh # operand is a NORM - bra.b _L5_6x -_L5_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L5_3x # no - bsr.l src_zero # yes - bra.b _L5_6x -_L5_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L5_4x # no - bsr.l src_one # yes - bra.b _L5_6x -_L5_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L5_5x # no - bsr.l src_qnan # yes - bra.b _L5_6x -_L5_5x: - bsr.l stanhd # operand is a DENORM -_L5_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fatans_ -_fatans_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L6_2s - bsr.l satan # operand is a NORM - bra.b _L6_6s -_L6_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L6_3s # no - bsr.l src_zero # yes - bra.b _L6_6s -_L6_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L6_4s # no - bsr.l spi_2 # yes - bra.b _L6_6s -_L6_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L6_5s # no - bsr.l src_qnan # yes - bra.b _L6_6s -_L6_5s: - bsr.l satand # operand is a DENORM -_L6_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fatand_ -_fatand_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L6_2d - bsr.l satan # operand is a NORM - bra.b _L6_6d -_L6_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L6_3d # no - bsr.l src_zero # yes - bra.b _L6_6d -_L6_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L6_4d # no - bsr.l spi_2 # yes - bra.b _L6_6d -_L6_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L6_5d # no - bsr.l src_qnan # yes - bra.b _L6_6d -_L6_5d: - bsr.l satand # operand is a DENORM -_L6_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fatanx_ -_fatanx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L6_2x - bsr.l satan # operand is a NORM - bra.b _L6_6x -_L6_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L6_3x # no - bsr.l src_zero # yes - bra.b _L6_6x -_L6_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L6_4x # no - bsr.l spi_2 # yes - bra.b _L6_6x -_L6_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L6_5x # no - bsr.l src_qnan # yes - bra.b _L6_6x -_L6_5x: - bsr.l satand # operand is a DENORM -_L6_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fasins_ -_fasins_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L7_2s - bsr.l sasin # operand is a NORM - bra.b _L7_6s -_L7_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L7_3s # no - bsr.l src_zero # yes - bra.b _L7_6s -_L7_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L7_4s # no - bsr.l t_operr # yes - bra.b _L7_6s -_L7_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L7_5s # no - bsr.l src_qnan # yes - bra.b _L7_6s -_L7_5s: - bsr.l sasind # operand is a DENORM -_L7_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fasind_ -_fasind_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L7_2d - bsr.l sasin # operand is a NORM - bra.b _L7_6d -_L7_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L7_3d # no - bsr.l src_zero # yes - bra.b _L7_6d -_L7_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L7_4d # no - bsr.l t_operr # yes - bra.b _L7_6d -_L7_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L7_5d # no - bsr.l src_qnan # yes - bra.b _L7_6d -_L7_5d: - bsr.l sasind # operand is a DENORM -_L7_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fasinx_ -_fasinx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L7_2x - bsr.l sasin # operand is a NORM - bra.b _L7_6x -_L7_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L7_3x # no - bsr.l src_zero # yes - bra.b _L7_6x -_L7_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L7_4x # no - bsr.l t_operr # yes - bra.b _L7_6x -_L7_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L7_5x # no - bsr.l src_qnan # yes - bra.b _L7_6x -_L7_5x: - bsr.l sasind # operand is a DENORM -_L7_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fatanhs_ -_fatanhs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L8_2s - bsr.l satanh # operand is a NORM - bra.b _L8_6s -_L8_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L8_3s # no - bsr.l src_zero # yes - bra.b _L8_6s -_L8_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L8_4s # no - bsr.l t_operr # yes - bra.b _L8_6s -_L8_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L8_5s # no - bsr.l src_qnan # yes - bra.b _L8_6s -_L8_5s: - bsr.l satanhd # operand is a DENORM -_L8_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fatanhd_ -_fatanhd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L8_2d - bsr.l satanh # operand is a NORM - bra.b _L8_6d -_L8_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L8_3d # no - bsr.l src_zero # yes - bra.b _L8_6d -_L8_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L8_4d # no - bsr.l t_operr # yes - bra.b _L8_6d -_L8_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L8_5d # no - bsr.l src_qnan # yes - bra.b _L8_6d -_L8_5d: - bsr.l satanhd # operand is a DENORM -_L8_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fatanhx_ -_fatanhx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L8_2x - bsr.l satanh # operand is a NORM - bra.b _L8_6x -_L8_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L8_3x # no - bsr.l src_zero # yes - bra.b _L8_6x -_L8_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L8_4x # no - bsr.l t_operr # yes - bra.b _L8_6x -_L8_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L8_5x # no - bsr.l src_qnan # yes - bra.b _L8_6x -_L8_5x: - bsr.l satanhd # operand is a DENORM -_L8_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _ftans_ -_ftans_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L9_2s - bsr.l stan # operand is a NORM - bra.b _L9_6s -_L9_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L9_3s # no - bsr.l src_zero # yes - bra.b _L9_6s -_L9_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L9_4s # no - bsr.l t_operr # yes - bra.b _L9_6s -_L9_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L9_5s # no - bsr.l src_qnan # yes - bra.b _L9_6s -_L9_5s: - bsr.l stand # operand is a DENORM -_L9_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftand_ -_ftand_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L9_2d - bsr.l stan # operand is a NORM - bra.b _L9_6d -_L9_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L9_3d # no - bsr.l src_zero # yes - bra.b _L9_6d -_L9_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L9_4d # no - bsr.l t_operr # yes - bra.b _L9_6d -_L9_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L9_5d # no - bsr.l src_qnan # yes - bra.b _L9_6d -_L9_5d: - bsr.l stand # operand is a DENORM -_L9_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftanx_ -_ftanx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L9_2x - bsr.l stan # operand is a NORM - bra.b _L9_6x -_L9_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L9_3x # no - bsr.l src_zero # yes - bra.b _L9_6x -_L9_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L9_4x # no - bsr.l t_operr # yes - bra.b _L9_6x -_L9_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L9_5x # no - bsr.l src_qnan # yes - bra.b _L9_6x -_L9_5x: - bsr.l stand # operand is a DENORM -_L9_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fetoxs_ -_fetoxs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L10_2s - bsr.l setox # operand is a NORM - bra.b _L10_6s -_L10_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L10_3s # no - bsr.l ld_pone # yes - bra.b _L10_6s -_L10_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L10_4s # no - bsr.l szr_inf # yes - bra.b _L10_6s -_L10_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L10_5s # no - bsr.l src_qnan # yes - bra.b _L10_6s -_L10_5s: - bsr.l setoxd # operand is a DENORM -_L10_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fetoxd_ -_fetoxd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L10_2d - bsr.l setox # operand is a NORM - bra.b _L10_6d -_L10_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L10_3d # no - bsr.l ld_pone # yes - bra.b _L10_6d -_L10_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L10_4d # no - bsr.l szr_inf # yes - bra.b _L10_6d -_L10_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L10_5d # no - bsr.l src_qnan # yes - bra.b _L10_6d -_L10_5d: - bsr.l setoxd # operand is a DENORM -_L10_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fetoxx_ -_fetoxx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L10_2x - bsr.l setox # operand is a NORM - bra.b _L10_6x -_L10_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L10_3x # no - bsr.l ld_pone # yes - bra.b _L10_6x -_L10_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L10_4x # no - bsr.l szr_inf # yes - bra.b _L10_6x -_L10_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L10_5x # no - bsr.l src_qnan # yes - bra.b _L10_6x -_L10_5x: - bsr.l setoxd # operand is a DENORM -_L10_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _ftwotoxs_ -_ftwotoxs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L11_2s - bsr.l stwotox # operand is a NORM - bra.b _L11_6s -_L11_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L11_3s # no - bsr.l ld_pone # yes - bra.b _L11_6s -_L11_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L11_4s # no - bsr.l szr_inf # yes - bra.b _L11_6s -_L11_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L11_5s # no - bsr.l src_qnan # yes - bra.b _L11_6s -_L11_5s: - bsr.l stwotoxd # operand is a DENORM -_L11_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftwotoxd_ -_ftwotoxd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L11_2d - bsr.l stwotox # operand is a NORM - bra.b _L11_6d -_L11_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L11_3d # no - bsr.l ld_pone # yes - bra.b _L11_6d -_L11_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L11_4d # no - bsr.l szr_inf # yes - bra.b _L11_6d -_L11_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L11_5d # no - bsr.l src_qnan # yes - bra.b _L11_6d -_L11_5d: - bsr.l stwotoxd # operand is a DENORM -_L11_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftwotoxx_ -_ftwotoxx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L11_2x - bsr.l stwotox # operand is a NORM - bra.b _L11_6x -_L11_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L11_3x # no - bsr.l ld_pone # yes - bra.b _L11_6x -_L11_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L11_4x # no - bsr.l szr_inf # yes - bra.b _L11_6x -_L11_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L11_5x # no - bsr.l src_qnan # yes - bra.b _L11_6x -_L11_5x: - bsr.l stwotoxd # operand is a DENORM -_L11_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _ftentoxs_ -_ftentoxs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L12_2s - bsr.l stentox # operand is a NORM - bra.b _L12_6s -_L12_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L12_3s # no - bsr.l ld_pone # yes - bra.b _L12_6s -_L12_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L12_4s # no - bsr.l szr_inf # yes - bra.b _L12_6s -_L12_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L12_5s # no - bsr.l src_qnan # yes - bra.b _L12_6s -_L12_5s: - bsr.l stentoxd # operand is a DENORM -_L12_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftentoxd_ -_ftentoxd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L12_2d - bsr.l stentox # operand is a NORM - bra.b _L12_6d -_L12_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L12_3d # no - bsr.l ld_pone # yes - bra.b _L12_6d -_L12_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L12_4d # no - bsr.l szr_inf # yes - bra.b _L12_6d -_L12_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L12_5d # no - bsr.l src_qnan # yes - bra.b _L12_6d -_L12_5d: - bsr.l stentoxd # operand is a DENORM -_L12_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _ftentoxx_ -_ftentoxx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L12_2x - bsr.l stentox # operand is a NORM - bra.b _L12_6x -_L12_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L12_3x # no - bsr.l ld_pone # yes - bra.b _L12_6x -_L12_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L12_4x # no - bsr.l szr_inf # yes - bra.b _L12_6x -_L12_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L12_5x # no - bsr.l src_qnan # yes - bra.b _L12_6x -_L12_5x: - bsr.l stentoxd # operand is a DENORM -_L12_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _flogns_ -_flogns_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L13_2s - bsr.l slogn # operand is a NORM - bra.b _L13_6s -_L13_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L13_3s # no - bsr.l t_dz2 # yes - bra.b _L13_6s -_L13_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L13_4s # no - bsr.l sopr_inf # yes - bra.b _L13_6s -_L13_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L13_5s # no - bsr.l src_qnan # yes - bra.b _L13_6s -_L13_5s: - bsr.l slognd # operand is a DENORM -_L13_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flognd_ -_flognd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L13_2d - bsr.l slogn # operand is a NORM - bra.b _L13_6d -_L13_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L13_3d # no - bsr.l t_dz2 # yes - bra.b _L13_6d -_L13_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L13_4d # no - bsr.l sopr_inf # yes - bra.b _L13_6d -_L13_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L13_5d # no - bsr.l src_qnan # yes - bra.b _L13_6d -_L13_5d: - bsr.l slognd # operand is a DENORM -_L13_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flognx_ -_flognx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L13_2x - bsr.l slogn # operand is a NORM - bra.b _L13_6x -_L13_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L13_3x # no - bsr.l t_dz2 # yes - bra.b _L13_6x -_L13_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L13_4x # no - bsr.l sopr_inf # yes - bra.b _L13_6x -_L13_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L13_5x # no - bsr.l src_qnan # yes - bra.b _L13_6x -_L13_5x: - bsr.l slognd # operand is a DENORM -_L13_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _flog10s_ -_flog10s_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L14_2s - bsr.l slog10 # operand is a NORM - bra.b _L14_6s -_L14_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L14_3s # no - bsr.l t_dz2 # yes - bra.b _L14_6s -_L14_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L14_4s # no - bsr.l sopr_inf # yes - bra.b _L14_6s -_L14_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L14_5s # no - bsr.l src_qnan # yes - bra.b _L14_6s -_L14_5s: - bsr.l slog10d # operand is a DENORM -_L14_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flog10d_ -_flog10d_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L14_2d - bsr.l slog10 # operand is a NORM - bra.b _L14_6d -_L14_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L14_3d # no - bsr.l t_dz2 # yes - bra.b _L14_6d -_L14_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L14_4d # no - bsr.l sopr_inf # yes - bra.b _L14_6d -_L14_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L14_5d # no - bsr.l src_qnan # yes - bra.b _L14_6d -_L14_5d: - bsr.l slog10d # operand is a DENORM -_L14_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flog10x_ -_flog10x_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L14_2x - bsr.l slog10 # operand is a NORM - bra.b _L14_6x -_L14_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L14_3x # no - bsr.l t_dz2 # yes - bra.b _L14_6x -_L14_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L14_4x # no - bsr.l sopr_inf # yes - bra.b _L14_6x -_L14_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L14_5x # no - bsr.l src_qnan # yes - bra.b _L14_6x -_L14_5x: - bsr.l slog10d # operand is a DENORM -_L14_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _flog2s_ -_flog2s_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L15_2s - bsr.l slog2 # operand is a NORM - bra.b _L15_6s -_L15_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L15_3s # no - bsr.l t_dz2 # yes - bra.b _L15_6s -_L15_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L15_4s # no - bsr.l sopr_inf # yes - bra.b _L15_6s -_L15_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L15_5s # no - bsr.l src_qnan # yes - bra.b _L15_6s -_L15_5s: - bsr.l slog2d # operand is a DENORM -_L15_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flog2d_ -_flog2d_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L15_2d - bsr.l slog2 # operand is a NORM - bra.b _L15_6d -_L15_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L15_3d # no - bsr.l t_dz2 # yes - bra.b _L15_6d -_L15_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L15_4d # no - bsr.l sopr_inf # yes - bra.b _L15_6d -_L15_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L15_5d # no - bsr.l src_qnan # yes - bra.b _L15_6d -_L15_5d: - bsr.l slog2d # operand is a DENORM -_L15_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _flog2x_ -_flog2x_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L15_2x - bsr.l slog2 # operand is a NORM - bra.b _L15_6x -_L15_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L15_3x # no - bsr.l t_dz2 # yes - bra.b _L15_6x -_L15_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L15_4x # no - bsr.l sopr_inf # yes - bra.b _L15_6x -_L15_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L15_5x # no - bsr.l src_qnan # yes - bra.b _L15_6x -_L15_5x: - bsr.l slog2d # operand is a DENORM -_L15_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fcoshs_ -_fcoshs_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L16_2s - bsr.l scosh # operand is a NORM - bra.b _L16_6s -_L16_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L16_3s # no - bsr.l ld_pone # yes - bra.b _L16_6s -_L16_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L16_4s # no - bsr.l ld_pinf # yes - bra.b _L16_6s -_L16_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L16_5s # no - bsr.l src_qnan # yes - bra.b _L16_6s -_L16_5s: - bsr.l scoshd # operand is a DENORM -_L16_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fcoshd_ -_fcoshd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L16_2d - bsr.l scosh # operand is a NORM - bra.b _L16_6d -_L16_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L16_3d # no - bsr.l ld_pone # yes - bra.b _L16_6d -_L16_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L16_4d # no - bsr.l ld_pinf # yes - bra.b _L16_6d -_L16_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L16_5d # no - bsr.l src_qnan # yes - bra.b _L16_6d -_L16_5d: - bsr.l scoshd # operand is a DENORM -_L16_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fcoshx_ -_fcoshx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L16_2x - bsr.l scosh # operand is a NORM - bra.b _L16_6x -_L16_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L16_3x # no - bsr.l ld_pone # yes - bra.b _L16_6x -_L16_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L16_4x # no - bsr.l ld_pinf # yes - bra.b _L16_6x -_L16_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L16_5x # no - bsr.l src_qnan # yes - bra.b _L16_6x -_L16_5x: - bsr.l scoshd # operand is a DENORM -_L16_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _facoss_ -_facoss_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L17_2s - bsr.l sacos # operand is a NORM - bra.b _L17_6s -_L17_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L17_3s # no - bsr.l ld_ppi2 # yes - bra.b _L17_6s -_L17_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L17_4s # no - bsr.l t_operr # yes - bra.b _L17_6s -_L17_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L17_5s # no - bsr.l src_qnan # yes - bra.b _L17_6s -_L17_5s: - bsr.l sacosd # operand is a DENORM -_L17_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _facosd_ -_facosd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L17_2d - bsr.l sacos # operand is a NORM - bra.b _L17_6d -_L17_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L17_3d # no - bsr.l ld_ppi2 # yes - bra.b _L17_6d -_L17_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L17_4d # no - bsr.l t_operr # yes - bra.b _L17_6d -_L17_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L17_5d # no - bsr.l src_qnan # yes - bra.b _L17_6d -_L17_5d: - bsr.l sacosd # operand is a DENORM -_L17_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _facosx_ -_facosx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L17_2x - bsr.l sacos # operand is a NORM - bra.b _L17_6x -_L17_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L17_3x # no - bsr.l ld_ppi2 # yes - bra.b _L17_6x -_L17_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L17_4x # no - bsr.l t_operr # yes - bra.b _L17_6x -_L17_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L17_5x # no - bsr.l src_qnan # yes - bra.b _L17_6x -_L17_5x: - bsr.l sacosd # operand is a DENORM -_L17_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fgetexps_ -_fgetexps_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L18_2s - bsr.l sgetexp # operand is a NORM - bra.b _L18_6s -_L18_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L18_3s # no - bsr.l src_zero # yes - bra.b _L18_6s -_L18_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L18_4s # no - bsr.l t_operr # yes - bra.b _L18_6s -_L18_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L18_5s # no - bsr.l src_qnan # yes - bra.b _L18_6s -_L18_5s: - bsr.l sgetexpd # operand is a DENORM -_L18_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fgetexpd_ -_fgetexpd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L18_2d - bsr.l sgetexp # operand is a NORM - bra.b _L18_6d -_L18_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L18_3d # no - bsr.l src_zero # yes - bra.b _L18_6d -_L18_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L18_4d # no - bsr.l t_operr # yes - bra.b _L18_6d -_L18_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L18_5d # no - bsr.l src_qnan # yes - bra.b _L18_6d -_L18_5d: - bsr.l sgetexpd # operand is a DENORM -_L18_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fgetexpx_ -_fgetexpx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L18_2x - bsr.l sgetexp # operand is a NORM - bra.b _L18_6x -_L18_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L18_3x # no - bsr.l src_zero # yes - bra.b _L18_6x -_L18_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L18_4x # no - bsr.l t_operr # yes - bra.b _L18_6x -_L18_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L18_5x # no - bsr.l src_qnan # yes - bra.b _L18_6x -_L18_5x: - bsr.l sgetexpd # operand is a DENORM -_L18_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fgetmans_ -_fgetmans_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L19_2s - bsr.l sgetman # operand is a NORM - bra.b _L19_6s -_L19_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L19_3s # no - bsr.l src_zero # yes - bra.b _L19_6s -_L19_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L19_4s # no - bsr.l t_operr # yes - bra.b _L19_6s -_L19_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L19_5s # no - bsr.l src_qnan # yes - bra.b _L19_6s -_L19_5s: - bsr.l sgetmand # operand is a DENORM -_L19_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fgetmand_ -_fgetmand_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L19_2d - bsr.l sgetman # operand is a NORM - bra.b _L19_6d -_L19_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L19_3d # no - bsr.l src_zero # yes - bra.b _L19_6d -_L19_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L19_4d # no - bsr.l t_operr # yes - bra.b _L19_6d -_L19_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L19_5d # no - bsr.l src_qnan # yes - bra.b _L19_6d -_L19_5d: - bsr.l sgetmand # operand is a DENORM -_L19_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fgetmanx_ -_fgetmanx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L19_2x - bsr.l sgetman # operand is a NORM - bra.b _L19_6x -_L19_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L19_3x # no - bsr.l src_zero # yes - bra.b _L19_6x -_L19_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L19_4x # no - bsr.l t_operr # yes - bra.b _L19_6x -_L19_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L19_5x # no - bsr.l src_qnan # yes - bra.b _L19_6x -_L19_5x: - bsr.l sgetmand # operand is a DENORM -_L19_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# MONADIC TEMPLATE # -######################################################################### - global _fsincoss_ -_fsincoss_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L20_2s - bsr.l ssincos # operand is a NORM - bra.b _L20_6s -_L20_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L20_3s # no - bsr.l ssincosz # yes - bra.b _L20_6s -_L20_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L20_4s # no - bsr.l ssincosi # yes - bra.b _L20_6s -_L20_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L20_5s # no - bsr.l ssincosqnan # yes - bra.b _L20_6s -_L20_5s: - bsr.l ssincosd # operand is a DENORM -_L20_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x &0x03,-(%sp) # store off fp0/fp1 - fmovm.x (%sp)+,&0x40 # fp0 now in fp1 - fmovm.x (%sp)+,&0x80 # fp1 now in fp0 - unlk %a6 - rts - - global _fsincosd_ -_fsincosd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl input - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - mov.b %d1,STAG(%a6) - tst.b %d1 - bne.b _L20_2d - bsr.l ssincos # operand is a NORM - bra.b _L20_6d -_L20_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L20_3d # no - bsr.l ssincosz # yes - bra.b _L20_6d -_L20_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L20_4d # no - bsr.l ssincosi # yes - bra.b _L20_6d -_L20_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L20_5d # no - bsr.l ssincosqnan # yes - bra.b _L20_6d -_L20_5d: - bsr.l ssincosd # operand is a DENORM -_L20_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x &0x03,-(%sp) # store off fp0/fp1 - fmovm.x (%sp)+,&0x40 # fp0 now in fp1 - fmovm.x (%sp)+,&0x80 # fp1 now in fp0 - unlk %a6 - rts - - global _fsincosx_ -_fsincosx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_SRC(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.b %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - tst.b %d1 - bne.b _L20_2x - bsr.l ssincos # operand is a NORM - bra.b _L20_6x -_L20_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L20_3x # no - bsr.l ssincosz # yes - bra.b _L20_6x -_L20_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L20_4x # no - bsr.l ssincosi # yes - bra.b _L20_6x -_L20_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L20_5x # no - bsr.l ssincosqnan # yes - bra.b _L20_6x -_L20_5x: - bsr.l ssincosd # operand is a DENORM -_L20_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x &0x03,-(%sp) # store off fp0/fp1 - fmovm.x (%sp)+,&0x40 # fp0 now in fp1 - fmovm.x (%sp)+,&0x80 # fp1 now in fp0 - unlk %a6 - rts - - -######################################################################### -# DYADIC TEMPLATE # -######################################################################### - global _frems_ -_frems_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.s 0xc(%a6),%fp0 # load sgl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L21_2s - bsr.l srem_snorm # operand is a NORM - bra.b _L21_6s -_L21_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L21_3s # no - bsr.l srem_szero # yes - bra.b _L21_6s -_L21_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L21_4s # no - bsr.l srem_sinf # yes - bra.b _L21_6s -_L21_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L21_5s # no - bsr.l sop_sqnan # yes - bra.b _L21_6s -_L21_5s: - bsr.l srem_sdnrm # operand is a DENORM -_L21_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fremd_ -_fremd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.d 0x10(%a6),%fp0 # load dbl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L21_2d - bsr.l srem_snorm # operand is a NORM - bra.b _L21_6d -_L21_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L21_3d # no - bsr.l srem_szero # yes - bra.b _L21_6d -_L21_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L21_4d # no - bsr.l srem_sinf # yes - bra.b _L21_6d -_L21_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L21_5d # no - bsr.l sop_sqnan # yes - bra.b _L21_6d -_L21_5d: - bsr.l srem_sdnrm # operand is a DENORM -_L21_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fremx_ -_fremx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_DST(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - lea FP_SRC(%a6),%a0 - mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src - mov.l 0x14+0x4(%a6),0x4(%a0) - mov.l 0x14+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L21_2x - bsr.l srem_snorm # operand is a NORM - bra.b _L21_6x -_L21_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L21_3x # no - bsr.l srem_szero # yes - bra.b _L21_6x -_L21_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L21_4x # no - bsr.l srem_sinf # yes - bra.b _L21_6x -_L21_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L21_5x # no - bsr.l sop_sqnan # yes - bra.b _L21_6x -_L21_5x: - bsr.l srem_sdnrm # operand is a DENORM -_L21_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# DYADIC TEMPLATE # -######################################################################### - global _fmods_ -_fmods_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.s 0xc(%a6),%fp0 # load sgl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L22_2s - bsr.l smod_snorm # operand is a NORM - bra.b _L22_6s -_L22_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L22_3s # no - bsr.l smod_szero # yes - bra.b _L22_6s -_L22_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L22_4s # no - bsr.l smod_sinf # yes - bra.b _L22_6s -_L22_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L22_5s # no - bsr.l sop_sqnan # yes - bra.b _L22_6s -_L22_5s: - bsr.l smod_sdnrm # operand is a DENORM -_L22_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fmodd_ -_fmodd_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.d 0x10(%a6),%fp0 # load dbl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L22_2d - bsr.l smod_snorm # operand is a NORM - bra.b _L22_6d -_L22_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L22_3d # no - bsr.l smod_szero # yes - bra.b _L22_6d -_L22_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L22_4d # no - bsr.l smod_sinf # yes - bra.b _L22_6d -_L22_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L22_5d # no - bsr.l sop_sqnan # yes - bra.b _L22_6d -_L22_5d: - bsr.l smod_sdnrm # operand is a DENORM -_L22_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fmodx_ -_fmodx_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_DST(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - lea FP_SRC(%a6),%a0 - mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src - mov.l 0x14+0x4(%a6),0x4(%a0) - mov.l 0x14+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L22_2x - bsr.l smod_snorm # operand is a NORM - bra.b _L22_6x -_L22_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L22_3x # no - bsr.l smod_szero # yes - bra.b _L22_6x -_L22_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L22_4x # no - bsr.l smod_sinf # yes - bra.b _L22_6x -_L22_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L22_5x # no - bsr.l sop_sqnan # yes - bra.b _L22_6x -_L22_5x: - bsr.l smod_sdnrm # operand is a DENORM -_L22_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# DYADIC TEMPLATE # -######################################################################### - global _fscales_ -_fscales_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.s 0x8(%a6),%fp0 # load sgl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.s 0xc(%a6),%fp0 # load sgl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L23_2s - bsr.l sscale_snorm # operand is a NORM - bra.b _L23_6s -_L23_2s: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L23_3s # no - bsr.l sscale_szero # yes - bra.b _L23_6s -_L23_3s: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L23_4s # no - bsr.l sscale_sinf # yes - bra.b _L23_6s -_L23_4s: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L23_5s # no - bsr.l sop_sqnan # yes - bra.b _L23_6s -_L23_5s: - bsr.l sscale_sdnrm # operand is a DENORM -_L23_6s: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fscaled_ -_fscaled_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - fmov.d 0x8(%a6),%fp0 # load dbl dst - fmov.x %fp0,FP_DST(%a6) - lea FP_DST(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - fmov.d 0x10(%a6),%fp0 # load dbl src - fmov.x %fp0,FP_SRC(%a6) - lea FP_SRC(%a6),%a0 - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L23_2d - bsr.l sscale_snorm # operand is a NORM - bra.b _L23_6d -_L23_2d: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L23_3d # no - bsr.l sscale_szero # yes - bra.b _L23_6d -_L23_3d: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L23_4d # no - bsr.l sscale_sinf # yes - bra.b _L23_6d -_L23_4d: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L23_5d # no - bsr.l sop_sqnan # yes - bra.b _L23_6d -_L23_5d: - bsr.l sscale_sdnrm # operand is a DENORM -_L23_6d: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - global _fscalex_ -_fscalex_: - link %a6,&-LOCAL_SIZE - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 - - fmov.l &0x0,%fpcr # zero FPCR - -# -# copy, convert, and tag input argument -# - lea FP_DST(%a6),%a0 - mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst - mov.l 0x8+0x4(%a6),0x4(%a0) - mov.l 0x8+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,DTAG(%a6) - - lea FP_SRC(%a6),%a0 - mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src - mov.l 0x14+0x4(%a6),0x4(%a0) - mov.l 0x14+0x8(%a6),0x8(%a0) - bsr.l tag # fetch operand type - mov.b %d0,STAG(%a6) - mov.l %d0,%d1 - - andi.l &0x00ff00ff,USER_FPSR(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec - - lea FP_SRC(%a6),%a0 # pass ptr to src - lea FP_DST(%a6),%a1 # pass ptr to dst - - tst.b %d1 - bne.b _L23_2x - bsr.l sscale_snorm # operand is a NORM - bra.b _L23_6x -_L23_2x: - cmpi.b %d1,&ZERO # is operand a ZERO? - bne.b _L23_3x # no - bsr.l sscale_szero # yes - bra.b _L23_6x -_L23_3x: - cmpi.b %d1,&INF # is operand an INF? - bne.b _L23_4x # no - bsr.l sscale_sinf # yes - bra.b _L23_6x -_L23_4x: - cmpi.b %d1,&QNAN # is operand a QNAN? - bne.b _L23_5x # no - bsr.l sop_sqnan # yes - bra.b _L23_6x -_L23_5x: - bsr.l sscale_sdnrm # operand is a DENORM -_L23_6x: - -# -# Result is now in FP0 -# - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs - fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 - unlk %a6 - rts - - -######################################################################### -# ssin(): computes the sine of a normalized input # -# ssind(): computes the sine of a denormalized input # -# scos(): computes the cosine of a normalized input # -# scosd(): computes the cosine of a denormalized input # -# ssincos(): computes the sine and cosine of a normalized input # -# ssincosd(): computes the sine and cosine of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = sin(X) or cos(X) # -# # -# For ssincos(X): # -# fp0 = sin(X) # -# fp1 = cos(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 1 ulp in 64 significant bit, i.e. # -# within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# SIN and COS: # -# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # -# # -# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # -# # -# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # -# k = N mod 4, so in particular, k = 0,1,2,or 3. # -# Overwrite k by k := k + AdjN. # -# # -# 4. If k is even, go to 6. # -# # -# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # -# Return sgn*cos(r) where cos(r) is approximated by an # -# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # -# s = r*r. # -# Exit. # -# # -# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # -# where sin(r) is approximated by an odd polynomial in r # -# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # -# Exit. # -# # -# 7. If |X| > 1, go to 9. # -# # -# 8. (|X|<2**(-40)) If SIN is invoked, return X; # -# otherwise return 1. # -# # -# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # -# go back to 3. # -# # -# SINCOS: # -# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # -# # -# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # -# k = N mod 4, so in particular, k = 0,1,2,or 3. # -# # -# 3. If k is even, go to 5. # -# # -# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # -# j1 exclusive or with the l.s.b. of k. # -# sgn1 := (-1)**j1, sgn2 := (-1)**j2. # -# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # -# sin(r) and cos(r) are computed as odd and even # -# polynomials in r, respectively. Exit # -# # -# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # -# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # -# sin(r) and cos(r) are computed as odd and even # -# polynomials in r, respectively. Exit # -# # -# 6. If |X| > 1, go to 8. # -# # -# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # -# # -# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # -# go back to 2. # -# # -######################################################################### - -SINA7: long 0xBD6AAA77,0xCCC994F5 -SINA6: long 0x3DE61209,0x7AAE8DA1 -SINA5: long 0xBE5AE645,0x2A118AE4 -SINA4: long 0x3EC71DE3,0xA5341531 -SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 -SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 -SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 - -COSB8: long 0x3D2AC4D0,0xD6011EE3 -COSB7: long 0xBDA9396F,0x9F45AC19 -COSB6: long 0x3E21EED9,0x0612C972 -COSB5: long 0xBE927E4F,0xB79D9FCF -COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 -COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 -COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E -COSB1: long 0xBF000000 - - set INARG,FP_SCR0 - - set X,FP_SCR0 -# set XDCARE,X+2 - set XFRAC,X+4 - - set RPRIME,FP_SCR0 - set SPRIME,FP_SCR1 - - set POSNEG1,L_SCR1 - set TWOTO63,L_SCR1 - - set ENDFLAG,L_SCR2 - set INT,L_SCR2 - - set ADJN,L_SCR3 - -############################################ - global ssin -ssin: - mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 - bra.b SINBGN - -############################################ - global scos -scos: - mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 - -############################################ -SINBGN: -#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE - - fmov.x (%a0),%fp0 # LOAD INPUT - fmov.x %fp0,X(%a6) # save input at X - -# "COMPACTIFY" X - mov.l (%a0),%d1 # put exp in hi word - mov.w 4(%a0),%d1 # fetch hi(man) - and.l &0x7FFFFFFF,%d1 # strip sign - - cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? - bge.b SOK1 # no - bra.w SINSM # yes; input is very small - -SOK1: - cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? - blt.b SINMAIN # no - bra.w SREDUCEX # yes; input is very large - -#--THIS IS THE USUAL CASE, |X| <= 15 PI. -#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. -SINMAIN: - fmov.x %fp0,%fp1 - fmul.d TWOBYPI(%pc),%fp1 # X*2/PI - - lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 - - fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER - - mov.l INT(%a6),%d1 # make a copy of N - asl.l &4,%d1 # N *= 16 - add.l %d1,%a1 # tbl_addr = a1 + (N*16) - -# A1 IS THE ADDRESS OF N*PIBY2 -# ...WHICH IS IN TWO PIECES Y1 & Y2 - fsub.x (%a1)+,%fp0 # X-Y1 - fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 - -SINCONT: -#--continuation from REDUCEX - -#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED - mov.l INT(%a6),%d1 - add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN - ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE - cmp.l %d1,&0 - blt.w COSPOLY - -#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. -#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY -#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE -#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS -#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) -#--WHERE T=S*S. -#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION -#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. -SINPOLY: - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmov.x %fp0,X(%a6) # X IS R - fmul.x %fp0,%fp0 # FP0 IS S - - fmov.d SINA7(%pc),%fp3 - fmov.d SINA6(%pc),%fp2 - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS T - - ror.l &1,%d1 - and.l &0x80000000,%d1 -# ...LEAST SIG. BIT OF D0 IN SIGN POSITION - eor.l %d1,X(%a6) # X IS NOW R'= SGN*R - - fmul.x %fp1,%fp3 # TA7 - fmul.x %fp1,%fp2 # TA6 - - fadd.d SINA5(%pc),%fp3 # A5+TA7 - fadd.d SINA4(%pc),%fp2 # A4+TA6 - - fmul.x %fp1,%fp3 # T(A5+TA7) - fmul.x %fp1,%fp2 # T(A4+TA6) - - fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) - fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) - - fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) - - fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) - fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) - fmul.x X(%a6),%fp0 # R'*S - - fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] - - fmul.x %fp1,%fp0 # SIN(R')-R' - - fmovm.x (%sp)+,&0x30 # restore fp2/fp3 - - fmov.l %d0,%fpcr # restore users round mode,prec - fadd.x X(%a6),%fp0 # last inst - possible exception set - bra t_inx2 - -#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. -#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY -#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE -#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS -#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) -#--WHERE T=S*S. -#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION -#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 -#--AND IS THEREFORE STORED AS SINGLE PRECISION. -COSPOLY: - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmul.x %fp0,%fp0 # FP0 IS S - - fmov.d COSB8(%pc),%fp2 - fmov.d COSB7(%pc),%fp3 - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS T - - fmov.x %fp0,X(%a6) # X IS S - ror.l &1,%d1 - and.l &0x80000000,%d1 -# ...LEAST SIG. BIT OF D0 IN SIGN POSITION - - fmul.x %fp1,%fp2 # TB8 - - eor.l %d1,X(%a6) # X IS NOW S'= SGN*S - and.l &0x80000000,%d1 - - fmul.x %fp1,%fp3 # TB7 - - or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE - mov.l %d1,POSNEG1(%a6) - - fadd.d COSB6(%pc),%fp2 # B6+TB8 - fadd.d COSB5(%pc),%fp3 # B5+TB7 - - fmul.x %fp1,%fp2 # T(B6+TB8) - fmul.x %fp1,%fp3 # T(B5+TB7) - - fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) - fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) - - fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) - fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) - - fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) - fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) - - fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) - - fadd.x %fp1,%fp0 - - fmul.x X(%a6),%fp0 - - fmovm.x (%sp)+,&0x30 # restore fp2/fp3 - - fmov.l %d0,%fpcr # restore users round mode,prec - fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set - bra t_inx2 - -############################################## - -# SINe: Big OR Small? -#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. -#--IF |X| < 2**(-40), RETURN X OR 1. -SINBORS: - cmp.l %d1,&0x3FFF8000 - bgt.l SREDUCEX - -SINSM: - mov.l ADJN(%a6),%d1 - cmp.l %d1,&0 - bgt.b COSTINY - -# here, the operation may underflow iff the precision is sgl or dbl. -# extended denorms are handled through another entry point. -SINTINY: -# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE - - fmov.l %d0,%fpcr # restore users round mode,prec - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x X(%a6),%fp0 # last inst - possible exception set - bra t_catch - -COSTINY: - fmov.s &0x3F800000,%fp0 # fp0 = 1.0 - fmov.l %d0,%fpcr # restore users round mode,prec - fadd.s &0x80800000,%fp0 # last inst - possible exception set - bra t_pinx2 - -################################################ - global ssind -#--SIN(X) = X FOR DENORMALIZED X -ssind: - bra t_extdnrm - -############################################ - global scosd -#--COS(X) = 1 FOR DENORMALIZED X -scosd: - fmov.s &0x3F800000,%fp0 # fp0 = 1.0 - bra t_pinx2 - -################################################## - - global ssincos -ssincos: -#--SET ADJN TO 4 - mov.l &4,ADJN(%a6) - - fmov.x (%a0),%fp0 # LOAD INPUT - fmov.x %fp0,X(%a6) - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 # COMPACTIFY X - - cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? - bge.b SCOK1 - bra.w SCSM - -SCOK1: - cmp.l %d1,&0x4004BC7E # |X| < 15 PI? - blt.b SCMAIN - bra.w SREDUCEX - - -#--THIS IS THE USUAL CASE, |X| <= 15 PI. -#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. -SCMAIN: - fmov.x %fp0,%fp1 - - fmul.d TWOBYPI(%pc),%fp1 # X*2/PI - - lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 - - fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER - - mov.l INT(%a6),%d1 - asl.l &4,%d1 - add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 - - fsub.x (%a1)+,%fp0 # X-Y1 - fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 - -SCCONT: -#--continuation point from REDUCEX - - mov.l INT(%a6),%d1 - ror.l &1,%d1 - cmp.l %d1,&0 # D0 < 0 IFF N IS ODD - bge.w NEVEN - -SNODD: -#--REGISTERS SAVED SO FAR: D0, A0, FP2. - fmovm.x &0x04,-(%sp) # save fp2 - - fmov.x %fp0,RPRIME(%a6) - fmul.x %fp0,%fp0 # FP0 IS S = R*R - fmov.d SINA7(%pc),%fp1 # A7 - fmov.d COSB8(%pc),%fp2 # B8 - fmul.x %fp0,%fp1 # SA7 - fmul.x %fp0,%fp2 # SB8 - - mov.l %d2,-(%sp) - mov.l %d1,%d2 - ror.l &1,%d2 - and.l &0x80000000,%d2 - eor.l %d1,%d2 - and.l &0x80000000,%d2 - - fadd.d SINA6(%pc),%fp1 # A6+SA7 - fadd.d COSB7(%pc),%fp2 # B7+SB8 - - fmul.x %fp0,%fp1 # S(A6+SA7) - eor.l %d2,RPRIME(%a6) - mov.l (%sp)+,%d2 - fmul.x %fp0,%fp2 # S(B7+SB8) - ror.l &1,%d1 - and.l &0x80000000,%d1 - mov.l &0x3F800000,POSNEG1(%a6) - eor.l %d1,POSNEG1(%a6) - - fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) - fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) - - fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) - fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) - fmov.x %fp0,SPRIME(%a6) - - fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) - eor.l %d1,SPRIME(%a6) - fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) - - fmul.x %fp0,%fp1 # S(A4+...) - fmul.x %fp0,%fp2 # S(B5+...) - - fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) - fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) - - fmul.x %fp0,%fp1 # S(A3+...) - fmul.x %fp0,%fp2 # S(B4+...) - - fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) - fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) - - fmul.x %fp0,%fp1 # S(A2+...) - fmul.x %fp0,%fp2 # S(B3+...) - - fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) - fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) - - fmul.x %fp0,%fp1 # S(A1+...) - fmul.x %fp2,%fp0 # S(B2+...) - - fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) - fadd.s COSB1(%pc),%fp0 # B1+S(B2...) - fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) - - fmovm.x (%sp)+,&0x20 # restore fp2 - - fmov.l %d0,%fpcr - fadd.x RPRIME(%a6),%fp1 # COS(X) - bsr sto_cos # store cosine result - fadd.s POSNEG1(%a6),%fp0 # SIN(X) - bra t_inx2 - -NEVEN: -#--REGISTERS SAVED SO FAR: FP2. - fmovm.x &0x04,-(%sp) # save fp2 - - fmov.x %fp0,RPRIME(%a6) - fmul.x %fp0,%fp0 # FP0 IS S = R*R - - fmov.d COSB8(%pc),%fp1 # B8 - fmov.d SINA7(%pc),%fp2 # A7 - - fmul.x %fp0,%fp1 # SB8 - fmov.x %fp0,SPRIME(%a6) - fmul.x %fp0,%fp2 # SA7 - - ror.l &1,%d1 - and.l &0x80000000,%d1 - - fadd.d COSB7(%pc),%fp1 # B7+SB8 - fadd.d SINA6(%pc),%fp2 # A6+SA7 - - eor.l %d1,RPRIME(%a6) - eor.l %d1,SPRIME(%a6) - - fmul.x %fp0,%fp1 # S(B7+SB8) - - or.l &0x3F800000,%d1 - mov.l %d1,POSNEG1(%a6) - - fmul.x %fp0,%fp2 # S(A6+SA7) - - fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) - fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) - - fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) - fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) - - fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) - fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) - - fmul.x %fp0,%fp1 # S(B5+...) - fmul.x %fp0,%fp2 # S(A4+...) - - fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) - fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) - - fmul.x %fp0,%fp1 # S(B4+...) - fmul.x %fp0,%fp2 # S(A3+...) - - fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) - fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) - - fmul.x %fp0,%fp1 # S(B3+...) - fmul.x %fp0,%fp2 # S(A2+...) - - fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) - fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) - - fmul.x %fp0,%fp1 # S(B2+...) - fmul.x %fp2,%fp0 # s(a1+...) - - - fadd.s COSB1(%pc),%fp1 # B1+S(B2...) - fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) - fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) - - fmovm.x (%sp)+,&0x20 # restore fp2 - - fmov.l %d0,%fpcr - fadd.s POSNEG1(%a6),%fp1 # COS(X) - bsr sto_cos # store cosine result - fadd.x RPRIME(%a6),%fp0 # SIN(X) - bra t_inx2 - -################################################ - -SCBORS: - cmp.l %d1,&0x3FFF8000 - bgt.w SREDUCEX - -################################################ - -SCSM: -# mov.w &0x0000,XDCARE(%a6) - fmov.s &0x3F800000,%fp1 - - fmov.l %d0,%fpcr - fsub.s &0x00800000,%fp1 - bsr sto_cos # store cosine result - fmov.l %fpcr,%d0 # d0 must have fpcr,too - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x X(%a6),%fp0 - bra t_catch - -############################################## - - global ssincosd -#--SIN AND COS OF X FOR DENORMALIZED X -ssincosd: - mov.l %d0,-(%sp) # save d0 - fmov.s &0x3F800000,%fp1 - bsr sto_cos # store cosine result - mov.l (%sp)+,%d0 # restore d0 - bra t_extdnrm - -############################################ - -#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. -#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING -#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. -SREDUCEX: - fmovm.x &0x3c,-(%sp) # save {fp2-fp5} - mov.l %d2,-(%sp) # save d2 - fmov.s &0x00000000,%fp1 # fp1 = 0 - -#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that -#--there is a danger of unwanted overflow in first LOOP iteration. In this -#--case, reduce argument by one remainder step to make subsequent reduction -#--safe. - cmp.l %d1,&0x7ffeffff # is arg dangerously large? - bne.b SLOOP # no - -# yes; create 2**16383*PI/2 - mov.w &0x7ffe,FP_SCR0_EX(%a6) - mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) - clr.l FP_SCR0_LO(%a6) - -# create low half of 2**16383*PI/2 at FP_SCR1 - mov.w &0x7fdc,FP_SCR1_EX(%a6) - mov.l &0x85a308d3,FP_SCR1_HI(%a6) - clr.l FP_SCR1_LO(%a6) - - ftest.x %fp0 # test sign of argument - fblt.w sred_neg - - or.b &0x80,FP_SCR0_EX(%a6) # positive arg - or.b &0x80,FP_SCR1_EX(%a6) -sred_neg: - fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact - fmov.x %fp0,%fp1 # save high result in fp1 - fadd.x FP_SCR1(%a6),%fp0 # low part of reduction - fsub.x %fp0,%fp1 # determine low component of result - fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. - -#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. -#--integer quotient will be stored in N -#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) -SLOOP: - fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 - mov.w INARG(%a6),%d1 - mov.l %d1,%a1 # save a copy of D0 - and.l &0x00007FFF,%d1 - sub.l &0x00003FFF,%d1 # d0 = K - cmp.l %d1,&28 - ble.b SLASTLOOP -SCONTLOOP: - sub.l &27,%d1 # d0 = L := K-27 - mov.b &0,ENDFLAG(%a6) - bra.b SWORK -SLASTLOOP: - clr.l %d1 # d0 = L := 0 - mov.b &1,ENDFLAG(%a6) - -SWORK: -#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN -#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. - -#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), -#--2**L * (PIby2_1), 2**L * (PIby2_2) - - mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI - sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) - - mov.l &0xA2F9836E,FP_SCR0_HI(%a6) - mov.l &0x4E44152A,FP_SCR0_LO(%a6) - mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) - - fmov.x %fp0,%fp2 - fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) - -#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN -#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N -#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT -#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE -#--US THE DESIRED VALUE IN FLOATING POINT. - mov.l %a1,%d2 - swap %d2 - and.l &0x80000000,%d2 - or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL - mov.l %d2,TWOTO63(%a6) - fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED - fsub.s TWOTO63(%a6),%fp2 # fp2 = N -# fint.x %fp2 - -#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 - mov.l %d1,%d2 # d2 = L - - add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) - mov.w %d2,FP_SCR0_EX(%a6) - mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) - clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 - - add.l &0x00003FDD,%d1 - mov.w %d1,FP_SCR1_EX(%a6) - mov.l &0x85A308D3,FP_SCR1_HI(%a6) - clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 - - mov.b ENDFLAG(%a6),%d1 - -#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and -#--P2 = 2**(L) * Piby2_2 - fmov.x %fp2,%fp4 # fp4 = N - fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 - fmov.x %fp2,%fp5 # fp5 = N - fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 - fmov.x %fp4,%fp3 # fp3 = W = N*P1 - -#--we want P+p = W+w but |p| <= half ulp of P -#--Then, we need to compute A := R-P and a := r-p - fadd.x %fp5,%fp3 # fp3 = P - fsub.x %fp3,%fp4 # fp4 = W-P - - fsub.x %fp3,%fp0 # fp0 = A := R - P - fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w - - fmov.x %fp0,%fp3 # fp3 = A - fsub.x %fp4,%fp1 # fp1 = a := r - p - -#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but -#--|r| <= half ulp of R. - fadd.x %fp1,%fp0 # fp0 = R := A+a -#--No need to calculate r if this is the last loop - cmp.b %d1,&0 - bgt.w SRESTORE - -#--Need to calculate r - fsub.x %fp0,%fp3 # fp3 = A-R - fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a - bra.w SLOOP - -SRESTORE: - fmov.l %fp2,INT(%a6) - mov.l (%sp)+,%d2 # restore d2 - fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} - - mov.l ADJN(%a6),%d1 - cmp.l %d1,&4 - - blt.w SINCONT - bra.w SCCONT - -######################################################################### -# stan(): computes the tangent of a normalized input # -# stand(): computes the tangent of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = tan(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulp in 64 significant bit, i.e. # -# within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # -# # -# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # -# k = N mod 2, so in particular, k = 0 or 1. # -# # -# 3. If k is odd, go to 5. # -# # -# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # -# rational function U/V where # -# U = r + r*s*(P1 + s*(P2 + s*P3)), and # -# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # -# Exit. # -# # -# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # -# a rational function U/V where # -# U = r + r*s*(P1 + s*(P2 + s*P3)), and # -# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # -# -Cot(r) = -V/U. Exit. # -# # -# 6. If |X| > 1, go to 8. # -# # -# 7. (|X|<2**(-40)) Tan(X) = X. Exit. # -# # -# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # -# to 2. # -# # -######################################################################### - -TANQ4: - long 0x3EA0B759,0xF50F8688 -TANP3: - long 0xBEF2BAA5,0xA8924F04 - -TANQ3: - long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 - -TANP2: - long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 - -TANQ2: - long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 - -TANP1: - long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 - -TANQ1: - long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 - -INVTWOPI: - long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 - -TWOPI1: - long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 -TWOPI2: - long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 - -#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING -#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT -#--MOST 69 BITS LONG. -# global PITBL -PITBL: - long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 - long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 - long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 - long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 - long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 - long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 - long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 - long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 - long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 - long 0xC0040000,0x90836524,0x88034B96,0x20B00000 - long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 - long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 - long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 - long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 - long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 - long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 - long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 - long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 - long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 - long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 - long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 - long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 - long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 - long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 - long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 - long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 - long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 - long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 - long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 - long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 - long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 - long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 - long 0x00000000,0x00000000,0x00000000,0x00000000 - long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 - long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 - long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 - long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 - long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 - long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 - long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 - long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 - long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 - long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 - long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 - long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 - long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 - long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 - long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 - long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 - long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 - long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 - long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 - long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 - long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 - long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 - long 0x40040000,0x90836524,0x88034B96,0xA0B00000 - long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 - long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 - long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 - long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 - long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 - long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 - long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 - long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 - long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 - - set INARG,FP_SCR0 - - set TWOTO63,L_SCR1 - set INT,L_SCR1 - set ENDFLAG,L_SCR2 - - global stan -stan: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - - cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? - bge.b TANOK1 - bra.w TANSM -TANOK1: - cmp.l %d1,&0x4004BC7E # |X| < 15 PI? - blt.b TANMAIN - bra.w REDUCEX - -TANMAIN: -#--THIS IS THE USUAL CASE, |X| <= 15 PI. -#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. - fmov.x %fp0,%fp1 - fmul.d TWOBYPI(%pc),%fp1 # X*2/PI - - lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 - - fmov.l %fp1,%d1 # CONVERT TO INTEGER - - asl.l &4,%d1 - add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 - - fsub.x (%a1)+,%fp0 # X-Y1 - - fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 - - ror.l &5,%d1 - and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 - -TANCONT: - fmovm.x &0x0c,-(%sp) # save fp2,fp3 - - cmp.l %d1,&0 - blt.w NODD - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # S = R*R - - fmov.d TANQ4(%pc),%fp3 - fmov.d TANP3(%pc),%fp2 - - fmul.x %fp1,%fp3 # SQ4 - fmul.x %fp1,%fp2 # SP3 - - fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 - fadd.x TANP2(%pc),%fp2 # P2+SP3 - - fmul.x %fp1,%fp3 # S(Q3+SQ4) - fmul.x %fp1,%fp2 # S(P2+SP3) - - fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) - fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) - - fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) - fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) - - fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) - fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) - - fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) - - fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) - - fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) - - fmovm.x (%sp)+,&0x30 # restore fp2,fp3 - - fmov.l %d0,%fpcr # restore users round mode,prec - fdiv.x %fp1,%fp0 # last inst - possible exception set - bra t_inx2 - -NODD: - fmov.x %fp0,%fp1 - fmul.x %fp0,%fp0 # S = R*R - - fmov.d TANQ4(%pc),%fp3 - fmov.d TANP3(%pc),%fp2 - - fmul.x %fp0,%fp3 # SQ4 - fmul.x %fp0,%fp2 # SP3 - - fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 - fadd.x TANP2(%pc),%fp2 # P2+SP3 - - fmul.x %fp0,%fp3 # S(Q3+SQ4) - fmul.x %fp0,%fp2 # S(P2+SP3) - - fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) - fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) - - fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) - fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) - - fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) - fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) - - fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) - - fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) - fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) - - fmovm.x (%sp)+,&0x30 # restore fp2,fp3 - - fmov.x %fp1,-(%sp) - eor.l &0x80000000,(%sp) - - fmov.l %d0,%fpcr # restore users round mode,prec - fdiv.x (%sp)+,%fp0 # last inst - possible exception set - bra t_inx2 - -TANBORS: -#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. -#--IF |X| < 2**(-40), RETURN X OR 1. - cmp.l %d1,&0x3FFF8000 - bgt.b REDUCEX - -TANSM: - fmov.x %fp0,-(%sp) - fmov.l %d0,%fpcr # restore users round mode,prec - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x (%sp)+,%fp0 # last inst - posibble exception set - bra t_catch - - global stand -#--TAN(X) = X FOR DENORMALIZED X -stand: - bra t_extdnrm - -#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. -#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING -#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. -REDUCEX: - fmovm.x &0x3c,-(%sp) # save {fp2-fp5} - mov.l %d2,-(%sp) # save d2 - fmov.s &0x00000000,%fp1 # fp1 = 0 - -#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that -#--there is a danger of unwanted overflow in first LOOP iteration. In this -#--case, reduce argument by one remainder step to make subsequent reduction -#--safe. - cmp.l %d1,&0x7ffeffff # is arg dangerously large? - bne.b LOOP # no - -# yes; create 2**16383*PI/2 - mov.w &0x7ffe,FP_SCR0_EX(%a6) - mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) - clr.l FP_SCR0_LO(%a6) - -# create low half of 2**16383*PI/2 at FP_SCR1 - mov.w &0x7fdc,FP_SCR1_EX(%a6) - mov.l &0x85a308d3,FP_SCR1_HI(%a6) - clr.l FP_SCR1_LO(%a6) - - ftest.x %fp0 # test sign of argument - fblt.w red_neg - - or.b &0x80,FP_SCR0_EX(%a6) # positive arg - or.b &0x80,FP_SCR1_EX(%a6) -red_neg: - fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact - fmov.x %fp0,%fp1 # save high result in fp1 - fadd.x FP_SCR1(%a6),%fp0 # low part of reduction - fsub.x %fp0,%fp1 # determine low component of result - fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. - -#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. -#--integer quotient will be stored in N -#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) -LOOP: - fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 - mov.w INARG(%a6),%d1 - mov.l %d1,%a1 # save a copy of D0 - and.l &0x00007FFF,%d1 - sub.l &0x00003FFF,%d1 # d0 = K - cmp.l %d1,&28 - ble.b LASTLOOP -CONTLOOP: - sub.l &27,%d1 # d0 = L := K-27 - mov.b &0,ENDFLAG(%a6) - bra.b WORK -LASTLOOP: - clr.l %d1 # d0 = L := 0 - mov.b &1,ENDFLAG(%a6) - -WORK: -#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN -#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. - -#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), -#--2**L * (PIby2_1), 2**L * (PIby2_2) - - mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI - sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) - - mov.l &0xA2F9836E,FP_SCR0_HI(%a6) - mov.l &0x4E44152A,FP_SCR0_LO(%a6) - mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) - - fmov.x %fp0,%fp2 - fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) - -#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN -#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N -#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT -#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE -#--US THE DESIRED VALUE IN FLOATING POINT. - mov.l %a1,%d2 - swap %d2 - and.l &0x80000000,%d2 - or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL - mov.l %d2,TWOTO63(%a6) - fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED - fsub.s TWOTO63(%a6),%fp2 # fp2 = N -# fintrz.x %fp2,%fp2 - -#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 - mov.l %d1,%d2 # d2 = L - - add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) - mov.w %d2,FP_SCR0_EX(%a6) - mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) - clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 - - add.l &0x00003FDD,%d1 - mov.w %d1,FP_SCR1_EX(%a6) - mov.l &0x85A308D3,FP_SCR1_HI(%a6) - clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 - - mov.b ENDFLAG(%a6),%d1 - -#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and -#--P2 = 2**(L) * Piby2_2 - fmov.x %fp2,%fp4 # fp4 = N - fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 - fmov.x %fp2,%fp5 # fp5 = N - fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 - fmov.x %fp4,%fp3 # fp3 = W = N*P1 - -#--we want P+p = W+w but |p| <= half ulp of P -#--Then, we need to compute A := R-P and a := r-p - fadd.x %fp5,%fp3 # fp3 = P - fsub.x %fp3,%fp4 # fp4 = W-P - - fsub.x %fp3,%fp0 # fp0 = A := R - P - fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w - - fmov.x %fp0,%fp3 # fp3 = A - fsub.x %fp4,%fp1 # fp1 = a := r - p - -#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but -#--|r| <= half ulp of R. - fadd.x %fp1,%fp0 # fp0 = R := A+a -#--No need to calculate r if this is the last loop - cmp.b %d1,&0 - bgt.w RESTORE - -#--Need to calculate r - fsub.x %fp0,%fp3 # fp3 = A-R - fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a - bra.w LOOP - -RESTORE: - fmov.l %fp2,INT(%a6) - mov.l (%sp)+,%d2 # restore d2 - fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} - - mov.l INT(%a6),%d1 - ror.l &1,%d1 - - bra.w TANCONT - -######################################################################### -# satan(): computes the arctangent of a normalized number # -# satand(): computes the arctangent of a denormalized number # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = arctan(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 2 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # -# # -# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # -# Note that k = -4, -3,..., or 3. # -# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # -# significant bits of X with a bit-1 attached at the 6-th # -# bit position. Define u to be u = (X-F) / (1 + X*F). # -# # -# Step 3. Approximate arctan(u) by a polynomial poly. # -# # -# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # -# table of values calculated beforehand. Exit. # -# # -# Step 5. If |X| >= 16, go to Step 7. # -# # -# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # -# # -# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # -# polynomial in X'. # -# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # -# # -######################################################################### - -ATANA3: long 0xBFF6687E,0x314987D8 -ATANA2: long 0x4002AC69,0x34A26DB3 -ATANA1: long 0xBFC2476F,0x4E1DA28E - -ATANB6: long 0x3FB34444,0x7F876989 -ATANB5: long 0xBFB744EE,0x7FAF45DB -ATANB4: long 0x3FBC71C6,0x46940220 -ATANB3: long 0xBFC24924,0x921872F9 -ATANB2: long 0x3FC99999,0x99998FA9 -ATANB1: long 0xBFD55555,0x55555555 - -ATANC5: long 0xBFB70BF3,0x98539E6A -ATANC4: long 0x3FBC7187,0x962D1D7D -ATANC3: long 0xBFC24924,0x827107B8 -ATANC2: long 0x3FC99999,0x9996263E -ATANC1: long 0xBFD55555,0x55555536 - -PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 -NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 - -PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 -NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 - -ATANTBL: - long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 - long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 - long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 - long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 - long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 - long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 - long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 - long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 - long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 - long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 - long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 - long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 - long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 - long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 - long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 - long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 - long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 - long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 - long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 - long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 - long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 - long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 - long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 - long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 - long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 - long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 - long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 - long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 - long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 - long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 - long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 - long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 - long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 - long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 - long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 - long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 - long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 - long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 - long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 - long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 - long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 - long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 - long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 - long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 - long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 - long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 - long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 - long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 - long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 - long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 - long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 - long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 - long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 - long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 - long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 - long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 - long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 - long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 - long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 - long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 - long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 - long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 - long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 - long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 - long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 - long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 - long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 - long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 - long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 - long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 - long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 - long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 - long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 - long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 - long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 - long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 - long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 - long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 - long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 - long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 - long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 - long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 - long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 - long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 - long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 - long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 - long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 - long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 - long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 - long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 - long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 - long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 - long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 - long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 - long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 - long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 - long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 - long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 - long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 - long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 - long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 - long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 - long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 - long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 - long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 - long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 - long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 - long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 - long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 - long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 - long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 - long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 - long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 - long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 - long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 - long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 - long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 - long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 - long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 - long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 - long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 - long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 - long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 - long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 - long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 - long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 - long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 - long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 - - set X,FP_SCR0 - set XDCARE,X+2 - set XFRAC,X+4 - set XFRACLO,X+8 - - set ATANF,FP_SCR1 - set ATANFHI,ATANF+4 - set ATANFLO,ATANF+8 - - global satan -#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S -satan: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - fmov.x %fp0,X(%a6) - and.l &0x7FFFFFFF,%d1 - - cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? - bge.b ATANOK1 - bra.w ATANSM - -ATANOK1: - cmp.l %d1,&0x4002FFFF # |X| < 16 ? - ble.b ATANMAIN - bra.w ATANBIG - -#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE -#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). -#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN -#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE -#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS -#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR -#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO -#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE -#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL -#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE -#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION -#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION -#--WILL INVOLVE A VERY LONG POLYNOMIAL. - -#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS -#--WE CHOSE F TO BE +-2^K * 1.BBBB1 -#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE -#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE -#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS -#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). - -ATANMAIN: - - and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS - or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 - mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F - - fmov.x %fp0,%fp1 # FP1 IS X - fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 - fsub.x X(%a6),%fp0 # FP0 IS X-F - fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F - fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) - -#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) -#--CREATE ATAN(F) AND STORE IT IN ATANF, AND -#--SAVE REGISTERS FP2. - - mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY - mov.l %d1,%d2 # THE EXP AND 16 BITS OF X - and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION - and.l &0x7FFF0000,%d2 # EXPONENT OF F - sub.l &0x3FFB0000,%d2 # K+4 - asr.l &1,%d2 - add.l %d2,%d1 # THE 7 BITS IDENTIFYING F - asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) - lea ATANTBL(%pc),%a1 - add.l %d1,%a1 # ADDRESS OF ATAN(|F|) - mov.l (%a1)+,ATANF(%a6) - mov.l (%a1)+,ATANFHI(%a6) - mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) - mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN - and.l &0x80000000,%d1 # SIGN(F) - or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) - mov.l (%sp)+,%d2 # RESTORE d2 - -#--THAT'S ALL I HAVE TO DO FOR NOW, -#--BUT ALAS, THE DIVIDE IS STILL CRANKING! - -#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS -#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U -#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. -#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) -#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. -#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT -#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED - - fmovm.x &0x04,-(%sp) # save fp2 - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 - fmov.d ATANA3(%pc),%fp2 - fadd.x %fp1,%fp2 # A3+V - fmul.x %fp1,%fp2 # V*(A3+V) - fmul.x %fp0,%fp1 # U*V - fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) - fmul.d ATANA1(%pc),%fp1 # A1*U*V - fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) - fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED - - fmovm.x (%sp)+,&0x20 # restore fp2 - - fmov.l %d0,%fpcr # restore users rnd mode,prec - fadd.x ATANF(%a6),%fp0 # ATAN(X) - bra t_inx2 - -ATANBORS: -#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. -#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. - cmp.l %d1,&0x3FFF8000 - bgt.w ATANBIG # I.E. |X| >= 16 - -ATANSM: -#--|X| <= 1/16 -#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE -#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) -#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) -#--WHERE Y = X*X, AND Z = Y*Y. - - cmp.l %d1,&0x3FD78000 - blt.w ATANTINY - -#--COMPUTE POLYNOMIAL - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmul.x %fp0,%fp0 # FPO IS Y = X*X - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y - - fmov.d ATANB6(%pc),%fp2 - fmov.d ATANB5(%pc),%fp3 - - fmul.x %fp1,%fp2 # Z*B6 - fmul.x %fp1,%fp3 # Z*B5 - - fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 - fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 - - fmul.x %fp1,%fp2 # Z*(B4+Z*B6) - fmul.x %fp3,%fp1 # Z*(B3+Z*B5) - - fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) - fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) - - fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) - fmul.x X(%a6),%fp0 # X*Y - - fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] - - fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) - - fmovm.x (%sp)+,&0x30 # restore fp2/fp3 - - fmov.l %d0,%fpcr # restore users rnd mode,prec - fadd.x X(%a6),%fp0 - bra t_inx2 - -ATANTINY: -#--|X| < 2^(-40), ATAN(X) = X - - fmov.l %d0,%fpcr # restore users rnd mode,prec - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x X(%a6),%fp0 # last inst - possible exception set - - bra t_catch - -ATANBIG: -#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, -#--RETURN SIGN(X)*PI/2 + ATAN(-1/X). - cmp.l %d1,&0x40638000 - bgt.w ATANHUGE - -#--APPROXIMATE ATAN(-1/X) BY -#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' -#--THIS CAN BE RE-WRITTEN AS -#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. - - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmov.s &0xBF800000,%fp1 # LOAD -1 - fdiv.x %fp0,%fp1 # FP1 IS -1/X - -#--DIVIDE IS STILL CRANKING - - fmov.x %fp1,%fp0 # FP0 IS X' - fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' - fmov.x %fp1,X(%a6) # X IS REALLY X' - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y - - fmov.d ATANC5(%pc),%fp3 - fmov.d ATANC4(%pc),%fp2 - - fmul.x %fp1,%fp3 # Z*C5 - fmul.x %fp1,%fp2 # Z*B4 - - fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 - fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 - - fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED - fmul.x %fp0,%fp2 # Y*(C2+Z*C4) - - fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) - fmul.x X(%a6),%fp0 # X'*Y - - fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] - - fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] -# ... +[Y*(B2+Z*(B4+Z*B6))]) - fadd.x X(%a6),%fp0 - - fmovm.x (%sp)+,&0x30 # restore fp2/fp3 - - fmov.l %d0,%fpcr # restore users rnd mode,prec - tst.b (%a0) - bpl.b pos_big - -neg_big: - fadd.x NPIBY2(%pc),%fp0 - bra t_minx2 - -pos_big: - fadd.x PPIBY2(%pc),%fp0 - bra t_pinx2 - -ATANHUGE: -#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY - tst.b (%a0) - bpl.b pos_huge - -neg_huge: - fmov.x NPIBY2(%pc),%fp0 - fmov.l %d0,%fpcr - fadd.x PTINY(%pc),%fp0 - bra t_minx2 - -pos_huge: - fmov.x PPIBY2(%pc),%fp0 - fmov.l %d0,%fpcr - fadd.x NTINY(%pc),%fp0 - bra t_pinx2 - - global satand -#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT -satand: - bra t_extdnrm - -######################################################################### -# sasin(): computes the inverse sine of a normalized input # -# sasind(): computes the inverse sine of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = arcsin(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# ASIN # -# 1. If |X| >= 1, go to 3. # -# # -# 2. (|X| < 1) Calculate asin(X) by # -# z := sqrt( [1-X][1+X] ) # -# asin(X) = atan( x / z ). # -# Exit. # -# # -# 3. If |X| > 1, go to 5. # -# # -# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# -# # -# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # -# Exit. # -# # -######################################################################### - - global sasin -sasin: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - cmp.l %d1,&0x3FFF8000 - bge.b ASINBIG - -# This catch is added here for the '060 QSP. Originally, the call to -# satan() would handle this case by causing the exception which would -# not be caught until gen_except(). Now, with the exceptions being -# detected inside of satan(), the exception would have been handled there -# instead of inside sasin() as expected. - cmp.l %d1,&0x3FD78000 - blt.w ASINTINY - -#--THIS IS THE USUAL CASE, |X| < 1 -#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) - -ASINMAIN: - fmov.s &0x3F800000,%fp1 - fsub.x %fp0,%fp1 # 1-X - fmovm.x &0x4,-(%sp) # {fp2} - fmov.s &0x3F800000,%fp2 - fadd.x %fp0,%fp2 # 1+X - fmul.x %fp2,%fp1 # (1+X)(1-X) - fmovm.x (%sp)+,&0x20 # {fp2} - fsqrt.x %fp1 # SQRT([1-X][1+X]) - fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) - fmovm.x &0x01,-(%sp) # save X/SQRT(...) - lea (%sp),%a0 # pass ptr to X/SQRT(...) - bsr satan - add.l &0xc,%sp # clear X/SQRT(...) from stack - bra t_inx2 - -ASINBIG: - fabs.x %fp0 # |X| - fcmp.s %fp0,&0x3F800000 - fbgt t_operr # cause an operr exception - -#--|X| = 1, ASIN(X) = +- PI/2. -ASINONE: - fmov.x PIBY2(%pc),%fp0 - mov.l (%a0),%d1 - and.l &0x80000000,%d1 # SIGN BIT OF X - or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT - mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT - fmov.l %d0,%fpcr - fmul.s (%sp)+,%fp0 - bra t_inx2 - -#--|X| < 2^(-40), ATAN(X) = X -ASINTINY: - fmov.l %d0,%fpcr # restore users rnd mode,prec - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x (%a0),%fp0 # last inst - possible exception - bra t_catch - - global sasind -#--ASIN(X) = X FOR DENORMALIZED X -sasind: - bra t_extdnrm - -######################################################################### -# sacos(): computes the inverse cosine of a normalized input # -# sacosd(): computes the inverse cosine of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = arccos(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# ACOS # -# 1. If |X| >= 1, go to 3. # -# # -# 2. (|X| < 1) Calculate acos(X) by # -# z := (1-X) / (1+X) # -# acos(X) = 2 * atan( sqrt(z) ). # -# Exit. # -# # -# 3. If |X| > 1, go to 5. # -# # -# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # -# # -# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # -# Exit. # -# # -######################################################################### - - global sacos -sacos: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 # pack exp w/ upper 16 fraction - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - cmp.l %d1,&0x3FFF8000 - bge.b ACOSBIG - -#--THIS IS THE USUAL CASE, |X| < 1 -#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) - -ACOSMAIN: - fmov.s &0x3F800000,%fp1 - fadd.x %fp0,%fp1 # 1+X - fneg.x %fp0 # -X - fadd.s &0x3F800000,%fp0 # 1-X - fdiv.x %fp1,%fp0 # (1-X)/(1+X) - fsqrt.x %fp0 # SQRT((1-X)/(1+X)) - mov.l %d0,-(%sp) # save original users fpcr - clr.l %d0 - fmovm.x &0x01,-(%sp) # save SQRT(...) to stack - lea (%sp),%a0 # pass ptr to sqrt - bsr satan # ATAN(SQRT([1-X]/[1+X])) - add.l &0xc,%sp # clear SQRT(...) from stack - - fmov.l (%sp)+,%fpcr # restore users round prec,mode - fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) - bra t_pinx2 - -ACOSBIG: - fabs.x %fp0 - fcmp.s %fp0,&0x3F800000 - fbgt t_operr # cause an operr exception - -#--|X| = 1, ACOS(X) = 0 OR PI - tst.b (%a0) # is X positive or negative? - bpl.b ACOSP1 - -#--X = -1 -#Returns PI and inexact exception -ACOSM1: - fmov.x PI(%pc),%fp0 # load PI - fmov.l %d0,%fpcr # load round mode,prec - fadd.s &0x00800000,%fp0 # add a small value - bra t_pinx2 - -ACOSP1: - bra ld_pzero # answer is positive zero - - global sacosd -#--ACOS(X) = PI/2 FOR DENORMALIZED X -sacosd: - fmov.l %d0,%fpcr # load user's rnd mode/prec - fmov.x PIBY2(%pc),%fp0 - bra t_pinx2 - -######################################################################### -# setox(): computes the exponential for a normalized input # -# setoxd(): computes the exponential for a denormalized input # -# setoxm1(): computes the exponential minus 1 for a normalized input # -# setoxm1d(): computes the exponential minus 1 for a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = exp(X) or exp(X)-1 # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 0.85 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM and IMPLEMENTATION **************************************** # -# # -# setoxd # -# ------ # -# Step 1. Set ans := 1.0 # -# # -# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # -# Notes: This will always generate one exception -- inexact. # -# # -# # -# setox # -# ----- # -# # -# Step 1. Filter out extreme cases of input argument. # -# 1.1 If |X| >= 2^(-65), go to Step 1.3. # -# 1.2 Go to Step 7. # -# 1.3 If |X| < 16380 log(2), go to Step 2. # -# 1.4 Go to Step 8. # -# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# -# To avoid the use of floating-point comparisons, a # -# compact representation of |X| is used. This format is a # -# 32-bit integer, the upper (more significant) 16 bits # -# are the sign and biased exponent field of |X|; the # -# lower 16 bits are the 16 most significant fraction # -# (including the explicit bit) bits of |X|. Consequently, # -# the comparisons in Steps 1.1 and 1.3 can be performed # -# by integer comparison. Note also that the constant # -# 16380 log(2) used in Step 1.3 is also in the compact # -# form. Thus taking the branch to Step 2 guarantees # -# |X| < 16380 log(2). There is no harm to have a small # -# number of cases where |X| is less than, but close to, # -# 16380 log(2) and the branch to Step 9 is taken. # -# # -# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # -# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # -# was taken) # -# 2.2 N := round-to-nearest-integer( X * 64/log2 ). # -# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # -# or 63. # -# 2.4 Calculate M = (N - J)/64; so N = 64M + J. # -# 2.5 Calculate the address of the stored value of # -# 2^(J/64). # -# 2.6 Create the value Scale = 2^M. # -# Notes: The calculation in 2.2 is really performed by # -# Z := X * constant # -# N := round-to-nearest-integer(Z) # -# where # -# constant := single-precision( 64/log 2 ). # -# # -# Using a single-precision constant avoids memory # -# access. Another effect of using a single-precision # -# "constant" is that the calculated value Z is # -# # -# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # -# # -# This error has to be considered later in Steps 3 and 4. # -# # -# Step 3. Calculate X - N*log2/64. # -# 3.1 R := X + N*L1, # -# where L1 := single-precision(-log2/64). # -# 3.2 R := R + N*L2, # -# L2 := extended-precision(-log2/64 - L1).# -# Notes: a) The way L1 and L2 are chosen ensures L1+L2 # -# approximate the value -log2/64 to 88 bits of accuracy. # -# b) N*L1 is exact because N is no longer than 22 bits # -# and L1 is no longer than 24 bits. # -# c) The calculation X+N*L1 is also exact due to # -# cancellation. Thus, R is practically X+N(L1+L2) to full # -# 64 bits. # -# d) It is important to estimate how large can |R| be # -# after Step 3.2. # -# # -# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # -# X*64/log2 (1+eps) = N + f, |f| <= 0.5 # -# X*64/log2 - N = f - eps*X 64/log2 # -# X - N*log2/64 = f*log2/64 - eps*X # -# # -# # -# Now |X| <= 16446 log2, thus # -# # -# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # -# <= 0.57 log2/64. # -# This bound will be used in Step 4. # -# # -# Step 4. Approximate exp(R)-1 by a polynomial # -# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # -# Notes: a) In order to reduce memory access, the coefficients # -# are made as "short" as possible: A1 (which is 1/2), A4 # -# and A5 are single precision; A2 and A3 are double # -# precision. # -# b) Even with the restrictions above, # -# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # -# Note that 0.0062 is slightly bigger than 0.57 log2/64. # -# c) To fully utilize the pipeline, p is separated into # -# two independent pieces of roughly equal complexities # -# p = [ R + R*S*(A2 + S*A4) ] + # -# [ S*(A1 + S*(A3 + S*A5)) ] # -# where S = R*R. # -# # -# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # -# ans := T + ( T*p + t) # -# where T and t are the stored values for 2^(J/64). # -# Notes: 2^(J/64) is stored as T and t where T+t approximates # -# 2^(J/64) to roughly 85 bits; T is in extended precision # -# and t is in single precision. Note also that T is # -# rounded to 62 bits so that the last two bits of T are # -# zero. The reason for such a special form is that T-1, # -# T-2, and T-8 will all be exact --- a property that will # -# give much more accurate computation of the function # -# EXPM1. # -# # -# Step 6. Reconstruction of exp(X) # -# exp(X) = 2^M * 2^(J/64) * exp(R). # -# 6.1 If AdjFlag = 0, go to 6.3 # -# 6.2 ans := ans * AdjScale # -# 6.3 Restore the user FPCR # -# 6.4 Return ans := ans * Scale. Exit. # -# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # -# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # -# neither overflow nor underflow. If AdjFlag = 1, that # -# means that # -# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # -# Hence, exp(X) may overflow or underflow or neither. # -# When that is the case, AdjScale = 2^(M1) where M1 is # -# approximately M. Thus 6.2 will never cause # -# over/underflow. Possible exception in 6.4 is overflow # -# or underflow. The inexact exception is not generated in # -# 6.4. Although one can argue that the inexact flag # -# should always be raised, to simulate that exception # -# cost to much than the flag is worth in practical uses. # -# # -# Step 7. Return 1 + X. # -# 7.1 ans := X # -# 7.2 Restore user FPCR. # -# 7.3 Return ans := 1 + ans. Exit # -# Notes: For non-zero X, the inexact exception will always be # -# raised by 7.3. That is the only exception raised by 7.3.# -# Note also that we use the FMOVEM instruction to move X # -# in Step 7.1 to avoid unnecessary trapping. (Although # -# the FMOVEM may not seem relevant since X is normalized, # -# the precaution will be useful in the library version of # -# this code where the separate entry for denormalized # -# inputs will be done away with.) # -# # -# Step 8. Handle exp(X) where |X| >= 16380log2. # -# 8.1 If |X| > 16480 log2, go to Step 9. # -# (mimic 2.2 - 2.6) # -# 8.2 N := round-to-integer( X * 64/log2 ) # -# 8.3 Calculate J = N mod 64, J = 0,1,...,63 # -# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # -# AdjFlag := 1. # -# 8.5 Calculate the address of the stored value # -# 2^(J/64). # -# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # -# 8.7 Go to Step 3. # -# Notes: Refer to notes for 2.2 - 2.6. # -# # -# Step 9. Handle exp(X), |X| > 16480 log2. # -# 9.1 If X < 0, go to 9.3 # -# 9.2 ans := Huge, go to 9.4 # -# 9.3 ans := Tiny. # -# 9.4 Restore user FPCR. # -# 9.5 Return ans := ans * ans. Exit. # -# Notes: Exp(X) will surely overflow or underflow, depending on # -# X's sign. "Huge" and "Tiny" are respectively large/tiny # -# extended-precision numbers whose square over/underflow # -# with an inexact result. Thus, 9.5 always raises the # -# inexact together with either overflow or underflow. # -# # -# setoxm1d # -# -------- # -# # -# Step 1. Set ans := 0 # -# # -# Step 2. Return ans := X + ans. Exit. # -# Notes: This will return X with the appropriate rounding # -# precision prescribed by the user FPCR. # -# # -# setoxm1 # -# ------- # -# # -# Step 1. Check |X| # -# 1.1 If |X| >= 1/4, go to Step 1.3. # -# 1.2 Go to Step 7. # -# 1.3 If |X| < 70 log(2), go to Step 2. # -# 1.4 Go to Step 10. # -# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# -# However, it is conceivable |X| can be small very often # -# because EXPM1 is intended to evaluate exp(X)-1 # -# accurately when |X| is small. For further details on # -# the comparisons, see the notes on Step 1 of setox. # -# # -# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # -# 2.1 N := round-to-nearest-integer( X * 64/log2 ). # -# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # -# or 63. # -# 2.3 Calculate M = (N - J)/64; so N = 64M + J. # -# 2.4 Calculate the address of the stored value of # -# 2^(J/64). # -# 2.5 Create the values Sc = 2^M and # -# OnebySc := -2^(-M). # -# Notes: See the notes on Step 2 of setox. # -# # -# Step 3. Calculate X - N*log2/64. # -# 3.1 R := X + N*L1, # -# where L1 := single-precision(-log2/64). # -# 3.2 R := R + N*L2, # -# L2 := extended-precision(-log2/64 - L1).# -# Notes: Applying the analysis of Step 3 of setox in this case # -# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # -# this case). # -# # -# Step 4. Approximate exp(R)-1 by a polynomial # -# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # -# Notes: a) In order to reduce memory access, the coefficients # -# are made as "short" as possible: A1 (which is 1/2), A5 # -# and A6 are single precision; A2, A3 and A4 are double # -# precision. # -# b) Even with the restriction above, # -# |p - (exp(R)-1)| < |R| * 2^(-72.7) # -# for all |R| <= 0.0055. # -# c) To fully utilize the pipeline, p is separated into # -# two independent pieces of roughly equal complexity # -# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # -# [ R + S*(A1 + S*(A3 + S*A5)) ] # -# where S = R*R. # -# # -# Step 5. Compute 2^(J/64)*p by # -# p := T*p # -# where T and t are the stored values for 2^(J/64). # -# Notes: 2^(J/64) is stored as T and t where T+t approximates # -# 2^(J/64) to roughly 85 bits; T is in extended precision # -# and t is in single precision. Note also that T is # -# rounded to 62 bits so that the last two bits of T are # -# zero. The reason for such a special form is that T-1, # -# T-2, and T-8 will all be exact --- a property that will # -# be exploited in Step 6 below. The total relative error # -# in p is no bigger than 2^(-67.7) compared to the final # -# result. # -# # -# Step 6. Reconstruction of exp(X)-1 # -# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # -# 6.1 If M <= 63, go to Step 6.3. # -# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # -# 6.3 If M >= -3, go to 6.5. # -# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # -# 6.5 ans := (T + OnebySc) + (p + t). # -# 6.6 Restore user FPCR. # -# 6.7 Return ans := Sc * ans. Exit. # -# Notes: The various arrangements of the expressions give # -# accurate evaluations. # -# # -# Step 7. exp(X)-1 for |X| < 1/4. # -# 7.1 If |X| >= 2^(-65), go to Step 9. # -# 7.2 Go to Step 8. # -# # -# Step 8. Calculate exp(X)-1, |X| < 2^(-65). # -# 8.1 If |X| < 2^(-16312), goto 8.3 # -# 8.2 Restore FPCR; return ans := X - 2^(-16382). # -# Exit. # -# 8.3 X := X * 2^(140). # -# 8.4 Restore FPCR; ans := ans - 2^(-16382). # -# Return ans := ans*2^(140). Exit # -# Notes: The idea is to return "X - tiny" under the user # -# precision and rounding modes. To avoid unnecessary # -# inefficiency, we stay away from denormalized numbers # -# the best we can. For |X| >= 2^(-16312), the # -# straightforward 8.2 generates the inexact exception as # -# the case warrants. # -# # -# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # -# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # -# Notes: a) In order to reduce memory access, the coefficients # -# are made as "short" as possible: B1 (which is 1/2), B9 # -# to B12 are single precision; B3 to B8 are double # -# precision; and B2 is double extended. # -# b) Even with the restriction above, # -# |p - (exp(X)-1)| < |X| 2^(-70.6) # -# for all |X| <= 0.251. # -# Note that 0.251 is slightly bigger than 1/4. # -# c) To fully preserve accuracy, the polynomial is # -# computed as # -# X + ( S*B1 + Q ) where S = X*X and # -# Q = X*S*(B2 + X*(B3 + ... + X*B12)) # -# d) To fully utilize the pipeline, Q is separated into # -# two independent pieces of roughly equal complexity # -# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # -# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # -# # -# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # -# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # -# practical purposes. Therefore, go to Step 1 of setox. # -# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # -# purposes. # -# ans := -1 # -# Restore user FPCR # -# Return ans := ans + 2^(-126). Exit. # -# Notes: 10.2 will always create an inexact and return -1 + tiny # -# in the user rounding precision and mode. # -# # -######################################################################### - -L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 - -EEXPA3: long 0x3FA55555,0x55554CC1 -EEXPA2: long 0x3FC55555,0x55554A54 - -EM1A4: long 0x3F811111,0x11174385 -EM1A3: long 0x3FA55555,0x55554F5A - -EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 - -EM1B8: long 0x3EC71DE3,0xA5774682 -EM1B7: long 0x3EFA01A0,0x19D7CB68 - -EM1B6: long 0x3F2A01A0,0x1A019DF3 -EM1B5: long 0x3F56C16C,0x16C170E2 - -EM1B4: long 0x3F811111,0x11111111 -EM1B3: long 0x3FA55555,0x55555555 - -EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB - long 0x00000000 - -TWO140: long 0x48B00000,0x00000000 -TWON140: - long 0x37300000,0x00000000 - -EEXPTBL: - long 0x3FFF0000,0x80000000,0x00000000,0x00000000 - long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B - long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 - long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 - long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C - long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F - long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 - long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF - long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF - long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA - long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 - long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 - long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 - long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 - long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D - long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 - long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD - long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 - long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 - long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D - long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 - long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C - long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 - long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 - long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 - long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA - long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A - long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC - long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC - long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 - long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 - long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A - long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 - long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 - long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC - long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 - long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 - long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 - long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 - long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B - long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 - long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E - long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 - long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D - long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 - long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C - long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 - long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 - long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F - long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F - long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 - long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 - long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B - long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 - long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A - long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 - long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 - long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B - long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 - long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 - long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 - long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 - long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 - long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A - - set ADJFLAG,L_SCR2 - set SCALE,FP_SCR0 - set ADJSCALE,FP_SCR1 - set SC,FP_SCR0 - set ONEBYSC,FP_SCR1 - - global setox -setox: -#--entry point for EXP(X), here X is finite, non-zero, and not NaN's - -#--Step 1. - mov.l (%a0),%d1 # load part of input X - and.l &0x7FFF0000,%d1 # biased expo. of X - cmp.l %d1,&0x3FBE0000 # 2^(-65) - bge.b EXPC1 # normal case - bra EXPSM - -EXPC1: -#--The case |X| >= 2^(-65) - mov.w 4(%a0),%d1 # expo. and partial sig. of |X| - cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits - blt.b EXPMAIN # normal case - bra EEXPBIG - -EXPMAIN: -#--Step 2. -#--This is the normal branch: 2^(-65) <= |X| < 16380 log2. - fmov.x (%a0),%fp0 # load input from (a0) - - fmov.x %fp0,%fp1 - fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X - fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} - mov.l &0,ADJFLAG(%a6) - fmov.l %fp0,%d1 # N = int( X * 64/log2 ) - lea EEXPTBL(%pc),%a1 - fmov.l %d1,%fp0 # convert to floating-format - - mov.l %d1,L_SCR1(%a6) # save N temporarily - and.l &0x3F,%d1 # D0 is J = N mod 64 - lsl.l &4,%d1 - add.l %d1,%a1 # address of 2^(J/64) - mov.l L_SCR1(%a6),%d1 - asr.l &6,%d1 # D0 is M - add.w &0x3FFF,%d1 # biased expo. of 2^(M) - mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB - -EXPCONT1: -#--Step 3. -#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, -#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) - fmov.x %fp0,%fp2 - fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) - fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 - fadd.x %fp1,%fp0 # X + N*L1 - fadd.x %fp2,%fp0 # fp0 is R, reduced arg. - -#--Step 4. -#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL -#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) -#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R -#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # fp1 IS S = R*R - - fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 - - fmul.x %fp1,%fp2 # fp2 IS S*A5 - fmov.x %fp1,%fp3 - fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 - - fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 - fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 - - fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) - mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended - mov.l &0x80000000,SCALE+4(%a6) - clr.l SCALE+8(%a6) - - fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) - - fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) - fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) - - fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) - fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), - - fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) - fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 - -#--Step 5 -#--final reconstruction process -#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) - - fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) - fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} - fadd.s (%a1),%fp0 # accurate 2^(J/64) - - fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... - mov.l ADJFLAG(%a6),%d1 - -#--Step 6 - tst.l %d1 - beq.b NORMAL -ADJUST: - fmul.x ADJSCALE(%a6),%fp0 -NORMAL: - fmov.l %d0,%fpcr # restore user FPCR - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x SCALE(%a6),%fp0 # multiply 2^(M) - bra t_catch - -EXPSM: -#--Step 7 - fmovm.x (%a0),&0x80 # load X - fmov.l %d0,%fpcr - fadd.s &0x3F800000,%fp0 # 1+X in user mode - bra t_pinx2 - -EEXPBIG: -#--Step 8 - cmp.l %d1,&0x400CB27C # 16480 log2 - bgt.b EXP2BIG -#--Steps 8.2 -- 8.6 - fmov.x (%a0),%fp0 # load input from (a0) - - fmov.x %fp0,%fp1 - fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X - fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} - mov.l &1,ADJFLAG(%a6) - fmov.l %fp0,%d1 # N = int( X * 64/log2 ) - lea EEXPTBL(%pc),%a1 - fmov.l %d1,%fp0 # convert to floating-format - mov.l %d1,L_SCR1(%a6) # save N temporarily - and.l &0x3F,%d1 # D0 is J = N mod 64 - lsl.l &4,%d1 - add.l %d1,%a1 # address of 2^(J/64) - mov.l L_SCR1(%a6),%d1 - asr.l &6,%d1 # D0 is K - mov.l %d1,L_SCR1(%a6) # save K temporarily - asr.l &1,%d1 # D0 is M1 - sub.l %d1,L_SCR1(%a6) # a1 is M - add.w &0x3FFF,%d1 # biased expo. of 2^(M1) - mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) - mov.l &0x80000000,ADJSCALE+4(%a6) - clr.l ADJSCALE+8(%a6) - mov.l L_SCR1(%a6),%d1 # D0 is M - add.w &0x3FFF,%d1 # biased expo. of 2^(M) - bra.w EXPCONT1 # go back to Step 3 - -EXP2BIG: -#--Step 9 - tst.b (%a0) # is X positive or negative? - bmi t_unfl2 - bra t_ovfl2 - - global setoxd -setoxd: -#--entry point for EXP(X), X is denormalized - mov.l (%a0),-(%sp) - andi.l &0x80000000,(%sp) - ori.l &0x00800000,(%sp) # sign(X)*2^(-126) - - fmov.s &0x3F800000,%fp0 - - fmov.l %d0,%fpcr - fadd.s (%sp)+,%fp0 - bra t_pinx2 - - global setoxm1 -setoxm1: -#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN - -#--Step 1. -#--Step 1.1 - mov.l (%a0),%d1 # load part of input X - and.l &0x7FFF0000,%d1 # biased expo. of X - cmp.l %d1,&0x3FFD0000 # 1/4 - bge.b EM1CON1 # |X| >= 1/4 - bra EM1SM - -EM1CON1: -#--Step 1.3 -#--The case |X| >= 1/4 - mov.w 4(%a0),%d1 # expo. and partial sig. of |X| - cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits - ble.b EM1MAIN # 1/4 <= |X| <= 70log2 - bra EM1BIG - -EM1MAIN: -#--Step 2. -#--This is the case: 1/4 <= |X| <= 70 log2. - fmov.x (%a0),%fp0 # load input from (a0) - - fmov.x %fp0,%fp1 - fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X - fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} - fmov.l %fp0,%d1 # N = int( X * 64/log2 ) - lea EEXPTBL(%pc),%a1 - fmov.l %d1,%fp0 # convert to floating-format - - mov.l %d1,L_SCR1(%a6) # save N temporarily - and.l &0x3F,%d1 # D0 is J = N mod 64 - lsl.l &4,%d1 - add.l %d1,%a1 # address of 2^(J/64) - mov.l L_SCR1(%a6),%d1 - asr.l &6,%d1 # D0 is M - mov.l %d1,L_SCR1(%a6) # save a copy of M - -#--Step 3. -#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, -#--a0 points to 2^(J/64), D0 and a1 both contain M - fmov.x %fp0,%fp2 - fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) - fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 - fadd.x %fp1,%fp0 # X + N*L1 - fadd.x %fp2,%fp0 # fp0 is R, reduced arg. - add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M - -#--Step 4. -#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL -#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) -#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R -#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # fp1 IS S = R*R - - fmov.s &0x3950097B,%fp2 # fp2 IS a6 - - fmul.x %fp1,%fp2 # fp2 IS S*A6 - fmov.x %fp1,%fp3 - fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 - - fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 - fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 - mov.w %d1,SC(%a6) # SC is 2^(M) in extended - mov.l &0x80000000,SC+4(%a6) - clr.l SC+8(%a6) - - fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) - mov.l L_SCR1(%a6),%d1 # D0 is M - neg.w %d1 # D0 is -M - fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) - add.w &0x3FFF,%d1 # biased expo. of 2^(-M) - fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) - fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) - - fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) - or.w &0x8000,%d1 # signed/expo. of -2^(-M) - mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) - mov.l &0x80000000,ONEBYSC+4(%a6) - clr.l ONEBYSC+8(%a6) - fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) - - fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) - fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) - - fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 - - fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} - -#--Step 5 -#--Compute 2^(J/64)*p - - fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) - -#--Step 6 -#--Step 6.1 - mov.l L_SCR1(%a6),%d1 # retrieve M - cmp.l %d1,&63 - ble.b MLE63 -#--Step 6.2 M >= 64 - fmov.s 12(%a1),%fp1 # fp1 is t - fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc - fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released - fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) - bra EM1SCALE -MLE63: -#--Step 6.3 M <= 63 - cmp.l %d1,&-3 - bge.b MGEN3 -MLTN3: -#--Step 6.4 M <= -4 - fadd.s 12(%a1),%fp0 # p+t - fadd.x (%a1),%fp0 # T+(p+t) - fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) - bra EM1SCALE -MGEN3: -#--Step 6.5 -3 <= M <= 63 - fmov.x (%a1)+,%fp1 # fp1 is T - fadd.s (%a1),%fp0 # fp0 is p+t - fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc - fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) - -EM1SCALE: -#--Step 6.6 - fmov.l %d0,%fpcr - fmul.x SC(%a6),%fp0 - bra t_inx2 - -EM1SM: -#--Step 7 |X| < 1/4. - cmp.l %d1,&0x3FBE0000 # 2^(-65) - bge.b EM1POLY - -EM1TINY: -#--Step 8 |X| < 2^(-65) - cmp.l %d1,&0x00330000 # 2^(-16312) - blt.b EM12TINY -#--Step 8.2 - mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) - mov.l &0x80000000,SC+4(%a6) - clr.l SC+8(%a6) - fmov.x (%a0),%fp0 - fmov.l %d0,%fpcr - mov.b &FADD_OP,%d1 # last inst is ADD - fadd.x SC(%a6),%fp0 - bra t_catch - -EM12TINY: -#--Step 8.3 - fmov.x (%a0),%fp0 - fmul.d TWO140(%pc),%fp0 - mov.l &0x80010000,SC(%a6) - mov.l &0x80000000,SC+4(%a6) - clr.l SC+8(%a6) - fadd.x SC(%a6),%fp0 - fmov.l %d0,%fpcr - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.d TWON140(%pc),%fp0 - bra t_catch - -EM1POLY: -#--Step 9 exp(X)-1 by a simple polynomial - fmov.x (%a0),%fp0 # fp0 is X - fmul.x %fp0,%fp0 # fp0 is S := X*X - fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} - fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 - fmul.x %fp0,%fp1 # fp1 is S*B12 - fmov.s &0x310F8290,%fp2 # fp2 is B11 - fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 - - fmul.x %fp0,%fp2 # fp2 is S*B11 - fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... - - fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... - fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... - - fmul.x %fp0,%fp2 # fp2 is S*(B9+... - fmul.x %fp0,%fp1 # fp1 is S*(B8+... - - fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... - fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... - - fmul.x %fp0,%fp2 # fp2 is S*(B7+... - fmul.x %fp0,%fp1 # fp1 is S*(B6+... - - fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... - fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... - - fmul.x %fp0,%fp2 # fp2 is S*(B5+... - fmul.x %fp0,%fp1 # fp1 is S*(B4+... - - fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... - fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... - - fmul.x %fp0,%fp2 # fp2 is S*(B3+... - fmul.x %fp0,%fp1 # fp1 is S*(B2+... - - fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) - fmul.x (%a0),%fp1 # fp1 is X*S*(B2... - - fmul.s &0x3F000000,%fp0 # fp0 is S*B1 - fadd.x %fp2,%fp1 # fp1 is Q - - fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} - - fadd.x %fp1,%fp0 # fp0 is S*B1+Q - - fmov.l %d0,%fpcr - fadd.x (%a0),%fp0 - bra t_inx2 - -EM1BIG: -#--Step 10 |X| > 70 log2 - mov.l (%a0),%d1 - cmp.l %d1,&0 - bgt.w EXPC1 -#--Step 10.2 - fmov.s &0xBF800000,%fp0 # fp0 is -1 - fmov.l %d0,%fpcr - fadd.s &0x00800000,%fp0 # -1 + 2^(-126) - bra t_minx2 - - global setoxm1d -setoxm1d: -#--entry point for EXPM1(X), here X is denormalized -#--Step 0. - bra t_extdnrm - -######################################################################### -# sgetexp(): returns the exponent portion of the input argument. # -# The exponent bias is removed and the exponent value is # -# returned as an extended precision number in fp0. # -# sgetexpd(): handles denormalized numbers. # -# # -# sgetman(): extracts the mantissa of the input argument. The # -# mantissa is converted to an extended precision number w/ # -# an exponent of $3fff and is returned in fp0. The range of # -# the result is [1.0 - 2.0). # -# sgetmand(): handles denormalized numbers. # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# # -# OUTPUT ************************************************************** # -# fp0 = exponent(X) or mantissa(X) # -# # -######################################################################### - - global sgetexp -sgetexp: - mov.w SRC_EX(%a0),%d0 # get the exponent - bclr &0xf,%d0 # clear the sign bit - subi.w &0x3fff,%d0 # subtract off the bias - fmov.w %d0,%fp0 # return exp in fp0 - blt.b sgetexpn # it's negative - rts - -sgetexpn: - mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit - rts - - global sgetexpd -sgetexpd: - bsr.l norm # normalize - neg.w %d0 # new exp = -(shft amt) - subi.w &0x3fff,%d0 # subtract off the bias - fmov.w %d0,%fp0 # return exp in fp0 - mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit - rts - - global sgetman -sgetman: - mov.w SRC_EX(%a0),%d0 # get the exp - ori.w &0x7fff,%d0 # clear old exp - bclr &0xe,%d0 # make it the new exp +-3fff - -# here, we build the result in a tmp location so as not to disturb the input - mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc - mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc - mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent - fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 - bmi.b sgetmann # it's negative - rts - -sgetmann: - mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit - rts - -# -# For denormalized numbers, shift the mantissa until the j-bit = 1, -# then load the exponent with +/1 $3fff. -# - global sgetmand -sgetmand: - bsr.l norm # normalize exponent - bra.b sgetman - -######################################################################### -# scosh(): computes the hyperbolic cosine of a normalized input # -# scoshd(): computes the hyperbolic cosine of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = cosh(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# COSH # -# 1. If |X| > 16380 log2, go to 3. # -# # -# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # -# y = |X|, z = exp(Y), and # -# cosh(X) = (1/2)*( z + 1/z ). # -# Exit. # -# # -# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # -# # -# 4. (16380 log2 < |X| <= 16480 log2) # -# cosh(X) = sign(X) * exp(|X|)/2. # -# However, invoking exp(|X|) may cause premature # -# overflow. Thus, we calculate sinh(X) as follows: # -# Y := |X| # -# Fact := 2**(16380) # -# Y' := Y - 16381 log2 # -# cosh(X) := Fact * exp(Y'). # -# Exit. # -# # -# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # -# Huge*Huge to generate overflow and an infinity with # -# the appropriate sign. Huge is the largest finite number # -# in extended format. Exit. # -# # -######################################################################### - -TWO16380: - long 0x7FFB0000,0x80000000,0x00000000,0x00000000 - - global scosh -scosh: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - cmp.l %d1,&0x400CB167 - bgt.b COSHBIG - -#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 -#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) - - fabs.x %fp0 # |X| - - mov.l %d0,-(%sp) - clr.l %d0 - fmovm.x &0x01,-(%sp) # save |X| to stack - lea (%sp),%a0 # pass ptr to |X| - bsr setox # FP0 IS EXP(|X|) - add.l &0xc,%sp # erase |X| from stack - fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) - mov.l (%sp)+,%d0 - - fmov.s &0x3E800000,%fp1 # (1/4) - fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) - - fmov.l %d0,%fpcr - mov.b &FADD_OP,%d1 # last inst is ADD - fadd.x %fp1,%fp0 - bra t_catch - -COSHBIG: - cmp.l %d1,&0x400CB2B3 - bgt.b COSHHUGE - - fabs.x %fp0 - fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) - fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE - - mov.l %d0,-(%sp) - clr.l %d0 - fmovm.x &0x01,-(%sp) # save fp0 to stack - lea (%sp),%a0 # pass ptr to fp0 - bsr setox - add.l &0xc,%sp # clear fp0 from stack - mov.l (%sp)+,%d0 - - fmov.l %d0,%fpcr - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x TWO16380(%pc),%fp0 - bra t_catch - -COSHHUGE: - bra t_ovfl2 - - global scoshd -#--COSH(X) = 1 FOR DENORMALIZED X -scoshd: - fmov.s &0x3F800000,%fp0 - - fmov.l %d0,%fpcr - fadd.s &0x00800000,%fp0 - bra t_pinx2 - -######################################################################### -# ssinh(): computes the hyperbolic sine of a normalized input # -# ssinhd(): computes the hyperbolic sine of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = sinh(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# SINH # -# 1. If |X| > 16380 log2, go to 3. # -# # -# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # -# y = |X|, sgn = sign(X), and z = expm1(Y), # -# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # -# Exit. # -# # -# 3. If |X| > 16480 log2, go to 5. # -# # -# 4. (16380 log2 < |X| <= 16480 log2) # -# sinh(X) = sign(X) * exp(|X|)/2. # -# However, invoking exp(|X|) may cause premature overflow. # -# Thus, we calculate sinh(X) as follows: # -# Y := |X| # -# sgn := sign(X) # -# sgnFact := sgn * 2**(16380) # -# Y' := Y - 16381 log2 # -# sinh(X) := sgnFact * exp(Y'). # -# Exit. # -# # -# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # -# sign(X)*Huge*Huge to generate overflow and an infinity with # -# the appropriate sign. Huge is the largest finite number in # -# extended format. Exit. # -# # -######################################################################### - - global ssinh -ssinh: - fmov.x (%a0),%fp0 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - mov.l %d1,%a1 # save (compacted) operand - and.l &0x7FFFFFFF,%d1 - cmp.l %d1,&0x400CB167 - bgt.b SINHBIG - -#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 -#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) - - fabs.x %fp0 # Y = |X| - - movm.l &0x8040,-(%sp) # {a1/d0} - fmovm.x &0x01,-(%sp) # save Y on stack - lea (%sp),%a0 # pass ptr to Y - clr.l %d0 - bsr setoxm1 # FP0 IS Z = EXPM1(Y) - add.l &0xc,%sp # clear Y from stack - fmov.l &0,%fpcr - movm.l (%sp)+,&0x0201 # {a1/d0} - - fmov.x %fp0,%fp1 - fadd.s &0x3F800000,%fp1 # 1+Z - fmov.x %fp0,-(%sp) - fdiv.x %fp1,%fp0 # Z/(1+Z) - mov.l %a1,%d1 - and.l &0x80000000,%d1 - or.l &0x3F000000,%d1 - fadd.x (%sp)+,%fp0 - mov.l %d1,-(%sp) - - fmov.l %d0,%fpcr - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set - bra t_catch - -SINHBIG: - cmp.l %d1,&0x400CB2B3 - bgt t_ovfl - fabs.x %fp0 - fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) - mov.l &0,-(%sp) - mov.l &0x80000000,-(%sp) - mov.l %a1,%d1 - and.l &0x80000000,%d1 - or.l &0x7FFB0000,%d1 - mov.l %d1,-(%sp) # EXTENDED FMT - fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE - - mov.l %d0,-(%sp) - clr.l %d0 - fmovm.x &0x01,-(%sp) # save fp0 on stack - lea (%sp),%a0 # pass ptr to fp0 - bsr setox - add.l &0xc,%sp # clear fp0 from stack - - mov.l (%sp)+,%d0 - fmov.l %d0,%fpcr - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x (%sp)+,%fp0 # possible exception - bra t_catch - - global ssinhd -#--SINH(X) = X FOR DENORMALIZED X -ssinhd: - bra t_extdnrm - -######################################################################### -# stanh(): computes the hyperbolic tangent of a normalized input # -# stanhd(): computes the hyperbolic tangent of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = tanh(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# TANH # -# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # -# # -# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # -# sgn := sign(X), y := 2|X|, z := expm1(Y), and # -# tanh(X) = sgn*( z/(2+z) ). # -# Exit. # -# # -# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # -# go to 7. # -# # -# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # -# # -# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # -# sgn := sign(X), y := 2|X|, z := exp(Y), # -# tanh(X) = sgn - [ sgn*2/(1+z) ]. # -# Exit. # -# # -# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # -# calculate Tanh(X) by # -# sgn := sign(X), Tiny := 2**(-126), # -# tanh(X) := sgn - sgn*Tiny. # -# Exit. # -# # -# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # -# # -######################################################################### - - set X,FP_SCR0 - set XFRAC,X+4 - - set SGN,L_SCR3 - - set V,FP_SCR0 - - global stanh -stanh: - fmov.x (%a0),%fp0 # LOAD INPUT - - fmov.x %fp0,X(%a6) - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - mov.l %d1,X(%a6) - and.l &0x7FFFFFFF,%d1 - cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? - blt.w TANHBORS # yes - cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? - bgt.w TANHBORS # yes - -#--THIS IS THE USUAL CASE -#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). - - mov.l X(%a6),%d1 - mov.l %d1,SGN(%a6) - and.l &0x7FFF0000,%d1 - add.l &0x00010000,%d1 # EXPONENT OF 2|X| - mov.l %d1,X(%a6) - and.l &0x80000000,SGN(%a6) - fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| - - mov.l %d0,-(%sp) - clr.l %d0 - fmovm.x &0x1,-(%sp) # save Y on stack - lea (%sp),%a0 # pass ptr to Y - bsr setoxm1 # FP0 IS Z = EXPM1(Y) - add.l &0xc,%sp # clear Y from stack - mov.l (%sp)+,%d0 - - fmov.x %fp0,%fp1 - fadd.s &0x40000000,%fp1 # Z+2 - mov.l SGN(%a6),%d1 - fmov.x %fp1,V(%a6) - eor.l %d1,V(%a6) - - fmov.l %d0,%fpcr # restore users round prec,mode - fdiv.x V(%a6),%fp0 - bra t_inx2 - -TANHBORS: - cmp.l %d1,&0x3FFF8000 - blt.w TANHSM - - cmp.l %d1,&0x40048AA1 - bgt.w TANHHUGE - -#-- (5/2) LOG2 < |X| < 50 LOG2, -#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), -#--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. - - mov.l X(%a6),%d1 - mov.l %d1,SGN(%a6) - and.l &0x7FFF0000,%d1 - add.l &0x00010000,%d1 # EXPO OF 2|X| - mov.l %d1,X(%a6) # Y = 2|X| - and.l &0x80000000,SGN(%a6) - mov.l SGN(%a6),%d1 - fmov.x X(%a6),%fp0 # Y = 2|X| - - mov.l %d0,-(%sp) - clr.l %d0 - fmovm.x &0x01,-(%sp) # save Y on stack - lea (%sp),%a0 # pass ptr to Y - bsr setox # FP0 IS EXP(Y) - add.l &0xc,%sp # clear Y from stack - mov.l (%sp)+,%d0 - mov.l SGN(%a6),%d1 - fadd.s &0x3F800000,%fp0 # EXP(Y)+1 - - eor.l &0xC0000000,%d1 # -SIGN(X)*2 - fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT - fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] - - mov.l SGN(%a6),%d1 - or.l &0x3F800000,%d1 # SGN - fmov.s %d1,%fp0 # SGN IN SGL FMT - - fmov.l %d0,%fpcr # restore users round prec,mode - mov.b &FADD_OP,%d1 # last inst is ADD - fadd.x %fp1,%fp0 - bra t_inx2 - -TANHSM: - fmov.l %d0,%fpcr # restore users round prec,mode - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x X(%a6),%fp0 # last inst - possible exception set - bra t_catch - -#---RETURN SGN(X) - SGN(X)EPS -TANHHUGE: - mov.l X(%a6),%d1 - and.l &0x80000000,%d1 - or.l &0x3F800000,%d1 - fmov.s %d1,%fp0 - and.l &0x80000000,%d1 - eor.l &0x80800000,%d1 # -SIGN(X)*EPS - - fmov.l %d0,%fpcr # restore users round prec,mode - fadd.s %d1,%fp0 - bra t_inx2 - - global stanhd -#--TANH(X) = X FOR DENORMALIZED X -stanhd: - bra t_extdnrm - -######################################################################### -# slogn(): computes the natural logarithm of a normalized input # -# slognd(): computes the natural logarithm of a denormalized input # -# slognp1(): computes the log(1+X) of a normalized input # -# slognp1d(): computes the log(1+X) of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = log(X) or log(1+X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 2 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# LOGN: # -# Step 1. If |X-1| < 1/16, approximate log(X) by an odd # -# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # -# move on to Step 2. # -# # -# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # -# seven significant bits of Y plus 2**(-7), i.e. # -# F = 1.xxxxxx1 in base 2 where the six "x" match those # -# of Y. Note that |Y-F| <= 2**(-7). # -# # -# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # -# polynomial in u, log(1+u) = poly. # -# # -# Step 4. Reconstruct # -# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # -# by k*log(2) + (log(F) + poly). The values of log(F) are # -# calculated beforehand and stored in the program. # -# # -# lognp1: # -# Step 1: If |X| < 1/16, approximate log(1+X) by an odd # -# polynomial in u where u = 2X/(2+X). Otherwise, move on # -# to Step 2. # -# # -# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # -# in Step 2 of the algorithm for LOGN and compute # -# log(1+X) as k*log(2) + log(F) + poly where poly # -# approximates log(1+u), u = (Y-F)/F. # -# # -# Implementation Notes: # -# Note 1. There are 64 different possible values for F, thus 64 # -# log(F)'s need to be tabulated. Moreover, the values of # -# 1/F are also tabulated so that the division in (Y-F)/F # -# can be performed by a multiplication. # -# # -# Note 2. In Step 2 of lognp1, in order to preserved accuracy, # -# the value Y-F has to be calculated carefully when # -# 1/2 <= X < 3/2. # -# # -# Note 3. To fully exploit the pipeline, polynomials are usually # -# separated into two parts evaluated independently before # -# being added up. # -# # -######################################################################### -LOGOF2: - long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 - -one: - long 0x3F800000 -zero: - long 0x00000000 -infty: - long 0x7F800000 -negone: - long 0xBF800000 - -LOGA6: - long 0x3FC2499A,0xB5E4040B -LOGA5: - long 0xBFC555B5,0x848CB7DB - -LOGA4: - long 0x3FC99999,0x987D8730 -LOGA3: - long 0xBFCFFFFF,0xFF6F7E97 - -LOGA2: - long 0x3FD55555,0x555555A4 -LOGA1: - long 0xBFE00000,0x00000008 - -LOGB5: - long 0x3F175496,0xADD7DAD6 -LOGB4: - long 0x3F3C71C2,0xFE80C7E0 - -LOGB3: - long 0x3F624924,0x928BCCFF -LOGB2: - long 0x3F899999,0x999995EC - -LOGB1: - long 0x3FB55555,0x55555555 -TWO: - long 0x40000000,0x00000000 - -LTHOLD: - long 0x3f990000,0x80000000,0x00000000,0x00000000 - -LOGTBL: - long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 - long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 - long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 - long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 - long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 - long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 - long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 - long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 - long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 - long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 - long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 - long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 - long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 - long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 - long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 - long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 - long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 - long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 - long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 - long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 - long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 - long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 - long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 - long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 - long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 - long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 - long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 - long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 - long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 - long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 - long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 - long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 - long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 - long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 - long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 - long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 - long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 - long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 - long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 - long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 - long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 - long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 - long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 - long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 - long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 - long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 - long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 - long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 - long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 - long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 - long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 - long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 - long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 - long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 - long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 - long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 - long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 - long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 - long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 - long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 - long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 - long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 - long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 - long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 - long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 - long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 - long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 - long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 - long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 - long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 - long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 - long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 - long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 - long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 - long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 - long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 - long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 - long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 - long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 - long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 - long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 - long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 - long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 - long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 - long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 - long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 - long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 - long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 - long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 - long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 - long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 - long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 - long 0x3FFE0000,0x94458094,0x45809446,0x00000000 - long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 - long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 - long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 - long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 - long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 - long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 - long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 - long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 - long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 - long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 - long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 - long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 - long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 - long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 - long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 - long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 - long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 - long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 - long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 - long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 - long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 - long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 - long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 - long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 - long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 - long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 - long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 - long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 - long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 - long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 - long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 - long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 - long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 - long 0x3FFE0000,0x80808080,0x80808081,0x00000000 - long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 - - set ADJK,L_SCR1 - - set X,FP_SCR0 - set XDCARE,X+2 - set XFRAC,X+4 - - set F,FP_SCR1 - set FFRAC,F+4 - - set KLOG2,FP_SCR0 - - set SAVEU,FP_SCR0 - - global slogn -#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S -slogn: - fmov.x (%a0),%fp0 # LOAD INPUT - mov.l &0x00000000,ADJK(%a6) - -LOGBGN: -#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS -#--A FINITE, NON-ZERO, NORMALIZED NUMBER. - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - - mov.l (%a0),X(%a6) - mov.l 4(%a0),X+4(%a6) - mov.l 8(%a0),X+8(%a6) - - cmp.l %d1,&0 # CHECK IF X IS NEGATIVE - blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID -# X IS POSITIVE, CHECK IF X IS NEAR 1 - cmp.l %d1,&0x3ffef07d # IS X < 15/16? - blt.b LOGMAIN # YES - cmp.l %d1,&0x3fff8841 # IS X > 17/16? - ble.w LOGNEAR1 # NO - -LOGMAIN: -#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 - -#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. -#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. -#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) -#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). -#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING -#--LOG(1+U) CAN BE VERY EFFICIENT. -#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO -#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. - -#--GET K, Y, F, AND ADDRESS OF 1/F. - asr.l &8,%d1 - asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X - sub.l &0x3FFF,%d1 # THIS IS K - add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. - lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) - fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT - -#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F - mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X - mov.l XFRAC(%a6),FFRAC(%a6) - and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y - or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT - mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F - and.l &0x7E000000,%d1 - asr.l &8,%d1 - asr.l &8,%d1 - asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT - add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F - - fmov.x X(%a6),%fp0 - mov.l &0x3fff0000,F(%a6) - clr.l F+8(%a6) - fsub.x F(%a6),%fp0 # Y-F - fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY -#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K -#--REGISTERS SAVED: FPCR, FP1, FP2 - -LP1CONT1: -#--AN RE-ENTRY POINT FOR LOGNP1 - fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F - fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY - fmov.x %fp0,%fp2 - fmul.x %fp2,%fp2 # FP2 IS V=U*U - fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 - -#--LOG(1+U) IS APPROXIMATED BY -#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS -#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] - - fmov.x %fp2,%fp3 - fmov.x %fp2,%fp1 - - fmul.d LOGA6(%pc),%fp1 # V*A6 - fmul.d LOGA5(%pc),%fp2 # V*A5 - - fadd.d LOGA4(%pc),%fp1 # A4+V*A6 - fadd.d LOGA3(%pc),%fp2 # A3+V*A5 - - fmul.x %fp3,%fp1 # V*(A4+V*A6) - fmul.x %fp3,%fp2 # V*(A3+V*A5) - - fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) - fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) - - fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) - add.l &16,%a0 # ADDRESS OF LOG(F) - fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) - - fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) - fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) - - fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) - fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 - fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) - - fmov.l %d0,%fpcr - fadd.x KLOG2(%a6),%fp0 # FINAL ADD - bra t_inx2 - - -LOGNEAR1: - -# if the input is exactly equal to one, then exit through ld_pzero. -# if these 2 lines weren't here, the correct answer would be returned -# but the INEX2 bit would be set. - fcmp.b %fp0,&0x1 # is it equal to one? - fbeq.l ld_pzero # yes - -#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. - fmov.x %fp0,%fp1 - fsub.s one(%pc),%fp1 # FP1 IS X-1 - fadd.s one(%pc),%fp0 # FP0 IS X+1 - fadd.x %fp1,%fp1 # FP1 IS 2(X-1) -#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL -#--IN U, U = 2(X-1)/(X+1) = FP1/FP0 - -LP1CONT2: -#--THIS IS AN RE-ENTRY POINT FOR LOGNP1 - fdiv.x %fp0,%fp1 # FP1 IS U - fmovm.x &0xc,-(%sp) # SAVE FP2-3 -#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 -#--LET V=U*U, W=V*V, CALCULATE -#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY -#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) - fmov.x %fp1,%fp0 - fmul.x %fp0,%fp0 # FP0 IS V - fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS W - - fmov.d LOGB5(%pc),%fp3 - fmov.d LOGB4(%pc),%fp2 - - fmul.x %fp1,%fp3 # W*B5 - fmul.x %fp1,%fp2 # W*B4 - - fadd.d LOGB3(%pc),%fp3 # B3+W*B5 - fadd.d LOGB2(%pc),%fp2 # B2+W*B4 - - fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED - - fmul.x %fp0,%fp2 # V*(B2+W*B4) - - fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) - fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V - - fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED - fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED - - fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) - - fmov.l %d0,%fpcr - fadd.x SAVEU(%a6),%fp0 - bra t_inx2 - -#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID -LOGNEG: - bra t_operr - - global slognd -slognd: -#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT - - mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 - -#----normalize the input value by left shifting k bits (k to be determined -#----below), adjusting exponent and storing -k to ADJK -#----the value TWOTO100 is no longer needed. -#----Note that this code assumes the denormalized input is NON-ZERO. - - movm.l &0x3f00,-(%sp) # save some registers {d2-d7} - mov.l (%a0),%d3 # D3 is exponent of smallest norm. # - mov.l 4(%a0),%d4 - mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) - clr.l %d2 # D2 used for holding K - - tst.l %d4 - bne.b Hi_not0 - -Hi_0: - mov.l %d5,%d4 - clr.l %d5 - mov.l &32,%d2 - clr.l %d6 - bfffo %d4{&0:&32},%d6 - lsl.l %d6,%d4 - add.l %d6,%d2 # (D3,D4,D5) is normalized - - mov.l %d3,X(%a6) - mov.l %d4,XFRAC(%a6) - mov.l %d5,XFRAC+4(%a6) - neg.l %d2 - mov.l %d2,ADJK(%a6) - fmov.x X(%a6),%fp0 - movm.l (%sp)+,&0xfc # restore registers {d2-d7} - lea X(%a6),%a0 - bra.w LOGBGN # begin regular log(X) - -Hi_not0: - clr.l %d6 - bfffo %d4{&0:&32},%d6 # find first 1 - mov.l %d6,%d2 # get k - lsl.l %d6,%d4 - mov.l %d5,%d7 # a copy of D5 - lsl.l %d6,%d5 - neg.l %d6 - add.l &32,%d6 - lsr.l %d6,%d7 - or.l %d7,%d4 # (D3,D4,D5) normalized - - mov.l %d3,X(%a6) - mov.l %d4,XFRAC(%a6) - mov.l %d5,XFRAC+4(%a6) - neg.l %d2 - mov.l %d2,ADJK(%a6) - fmov.x X(%a6),%fp0 - movm.l (%sp)+,&0xfc # restore registers {d2-d7} - lea X(%a6),%a0 - bra.w LOGBGN # begin regular log(X) - - global slognp1 -#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S -slognp1: - fmov.x (%a0),%fp0 # LOAD INPUT - fabs.x %fp0 # test magnitude - fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold - fbgt.w LP1REAL # if greater, continue - fmov.l %d0,%fpcr - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x (%a0),%fp0 # return signed argument - bra t_catch - -LP1REAL: - fmov.x (%a0),%fp0 # LOAD INPUT - mov.l &0x00000000,ADJK(%a6) - fmov.x %fp0,%fp1 # FP1 IS INPUT Z - fadd.s one(%pc),%fp0 # X := ROUND(1+Z) - fmov.x %fp0,X(%a6) - mov.w XFRAC(%a6),XDCARE(%a6) - mov.l X(%a6),%d1 - cmp.l %d1,&0 - ble.w LP1NEG0 # LOG OF ZERO OR -VE - cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? - blt.w LOGMAIN - cmp.l %d1,&0x3fffc000 - bgt.w LOGMAIN -#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, -#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, -#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). - -LP1NEAR1: -#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) - cmp.l %d1,&0x3ffef07d - blt.w LP1CARE - cmp.l %d1,&0x3fff8841 - bgt.w LP1CARE - -LP1ONE16: -#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) -#--WHERE U = 2Z/(2+Z) = 2Z/(1+X). - fadd.x %fp1,%fp1 # FP1 IS 2Z - fadd.s one(%pc),%fp0 # FP0 IS 1+X -#--U = FP1/FP0 - bra.w LP1CONT2 - -LP1CARE: -#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE -#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST -#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], -#--THERE ARE ONLY TWO CASES. -#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z -#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z -#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF -#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. - - mov.l XFRAC(%a6),FFRAC(%a6) - and.l &0xFE000000,FFRAC(%a6) - or.l &0x01000000,FFRAC(%a6) # F OBTAINED - cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 - bge.b KISZERO - -KISNEG1: - fmov.s TWO(%pc),%fp0 - mov.l &0x3fff0000,F(%a6) - clr.l F+8(%a6) - fsub.x F(%a6),%fp0 # 2-F - mov.l FFRAC(%a6),%d1 - and.l &0x7E000000,%d1 - asr.l &8,%d1 - asr.l &8,%d1 - asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F - fadd.x %fp1,%fp1 # GET 2Z - fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} - fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z - lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F - add.l %d1,%a0 - fmov.s negone(%pc),%fp1 # FP1 IS K = -1 - bra.w LP1CONT1 - -KISZERO: - fmov.s one(%pc),%fp0 - mov.l &0x3fff0000,F(%a6) - clr.l F+8(%a6) - fsub.x F(%a6),%fp0 # 1-F - mov.l FFRAC(%a6),%d1 - and.l &0x7E000000,%d1 - asr.l &8,%d1 - asr.l &8,%d1 - asr.l &4,%d1 - fadd.x %fp1,%fp0 # FP0 IS Y-F - fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} - lea LOGTBL(%pc),%a0 - add.l %d1,%a0 # A0 IS ADDRESS OF 1/F - fmov.s zero(%pc),%fp1 # FP1 IS K = 0 - bra.w LP1CONT1 - -LP1NEG0: -#--FPCR SAVED. D0 IS X IN COMPACT FORM. - cmp.l %d1,&0 - blt.b LP1NEG -LP1ZERO: - fmov.s negone(%pc),%fp0 - - fmov.l %d0,%fpcr - bra t_dz - -LP1NEG: - fmov.s zero(%pc),%fp0 - - fmov.l %d0,%fpcr - bra t_operr - - global slognp1d -#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT -# Simply return the denorm -slognp1d: - bra t_extdnrm - -######################################################################### -# satanh(): computes the inverse hyperbolic tangent of a norm input # -# satanhd(): computes the inverse hyperbolic tangent of a denorm input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = arctanh(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 3 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# ATANH # -# 1. If |X| >= 1, go to 3. # -# # -# 2. (|X| < 1) Calculate atanh(X) by # -# sgn := sign(X) # -# y := |X| # -# z := 2y/(1-y) # -# atanh(X) := sgn * (1/2) * logp1(z) # -# Exit. # -# # -# 3. If |X| > 1, go to 5. # -# # -# 4. (|X| = 1) Generate infinity with an appropriate sign and # -# divide-by-zero by # -# sgn := sign(X) # -# atan(X) := sgn / (+0). # -# Exit. # -# # -# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # -# Exit. # -# # -######################################################################### - - global satanh -satanh: - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - cmp.l %d1,&0x3FFF8000 - bge.b ATANHBIG - -#--THIS IS THE USUAL CASE, |X| < 1 -#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). - - fabs.x (%a0),%fp0 # Y = |X| - fmov.x %fp0,%fp1 - fneg.x %fp1 # -Y - fadd.x %fp0,%fp0 # 2Y - fadd.s &0x3F800000,%fp1 # 1-Y - fdiv.x %fp1,%fp0 # 2Y/(1-Y) - mov.l (%a0),%d1 - and.l &0x80000000,%d1 - or.l &0x3F000000,%d1 # SIGN(X)*HALF - mov.l %d1,-(%sp) - - mov.l %d0,-(%sp) # save rnd prec,mode - clr.l %d0 # pass ext prec,RN - fmovm.x &0x01,-(%sp) # save Z on stack - lea (%sp),%a0 # pass ptr to Z - bsr slognp1 # LOG1P(Z) - add.l &0xc,%sp # clear Z from stack - - mov.l (%sp)+,%d0 # fetch old prec,mode - fmov.l %d0,%fpcr # load it - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.s (%sp)+,%fp0 - bra t_catch - -ATANHBIG: - fabs.x (%a0),%fp0 # |X| - fcmp.s %fp0,&0x3F800000 - fbgt t_operr - bra t_dz - - global satanhd -#--ATANH(X) = X FOR DENORMALIZED X -satanhd: - bra t_extdnrm - -######################################################################### -# slog10(): computes the base-10 logarithm of a normalized input # -# slog10d(): computes the base-10 logarithm of a denormalized input # -# slog2(): computes the base-2 logarithm of a normalized input # -# slog2d(): computes the base-2 logarithm of a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = log_10(X) or log_2(X) # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 1.7 ulps in 64 significant bit, # -# i.e. within 0.5003 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# slog10d: # -# # -# Step 0. If X < 0, create a NaN and raise the invalid operation # -# flag. Otherwise, save FPCR in D1; set FpCR to default. # -# Notes: Default means round-to-nearest mode, no floating-point # -# traps, and precision control = double extended. # -# # -# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # -# Notes: Even if X is denormalized, log(X) is always normalized. # -# # -# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # -# 2.1 Restore the user FPCR # -# 2.2 Return ans := Y * INV_L10. # -# # -# slog10: # -# # -# Step 0. If X < 0, create a NaN and raise the invalid operation # -# flag. Otherwise, save FPCR in D1; set FpCR to default. # -# Notes: Default means round-to-nearest mode, no floating-point # -# traps, and precision control = double extended. # -# # -# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # -# # -# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # -# 2.1 Restore the user FPCR # -# 2.2 Return ans := Y * INV_L10. # -# # -# sLog2d: # -# # -# Step 0. If X < 0, create a NaN and raise the invalid operation # -# flag. Otherwise, save FPCR in D1; set FpCR to default. # -# Notes: Default means round-to-nearest mode, no floating-point # -# traps, and precision control = double extended. # -# # -# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # -# Notes: Even if X is denormalized, log(X) is always normalized. # -# # -# Step 2. Compute log_10(X) = log(X) * (1/log(2)). # -# 2.1 Restore the user FPCR # -# 2.2 Return ans := Y * INV_L2. # -# # -# sLog2: # -# # -# Step 0. If X < 0, create a NaN and raise the invalid operation # -# flag. Otherwise, save FPCR in D1; set FpCR to default. # -# Notes: Default means round-to-nearest mode, no floating-point # -# traps, and precision control = double extended. # -# # -# Step 1. If X is not an integer power of two, i.e., X != 2^k, # -# go to Step 3. # -# # -# Step 2. Return k. # -# 2.1 Get integer k, X = 2^k. # -# 2.2 Restore the user FPCR. # -# 2.3 Return ans := convert-to-double-extended(k). # -# # -# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # -# # -# Step 4. Compute log_2(X) = log(X) * (1/log(2)). # -# 4.1 Restore the user FPCR # -# 4.2 Return ans := Y * INV_L2. # -# # -######################################################################### - -INV_L10: - long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 - -INV_L2: - long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 - - global slog10 -#--entry point for Log10(X), X is normalized -slog10: - fmov.b &0x1,%fp0 - fcmp.x %fp0,(%a0) # if operand == 1, - fbeq.l ld_pzero # return an EXACT zero - - mov.l (%a0),%d1 - blt.w invalid - mov.l %d0,-(%sp) - clr.l %d0 - bsr slogn # log(X), X normal. - fmov.l (%sp)+,%fpcr - fmul.x INV_L10(%pc),%fp0 - bra t_inx2 - - global slog10d -#--entry point for Log10(X), X is denormalized -slog10d: - mov.l (%a0),%d1 - blt.w invalid - mov.l %d0,-(%sp) - clr.l %d0 - bsr slognd # log(X), X denorm. - fmov.l (%sp)+,%fpcr - fmul.x INV_L10(%pc),%fp0 - bra t_minx2 - - global slog2 -#--entry point for Log2(X), X is normalized -slog2: - mov.l (%a0),%d1 - blt.w invalid - - mov.l 8(%a0),%d1 - bne.b continue # X is not 2^k - - mov.l 4(%a0),%d1 - and.l &0x7FFFFFFF,%d1 - bne.b continue - -#--X = 2^k. - mov.w (%a0),%d1 - and.l &0x00007FFF,%d1 - sub.l &0x3FFF,%d1 - beq.l ld_pzero - fmov.l %d0,%fpcr - fmov.l %d1,%fp0 - bra t_inx2 - -continue: - mov.l %d0,-(%sp) - clr.l %d0 - bsr slogn # log(X), X normal. - fmov.l (%sp)+,%fpcr - fmul.x INV_L2(%pc),%fp0 - bra t_inx2 - -invalid: - bra t_operr - - global slog2d -#--entry point for Log2(X), X is denormalized -slog2d: - mov.l (%a0),%d1 - blt.w invalid - mov.l %d0,-(%sp) - clr.l %d0 - bsr slognd # log(X), X denorm. - fmov.l (%sp)+,%fpcr - fmul.x INV_L2(%pc),%fp0 - bra t_minx2 - -######################################################################### -# stwotox(): computes 2**X for a normalized input # -# stwotoxd(): computes 2**X for a denormalized input # -# stentox(): computes 10**X for a normalized input # -# stentoxd(): computes 10**X for a denormalized input # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input # -# d0 = round precision,mode # -# # -# OUTPUT ************************************************************** # -# fp0 = 2**X or 10**X # -# # -# ACCURACY and MONOTONICITY ******************************************* # -# The returned result is within 2 ulps in 64 significant bit, # -# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # -# rounded to double precision. The result is provably monotonic # -# in double precision. # -# # -# ALGORITHM *********************************************************** # -# # -# twotox # -# 1. If |X| > 16480, go to ExpBig. # -# # -# 2. If |X| < 2**(-70), go to ExpSm. # -# # -# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # -# decompose N as # -# N = 64(M + M') + j, j = 0,1,2,...,63. # -# # -# 4. Overwrite r := r * log2. Then # -# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # -# Go to expr to compute that expression. # -# # -# tentox # -# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # -# # -# 2. If |X| < 2**(-70), go to ExpSm. # -# # -# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # -# N := round-to-int(y). Decompose N as # -# N = 64(M + M') + j, j = 0,1,2,...,63. # -# # -# 4. Define r as # -# r := ((X - N*L1)-N*L2) * L10 # -# where L1, L2 are the leading and trailing parts of # -# log_10(2)/64 and L10 is the natural log of 10. Then # -# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # -# Go to expr to compute that expression. # -# # -# expr # -# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # -# # -# 2. Overwrite Fact1 and Fact2 by # -# Fact1 := 2**(M) * Fact1 # -# Fact2 := 2**(M) * Fact2 # -# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # -# # -# 3. Calculate P where 1 + P approximates exp(r): # -# P = r + r*r*(A1+r*(A2+...+r*A5)). # -# # -# 4. Let AdjFact := 2**(M'). Return # -# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # -# Exit. # -# # -# ExpBig # -# 1. Generate overflow by Huge * Huge if X > 0; otherwise, # -# generate underflow by Tiny * Tiny. # -# # -# ExpSm # -# 1. Return 1 + X. # -# # -######################################################################### - -L2TEN64: - long 0x406A934F,0x0979A371 # 64LOG10/LOG2 -L10TWO1: - long 0x3F734413,0x509F8000 # LOG2/64LOG10 - -L10TWO2: - long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 - -LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 - -LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 - -EXPA5: long 0x3F56C16D,0x6F7BD0B2 -EXPA4: long 0x3F811112,0x302C712C -EXPA3: long 0x3FA55555,0x55554CC1 -EXPA2: long 0x3FC55555,0x55554A54 -EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 - -TEXPTBL: - long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 - long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA - long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 - long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 - long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA - long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C - long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 - long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA - long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 - long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 - long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 - long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 - long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D - long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 - long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B - long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 - long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A - long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B - long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF - long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA - long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD - long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E - long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B - long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB - long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB - long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 - long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C - long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 - long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 - long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 - long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F - long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C - long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB - long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB - long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C - long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA - long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD - long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 - long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A - long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 - long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB - long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 - long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C - long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 - long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 - long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE - long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 - long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 - long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A - long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 - long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 - long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 - long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 - long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE - long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 - long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F - long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A - long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A - long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC - long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F - long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A - long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 - long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B - long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 - - set INT,L_SCR1 - - set X,FP_SCR0 - set XDCARE,X+2 - set XFRAC,X+4 - - set ADJFACT,FP_SCR0 - - set FACT1,FP_SCR0 - set FACT1HI,FACT1+4 - set FACT1LOW,FACT1+8 - - set FACT2,FP_SCR1 - set FACT2HI,FACT2+4 - set FACT2LOW,FACT2+8 - - global stwotox -#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S -stwotox: - fmovm.x (%a0),&0x80 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - fmov.x %fp0,X(%a6) - and.l &0x7FFFFFFF,%d1 - - cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? - bge.b TWOOK1 - bra.w EXPBORS - -TWOOK1: - cmp.l %d1,&0x400D80C0 # |X| > 16480? - ble.b TWOMAIN - bra.w EXPBORS - -TWOMAIN: -#--USUAL CASE, 2^(-70) <= |X| <= 16480 - - fmov.x %fp0,%fp1 - fmul.s &0x42800000,%fp1 # 64 * X - fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) - mov.l %d2,-(%sp) - lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) - fmov.l INT(%a6),%fp1 # N --> FLOATING FMT - mov.l INT(%a6),%d1 - mov.l %d1,%d2 - and.l &0x3F,%d1 # D0 IS J - asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) - add.l %d1,%a1 # ADDRESS FOR 2^(J/64) - asr.l &6,%d2 # d2 IS L, N = 64L + J - mov.l %d2,%d1 - asr.l &1,%d1 # D0 IS M - sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J - add.l &0x3FFF,%d2 - -#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), -#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. -#--ADJFACT = 2^(M'). -#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. - - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmul.s &0x3C800000,%fp1 # (1/64)*N - mov.l (%a1)+,FACT1(%a6) - mov.l (%a1)+,FACT1HI(%a6) - mov.l (%a1)+,FACT1LOW(%a6) - mov.w (%a1)+,FACT2(%a6) - - fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) - - mov.w (%a1)+,FACT2HI(%a6) - clr.w FACT2HI+2(%a6) - clr.l FACT2LOW(%a6) - add.w %d1,FACT1(%a6) - fmul.x LOG2(%pc),%fp0 # FP0 IS R - add.w %d1,FACT2(%a6) - - bra.w expr - -EXPBORS: -#--FPCR, D0 SAVED - cmp.l %d1,&0x3FFF8000 - bgt.b TEXPBIG - -#--|X| IS SMALL, RETURN 1 + X - - fmov.l %d0,%fpcr # restore users round prec,mode - fadd.s &0x3F800000,%fp0 # RETURN 1 + X - bra t_pinx2 - -TEXPBIG: -#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW -#--REGISTERS SAVE SO FAR ARE FPCR AND D0 - mov.l X(%a6),%d1 - cmp.l %d1,&0 - blt.b EXPNEG - - bra t_ovfl2 # t_ovfl expects positive value - -EXPNEG: - bra t_unfl2 # t_unfl expects positive value - - global stwotoxd -stwotoxd: -#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT - - fmov.l %d0,%fpcr # set user's rounding mode/precision - fmov.s &0x3F800000,%fp0 # RETURN 1 + X - mov.l (%a0),%d1 - or.l &0x00800001,%d1 - fadd.s %d1,%fp0 - bra t_pinx2 - - global stentox -#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S -stentox: - fmovm.x (%a0),&0x80 # LOAD INPUT - - mov.l (%a0),%d1 - mov.w 4(%a0),%d1 - fmov.x %fp0,X(%a6) - and.l &0x7FFFFFFF,%d1 - - cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? - bge.b TENOK1 - bra.w EXPBORS - -TENOK1: - cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? - ble.b TENMAIN - bra.w EXPBORS - -TENMAIN: -#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 - - fmov.x %fp0,%fp1 - fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 - fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) - mov.l %d2,-(%sp) - lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) - fmov.l INT(%a6),%fp1 # N --> FLOATING FMT - mov.l INT(%a6),%d1 - mov.l %d1,%d2 - and.l &0x3F,%d1 # D0 IS J - asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) - add.l %d1,%a1 # ADDRESS FOR 2^(J/64) - asr.l &6,%d2 # d2 IS L, N = 64L + J - mov.l %d2,%d1 - asr.l &1,%d1 # D0 IS M - sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J - add.l &0x3FFF,%d2 - -#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), -#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. -#--ADJFACT = 2^(M'). -#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. - fmovm.x &0x0c,-(%sp) # save fp2/fp3 - - fmov.x %fp1,%fp2 - - fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD - mov.l (%a1)+,FACT1(%a6) - - fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL - - mov.l (%a1)+,FACT1HI(%a6) - mov.l (%a1)+,FACT1LOW(%a6) - fsub.x %fp1,%fp0 # X - N L_LEAD - mov.w (%a1)+,FACT2(%a6) - - fsub.x %fp2,%fp0 # X - N L_TRAIL - - mov.w (%a1)+,FACT2HI(%a6) - clr.w FACT2HI+2(%a6) - clr.l FACT2LOW(%a6) - - fmul.x LOG10(%pc),%fp0 # FP0 IS R - add.w %d1,FACT1(%a6) - add.w %d1,FACT2(%a6) - -expr: -#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. -#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). -#--FP0 IS R. THE FOLLOWING CODE COMPUTES -#-- 2**(M'+M) * 2**(J/64) * EXP(R) - - fmov.x %fp0,%fp1 - fmul.x %fp1,%fp1 # FP1 IS S = R*R - - fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 - fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 - - fmul.x %fp1,%fp2 # FP2 IS S*A5 - fmul.x %fp1,%fp3 # FP3 IS S*A4 - - fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 - fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 - - fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) - fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) - - fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) - fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) - - fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) - fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) - fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 - - fmovm.x (%sp)+,&0x30 # restore fp2/fp3 - -#--FINAL RECONSTRUCTION PROCESS -#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) - - fmul.x FACT1(%a6),%fp0 - fadd.x FACT2(%a6),%fp0 - fadd.x FACT1(%a6),%fp0 - - fmov.l %d0,%fpcr # restore users round prec,mode - mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT - mov.l (%sp)+,%d2 - mov.l &0x80000000,ADJFACT+4(%a6) - clr.l ADJFACT+8(%a6) - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT - bra t_catch - - global stentoxd -stentoxd: -#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT - - fmov.l %d0,%fpcr # set user's rounding mode/precision - fmov.s &0x3F800000,%fp0 # RETURN 1 + X - mov.l (%a0),%d1 - or.l &0x00800001,%d1 - fadd.s %d1,%fp0 - bra t_pinx2 - -######################################################################### -# sscale(): computes the destination operand scaled by the source # -# operand. If the absoulute value of the source operand is # -# >= 2^14, an overflow or underflow is returned. # -# # -# INPUT *************************************************************** # -# a0 = pointer to double-extended source operand X # -# a1 = pointer to double-extended destination operand Y # -# # -# OUTPUT ************************************************************** # -# fp0 = scale(X,Y) # -# # -######################################################################### - -set SIGN, L_SCR1 - - global sscale -sscale: - mov.l %d0,-(%sp) # store off ctrl bits for now - - mov.w DST_EX(%a1),%d1 # get dst exponent - smi.b SIGN(%a6) # use SIGN to hold dst sign - andi.l &0x00007fff,%d1 # strip sign from dst exp - - mov.w SRC_EX(%a0),%d0 # check src bounds - andi.w &0x7fff,%d0 # clr src sign bit - cmpi.w %d0,&0x3fff # is src ~ ZERO? - blt.w src_small # yes - cmpi.w %d0,&0x400c # no; is src too big? - bgt.w src_out # yes - -# -# Source is within 2^14 range. -# -src_ok: - fintrz.x SRC(%a0),%fp0 # calc int of src - fmov.l %fp0,%d0 # int src to d0 -# don't want any accrued bits from the fintrz showing up later since -# we may need to read the fpsr for the last fp op in t_catch2(). - fmov.l &0x0,%fpsr - - tst.b DST_HI(%a1) # is dst denormalized? - bmi.b sok_norm - -# the dst is a DENORM. normalize the DENORM and add the adjustment to -# the src value. then, jump to the norm part of the routine. -sok_dnrm: - mov.l %d0,-(%sp) # save src for now - - mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy - mov.l DST_HI(%a1),FP_SCR0_HI(%a6) - mov.l DST_LO(%a1),FP_SCR0_LO(%a6) - - lea FP_SCR0(%a6),%a0 # pass ptr to DENORM - bsr.l norm # normalize the DENORM - neg.l %d0 - add.l (%sp)+,%d0 # add adjustment to src - - fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM - - cmpi.w %d0,&-0x3fff # is the shft amt really low? - bge.b sok_norm2 # thank goodness no - -# the multiply factor that we're trying to create should be a denorm -# for the multiply to work. therefore, we're going to actually do a -# multiply with a denorm which will cause an unimplemented data type -# exception to be put into the machine which will be caught and corrected -# later. we don't do this with the DENORMs above because this method -# is slower. but, don't fret, I don't see it being used much either. - fmov.l (%sp)+,%fpcr # restore user fpcr - mov.l &0x80000000,%d1 # load normalized mantissa - subi.l &-0x3fff,%d0 # how many should we shift? - neg.l %d0 # make it positive - cmpi.b %d0,&0x20 # is it > 32? - bge.b sok_dnrm_32 # yes - lsr.l %d0,%d1 # no; bit stays in upper lw - clr.l -(%sp) # insert zero low mantissa - mov.l %d1,-(%sp) # insert new high mantissa - clr.l -(%sp) # make zero exponent - bra.b sok_norm_cont -sok_dnrm_32: - subi.b &0x20,%d0 # get shift count - lsr.l %d0,%d1 # make low mantissa longword - mov.l %d1,-(%sp) # insert new low mantissa - clr.l -(%sp) # insert zero high mantissa - clr.l -(%sp) # make zero exponent - bra.b sok_norm_cont - -# the src will force the dst to a DENORM value or worse. so, let's -# create an fp multiply that will create the result. -sok_norm: - fmovm.x DST(%a1),&0x80 # load fp0 with normalized src -sok_norm2: - fmov.l (%sp)+,%fpcr # restore user fpcr - - addi.w &0x3fff,%d0 # turn src amt into exp value - swap %d0 # put exponent in high word - clr.l -(%sp) # insert new exponent - mov.l &0x80000000,-(%sp) # insert new high mantissa - mov.l %d0,-(%sp) # insert new lo mantissa - -sok_norm_cont: - fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x (%sp)+,%fp0 # do the multiply - bra t_catch2 # catch any exceptions - -# -# Source is outside of 2^14 range. Test the sign and branch -# to the appropriate exception handler. -# -src_out: - mov.l (%sp)+,%d0 # restore ctrl bits - exg %a0,%a1 # swap src,dst ptrs - tst.b SRC_EX(%a1) # is src negative? - bmi t_unfl # yes; underflow - bra t_ovfl_sc # no; overflow - -# -# The source input is below 1, so we check for denormalized numbers -# and set unfl. -# -src_small: - tst.b DST_HI(%a1) # is dst denormalized? - bpl.b ssmall_done # yes - - mov.l (%sp)+,%d0 - fmov.l %d0,%fpcr # no; load control bits - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x DST(%a1),%fp0 # simply return dest - bra t_catch2 -ssmall_done: - mov.l (%sp)+,%d0 # load control bits into d1 - mov.l %a1,%a0 # pass ptr to dst - bra t_resdnrm - -######################################################################### -# smod(): computes the fp MOD of the input values X,Y. # -# srem(): computes the fp (IEEE) REM of the input values X,Y. # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input X # -# a1 = pointer to extended precision input Y # -# d0 = round precision,mode # -# # -# The input operands X and Y can be either normalized or # -# denormalized. # -# # -# OUTPUT ************************************************************** # -# fp0 = FREM(X,Y) or FMOD(X,Y) # -# # -# ALGORITHM *********************************************************** # -# # -# Step 1. Save and strip signs of X and Y: signX := sign(X), # -# signY := sign(Y), X := |X|, Y := |Y|, # -# signQ := signX EOR signY. Record whether MOD or REM # -# is requested. # -# # -# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # -# If (L < 0) then # -# R := X, go to Step 4. # -# else # -# R := 2^(-L)X, j := L. # -# endif # -# # -# Step 3. Perform MOD(X,Y) # -# 3.1 If R = Y, go to Step 9. # -# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # -# 3.3 If j = 0, go to Step 4. # -# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # -# Step 3.1. # -# # -# Step 4. At this point, R = X - QY = MOD(X,Y). Set # -# Last_Subtract := false (used in Step 7 below). If # -# MOD is requested, go to Step 6. # -# # -# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # -# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # -# Step 6. # -# 5.2 If R > Y/2, then { set Last_Subtract := true, # -# Q := Q + 1, Y := signY*Y }. Go to Step 6. # -# 5.3 This is the tricky case of R = Y/2. If Q is odd, # -# then { Q := Q + 1, signX := -signX }. # -# # -# Step 6. R := signX*R. # -# # -# Step 7. If Last_Subtract = true, R := R - Y. # -# # -# Step 8. Return signQ, last 7 bits of Q, and R as required. # -# # -# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # -# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # -# R := 0. Return signQ, last 7 bits of Q, and R. # -# # -######################################################################### - - set Mod_Flag,L_SCR3 - set Sc_Flag,L_SCR3+1 - - set SignY,L_SCR2 - set SignX,L_SCR2+2 - set SignQ,L_SCR3+2 - - set Y,FP_SCR0 - set Y_Hi,Y+4 - set Y_Lo,Y+8 - - set R,FP_SCR1 - set R_Hi,R+4 - set R_Lo,R+8 - -Scale: - long 0x00010000,0x80000000,0x00000000,0x00000000 - - global smod -smod: - clr.b FPSR_QBYTE(%a6) - mov.l %d0,-(%sp) # save ctrl bits - clr.b Mod_Flag(%a6) - bra.b Mod_Rem - - global srem -srem: - clr.b FPSR_QBYTE(%a6) - mov.l %d0,-(%sp) # save ctrl bits - mov.b &0x1,Mod_Flag(%a6) - -Mod_Rem: -#..Save sign of X and Y - movm.l &0x3f00,-(%sp) # save data registers - mov.w SRC_EX(%a0),%d3 - mov.w %d3,SignY(%a6) - and.l &0x00007FFF,%d3 # Y := |Y| - -# - mov.l SRC_HI(%a0),%d4 - mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| - - tst.l %d3 - bne.b Y_Normal - - mov.l &0x00003FFE,%d3 # $3FFD + 1 - tst.l %d4 - bne.b HiY_not0 - -HiY_0: - mov.l %d5,%d4 - clr.l %d5 - sub.l &32,%d3 - clr.l %d6 - bfffo %d4{&0:&32},%d6 - lsl.l %d6,%d4 - sub.l %d6,%d3 # (D3,D4,D5) is normalized -# ...with bias $7FFD - bra.b Chk_X - -HiY_not0: - clr.l %d6 - bfffo %d4{&0:&32},%d6 - sub.l %d6,%d3 - lsl.l %d6,%d4 - mov.l %d5,%d7 # a copy of D5 - lsl.l %d6,%d5 - neg.l %d6 - add.l &32,%d6 - lsr.l %d6,%d7 - or.l %d7,%d4 # (D3,D4,D5) normalized -# ...with bias $7FFD - bra.b Chk_X - -Y_Normal: - add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized -# ...with bias $7FFD - -Chk_X: - mov.w DST_EX(%a1),%d0 - mov.w %d0,SignX(%a6) - mov.w SignY(%a6),%d1 - eor.l %d0,%d1 - and.l &0x00008000,%d1 - mov.w %d1,SignQ(%a6) # sign(Q) obtained - and.l &0x00007FFF,%d0 - mov.l DST_HI(%a1),%d1 - mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| - tst.l %d0 - bne.b X_Normal - mov.l &0x00003FFE,%d0 - tst.l %d1 - bne.b HiX_not0 - -HiX_0: - mov.l %d2,%d1 - clr.l %d2 - sub.l &32,%d0 - clr.l %d6 - bfffo %d1{&0:&32},%d6 - lsl.l %d6,%d1 - sub.l %d6,%d0 # (D0,D1,D2) is normalized -# ...with bias $7FFD - bra.b Init - -HiX_not0: - clr.l %d6 - bfffo %d1{&0:&32},%d6 - sub.l %d6,%d0 - lsl.l %d6,%d1 - mov.l %d2,%d7 # a copy of D2 - lsl.l %d6,%d2 - neg.l %d6 - add.l &32,%d6 - lsr.l %d6,%d7 - or.l %d7,%d1 # (D0,D1,D2) normalized -# ...with bias $7FFD - bra.b Init - -X_Normal: - add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized -# ...with bias $7FFD - -Init: -# - mov.l %d3,L_SCR1(%a6) # save biased exp(Y) - mov.l %d0,-(%sp) # save biased exp(X) - sub.l %d3,%d0 # L := expo(X)-expo(Y) - - clr.l %d6 # D6 := carry <- 0 - clr.l %d3 # D3 is Q - mov.l &0,%a1 # A1 is k; j+k=L, Q=0 - -#..(Carry,D1,D2) is R - tst.l %d0 - bge.b Mod_Loop_pre - -#..expo(X) < expo(Y). Thus X = mod(X,Y) -# - mov.l (%sp)+,%d0 # restore d0 - bra.w Get_Mod - -Mod_Loop_pre: - addq.l &0x4,%sp # erase exp(X) -#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L -Mod_Loop: - tst.l %d6 # test carry bit - bgt.b R_GT_Y - -#..At this point carry = 0, R = (D1,D2), Y = (D4,D5) - cmp.l %d1,%d4 # compare hi(R) and hi(Y) - bne.b R_NE_Y - cmp.l %d2,%d5 # compare lo(R) and lo(Y) - bne.b R_NE_Y - -#..At this point, R = Y - bra.w Rem_is_0 - -R_NE_Y: -#..use the borrow of the previous compare - bcs.b R_LT_Y # borrow is set iff R < Y - -R_GT_Y: -#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 -#..and Y < (D1,D2) < 2Y. Either way, perform R - Y - sub.l %d5,%d2 # lo(R) - lo(Y) - subx.l %d4,%d1 # hi(R) - hi(Y) - clr.l %d6 # clear carry - addq.l &1,%d3 # Q := Q + 1 - -R_LT_Y: -#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. - tst.l %d0 # see if j = 0. - beq.b PostLoop - - add.l %d3,%d3 # Q := 2Q - add.l %d2,%d2 # lo(R) = 2lo(R) - roxl.l &1,%d1 # hi(R) = 2hi(R) + carry - scs %d6 # set Carry if 2(R) overflows - addq.l &1,%a1 # k := k+1 - subq.l &1,%d0 # j := j - 1 -#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. - - bra.b Mod_Loop - -PostLoop: -#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. - -#..normalize R. - mov.l L_SCR1(%a6),%d0 # new biased expo of R - tst.l %d1 - bne.b HiR_not0 - -HiR_0: - mov.l %d2,%d1 - clr.l %d2 - sub.l &32,%d0 - clr.l %d6 - bfffo %d1{&0:&32},%d6 - lsl.l %d6,%d1 - sub.l %d6,%d0 # (D0,D1,D2) is normalized -# ...with bias $7FFD - bra.b Get_Mod - -HiR_not0: - clr.l %d6 - bfffo %d1{&0:&32},%d6 - bmi.b Get_Mod # already normalized - sub.l %d6,%d0 - lsl.l %d6,%d1 - mov.l %d2,%d7 # a copy of D2 - lsl.l %d6,%d2 - neg.l %d6 - add.l &32,%d6 - lsr.l %d6,%d7 - or.l %d7,%d1 # (D0,D1,D2) normalized - -# -Get_Mod: - cmp.l %d0,&0x000041FE - bge.b No_Scale -Do_Scale: - mov.w %d0,R(%a6) - mov.l %d1,R_Hi(%a6) - mov.l %d2,R_Lo(%a6) - mov.l L_SCR1(%a6),%d6 - mov.w %d6,Y(%a6) - mov.l %d4,Y_Hi(%a6) - mov.l %d5,Y_Lo(%a6) - fmov.x R(%a6),%fp0 # no exception - mov.b &1,Sc_Flag(%a6) - bra.b ModOrRem -No_Scale: - mov.l %d1,R_Hi(%a6) - mov.l %d2,R_Lo(%a6) - sub.l &0x3FFE,%d0 - mov.w %d0,R(%a6) - mov.l L_SCR1(%a6),%d6 - sub.l &0x3FFE,%d6 - mov.l %d6,L_SCR1(%a6) - fmov.x R(%a6),%fp0 - mov.w %d6,Y(%a6) - mov.l %d4,Y_Hi(%a6) - mov.l %d5,Y_Lo(%a6) - clr.b Sc_Flag(%a6) - -# -ModOrRem: - tst.b Mod_Flag(%a6) - beq.b Fix_Sign - - mov.l L_SCR1(%a6),%d6 # new biased expo(Y) - subq.l &1,%d6 # biased expo(Y/2) - cmp.l %d0,%d6 - blt.b Fix_Sign - bgt.b Last_Sub - - cmp.l %d1,%d4 - bne.b Not_EQ - cmp.l %d2,%d5 - bne.b Not_EQ - bra.w Tie_Case - -Not_EQ: - bcs.b Fix_Sign - -Last_Sub: -# - fsub.x Y(%a6),%fp0 # no exceptions - addq.l &1,%d3 # Q := Q + 1 - -# -Fix_Sign: -#..Get sign of X - mov.w SignX(%a6),%d6 - bge.b Get_Q - fneg.x %fp0 - -#..Get Q -# -Get_Q: - clr.l %d6 - mov.w SignQ(%a6),%d6 # D6 is sign(Q) - mov.l &8,%d7 - lsr.l %d7,%d6 - and.l &0x0000007F,%d3 # 7 bits of Q - or.l %d6,%d3 # sign and bits of Q -# swap %d3 -# fmov.l %fpsr,%d6 -# and.l &0xFF00FFFF,%d6 -# or.l %d3,%d6 -# fmov.l %d6,%fpsr # put Q in fpsr - mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr - -# -Restore: - movm.l (%sp)+,&0xfc # {%d2-%d7} - mov.l (%sp)+,%d0 - fmov.l %d0,%fpcr - tst.b Sc_Flag(%a6) - beq.b Finish - mov.b &FMUL_OP,%d1 # last inst is MUL - fmul.x Scale(%pc),%fp0 # may cause underflow - bra t_catch2 -# the '040 package did this apparently to see if the dst operand for the -# preceding fmul was a denorm. but, it better not have been since the -# algorithm just got done playing with fp0 and expected no exceptions -# as a result. trust me... -# bra t_avoid_unsupp # check for denorm as a -# ;result of the scaling - -Finish: - mov.b &FMOV_OP,%d1 # last inst is MOVE - fmov.x %fp0,%fp0 # capture exceptions & round - bra t_catch2 - -Rem_is_0: -#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) - addq.l &1,%d3 - cmp.l %d0,&8 # D0 is j - bge.b Q_Big - - lsl.l %d0,%d3 - bra.b Set_R_0 - -Q_Big: - clr.l %d3 - -Set_R_0: - fmov.s &0x00000000,%fp0 - clr.b Sc_Flag(%a6) - bra.w Fix_Sign - -Tie_Case: -#..Check parity of Q - mov.l %d3,%d6 - and.l &0x00000001,%d6 - tst.l %d6 - beq.w Fix_Sign # Q is even - -#..Q is odd, Q := Q + 1, signX := -signX - addq.l &1,%d3 - mov.w SignX(%a6),%d6 - eor.l &0x00008000,%d6 - mov.w %d6,SignX(%a6) - bra.w Fix_Sign - -######################################################################### -# XDEF **************************************************************** # -# tag(): return the optype of the input ext fp number # -# # -# This routine is used by the 060FPLSP. # -# # -# XREF **************************************************************** # -# None # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision operand # -# # -# OUTPUT ************************************************************** # -# d0 = value of type tag # -# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # -# # -# ALGORITHM *********************************************************** # -# Simply test the exponent, j-bit, and mantissa values to # -# determine the type of operand. # -# If it's an unnormalized zero, alter the operand and force it # -# to be a normal zero. # -# # -######################################################################### - - global tag -tag: - mov.w FTEMP_EX(%a0), %d0 # extract exponent - andi.w &0x7fff, %d0 # strip off sign - cmpi.w %d0, &0x7fff # is (EXP == MAX)? - beq.b inf_or_nan_x -not_inf_or_nan_x: - btst &0x7,FTEMP_HI(%a0) - beq.b not_norm_x -is_norm_x: - mov.b &NORM, %d0 - rts -not_norm_x: - tst.w %d0 # is exponent = 0? - bne.b is_unnorm_x -not_unnorm_x: - tst.l FTEMP_HI(%a0) - bne.b is_denorm_x - tst.l FTEMP_LO(%a0) - bne.b is_denorm_x -is_zero_x: - mov.b &ZERO, %d0 - rts -is_denorm_x: - mov.b &DENORM, %d0 - rts -is_unnorm_x: - bsr.l unnorm_fix # convert to norm,denorm,or zero - rts -is_unnorm_reg_x: - mov.b &UNNORM, %d0 - rts -inf_or_nan_x: - tst.l FTEMP_LO(%a0) - bne.b is_nan_x - mov.l FTEMP_HI(%a0), %d0 - and.l &0x7fffffff, %d0 # msb is a don't care! - bne.b is_nan_x -is_inf_x: - mov.b &INF, %d0 - rts -is_nan_x: - mov.b &QNAN, %d0 - rts - -############################################################# - -qnan: long 0x7fff0000, 0xffffffff, 0xffffffff - -######################################################################### -# XDEF **************************************************************** # -# t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. # -# t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. # -# # -# These rouitnes are used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision source operand. # -# # -# OUTPUT ************************************************************** # -# fp0 = default DZ result. # -# # -# ALGORITHM *********************************************************** # -# Transcendental emulation for the 060FPLSP has detected that # -# a DZ exception should occur for the instruction. If DZ is disabled, # -# return the default result. # -# If DZ is enabled, the dst operand should be returned unscathed # -# in fp0 while fp1 is used to create a DZ exception so that the # -# operating system can log that such an event occurred. # -# # -######################################################################### - - global t_dz -t_dz: - tst.b SRC_EX(%a0) # check sign for neg or pos - bpl.b dz_pinf # branch if pos sign - - global t_dz2 -t_dz2: - ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ - - btst &dz_bit,FPCR_ENABLE(%a6) - bne.b dz_minf_ena - -# dz is disabled. return a -INF. - fmov.s &0xff800000,%fp0 # return -INF - rts - -# dz is enabled. create a dz exception so the user can record it -# but use fp1 instead. return the dst operand unscathed in fp0. -dz_minf_ena: - fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed - fmov.l USER_FPCR(%a6),%fpcr - fmov.s &0xbf800000,%fp1 # load -1 - fdiv.s &0x00000000,%fp1 # -1 / 0 - rts - -dz_pinf: - ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ - - btst &dz_bit,FPCR_ENABLE(%a6) - bne.b dz_pinf_ena - -# dz is disabled. return a +INF. - fmov.s &0x7f800000,%fp0 # return +INF - rts - -# dz is enabled. create a dz exception so the user can record it -# but use fp1 instead. return the dst operand unscathed in fp0. -dz_pinf_ena: - fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed - fmov.l USER_FPCR(%a6),%fpcr - fmov.s &0x3f800000,%fp1 # load +1 - fdiv.s &0x00000000,%fp1 # +1 / 0 - rts - -######################################################################### -# XDEF **************************************************************** # -# t_operr(): Handle 060FPLSP OPERR exception during emulation. # -# # -# This routine is used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# fp1 = source operand # -# # -# OUTPUT ************************************************************** # -# fp0 = default result # -# fp1 = unchanged # -# # -# ALGORITHM *********************************************************** # -# An operand error should occur as the result of transcendental # -# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN # -# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 # -# and the source operand in fp1. Use fp2 to create an OPERR exception # -# so that the operating system can log the event. # -# # -######################################################################### - - global t_operr -t_operr: - ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP - - btst &operr_bit,FPCR_ENABLE(%a6) - bne.b operr_ena - -# operr is disabled. return a QNAN in fp0 - fmovm.x qnan(%pc),&0x80 # return QNAN - rts - -# operr is enabled. create an operr exception so the user can record it -# but use fp2 instead. return the dst operand unscathed in fp0. -operr_ena: - fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x &0x04,-(%sp) # save fp2 - fmov.s &0x7f800000,%fp2 # load +INF - fmul.s &0x00000000,%fp2 # +INF x 0 - fmovm.x (%sp)+,&0x20 # restore fp2 - rts - -pls_huge: - long 0x7ffe0000,0xffffffff,0xffffffff -mns_huge: - long 0xfffe0000,0xffffffff,0xffffffff -pls_tiny: - long 0x00000000,0x80000000,0x00000000 -mns_tiny: - long 0x80000000,0x80000000,0x00000000 - -######################################################################### -# XDEF **************************************************************** # -# t_unfl(): Handle 060FPLSP underflow exception during emulation. # -# t_unfl2(): Handle 060FPLSP underflow exception during # -# emulation. result always positive. # -# # -# This routine is used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision source operand # -# # -# OUTPUT ************************************************************** # -# fp0 = default underflow result # -# # -# ALGORITHM *********************************************************** # -# An underflow should occur as the result of transcendental # -# emulation in the 060FPLSP. Create an underflow by using "fmul" # -# and two very small numbers of appropriate sign so the operating # -# system can log the event. # -# # -######################################################################### - - global t_unfl -t_unfl: - tst.b SRC_EX(%a0) - bpl.b unf_pos - - global t_unfl2 -t_unfl2: - ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX - - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x mns_tiny(%pc),&0x80 - fmul.x pls_tiny(%pc),%fp0 - - fmov.l %fpsr,%d0 - rol.l &0x8,%d0 - mov.b %d0,FPSR_CC(%a6) - rts -unf_pos: - ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX - - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x pls_tiny(%pc),&0x80 - fmul.x %fp0,%fp0 - - fmov.l %fpsr,%d0 - rol.l &0x8,%d0 - mov.b %d0,FPSR_CC(%a6) - rts - -######################################################################### -# XDEF **************************************************************** # -# t_ovfl(): Handle 060FPLSP overflow exception during emulation. # -# (monadic) # -# t_ovfl2(): Handle 060FPLSP overflow exception during # -# emulation. result always positive. (dyadic) # -# t_ovfl_sc(): Handle 060FPLSP overflow exception during # -# emulation for "fscale". # -# # -# This routine is used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision source operand # -# # -# OUTPUT ************************************************************** # -# fp0 = default underflow result # -# # -# ALGORITHM *********************************************************** # -# An overflow should occur as the result of transcendental # -# emulation in the 060FPLSP. Create an overflow by using "fmul" # -# and two very lareg numbers of appropriate sign so the operating # -# system can log the event. # -# For t_ovfl_sc() we take special care not to lose the INEX2 bit. # -# # -######################################################################### - - global t_ovfl_sc -t_ovfl_sc: - ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX - - mov.b %d0,%d1 # fetch rnd prec,mode - andi.b &0xc0,%d1 # extract prec - beq.w ovfl_work - -# dst op is a DENORM. we have to normalize the mantissa to see if the -# result would be inexact for the given precision. make a copy of the -# dst so we don't screw up the version passed to us. - mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) - mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) - mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) - lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 - movm.l &0xc080,-(%sp) # save d0-d1/a0 - bsr.l norm # normalize mantissa - movm.l (%sp)+,&0x0103 # restore d0-d1/a0 - - cmpi.b %d1,&0x40 # is precision sgl? - bne.b ovfl_sc_dbl # no; dbl -ovfl_sc_sgl: - tst.l LOCAL_LO(%a0) # is lo lw of sgl set? - bne.b ovfl_sc_inx # yes - tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? - bne.b ovfl_sc_inx # yes - bra.w ovfl_work # don't set INEX2 -ovfl_sc_dbl: - mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of - andi.l &0x7ff,%d1 # dbl mantissa set? - beq.w ovfl_work # no; don't set INEX2 -ovfl_sc_inx: - ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 - bra.b ovfl_work # continue - - global t_ovfl -t_ovfl: - ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX -ovfl_work: - tst.b SRC_EX(%a0) - bpl.b ovfl_p -ovfl_m: - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x mns_huge(%pc),&0x80 - fmul.x pls_huge(%pc),%fp0 - - fmov.l %fpsr,%d0 - rol.l &0x8,%d0 - ori.b &neg_mask,%d0 - mov.b %d0,FPSR_CC(%a6) - rts -ovfl_p: - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x pls_huge(%pc),&0x80 - fmul.x pls_huge(%pc),%fp0 - - fmov.l %fpsr,%d0 - rol.l &0x8,%d0 - mov.b %d0,FPSR_CC(%a6) - rts - - global t_ovfl2 -t_ovfl2: - ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX - fmov.l USER_FPCR(%a6),%fpcr - fmovm.x pls_huge(%pc),&0x80 - fmul.x pls_huge(%pc),%fp0 - - fmov.l %fpsr,%d0 - rol.l &0x8,%d0 - mov.b %d0,FPSR_CC(%a6) - rts - -######################################################################### -# XDEF **************************************************************** # -# t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # -# emulation. # -# t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # -# emulation. # -# # -# These routines are used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# fp0 = default underflow or overflow result # -# # -# OUTPUT ************************************************************** # -# fp0 = default result # -# # -# ALGORITHM *********************************************************** # -# If an overflow or underflow occurred during the last # -# instruction of transcendental 060FPLSP emulation, then it has already # -# occurred and has been logged. Now we need to see if an inexact # -# exception should occur. # -# # -######################################################################### - - global t_catch2 -t_catch2: - fmov.l %fpsr,%d0 - or.l %d0,USER_FPSR(%a6) - bra.b inx2_work - - global t_catch -t_catch: - fmov.l %fpsr,%d0 - or.l %d0,USER_FPSR(%a6) - -######################################################################### -# XDEF **************************************************************** # -# t_inx2(): Handle inexact 060FPLSP exception during emulation. # -# t_pinx2(): Handle inexact 060FPLSP exception for "+" results. # -# t_minx2(): Handle inexact 060FPLSP exception for "-" results. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# fp0 = default result # -# # -# OUTPUT ************************************************************** # -# fp0 = default result # -# # -# ALGORITHM *********************************************************** # -# The last instruction of transcendental emulation for the # -# 060FPLSP should be inexact. So, if inexact is enabled, then we create # -# the event here by adding a large and very small number together # -# so that the operating system can log the event. # -# Must check, too, if the result was zero, in which case we just # -# set the FPSR bits and return. # -# # -######################################################################### - - global t_inx2 -t_inx2: - fblt.w t_minx2 - fbeq.w inx2_zero - - global t_pinx2 -t_pinx2: - ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX - bra.b inx2_work - - global t_minx2 -t_minx2: - ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) - -inx2_work: - btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? - bne.b inx2_work_ena # yes - rts -inx2_work_ena: - fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions - fmov.s &0x3f800000,%fp1 # load +1 - fadd.x pls_tiny(%pc),%fp1 # cause exception - rts - -inx2_zero: - mov.b &z_bmask,FPSR_CC(%a6) - ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX - rts - -######################################################################### -# XDEF **************************************************************** # -# t_extdnrm(): Handle DENORM inputs in 060FPLSP. # -# t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". # -# # -# This routine is used by the 060FPLSP package. # -# # -# XREF **************************************************************** # -# None. # -# # -# INPUT *************************************************************** # -# a0 = pointer to extended precision input operand # -# # -# OUTPUT ************************************************************** # -# fp0 = default result # -# # -# ALGORITHM *********************************************************** # -# For all functions that have a denormalized input and that # -# f(x)=x, this is the entry point. # -# DENORM value is moved using "fmove" which triggers an exception # -# if enabled so the operating system can log the event. # -# # -######################################################################### - - global t_extdnrm -t_extdnrm: - fmov.l USER_FPCR(%a6),%fpcr - fmov.x SRC_EX(%a0),%fp0 - fmov.l %fpsr,%d0 - ori.l &unfinx_mask,%d0 - or.l %d0,USER_FPSR(%a6) - rts - - global t_resdnrm -t_resdnrm: - fmov.l USER_FPCR(%a6),%fpcr - fmov.x SRC_EX(%a0),%fp0 - fmov.l %fpsr,%d0 - or.l %d0,USER_FPSR(%a6) - rts - -########################################## - -# -# sto_cos: -# This is used by fsincos library emulation. The correct -# values are already in fp0 and fp1 so we do nothing here. -# - global sto_cos -sto_cos: - rts - -########################################## - -# -# dst_qnan --- force result when destination is a NaN -# - global dst_qnan -dst_qnan: - fmov.x DST(%a1),%fp0 - tst.b DST_EX(%a1) - bmi.b dst_qnan_m -dst_qnan_p: - mov.b &nan_bmask,FPSR_CC(%a6) - rts -dst_qnan_m: - mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) - rts - -# -# src_qnan --- force result when source is a NaN -# - global src_qnan -src_qnan: - fmov.x SRC(%a0),%fp0 - tst.b SRC_EX(%a0) - bmi.b src_qnan_m -src_qnan_p: - mov.b &nan_bmask,FPSR_CC(%a6) - rts -src_qnan_m: - mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) - rts - -########################################## - -# -# Native instruction support -# -# Some systems may need entry points even for 68060 native -# instructions. These routines are provided for -# convenience. -# - global _fadds_ -_fadds_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.s 0x8(%sp),%fp0 # load sgl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src - rts - - global _faddd_ -_faddd_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.d 0x8(%sp),%fp0 # load dbl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src - rts - - global _faddx_ -_faddx_: - fmovm.x 0x4(%sp),&0x80 # load ext dst - fadd.x 0x10(%sp),%fp0 # fadd w/ ext src - rts - - global _fsubs_ -_fsubs_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.s 0x8(%sp),%fp0 # load sgl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src - rts - - global _fsubd_ -_fsubd_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.d 0x8(%sp),%fp0 # load dbl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src - rts - - global _fsubx_ -_fsubx_: - fmovm.x 0x4(%sp),&0x80 # load ext dst - fsub.x 0x10(%sp),%fp0 # fsub w/ ext src - rts - - global _fmuls_ -_fmuls_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.s 0x8(%sp),%fp0 # load sgl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src - rts - - global _fmuld_ -_fmuld_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.d 0x8(%sp),%fp0 # load dbl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src - rts - - global _fmulx_ -_fmulx_: - fmovm.x 0x4(%sp),&0x80 # load ext dst - fmul.x 0x10(%sp),%fp0 # fmul w/ ext src - rts - - global _fdivs_ -_fdivs_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.s 0x8(%sp),%fp0 # load sgl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src - rts - - global _fdivd_ -_fdivd_: - fmov.l %fpcr,-(%sp) # save fpcr - fmov.l &0x00000000,%fpcr # clear fpcr for load - fmov.d 0x8(%sp),%fp0 # load dbl dst - fmov.l (%sp)+,%fpcr # restore fpcr - fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src - rts - - global _fdivx_ -_fdivx_: - fmovm.x 0x4(%sp),&0x80 # load ext dst - fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src - rts - - global _fabss_ -_fabss_: - fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src - rts - - global _fabsd_ -_fabsd_: - fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src - rts - - global _fabsx_ -_fabsx_: - fabs.x 0x4(%sp),%fp0 # fabs w/ ext src - rts - - global _fnegs_ -_fnegs_: - fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src - rts - - global _fnegd_ -_fnegd_: - fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src - rts - - global _fnegx_ -_fnegx_: - fneg.x 0x4(%sp),%fp0 # fneg w/ ext src - rts - - global _fsqrts_ -_fsqrts_: - fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src - rts - - global _fsqrtd_ -_fsqrtd_: - fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src - rts - - global _fsqrtx_ -_fsqrtx_: - fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src - rts - - global _fints_ -_fints_: - fint.s 0x4(%sp),%fp0 # fint w/ sgl src - rts - - global _fintd_ -_fintd_: - fint.d 0x4(%sp),%fp0 # fint w/ dbl src - rts - - global _fintx_ -_fintx_: - fint.x 0x4(%sp),%fp0 # fint w/ ext src - rts - - global _fintrzs_ -_fintrzs_: - fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src - rts - - global _fintrzd_ -_fintrzd_: - fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src - rts - - global _fintrzx_ -_fintrzx_: - fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src - rts - -######################################################################## - -######################################################################### -# src_zero(): Return signed zero according to sign of src operand. # -######################################################################### - global src_zero -src_zero: - tst.b SRC_EX(%a0) # get sign of src operand - bmi.b ld_mzero # if neg, load neg zero - -# -# ld_pzero(): return a positive zero. -# - global ld_pzero -ld_pzero: - fmov.s &0x00000000,%fp0 # load +0 - mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit - rts - -# ld_mzero(): return a negative zero. - global ld_mzero -ld_mzero: - fmov.s &0x80000000,%fp0 # load -0 - mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits - rts - -######################################################################### -# dst_zero(): Return signed zero according to sign of dst operand. # -######################################################################### - global dst_zero -dst_zero: - tst.b DST_EX(%a1) # get sign of dst operand - bmi.b ld_mzero # if neg, load neg zero - bra.b ld_pzero # load positive zero - -######################################################################### -# src_inf(): Return signed inf according to sign of src operand. # -######################################################################### - global src_inf -src_inf: - tst.b SRC_EX(%a0) # get sign of src operand - bmi.b ld_minf # if negative branch - -# -# ld_pinf(): return a positive infinity. -# - global ld_pinf -ld_pinf: - fmov.s &0x7f800000,%fp0 # load +INF - mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit - rts - -# -# ld_minf():return a negative infinity. -# - global ld_minf -ld_minf: - fmov.s &0xff800000,%fp0 # load -INF - mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits - rts - -######################################################################### -# dst_inf(): Return signed inf according to sign of dst operand. # -######################################################################### - global dst_inf -dst_inf: - tst.b DST_EX(%a1) # get sign of dst operand - bmi.b ld_minf # if negative branch - bra.b ld_pinf - - global szr_inf -################################################################# -# szr_inf(): Return +ZERO for a negative src operand or # -# +INF for a positive src operand. # -# Routine used for fetox, ftwotox, and ftentox. # -################################################################# -szr_inf: - tst.b SRC_EX(%a0) # check sign of source - bmi.b ld_pzero - bra.b ld_pinf - -######################################################################### -# sopr_inf(): Return +INF for a positive src operand or # -# jump to operand error routine for a negative src operand. # -# Routine used for flogn, flognp1, flog10, and flog2. # -######################################################################### - global sopr_inf -sopr_inf: - tst.b SRC_EX(%a0) # check sign of source - bmi.w t_operr - bra.b ld_pinf - -################################################################# -# setoxm1i(): Return minus one for a negative src operand or # -# positive infinity for a positive src operand. # -# Routine used for fetoxm1. # -################################################################# - global setoxm1i -setoxm1i: - tst.b SRC_EX(%a0) # check sign of source - bmi.b ld_mone - bra.b ld_pinf - -######################################################################### -# src_one(): Return signed one according to sign of src operand. # -######################################################################### - global src_one -src_one: - tst.b SRC_EX(%a0) # check sign of source - bmi.b ld_mone - -# -# ld_pone(): return positive one. -# - global ld_pone -ld_pone: - fmov.s &0x3f800000,%fp0 # load +1 - clr.b FPSR_CC(%a6) - rts - -# -# ld_mone(): return negative one. -# - global ld_mone -ld_mone: - fmov.s &0xbf800000,%fp0 # load -1 - mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit - rts - -ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 -mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 - -################################################################# -# spi_2(): Return signed PI/2 according to sign of src operand. # -################################################################# - global spi_2 -spi_2: - tst.b SRC_EX(%a0) # check sign of source - bmi.b ld_mpi2 - -# -# ld_ppi2(): return positive PI/2. -# - global ld_ppi2 -ld_ppi2: - fmov.l %d0,%fpcr - fmov.x ppiby2(%pc),%fp0 # load +pi/2 - bra.w t_pinx2 # set INEX2 - -# -# ld_mpi2(): return negative PI/2. -# - global ld_mpi2 -ld_mpi2: - fmov.l %d0,%fpcr - fmov.x mpiby2(%pc),%fp0 # load -pi/2 - bra.w t_minx2 # set INEX2 - -#################################################### -# The following routines give support for fsincos. # -#################################################### - -# -# ssincosz(): When the src operand is ZERO, store a one in the -# cosine register and return a ZERO in fp0 w/ the same sign -# as the src operand. -# - global ssincosz -ssincosz: - fmov.s &0x3f800000,%fp1 - tst.b SRC_EX(%a0) # test sign - bpl.b sincoszp - fmov.s &0x80000000,%fp0 # return sin result in fp0 - mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) - rts -sincoszp: - fmov.s &0x00000000,%fp0 # return sin result in fp0 - mov.b &z_bmask,FPSR_CC(%a6) - rts - -# -# ssincosi(): When the src operand is INF, store a QNAN in the cosine -# register and jump to the operand error routine for negative -# src operands. -# - global ssincosi -ssincosi: - fmov.x qnan(%pc),%fp1 # load NAN - bra.w t_operr - -# -# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine -# register and branch to the src QNAN routine. -# - global ssincosqnan -ssincosqnan: - fmov.x LOCAL_EX(%a0),%fp1 - bra.w src_qnan - -######################################################################## - - global smod_sdnrm - global smod_snorm -smod_sdnrm: -smod_snorm: - mov.b DTAG(%a6),%d1 - beq.l smod - cmpi.b %d1,&ZERO - beq.w smod_zro - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l smod - bra.l dst_qnan - - global smod_szero -smod_szero: - mov.b DTAG(%a6),%d1 - beq.l t_operr - cmpi.b %d1,&ZERO - beq.l t_operr - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l t_operr - bra.l dst_qnan - - global smod_sinf -smod_sinf: - mov.b DTAG(%a6),%d1 - beq.l smod_fpn - cmpi.b %d1,&ZERO - beq.l smod_zro - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l smod_fpn - bra.l dst_qnan - -smod_zro: -srem_zro: - mov.b SRC_EX(%a0),%d1 # get src sign - mov.b DST_EX(%a1),%d0 # get dst sign - eor.b %d0,%d1 # get qbyte sign - andi.b &0x80,%d1 - mov.b %d1,FPSR_QBYTE(%a6) - tst.b %d0 - bpl.w ld_pzero - bra.w ld_mzero - -smod_fpn: -srem_fpn: - clr.b FPSR_QBYTE(%a6) - mov.l %d0,-(%sp) - mov.b SRC_EX(%a0),%d1 # get src sign - mov.b DST_EX(%a1),%d0 # get dst sign - eor.b %d0,%d1 # get qbyte sign - andi.b &0x80,%d1 - mov.b %d1,FPSR_QBYTE(%a6) - cmpi.b DTAG(%a6),&DENORM - bne.b smod_nrm - lea DST(%a1),%a0 - mov.l (%sp)+,%d0 - bra t_resdnrm -smod_nrm: - fmov.l (%sp)+,%fpcr - fmov.x DST(%a1),%fp0 - tst.b DST_EX(%a1) - bmi.b smod_nrm_neg - rts - -smod_nrm_neg: - mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code - rts - -######################################################################### - global srem_snorm - global srem_sdnrm -srem_sdnrm: -srem_snorm: - mov.b DTAG(%a6),%d1 - beq.l srem - cmpi.b %d1,&ZERO - beq.w srem_zro - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l srem - bra.l dst_qnan - - global srem_szero -srem_szero: - mov.b DTAG(%a6),%d1 - beq.l t_operr - cmpi.b %d1,&ZERO - beq.l t_operr - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l t_operr - bra.l dst_qnan - - global srem_sinf -srem_sinf: - mov.b DTAG(%a6),%d1 - beq.w srem_fpn - cmpi.b %d1,&ZERO - beq.w srem_zro - cmpi.b %d1,&INF - beq.l t_operr - cmpi.b %d1,&DENORM - beq.l srem_fpn - bra.l dst_qnan - -######################################################################### - - global sscale_snorm - global sscale_sdnrm -sscale_snorm: -sscale_sdnrm: - mov.b DTAG(%a6),%d1 - beq.l sscale - cmpi.b %d1,&ZERO - beq.l dst_zero - cmpi.b %d1,&INF - beq.l dst_inf - cmpi.b %d1,&DENORM - beq.l sscale - bra.l dst_qnan - - global sscale_szero -sscale_szero: - mov.b DTAG(%a6),%d1 - beq.l sscale - cmpi.b %d1,&ZERO - beq.l dst_zero - cmpi.b %d1,&INF - beq.l dst_inf - cmpi.b %d1,&DENORM - beq.l sscale - bra.l dst_qnan - - global sscale_sinf -sscale_sinf: - mov.b DTAG(%a6),%d1 - beq.l t_operr - cmpi.b %d1,&QNAN - beq.l dst_qnan - bra.l t_operr - -######################################################################## - - global sop_sqnan -sop_sqnan: - mov.b DTAG(%a6),%d1 - cmpi.b %d1,&QNAN - beq.l dst_qnan - bra.l src_qnan - -######################################################################### -# norm(): normalize the mantissa of an extended precision input. the # -# input operand should not be normalized already. # -# # -# XDEF **************************************************************** # -# norm() # -# # -# XREF **************************************************************** # -# none # -# # -# INPUT *************************************************************** # -# a0 = pointer fp extended precision operand to normalize # -# # -# OUTPUT ************************************************************** # -# d0 = number of bit positions the mantissa was shifted # -# a0 = the input operand's mantissa is normalized; the exponent # -# is unchanged. # -# # -######################################################################### - global norm -norm: - mov.l %d2, -(%sp) # create some temp regs - mov.l %d3, -(%sp) - - mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) - mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) - - bfffo %d0{&0:&32}, %d2 # how many places to shift? - beq.b norm_lo # hi(man) is all zeroes! - -norm_hi: - lsl.l %d2, %d0 # left shift hi(man) - bfextu %d1{&0:%d2}, %d3 # extract lo bits - - or.l %d3, %d0 # create hi(man) - lsl.l %d2, %d1 # create lo(man) - - mov.l %d0, FTEMP_HI(%a0) # store new hi(man) - mov.l %d1, FTEMP_LO(%a0) # store new lo(man) - - mov.l %d2, %d0 # return shift amount - - mov.l (%sp)+, %d3 # restore temp regs - mov.l (%sp)+, %d2 - - rts - -norm_lo: - bfffo %d1{&0:&32}, %d2 # how many places to shift? - lsl.l %d2, %d1 # shift lo(man) - add.l &32, %d2 # add 32 to shft amount - - mov.l %d1, FTEMP_HI(%a0) # store hi(man) - clr.l FTEMP_LO(%a0) # lo(man) is now zero - - mov.l %d2, %d0 # return shift amount - - mov.l (%sp)+, %d3 # restore temp regs - mov.l (%sp)+, %d2 - - rts - -######################################################################### -# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # -# - returns corresponding optype tag # -# # -# XDEF **************************************************************** # -# unnorm_fix() # -# # -# XREF **************************************************************** # -# norm() - normalize the mantissa # -# # -# INPUT *************************************************************** # -# a0 = pointer to unnormalized extended precision number # -# # -# OUTPUT ************************************************************** # -# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # -# a0 = input operand has been converted to a norm, denorm, or # -# zero; both the exponent and mantissa are changed. # -# # -######################################################################### - - global unnorm_fix -unnorm_fix: - bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? - bne.b unnorm_shift # hi(man) is not all zeroes - -# -# hi(man) is all zeroes so see if any bits in lo(man) are set -# -unnorm_chk_lo: - bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? - beq.w unnorm_zero # yes - - add.w &32, %d0 # no; fix shift distance - -# -# d0 = # shifts needed for complete normalization -# -unnorm_shift: - clr.l %d1 # clear top word - mov.w FTEMP_EX(%a0), %d1 # extract exponent - and.w &0x7fff, %d1 # strip off sgn - - cmp.w %d0, %d1 # will denorm push exp < 0? - bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 - -# -# exponent would not go < 0. therefore, number stays normalized -# - sub.w %d0, %d1 # shift exponent value - mov.w FTEMP_EX(%a0), %d0 # load old exponent - and.w &0x8000, %d0 # save old sign - or.w %d0, %d1 # {sgn,new exp} - mov.w %d1, FTEMP_EX(%a0) # insert new exponent - - bsr.l norm # normalize UNNORM - - mov.b &NORM, %d0 # return new optype tag - rts - -# -# exponent would go < 0, so only denormalize until exp = 0 -# -unnorm_nrm_zero: - cmp.b %d1, &32 # is exp <= 32? - bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent - - bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) - mov.l %d0, FTEMP_HI(%a0) # save new hi(man) - - mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) - lsl.l %d1, %d0 # extract new lo(man) - mov.l %d0, FTEMP_LO(%a0) # save new lo(man) - - and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 - - mov.b &DENORM, %d0 # return new optype tag - rts - -# -# only mantissa bits set are in lo(man) -# -unnorm_nrm_zero_lrg: - sub.w &32, %d1 # adjust shft amt by 32 - - mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) - lsl.l %d1, %d0 # left shift lo(man) - - mov.l %d0, FTEMP_HI(%a0) # store new hi(man) - clr.l FTEMP_LO(%a0) # lo(man) = 0 - - and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 - - mov.b &DENORM, %d0 # return new optype tag - rts - -# -# whole mantissa is zero so this UNNORM is actually a zero -# -unnorm_zero: - and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero - - mov.b &ZERO, %d0 # fix optype tag - rts diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S deleted file mode 100644 index 9bbffebe3eb504833ed0937670bd4168751d61a4..0000000000000000000000000000000000000000 --- a/arch/m68k/ifpsp060/src/fpsp.S +++ /dev/null @@ -1,24785 +0,0 @@ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP -M68000 Hi-Performance Microprocessor Division -M68060 Software Package -Production Release P1.00 -- October 10, 1994 - -M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. - -THE SOFTWARE is provided on an "AS IS" basis and without warranty. -To the maximum extent permitted by applicable law, -MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, -INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE -and any warranty against infringement with regard to the SOFTWARE -(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. - -To the maximum extent permitted by applicable law, -IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER -(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, -BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) -ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. -Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. - -You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE -so long as this entire notice is retained without alteration in any modified and/or -redistributed versions, and that such modified versions are clearly identified as such. -No licenses are granted by implication, estoppel or otherwise under any patents -or trademarks of Motorola, Inc. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# freal.s: -# This file is appended to the top of the 060FPSP package -# and contains the entry points into the package. The user, in -# effect, branches to one of the branch table entries located -# after _060FPSP_TABLE. -# Also, subroutine stubs exist in this file (_fpsp_done for -# example) that are referenced by the FPSP package itself in order -# to call a given routine. The stub routine actually performs the -# callout. The FPSP code does a "bsr" to the stub routine. This -# extra layer of hierarchy adds a slight performance penalty but -# it makes the FPSP code easier to read and more mainatinable. -# - -set _off_bsun, 0x00 -set _off_snan, 0x04 -set _off_operr, 0x08 -set _off_ovfl, 0x0c -set _off_unfl, 0x10 -set _off_dz, 0x14 -set _off_inex, 0x18 -set _off_fline, 0x1c -set _off_fpu_dis, 0x20 -set _off_trap, 0x24 -set _off_trace, 0x28 -set _off_access, 0x2c -set _off_done, 0x30 - -set _off_imr, 0x40 -set _off_dmr, 0x44 -set _off_dmw, 0x48 -set _off_irw, 0x4c -set _off_irl, 0x50 -set _off_drb, 0x54 -set _off_drw, 0x58 -set _off_drl, 0x5c -set _off_dwb, 0x60 -set _off_dww, 0x64 -set _off_dwl, 0x68 - -_060FPSP_TABLE: - -############################################################### - -# Here's the table of ENTRY POINTS for those linking the package. - bra.l _fpsp_snan - short 0x0000 - bra.l _fpsp_operr - short 0x0000 - bra.l _fpsp_ovfl - short 0x0000 - bra.l _fpsp_unfl - short 0x0000 - bra.l _fpsp_dz - short 0x0000 - bra.l _fpsp_inex - short 0x0000 - bra.l _fpsp_fline - short 0x0000 - bra.l _fpsp_unsupp - short 0x0000 - bra.l _fpsp_effadd - short 0x0000 - - space 56 - -############################################################### - global _fpsp_done -_fpsp_done: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_ovfl -_real_ovfl: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_unfl -_real_unfl: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_inex -_real_inex: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_bsun -_real_bsun: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_operr -_real_operr: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_snan -_real_snan: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_dz -_real_dz: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_fline -_real_fline: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_fpu_disabled -_real_fpu_disabled: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_trap -_real_trap: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_trace -_real_trace: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _real_access -_real_access: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - -####################################### - - global _imem_read -_imem_read: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_read -_dmem_read: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_write -_dmem_write: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _imem_read_word -_imem_read_word: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _imem_read_long -_imem_read_long: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_read_byte -_dmem_read_byte: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_read_word -_dmem_read_word: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_read_long -_dmem_read_long: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_write_byte -_dmem_write_byte: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_write_word -_dmem_write_word: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - - global _dmem_write_long -_dmem_write_long: - mov.l %d0,-(%sp) - mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 - pea.l (_060FPSP_TABLE-0x80,%pc,%d0) - mov.l 0x4(%sp),%d0 - rtd &0x4 - -# -# This file contains a set of define statements for constants -# in order to promote readability within the corecode itself. -# - -set LOCAL_SIZE, 192 # stack frame size(bytes) -set LV, -LOCAL_SIZE # stack offset - -set EXC_SR, 0x4 # stack status register -set EXC_PC, 0x6 # stack pc -set EXC_VOFF, 0xa # stacked vector offset -set EXC_EA, 0xc # stacked - -set EXC_FP, 0x0 # frame pointer - -set EXC_AREGS, -68 # offset of all address regs -set EXC_DREGS, -100 # offset of all data regs -set EXC_FPREGS, -36 # offset of all fp regs - -set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 -set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 -set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 -set EXC_A5, EXC_AREGS+(5*4) -set EXC_A4, EXC_AREGS+(4*4) -set EXC_A3, EXC_AREGS+(3*4) -set EXC_A2, EXC_AREGS+(2*4) -set EXC_A1, EXC_AREGS+(1*4) -set EXC_A0, EXC_AREGS+(0*4) -set EXC_D7, EXC_DREGS+(7*4) -set EXC_D6, EXC_DREGS+(6*4) -set EXC_D5, EXC_DREGS+(5*4) -set EXC_D4, EXC_DREGS+(4*4) -set EXC_D3, EXC_DREGS+(3*4) -set EXC_D2, EXC_DREGS+(2*4) -set EXC_D1, EXC_DREGS+(1*4) -set EXC_D0, EXC_DREGS+(0*4) - -set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 -set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 -set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) - -set FP_SCR1, LV+80 # fp scratch 1 -set FP_SCR1_EX, FP_SCR1+0 -set FP_SCR1_SGN, FP_SCR1+2 -set FP_SCR1_HI, FP_SCR1+4 -set FP_SCR1_LO, FP_SCR1+8 - -set FP_SCR0, LV+68 # fp scratch 0 -set FP_SCR0_EX, FP_SCR0+0 -set FP_SCR0_SGN, FP_SCR0+2 -set FP_SCR0_HI, FP_SCR0+4 -set FP_SCR0_LO, FP_SCR0+8 - -set FP_DST, LV+56 # fp destination operand -set FP_DST_EX, FP_DST+0 -set FP_DST_SGN, FP_DST+2 -set FP_DST_HI, FP_DST+4 -set FP_DST_LO, FP_DST+8 - -set FP_SRC, LV+44 # fp source operand -set FP_SRC_EX, FP_SRC+0 -set FP_SRC_SGN, FP_SRC+2 -set FP_SRC_HI, FP_SRC+4 -set FP_SRC_LO, FP_SRC+8 - -set USER_FPIAR, LV+40 # FP instr address register - -set USER_FPSR, LV+36 # FP status register -set FPSR_CC, USER_FPSR+0 # FPSR condition codes -set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte -set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte -set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte - -set USER_FPCR, LV+32 # FP control register -set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable -set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control - -set L_SCR3, LV+28 # integer scratch 3 -set L_SCR2, LV+24 # integer scratch 2 -set L_SCR1, LV+20 # integer scratch 1 - -set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) - -set EXC_TEMP2, LV+24 # temporary space -set EXC_TEMP, LV+16 # temporary space - -set DTAG, LV+15 # destination operand type -set STAG, LV+14 # source operand type - -set SPCOND_FLG, LV+10 # flag: special case (see below) - -set EXC_CC, LV+8 # saved condition codes -set EXC_EXTWPTR, LV+4 # saved current PC (active) -set EXC_EXTWORD, LV+2 # saved extension word -set EXC_CMDREG, LV+2 # saved extension word -set EXC_OPWORD, LV+0 # saved operation word - -################################ - -# Helpful macros - -set FTEMP, 0 # offsets within an -set FTEMP_EX, 0 # extended precision -set FTEMP_SGN, 2 # value saved in memory. -set FTEMP_HI, 4 -set FTEMP_LO, 8 -set FTEMP_GRS, 12 - -set LOCAL, 0 # offsets within an -set LOCAL_EX, 0 # extended precision -set LOCAL_SGN, 2 # value saved in memory. -set LOCAL_HI, 4 -set LOCAL_LO, 8 -set LOCAL_GRS, 12 - -set DST, 0 # offsets within an -set DST_EX, 0 # extended precision -set DST_HI, 4 # value saved in memory. -set DST_LO, 8 - -set SRC, 0 # offsets within an -set SRC_EX, 0 # extended precision -set SRC_HI, 4 # value saved in memory. -set SRC_LO, 8 - -set SGL_LO, 0x3f81 # min sgl prec exponent -set SGL_HI, 0x407e # max sgl prec exponent -set DBL_LO, 0x3c01 # min dbl prec exponent -set DBL_HI, 0x43fe # max dbl prec exponent -set EXT_LO, 0x0 # min ext prec exponent -set EXT_HI, 0x7ffe # max ext prec exponent - -set EXT_BIAS, 0x3fff # extended precision bias -set SGL_BIAS, 0x007f # single precision bias -set DBL_BIAS, 0x03ff # double precision bias - -set NORM, 0x00 # operand type for STAG/DTAG -set ZERO, 0x01 # operand type for STAG/DTAG -set INF, 0x02 # operand type for STAG/DTAG -set QNAN, 0x03 # operand type for STAG/DTAG -set DENORM, 0x04 # operand type for STAG/DTAG -set SNAN, 0x05 # operand type for STAG/DTAG -set UNNORM, 0x06 # operand type for STAG/DTAG - -################## -# FPSR/FPCR bits # -################## -set neg_bit, 0x3 # negative result -set z_bit, 0x2 # zero result -set inf_bit, 0x1 # infinite result -set nan_bit, 0x0 # NAN result - -set q_sn_bit, 0x7 # sign bit of quotient byte - -set bsun_bit, 7 # branch on unordered -set snan_bit, 6 # signalling NAN -set operr_bit, 5 # operand error -set ovfl_bit, 4 # overflow -set unfl_bit, 3 # underflow -set dz_bit, 2 # divide by zero -set inex2_bit, 1 # inexact result 2 -set inex1_bit, 0 # inexact result 1 - -set aiop_bit, 7 # accrued inexact operation bit -set aovfl_bit, 6 # accrued overflow bit -set aunfl_bit, 5 # accrued underflow bit -set adz_bit, 4 # accrued dz bit -set ainex_bit, 3 # accrued inexact bit - -############################# -# FPSR individual bit masks # -############################# -set neg_mask, 0x08000000 # negative bit mask (lw) -set inf_mask, 0x02000000 # infinity bit mask (lw) -set z_mask, 0x04000000 # zero bit mask (lw) -set nan_mask, 0x01000000 # nan bit mask (lw) - -set neg_bmask, 0x08 # negative bit mask (byte) -set inf_bmask, 0x02 # infinity bit mask (byte) -set z_bmask, 0x04 # zero bit mask (byte) -set nan_bmask, 0x01 # nan bit mask (byte) - -set bsun_mask, 0x00008000 # bsun exception mask -set snan_mask, 0x00004000 # snan exception mask -set operr_mask, 0x00002000 # operr exception mask -set ovfl_mask, 0x00001000 # overflow exception mask -set unfl_mask, 0x00000800 # underflow exception mask -set dz_mask, 0x00000400 # dz exception mask -set inex2_mask, 0x00000200 # inex2 exception mask -set inex1_mask, 0x00000100 # inex1 exception mask - -set aiop_mask, 0x00000080 # accrued illegal operation -set aovfl_mask, 0x00000040 # accrued overflow -set aunfl_mask, 0x00000020 # accrued underflow -set adz_mask, 0x00000010 # accrued divide by zero -set ainex_mask, 0x00000008 # accrued inexact - -###################################### -# FPSR combinations used in the FPSP # -###################################### -set dzinf_mask, inf_mask+dz_mask+adz_mask -set opnan_mask, nan_mask+operr_mask+aiop_mask -set nzi_mask, 0x01ffffff #clears N, Z, and I -set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask -set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask -set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask -set inx1a_mask, inex1_mask+ainex_mask -set inx2a_mask, inex2_mask+ainex_mask -set snaniop_mask, nan_mask+snan_mask+aiop_mask -set snaniop2_mask, snan_mask+aiop_mask -set naniop_mask, nan_mask+aiop_mask -set neginf_mask, neg_mask+inf_mask -set infaiop_mask, inf_mask+aiop_mask -set negz_mask, neg_mask+z_mask -set opaop_mask, operr_mask+aiop_mask -set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask -set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask - -######### -# misc. # -######### -set rnd_stky_bit, 29 # stky bit pos in longword - -set sign_bit, 0x7 # sign bit -set signan_bit, 0x6 # signalling nan bit - -set sgl_thresh, 0x3f81 # minimum sgl exponent -set dbl_thresh, 0x3c01 # minimum dbl exponent - -set x_mode, 0x0 # extended precision -set s_mode, 0x4 # single precision -set d_mode, 0x8 # double precision - -set rn_mode, 0x0 # round-to-nearest -set rz_mode, 0x1 # round-to-zero -set rm_mode, 0x2 # round-tp-minus-infinity -set rp_mode, 0x3 # round-to-plus-infinity - -set mantissalen, 64 # length of mantissa in bits - -set BYTE, 1 # len(byte) == 1 byte -set WORD, 2 # len(word) == 2 bytes -set LONG, 4 # len(longword) == 2 bytes - -set BSUN_VEC, 0xc0 # bsun vector offset -set INEX_VEC, 0xc4 # inexact vector offset -set DZ_VEC, 0xc8 # dz vector offset -set UNFL_VEC, 0xcc # unfl vector offset -set OPERR_VEC, 0xd0 # operr vector offset -set OVFL_VEC, 0xd4 # ovfl vector offset -set SNAN_VEC, 0xd8 # snan vector offset - -########################### -# SPecial CONDition FLaGs # -########################### -set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception -set fbsun_flg, 0x02 # flag bit: bsun exception -set mia7_flg, 0x04 # flag bit: (a7)+ -set mda7_flg, 0x08 # flag bit: -(a7) -set fmovm_flg, 0x40 # flag bit: fmovm instruction -set immed_flg, 0x80 # flag bit: & - -set ftrapcc_bit, 0x0 -set fbsun_bit, 0x1 -set mia7_bit, 0x2 -set mda7_bit, 0x3 -set immed_bit, 0x7 - -################################## -# TRANSCENDENTAL "LAST-OP" FLAGS # -################################## -set FMUL_OP, 0x0 # fmul instr performed last -set FDIV_OP, 0x1 # fdiv performed last -set FADD_OP, 0x2 # fadd performed last -set FMOV_OP, 0x3 # fmov performed last - -############# -# CONSTANTS # -############# -T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD -T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL - -PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 -PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 - -TWOBYPI: - long 0x3FE45F30,0x6DC9C883 - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Overflow exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# set_tag_x() - determine optype of src/dst operands # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# unnorm_fix() - change UNNORM operands to NORM or ZERO # -# load_fpn2() - load dst operand from FP regfile # -# fout() - emulate an opclass 3 instruction # -# tbl_unsupp - add of table of emulation routines for opclass 0,2 # -# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # -# _real_ovfl() - "callout" for Overflow exception enabled code # -# _real_inex() - "callout" for Inexact exception enabled code # -# _real_trace() - "callout" for Trace exception code # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP Ovfl exception stack frame # -# - The fsave frame contains the source operand # -# # -# OUTPUT ************************************************************** # -# Overflow Exception enabled: # -# - The system stack is unchanged # -# - The fsave frame contains the adjusted src op for opclass 0,2 # -# Overflow Exception disabled: # -# - The system stack is unchanged # -# - The "exception present" flag in the fsave frame is cleared # -# # -# ALGORITHM *********************************************************** # -# On the 060, if an FP overflow is present as the result of any # -# instruction, the 060 will take an overflow exception whether the # -# exception is enabled or disabled in the FPCR. For the disabled case, # -# This handler emulates the instruction to determine what the correct # -# default result should be for the operation. This default result is # -# then stored in either the FP regfile, data regfile, or memory. # -# Finally, the handler exits through the "callout" _fpsp_done() # -# denoting that no exceptional conditions exist within the machine. # -# If the exception is enabled, then this handler must create the # -# exceptional operand and plave it in the fsave state frame, and store # -# the default result (only if the instruction is opclass 3). For # -# exceptions enabled, this handler must exit through the "callout" # -# _real_ovfl() so that the operating system enabled overflow handler # -# can handle this case. # -# Two other conditions exist. First, if overflow was disabled # -# but the inexact exception was enabled, this handler must exit # -# through the "callout" _real_inex() regardless of whether the result # -# was inexact. # -# Also, in the case of an opclass three instruction where # -# overflow was disabled and the trace exception was enabled, this # -# handler must exit through the "callout" _real_trace(). # -# # -######################################################################### - - global _fpsp_ovfl -_fpsp_ovfl: - -#$# sub.l &24,%sp # make room for src/dst - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? - bne.w fovfl_out - - - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - -# since, I believe, only NORMs and DENORMs can come through here, -# maybe we can avoid the subroutine call. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l set_tag_x # tag the operand type - mov.b %d0,STAG(%a6) # maybe NORM,DENORM - -# bit five of the fp extension word separates the monadic and dyadic operations -# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos -# will never take this exception. - btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? - beq.b fovfl_extract # monadic - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - bsr.l load_fpn2 # load dst into FP_DST - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b fovfl_op2_done # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO -fovfl_op2_done: - mov.b %d0,DTAG(%a6) # save dst optype tag - -fovfl_extract: - -#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) -#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) -#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) -#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) -#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) -#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode - - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.w &0x007f,%d1 # extract extension - - andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - -# maybe we can make these entry points ONLY the OVFL entry points of each routine. - mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - -# the operation has been emulated. the result is in fp0. -# the EXOP, if an exception occurred, is in fp1. -# we must save the default result regardless of whether -# traps are enabled or disabled. - bfextu EXC_CMDREG(%a6){&6:&3},%d0 - bsr.l store_fpreg - -# the exceptional possibilities we have left ourselves with are ONLY overflow -# and inexact. and, the inexact is such that overflow occurred and was disabled -# but inexact was enabled. - btst &ovfl_bit,FPCR_ENABLE(%a6) - bne.b fovfl_ovfl_on - - btst &inex2_bit,FPCR_ENABLE(%a6) - bne.b fovfl_inex_on - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 -#$# add.l &24,%sp - bra.l _fpsp_done - -# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP -# in fp1. now, simply jump to _real_ovfl()! -fovfl_ovfl_on: - fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack - - mov.w &0xe005,2+FP_SRC(%a6) # save exc status - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # do this after fmovm,other fs! - - unlk %a6 - - bra.l _real_ovfl - -# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, -# we must jump to real_inex(). -fovfl_inex_on: - - fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack - - mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 - mov.w &0xe001,2+FP_SRC(%a6) # save exc status - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # do this after fmovm,other fs! - - unlk %a6 - - bra.l _real_inex - -######################################################################## -fovfl_out: - - -#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) -#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) -#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) - -# the src operand is definitely a NORM(!), so tag it as such - mov.b &NORM,STAG(%a6) # set src optype tag - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode - - and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - lea FP_SRC(%a6),%a0 # pass ptr to src operand - - bsr.l fout - - btst &ovfl_bit,FPCR_ENABLE(%a6) - bne.w fovfl_ovfl_on - - btst &inex2_bit,FPCR_ENABLE(%a6) - bne.w fovfl_inex_on - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 -#$# add.l &24,%sp - - btst &0x7,(%sp) # is trace on? - beq.l _fpsp_done # no - - fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR - mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 - bra.l _real_trace - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Underflow exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# set_tag_x() - determine optype of src/dst operands # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# unnorm_fix() - change UNNORM operands to NORM or ZERO # -# load_fpn2() - load dst operand from FP regfile # -# fout() - emulate an opclass 3 instruction # -# tbl_unsupp - add of table of emulation routines for opclass 0,2 # -# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # -# _real_ovfl() - "callout" for Overflow exception enabled code # -# _real_inex() - "callout" for Inexact exception enabled code # -# _real_trace() - "callout" for Trace exception code # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP Unfl exception stack frame # -# - The fsave frame contains the source operand # -# # -# OUTPUT ************************************************************** # -# Underflow Exception enabled: # -# - The system stack is unchanged # -# - The fsave frame contains the adjusted src op for opclass 0,2 # -# Underflow Exception disabled: # -# - The system stack is unchanged # -# - The "exception present" flag in the fsave frame is cleared # -# # -# ALGORITHM *********************************************************** # -# On the 060, if an FP underflow is present as the result of any # -# instruction, the 060 will take an underflow exception whether the # -# exception is enabled or disabled in the FPCR. For the disabled case, # -# This handler emulates the instruction to determine what the correct # -# default result should be for the operation. This default result is # -# then stored in either the FP regfile, data regfile, or memory. # -# Finally, the handler exits through the "callout" _fpsp_done() # -# denoting that no exceptional conditions exist within the machine. # -# If the exception is enabled, then this handler must create the # -# exceptional operand and plave it in the fsave state frame, and store # -# the default result (only if the instruction is opclass 3). For # -# exceptions enabled, this handler must exit through the "callout" # -# _real_unfl() so that the operating system enabled overflow handler # -# can handle this case. # -# Two other conditions exist. First, if underflow was disabled # -# but the inexact exception was enabled and the result was inexact, # -# this handler must exit through the "callout" _real_inex(). # -# was inexact. # -# Also, in the case of an opclass three instruction where # -# underflow was disabled and the trace exception was enabled, this # -# handler must exit through the "callout" _real_trace(). # -# # -######################################################################### - - global _fpsp_unfl -_fpsp_unfl: - -#$# sub.l &24,%sp # make room for src/dst - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? - bne.w funfl_out - - - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l set_tag_x # tag the operand type - mov.b %d0,STAG(%a6) # maybe NORM,DENORM - -# bit five of the fp ext word separates the monadic and dyadic operations -# that can pass through fpsp_unfl(). remember that fcmp, and ftst -# will never take this exception. - btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? - beq.b funfl_extract # monadic - -# now, what's left that's not dyadic is fsincos. we can distinguish it -# from all dyadics by the '0110xxx pattern - btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? - bne.b funfl_extract # yes - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - bsr.l load_fpn2 # load dst into FP_DST - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b funfl_op2_done # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO -funfl_op2_done: - mov.b %d0,DTAG(%a6) # save dst optype tag - -funfl_extract: - -#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) -#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) -#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) -#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) -#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) -#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode - - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.w &0x007f,%d1 # extract extension - - andi.l &0x00ff01ff,USER_FPSR(%a6) - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - -# maybe we can make these entry points ONLY the OVFL entry points of each routine. - mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 - bsr.l store_fpreg - -# The `060 FPU multiplier hardware is such that if the result of a -# multiply operation is the smallest possible normalized number -# (0x00000000_80000000_00000000), then the machine will take an -# underflow exception. Since this is incorrect, we need to check -# if our emulation, after re-doing the operation, decided that -# no underflow was called for. We do these checks only in -# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this -# special case will simply exit gracefully with the correct result. - -# the exceptional possibilities we have left ourselves with are ONLY overflow -# and inexact. and, the inexact is such that overflow occurred and was disabled -# but inexact was enabled. - btst &unfl_bit,FPCR_ENABLE(%a6) - bne.b funfl_unfl_on - -funfl_chkinex: - btst &inex2_bit,FPCR_ENABLE(%a6) - bne.b funfl_inex_on - -funfl_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 -#$# add.l &24,%sp - bra.l _fpsp_done - -# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP -# in fp1 (don't forget to save fp0). what to do now? -# well, we simply have to get to go to _real_unfl()! -funfl_unfl_on: - -# The `060 FPU multiplier hardware is such that if the result of a -# multiply operation is the smallest possible normalized number -# (0x00000000_80000000_00000000), then the machine will take an -# underflow exception. Since this is incorrect, we check here to see -# if our emulation, after re-doing the operation, decided that -# no underflow was called for. - btst &unfl_bit,FPSR_EXCEPT(%a6) - beq.w funfl_chkinex - -funfl_unfl_on2: - fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack - - mov.w &0xe003,2+FP_SRC(%a6) # save exc status - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # do this after fmovm,other fs! - - unlk %a6 - - bra.l _real_unfl - -# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, -# we must jump to real_inex(). -funfl_inex_on: - -# The `060 FPU multiplier hardware is such that if the result of a -# multiply operation is the smallest possible normalized number -# (0x00000000_80000000_00000000), then the machine will take an -# underflow exception. -# But, whether bogus or not, if inexact is enabled AND it occurred, -# then we have to branch to real_inex. - - btst &inex2_bit,FPSR_EXCEPT(%a6) - beq.w funfl_exit - -funfl_inex_on2: - - fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack - - mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 - mov.w &0xe001,2+FP_SRC(%a6) # save exc status - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # do this after fmovm,other fs! - - unlk %a6 - - bra.l _real_inex - -####################################################################### -funfl_out: - - -#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) -#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) -#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) - -# the src operand is definitely a NORM(!), so tag it as such - mov.b &NORM,STAG(%a6) # set src optype tag - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode - - and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - lea FP_SRC(%a6),%a0 # pass ptr to src operand - - bsr.l fout - - btst &unfl_bit,FPCR_ENABLE(%a6) - bne.w funfl_unfl_on2 - - btst &inex2_bit,FPCR_ENABLE(%a6) - bne.w funfl_inex_on2 - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 -#$# add.l &24,%sp - - btst &0x7,(%sp) # is trace on? - beq.l _fpsp_done # no - - fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR - mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 - bra.l _real_trace - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # -# Data Type" exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Unimplemented Data Type exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_{word,long}() - read instruction word/longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# set_tag_x() - determine optype of src/dst operands # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# unnorm_fix() - change UNNORM operands to NORM or ZERO # -# load_fpn2() - load dst operand from FP regfile # -# load_fpn1() - load src operand from FP regfile # -# fout() - emulate an opclass 3 instruction # -# tbl_unsupp - add of table of emulation routines for opclass 0,2 # -# _real_inex() - "callout" to operating system inexact handler # -# _fpsp_done() - "callout" for exit; work all done # -# _real_trace() - "callout" for Trace enabled exception # -# funimp_skew() - adjust fsave src ops to "incorrect" value # -# _real_snan() - "callout" for SNAN exception # -# _real_operr() - "callout" for OPERR exception # -# _real_ovfl() - "callout" for OVFL exception # -# _real_unfl() - "callout" for UNFL exception # -# get_packed() - fetch packed operand from memory # -# # -# INPUT *************************************************************** # -# - The system stack contains the "Unimp Data Type" stk frame # -# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # -# # -# OUTPUT ************************************************************** # -# If Inexact exception (opclass 3): # -# - The system stack is changed to an Inexact exception stk frame # -# If SNAN exception (opclass 3): # -# - The system stack is changed to an SNAN exception stk frame # -# If OPERR exception (opclass 3): # -# - The system stack is changed to an OPERR exception stk frame # -# If OVFL exception (opclass 3): # -# - The system stack is changed to an OVFL exception stk frame # -# If UNFL exception (opclass 3): # -# - The system stack is changed to an UNFL exception stack frame # -# If Trace exception enabled: # -# - The system stack is changed to a Trace exception stack frame # -# Else: (normal case) # -# - Correct result has been stored as appropriate # -# # -# ALGORITHM *********************************************************** # -# Two main instruction types can enter here: (1) DENORM or UNNORM # -# unimplemented data types. These can be either opclass 0,2 or 3 # -# instructions, and (2) PACKED unimplemented data format instructions # -# also of opclasses 0,2, or 3. # -# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # -# operand from the fsave state frame and the dst operand (if dyadic) # -# from the FP register file. The instruction is then emulated by # -# choosing an emulation routine from a table of routines indexed by # -# instruction type. Once the instruction has been emulated and result # -# saved, then we check to see if any enabled exceptions resulted from # -# instruction emulation. If none, then we exit through the "callout" # -# _fpsp_done(). If there is an enabled FP exception, then we insert # -# this exception into the FPU in the fsave state frame and then exit # -# through _fpsp_done(). # -# PACKED opclass 0 and 2 is similar in how the instruction is # -# emulated and exceptions handled. The differences occur in how the # -# handler loads the packed op (by calling get_packed() routine) and # -# by the fact that a Trace exception could be pending for PACKED ops. # -# If a Trace exception is pending, then the current exception stack # -# frame is changed to a Trace exception stack frame and an exit is # -# made through _real_trace(). # -# For UNNORM/DENORM opclass 3, the actual move out to memory is # -# performed by calling the routine fout(). If no exception should occur # -# as the result of emulation, then an exit either occurs through # -# _fpsp_done() or through _real_trace() if a Trace exception is pending # -# (a Trace stack frame must be created here, too). If an FP exception # -# should occur, then we must create an exception stack frame of that # -# type and jump to either _real_snan(), _real_operr(), _real_inex(), # -# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # -# emulation is performed in a similar manner. # -# # -######################################################################### - -# -# (1) DENORM and UNNORM (unimplemented) data types: -# -# post-instruction -# ***************** -# * EA * -# pre-instruction * * -# ***************** ***************** -# * 0x0 * 0x0dc * * 0x3 * 0x0dc * -# ***************** ***************** -# * Next * * Next * -# * PC * * PC * -# ***************** ***************** -# * SR * * SR * -# ***************** ***************** -# -# (2) PACKED format (unsupported) opclasses two and three: -# ***************** -# * EA * -# * * -# ***************** -# * 0x2 * 0x0dc * -# ***************** -# * Next * -# * PC * -# ***************** -# * SR * -# ***************** -# - global _fpsp_unsupp -_fpsp_unsupp: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # save fp state - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - - btst &0x5,EXC_SR(%a6) # user or supervisor mode? - bne.b fu_s -fu_u: - mov.l %usp,%a0 # fetch user stack pointer - mov.l %a0,EXC_A7(%a6) # save on stack - bra.b fu_cont -# if the exception is an opclass zero or two unimplemented data type -# exception, then the a7' calculated here is wrong since it doesn't -# stack an ea. however, we don't need an a7' for this case anyways. -fu_s: - lea 0x4+EXC_EA(%a6),%a0 # load old a7' - mov.l %a0,EXC_A7(%a6) # save on stack - -fu_cont: - -# the FPIAR holds the "current PC" of the faulting instruction -# the FPIAR should be set correctly for ALL exceptions passing through -# this point. - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD - -############################ - - clr.b SPCOND_FLG(%a6) # clear special condition flag - -# Separate opclass three (fpn-to-mem) ops since they have a different -# stack frame and protocol. - btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? - bne.w fu_out # yes - -# Separate packed opclass two instructions. - bfextu EXC_CMDREG(%a6){&0:&6},%d0 - cmpi.b %d0,&0x13 - beq.w fu_in_pack - - -# I'm not sure at this point what FPSR bits are valid for this instruction. -# so, since the emulation routines re-create them anyways, zero exception field - andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - -# Opclass two w/ memory-to-fpn operation will have an incorrect extended -# precision format if the src format was single or double and the -# source data type was an INF, NAN, DENORM, or UNNORM - lea FP_SRC(%a6),%a0 # pass ptr to input - bsr.l fix_skewed_ops - -# we don't know whether the src operand or the dst operand (or both) is the -# UNNORM or DENORM. call the function that tags the operand type. if the -# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b fu_op2 # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO - -fu_op2: - mov.b %d0,STAG(%a6) # save src optype tag - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - -# bit five of the fp extension word separates the monadic and dyadic operations -# at this point - btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? - beq.b fu_extract # monadic - cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? - beq.b fu_extract # yes, so it's monadic, too - - bsr.l load_fpn2 # load dst into FP_DST - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b fu_op2_done # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO -fu_op2_done: - mov.b %d0,DTAG(%a6) # save dst optype tag - -fu_extract: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec - - bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - - mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - -# -# Exceptions in order of precedence: -# BSUN : none -# SNAN : all dyadic ops -# OPERR : fsqrt(-NORM) -# OVFL : all except ftst,fcmp -# UNFL : all except ftst,fcmp -# DZ : fdiv -# INEX2 : all except ftst,fcmp -# INEX1 : none (packed doesn't go through here) -# - -# we determine the highest priority exception(if any) set by the -# emulation routine that has also been enabled by the user. - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set - bne.b fu_in_ena # some are enabled - -fu_in_cont: -# fcmp and ftst do not store any result. - mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension - andi.b &0x38,%d0 # extract bits 3-5 - cmpi.b %d0,&0x38 # is instr fcmp or ftst? - beq.b fu_in_exit # yes - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - bsr.l store_fpreg # store the result - -fu_in_exit: - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - bra.l _fpsp_done - -fu_in_ena: - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled - bfffo %d0{&24:&8},%d0 # find highest priority exception - bne.b fu_in_exc # there is at least one set - -# -# No exceptions occurred that were also enabled. Now: -# -# if (OVFL && ovfl_disabled && inexact_enabled) { -# branch to _real_inex() (even if the result was exact!); -# } else { -# save the result in the proper fp reg (unless the op is fcmp or ftst); -# return; -# } -# - btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? - beq.b fu_in_cont # no - -fu_in_ovflchk: - btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? - beq.b fu_in_cont # no - bra.w fu_in_exc_ovfl # go insert overflow frame - -# -# An exception occurred and that exception was enabled: -# -# shift enabled exception field into lo byte of d0; -# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || -# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { -# /* -# * this is the case where we must call _real_inex() now or else -# * there will be no other way to pass it the exceptional operand -# */ -# call _real_inex(); -# } else { -# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; -# } -# -fu_in_exc: - subi.l &24,%d0 # fix offset to be 0-8 - cmpi.b %d0,&0x6 # is exception INEX? (6) - bne.b fu_in_exc_exit # no - -# the enabled exception was inexact - btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? - bne.w fu_in_exc_unfl # yes - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? - bne.w fu_in_exc_ovfl # yes - -# here, we insert the correct fsave status value into the fsave frame for the -# corresponding exception. the operand in the fsave frame should be the original -# src operand. -fu_in_exc_exit: - mov.l %d0,-(%sp) # save d0 - bsr.l funimp_skew # skew sgl or dbl inputs - mov.l (%sp)+,%d0 # restore d0 - - mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # restore src op - - unlk %a6 - - bra.l _fpsp_done - -tbl_except: - short 0xe000,0xe006,0xe004,0xe005 - short 0xe003,0xe002,0xe001,0xe001 - -fu_in_exc_unfl: - mov.w &0x4,%d0 - bra.b fu_in_exc_exit -fu_in_exc_ovfl: - mov.w &0x03,%d0 - bra.b fu_in_exc_exit - -# If the input operand to this operation was opclass two and a single -# or double precision denorm, inf, or nan, the operand needs to be -# "corrected" in order to have the proper equivalent extended precision -# number. - global fix_skewed_ops -fix_skewed_ops: - bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt - cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? - beq.b fso_sgl # yes - cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? - beq.b fso_dbl # yes - rts # no - -fso_sgl: - mov.w LOCAL_EX(%a0),%d0 # fetch src exponent - andi.w &0x7fff,%d0 # strip sign - cmpi.w %d0,&0x3f80 # is |exp| == $3f80? - beq.b fso_sgl_dnrm_zero # yes - cmpi.w %d0,&0x407f # no; is |exp| == $407f? - beq.b fso_infnan # yes - rts # no - -fso_sgl_dnrm_zero: - andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit - beq.b fso_zero # it's a skewed zero -fso_sgl_dnrm: -# here, we count on norm not to alter a0... - bsr.l norm # normalize mantissa - neg.w %d0 # -shft amt - addi.w &0x3f81,%d0 # adjust new exponent - andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent - or.w %d0,LOCAL_EX(%a0) # insert new exponent - rts - -fso_zero: - andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent - rts - -fso_infnan: - andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit - ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff - rts - -fso_dbl: - mov.w LOCAL_EX(%a0),%d0 # fetch src exponent - andi.w &0x7fff,%d0 # strip sign - cmpi.w %d0,&0x3c00 # is |exp| == $3c00? - beq.b fso_dbl_dnrm_zero # yes - cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? - beq.b fso_infnan # yes - rts # no - -fso_dbl_dnrm_zero: - andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit - bne.b fso_dbl_dnrm # it's a skewed denorm - tst.l LOCAL_LO(%a0) # is it a zero? - beq.b fso_zero # yes -fso_dbl_dnrm: -# here, we count on norm not to alter a0... - bsr.l norm # normalize mantissa - neg.w %d0 # -shft amt - addi.w &0x3c01,%d0 # adjust new exponent - andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent - or.w %d0,LOCAL_EX(%a0) # insert new exponent - rts - -################################################################# - -# fmove out took an unimplemented data type exception. -# the src operand is in FP_SRC. Call _fout() to write out the result and -# to determine which exceptions, if any, to take. -fu_out: - -# Separate packed move outs from the UNNORM and DENORM move outs. - bfextu EXC_CMDREG(%a6){&3:&3},%d0 - cmpi.b %d0,&0x3 - beq.w fu_out_pack - cmpi.b %d0,&0x7 - beq.w fu_out_pack - - -# I'm not sure at this point what FPSR bits are valid for this instruction. -# so, since the emulation routines re-create them anyways, zero exception field. -# fmove out doesn't affect ccodes. - and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - -# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine -# call here. just figure out what it is... - mov.w FP_SRC_EX(%a6),%d0 # get exponent - andi.w &0x7fff,%d0 # strip sign - beq.b fu_out_denorm # it's a DENORM - - lea FP_SRC(%a6),%a0 - bsr.l unnorm_fix # yes; fix it - - mov.b %d0,STAG(%a6) - - bra.b fu_out_cont -fu_out_denorm: - mov.b &DENORM,STAG(%a6) -fu_out_cont: - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec - - lea FP_SRC(%a6),%a0 # pass ptr to src operand - - mov.l (%a6),EXC_A6(%a6) # in case a6 changes - bsr.l fout # call fmove out routine - -# Exceptions in order of precedence: -# BSUN : none -# SNAN : none -# OPERR : fmove.{b,w,l} out of large UNNORM -# OVFL : fmove.{s,d} -# UNFL : fmove.{s,d,x} -# DZ : none -# INEX2 : all -# INEX1 : none (packed doesn't travel through here) - -# determine the highest priority exception(if any) set by the -# emulation routine that has also been enabled by the user. - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled - bne.w fu_out_ena # some are enabled - -fu_out_done: - - mov.l EXC_A6(%a6),(%a6) # in case a6 changed - -# on extended precision opclass three instructions using pre-decrement or -# post-increment addressing mode, the address register is not updated. is the -# address register was the stack pointer used from user mode, then let's update -# it here. if it was used from supervisor mode, then we have to handle this -# as a special case. - btst &0x5,EXC_SR(%a6) - bne.b fu_out_done_s - - mov.l EXC_A7(%a6),%a0 # restore a7 - mov.l %a0,%usp - -fu_out_done_cont: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - btst &0x7,(%sp) # is trace on? - bne.b fu_out_trace # yes - - bra.l _fpsp_done - -# is the ea mode pre-decrement of the stack pointer from supervisor mode? -# ("fmov.x fpm,-(a7)") if so, -fu_out_done_s: - cmpi.b SPCOND_FLG(%a6),&mda7_flg - bne.b fu_out_done_cont - -# the extended precision result is still in fp0. but, we need to save it -# somewhere on the stack until we can copy it to its final resting place. -# here, we're counting on the top of the stack to be the old place-holders -# for fp0/fp1 which have already been restored. that way, we can write -# over those destinations with the shifted stack frame. - fmovm.x &0x80,FP_SRC(%a6) # put answer on stack - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - -# now, copy the result to the proper place on the stack - mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) - mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) - mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - btst &0x7,(%sp) - bne.b fu_out_trace - - bra.l _fpsp_done - -fu_out_ena: - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled - bfffo %d0{&24:&8},%d0 # find highest priority exception - bne.b fu_out_exc # there is at least one set - -# no exceptions were set. -# if a disabled overflow occurred and inexact was enabled but the result -# was exact, then a branch to _real_inex() is made. - btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? - beq.w fu_out_done # no - -fu_out_ovflchk: - btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? - beq.w fu_out_done # no - bra.w fu_inex # yes - -# -# The fp move out that took the "Unimplemented Data Type" exception was -# being traced. Since the stack frames are similar, get the "current" PC -# from FPIAR and put it in the trace stack frame then jump to _real_trace(). -# -# UNSUPP FRAME TRACE FRAME -# ***************** ***************** -# * EA * * Current * -# * * * PC * -# ***************** ***************** -# * 0x3 * 0x0dc * * 0x2 * 0x024 * -# ***************** ***************** -# * Next * * Next * -# * PC * * PC * -# ***************** ***************** -# * SR * * SR * -# ***************** ***************** -# -fu_out_trace: - mov.w &0x2024,0x6(%sp) - fmov.l %fpiar,0x8(%sp) - bra.l _real_trace - -# an exception occurred and that exception was enabled. -fu_out_exc: - subi.l &24,%d0 # fix offset to be 0-8 - -# we don't mess with the existing fsave frame. just re-insert it and -# jump to the "_real_{}()" handler... - mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 - jmp (tbl_fu_out.b,%pc,%d0.w*1) - - swbeg &0x8 -tbl_fu_out: - short tbl_fu_out - tbl_fu_out # BSUN can't happen - short tbl_fu_out - tbl_fu_out # SNAN can't happen - short fu_operr - tbl_fu_out # OPERR - short fu_ovfl - tbl_fu_out # OVFL - short fu_unfl - tbl_fu_out # UNFL - short tbl_fu_out - tbl_fu_out # DZ can't happen - short fu_inex - tbl_fu_out # INEX2 - short tbl_fu_out - tbl_fu_out # INEX1 won't make it here - -# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just -# frestore it. -fu_snan: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 - mov.w &0xe006,2+FP_SRC(%a6) - - frestore FP_SRC(%a6) - - unlk %a6 - - - bra.l _real_snan - -fu_operr: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 - mov.w &0xe004,2+FP_SRC(%a6) - - frestore FP_SRC(%a6) - - unlk %a6 - - - bra.l _real_operr - -fu_ovfl: - fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 - mov.w &0xe005,2+FP_SRC(%a6) - - frestore FP_SRC(%a6) # restore EXOP - - unlk %a6 - - bra.l _real_ovfl - -# underflow can happen for extended precision. extended precision opclass -# three instruction exceptions don't update the stack pointer. so, if the -# exception occurred from user mode, then simply update a7 and exit normally. -# if the exception occurred from supervisor mode, check if -fu_unfl: - mov.l EXC_A6(%a6),(%a6) # restore a6 - - btst &0x5,EXC_SR(%a6) - bne.w fu_unfl_s - - mov.l EXC_A7(%a6),%a0 # restore a7 whether we need - mov.l %a0,%usp # to or not... - -fu_unfl_cont: - fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc - mov.w &0xe003,2+FP_SRC(%a6) - - frestore FP_SRC(%a6) # restore EXOP - - unlk %a6 - - bra.l _real_unfl - -fu_unfl_s: - cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the mode -(sp)? - bne.b fu_unfl_cont - -# the extended precision result is still in fp0. but, we need to save it -# somewhere on the stack until we can copy it to its final resting place -# (where the exc frame is currently). make sure it's not at the top of the -# frame or it will get overwritten when the exc stack frame is shifted "down". - fmovm.x &0x80,FP_SRC(%a6) # put answer on stack - fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc - mov.w &0xe003,2+FP_DST(%a6) - - frestore FP_DST(%a6) # restore EXOP - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) - -# now, copy the result to the proper place on the stack - mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) - mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) - mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - bra.l _real_unfl - -# fmove in and out enter here. -fu_inex: - fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 - mov.w &0xe001,2+FP_SRC(%a6) - - frestore FP_SRC(%a6) # restore EXOP - - unlk %a6 - - - bra.l _real_inex - -######################################################################### -######################################################################### -fu_in_pack: - - -# I'm not sure at this point what FPSR bits are valid for this instruction. -# so, since the emulation routines re-create them anyways, zero exception field - andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - bsr.l get_packed # fetch packed src operand - - lea FP_SRC(%a6),%a0 # pass ptr to src - bsr.l set_tag_x # set src optype tag - - mov.b %d0,STAG(%a6) # save src optype tag - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - -# bit five of the fp extension word separates the monadic and dyadic operations -# at this point - btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? - beq.b fu_extract_p # monadic - cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? - beq.b fu_extract_p # yes, so it's monadic, too - - bsr.l load_fpn2 # load dst into FP_DST - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b fu_op2_done_p # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO -fu_op2_done_p: - mov.b %d0,DTAG(%a6) # save dst optype tag - -fu_extract_p: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec - - bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - - mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - -# -# Exceptions in order of precedence: -# BSUN : none -# SNAN : all dyadic ops -# OPERR : fsqrt(-NORM) -# OVFL : all except ftst,fcmp -# UNFL : all except ftst,fcmp -# DZ : fdiv -# INEX2 : all except ftst,fcmp -# INEX1 : all -# - -# we determine the highest priority exception(if any) set by the -# emulation routine that has also been enabled by the user. - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled - bne.w fu_in_ena_p # some are enabled - -fu_in_cont_p: -# fcmp and ftst do not store any result. - mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension - andi.b &0x38,%d0 # extract bits 3-5 - cmpi.b %d0,&0x38 # is instr fcmp or ftst? - beq.b fu_in_exit_p # yes - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - bsr.l store_fpreg # store the result - -fu_in_exit_p: - - btst &0x5,EXC_SR(%a6) # user or supervisor? - bne.w fu_in_exit_s_p # supervisor - - mov.l EXC_A7(%a6),%a0 # update user a7 - mov.l %a0,%usp - -fu_in_exit_cont_p: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 # unravel stack frame - - btst &0x7,(%sp) # is trace on? - bne.w fu_trace_p # yes - - bra.l _fpsp_done # exit to os - -# the exception occurred in supervisor mode. check to see if the -# addressing mode was (a7)+. if so, we'll need to shift the -# stack frame "up". -fu_in_exit_s_p: - btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ - beq.b fu_in_exit_cont_p # no - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 # unravel stack frame - -# shift the stack frame "up". we don't really care about the field. - mov.l 0x4(%sp),0x10(%sp) - mov.l 0x0(%sp),0xc(%sp) - add.l &0xc,%sp - - btst &0x7,(%sp) # is trace on? - bne.w fu_trace_p # yes - - bra.l _fpsp_done # exit to os - -fu_in_ena_p: - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set - bfffo %d0{&24:&8},%d0 # find highest priority exception - bne.b fu_in_exc_p # at least one was set - -# -# No exceptions occurred that were also enabled. Now: -# -# if (OVFL && ovfl_disabled && inexact_enabled) { -# branch to _real_inex() (even if the result was exact!); -# } else { -# save the result in the proper fp reg (unless the op is fcmp or ftst); -# return; -# } -# - btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? - beq.w fu_in_cont_p # no - -fu_in_ovflchk_p: - btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? - beq.w fu_in_cont_p # no - bra.w fu_in_exc_ovfl_p # do _real_inex() now - -# -# An exception occurred and that exception was enabled: -# -# shift enabled exception field into lo byte of d0; -# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || -# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { -# /* -# * this is the case where we must call _real_inex() now or else -# * there will be no other way to pass it the exceptional operand -# */ -# call _real_inex(); -# } else { -# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; -# } -# -fu_in_exc_p: - subi.l &24,%d0 # fix offset to be 0-8 - cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) - blt.b fu_in_exc_exit_p # no - -# the enabled exception was inexact - btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? - bne.w fu_in_exc_unfl_p # yes - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? - bne.w fu_in_exc_ovfl_p # yes - -# here, we insert the correct fsave status value into the fsave frame for the -# corresponding exception. the operand in the fsave frame should be the original -# src operand. -# as a reminder for future predicted pain and agony, we are passing in fsave the -# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. -# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! -fu_in_exc_exit_p: - btst &0x5,EXC_SR(%a6) # user or supervisor? - bne.w fu_in_exc_exit_s_p # supervisor - - mov.l EXC_A7(%a6),%a0 # update user a7 - mov.l %a0,%usp - -fu_in_exc_exit_cont_p: - mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # restore src op - - unlk %a6 - - btst &0x7,(%sp) # is trace enabled? - bne.w fu_trace_p # yes - - bra.l _fpsp_done - -tbl_except_p: - short 0xe000,0xe006,0xe004,0xe005 - short 0xe003,0xe002,0xe001,0xe001 - -fu_in_exc_ovfl_p: - mov.w &0x3,%d0 - bra.w fu_in_exc_exit_p - -fu_in_exc_unfl_p: - mov.w &0x4,%d0 - bra.w fu_in_exc_exit_p - -fu_in_exc_exit_s_p: - btst &mia7_bit,SPCOND_FLG(%a6) - beq.b fu_in_exc_exit_cont_p - - mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # restore src op - - unlk %a6 # unravel stack frame - -# shift stack frame "up". who cares about field. - mov.l 0x4(%sp),0x10(%sp) - mov.l 0x0(%sp),0xc(%sp) - add.l &0xc,%sp - - btst &0x7,(%sp) # is trace on? - bne.b fu_trace_p # yes - - bra.l _fpsp_done # exit to os - -# -# The opclass two PACKED instruction that took an "Unimplemented Data Type" -# exception was being traced. Make the "current" PC the FPIAR and put it in the -# trace stack frame then jump to _real_trace(). -# -# UNSUPP FRAME TRACE FRAME -# ***************** ***************** -# * EA * * Current * -# * * * PC * -# ***************** ***************** -# * 0x2 * 0x0dc * * 0x2 * 0x024 * -# ***************** ***************** -# * Next * * Next * -# * PC * * PC * -# ***************** ***************** -# * SR * * SR * -# ***************** ***************** -fu_trace_p: - mov.w &0x2024,0x6(%sp) - fmov.l %fpiar,0x8(%sp) - - bra.l _real_trace - -######################################################### -######################################################### -fu_out_pack: - - -# I'm not sure at this point what FPSR bits are valid for this instruction. -# so, since the emulation routines re-create them anyways, zero exception field. -# fmove out doesn't affect ccodes. - and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 - bsr.l load_fpn1 - -# unlike other opclass 3, unimplemented data type exceptions, packed must be -# able to detect all operand types. - lea FP_SRC(%a6),%a0 - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b fu_op2_p # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO - -fu_op2_p: - mov.b %d0,STAG(%a6) # save src optype tag - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec - - lea FP_SRC(%a6),%a0 # pass ptr to src operand - - mov.l (%a6),EXC_A6(%a6) # in case a6 changes - bsr.l fout # call fmove out routine - -# Exceptions in order of precedence: -# BSUN : no -# SNAN : yes -# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) -# OVFL : no -# UNFL : no -# DZ : no -# INEX2 : yes -# INEX1 : no - -# determine the highest priority exception(if any) set by the -# emulation routine that has also been enabled by the user. - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled - bne.w fu_out_ena_p # some are enabled - -fu_out_exit_p: - mov.l EXC_A6(%a6),(%a6) # restore a6 - - btst &0x5,EXC_SR(%a6) # user or supervisor? - bne.b fu_out_exit_s_p # supervisor - - mov.l EXC_A7(%a6),%a0 # update user a7 - mov.l %a0,%usp - -fu_out_exit_cont_p: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 # unravel stack frame - - btst &0x7,(%sp) # is trace on? - bne.w fu_trace_p # yes - - bra.l _fpsp_done # exit to os - -# the exception occurred in supervisor mode. check to see if the -# addressing mode was -(a7). if so, we'll need to shift the -# stack frame "down". -fu_out_exit_s_p: - btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) - beq.b fu_out_exit_cont_p # no - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - -# now, copy the result to the proper place on the stack - mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) - mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) - mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - btst &0x7,(%sp) - bne.w fu_trace_p - - bra.l _fpsp_done - -fu_out_ena_p: - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled - bfffo %d0{&24:&8},%d0 # find highest priority exception - beq.w fu_out_exit_p - - mov.l EXC_A6(%a6),(%a6) # restore a6 - -# an exception occurred and that exception was enabled. -# the only exception possible on packed move out are INEX, OPERR, and SNAN. -fu_out_exc_p: - cmpi.b %d0,&0x1a - bgt.w fu_inex_p2 - beq.w fu_operr_p - -fu_snan_p: - btst &0x5,EXC_SR(%a6) - bne.b fu_snan_s_p - - mov.l EXC_A7(%a6),%a0 - mov.l %a0,%usp - bra.w fu_snan - -fu_snan_s_p: - cmpi.b SPCOND_FLG(%a6),&mda7_flg - bne.w fu_snan - -# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. -# the strategy is to move the exception frame "down" 12 bytes. then, we -# can store the default result where the exception frame was. - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 - mov.w &0xe006,2+FP_SRC(%a6) # set fsave status - - frestore FP_SRC(%a6) # restore src operand - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) - -# now, we copy the default result to its proper location - mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) - mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) - mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - - bra.l _real_snan - -fu_operr_p: - btst &0x5,EXC_SR(%a6) - bne.w fu_operr_p_s - - mov.l EXC_A7(%a6),%a0 - mov.l %a0,%usp - bra.w fu_operr - -fu_operr_p_s: - cmpi.b SPCOND_FLG(%a6),&mda7_flg - bne.w fu_operr - -# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. -# the strategy is to move the exception frame "down" 12 bytes. then, we -# can store the default result where the exception frame was. - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 - mov.w &0xe004,2+FP_SRC(%a6) # set fsave status - - frestore FP_SRC(%a6) # restore src operand - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) - -# now, we copy the default result to its proper location - mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) - mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) - mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - - bra.l _real_operr - -fu_inex_p2: - btst &0x5,EXC_SR(%a6) - bne.w fu_inex_s_p2 - - mov.l EXC_A7(%a6),%a0 - mov.l %a0,%usp - bra.w fu_inex - -fu_inex_s_p2: - cmpi.b SPCOND_FLG(%a6),&mda7_flg - bne.w fu_inex - -# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. -# the strategy is to move the exception frame "down" 12 bytes. then, we -# can store the default result where the exception frame was. - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 - mov.w &0xe001,2+FP_SRC(%a6) # set fsave status - - frestore FP_SRC(%a6) # restore src operand - - mov.l (%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) - mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) - -# now, we copy the default result to its proper location - mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) - mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) - mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - - bra.l _real_inex - -######################################################################### - -# -# if we're stuffing a source operand back into an fsave frame then we -# have to make sure that for single or double source operands that the -# format stuffed is as weird as the hardware usually makes it. -# - global funimp_skew -funimp_skew: - bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier - cmpi.b %d0,&0x1 # was src sgl? - beq.b funimp_skew_sgl # yes - cmpi.b %d0,&0x5 # was src dbl? - beq.b funimp_skew_dbl # yes - rts - -funimp_skew_sgl: - mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent - andi.w &0x7fff,%d0 # strip sign - beq.b funimp_skew_sgl_not - cmpi.w %d0,&0x3f80 - bgt.b funimp_skew_sgl_not - neg.w %d0 # make exponent negative - addi.w &0x3f81,%d0 # find amt to shift - mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) - lsr.l %d0,%d1 # shift it - bset &31,%d1 # set j-bit - mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) - andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent - ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent -funimp_skew_sgl_not: - rts - -funimp_skew_dbl: - mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent - andi.w &0x7fff,%d0 # strip sign - beq.b funimp_skew_dbl_not - cmpi.w %d0,&0x3c00 - bgt.b funimp_skew_dbl_not - - tst.b FP_SRC_EX(%a6) # make "internal format" - smi.b 0x2+FP_SRC(%a6) - mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign - clr.l %d0 # clear g,r,s - lea FP_SRC(%a6),%a0 # pass ptr to src op - mov.w &0x3c01,%d1 # pass denorm threshold - bsr.l dnrm_lp # denorm it - mov.w &0x3c00,%d0 # new exponent - tst.b 0x2+FP_SRC(%a6) # is sign set? - beq.b fss_dbl_denorm_done # no - bset &15,%d0 # set sign -fss_dbl_denorm_done: - bset &0x7,FP_SRC_HI(%a6) # set j-bit - mov.w %d0,FP_SRC_EX(%a6) # insert new exponent -funimp_skew_dbl_not: - rts - -######################################################################### - global _mem_write2 -_mem_write2: - btst &0x5,EXC_SR(%a6) - beq.l _dmem_write - mov.l 0x0(%a0),FP_DST_EX(%a6) - mov.l 0x4(%a0),FP_DST_HI(%a6) - mov.l 0x8(%a0),FP_DST_LO(%a6) - clr.l %d1 - rts - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # -# effective address" exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Unimplemented Effective Address exception in an operating # -# system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# set_tag_x() - determine optype of src/dst operands # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# unnorm_fix() - change UNNORM operands to NORM or ZERO # -# load_fpn2() - load dst operand from FP regfile # -# tbl_unsupp - add of table of emulation routines for opclass 0,2 # -# decbin() - convert packed data to FP binary data # -# _real_fpu_disabled() - "callout" for "FPU disabled" exception # -# _real_access() - "callout" for access error exception # -# _mem_read() - read extended immediate operand from memory # -# _fpsp_done() - "callout" for exit; work all done # -# _real_trace() - "callout" for Trace enabled exception # -# fmovm_dynamic() - emulate dynamic fmovm instruction # -# fmovm_ctrl() - emulate fmovm control instruction # -# # -# INPUT *************************************************************** # -# - The system stack contains the "Unimplemented " stk frame # -# # -# OUTPUT ************************************************************** # -# If access error: # -# - The system stack is changed to an access error stack frame # -# If FPU disabled: # -# - The system stack is changed to an FPU disabled stack frame # -# If Trace exception enabled: # -# - The system stack is changed to a Trace exception stack frame # -# Else: (normal case) # -# - None (correct result has been stored as appropriate) # -# # -# ALGORITHM *********************************************************** # -# This exception handles 3 types of operations: # -# (1) FP Instructions using extended precision or packed immediate # -# addressing mode. # -# (2) The "fmovm.x" instruction w/ dynamic register specification. # -# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # -# # -# For immediate data operations, the data is read in w/ a # -# _mem_read() "callout", converted to FP binary (if packed), and used # -# as the source operand to the instruction specified by the instruction # -# word. If no FP exception should be reported ads a result of the # -# emulation, then the result is stored to the destination register and # -# the handler exits through _fpsp_done(). If an enabled exc has been # -# signalled as a result of emulation, then an fsave state frame # -# corresponding to the FP exception type must be entered into the 060 # -# FPU before exiting. In either the enabled or disabled cases, we # -# must also check if a Trace exception is pending, in which case, we # -# must create a Trace exception stack frame from the current exception # -# stack frame. If no Trace is pending, we simply exit through # -# _fpsp_done(). # -# For "fmovm.x", call the routine fmovm_dynamic() which will # -# decode and emulate the instruction. No FP exceptions can be pending # -# as a result of this operation emulation. A Trace exception can be # -# pending, though, which means the current stack frame must be changed # -# to a Trace stack frame and an exit made through _real_trace(). # -# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # -# was executed from supervisor mode, this handler must store the FP # -# register file values to the system stack by itself since # -# fmovm_dynamic() can't handle this. A normal exit is made through # -# fpsp_done(). # -# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # -# Again, a Trace exception may be pending and an exit made through # -# _real_trace(). Else, a normal exit is made through _fpsp_done(). # -# # -# Before any of the above is attempted, it must be checked to # -# see if the FPU is disabled. Since the "Unimp " exception is taken # -# before the "FPU disabled" exception, but the "FPU disabled" exception # -# has higher priority, we check the disabled bit in the PCR. If set, # -# then we must create an 8 word "FPU disabled" exception stack frame # -# from the current 4 word exception stack frame. This includes # -# reproducing the effective address of the instruction to put on the # -# new stack frame. # -# # -# In the process of all emulation work, if a _mem_read() # -# "callout" returns a failing result indicating an access error, then # -# we must create an access error stack frame from the current stack # -# frame. This information includes a faulting address and a fault- # -# status-longword. These are created within this handler. # -# # -######################################################################### - - global _fpsp_effadd -_fpsp_effadd: - -# This exception type takes priority over the "Line F Emulator" -# exception. Therefore, the FPU could be disabled when entering here. -# So, we must check to see if it's disabled and handle that case separately. - mov.l %d0,-(%sp) # save d0 - movc %pcr,%d0 # load proc cr - btst &0x1,%d0 # is FPU disabled? - bne.w iea_disabled # yes - mov.l (%sp)+,%d0 # restore d0 - - link %a6,&-LOCAL_SIZE # init stack frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# PC of instruction that took the exception is the PC in the frame - mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD - -######################################################################### - - tst.w %d0 # is operation fmovem? - bmi.w iea_fmovm # yes - -# -# here, we will have: -# fabs fdabs fsabs facos fmod -# fadd fdadd fsadd fasin frem -# fcmp fatan fscale -# fdiv fddiv fsdiv fatanh fsin -# fint fcos fsincos -# fintrz fcosh fsinh -# fmove fdmove fsmove fetox ftan -# fmul fdmul fsmul fetoxm1 ftanh -# fneg fdneg fsneg fgetexp ftentox -# fsgldiv fgetman ftwotox -# fsglmul flog10 -# fsqrt flog2 -# fsub fdsub fssub flogn -# ftst flognp1 -# which can all use f.{x,p} -# so, now it's immediate data extended precision AND PACKED FORMAT! -# -iea_op: - andi.l &0x00ff00ff,USER_FPSR(%a6) - - btst &0xa,%d0 # is src fmt x or p? - bne.b iea_op_pack # packed - - - mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to # - lea FP_SRC(%a6),%a1 # pass: ptr to super addr - mov.l &0xc,%d0 # pass: 12 bytes - bsr.l _imem_read # read extended immediate - - tst.l %d1 # did ifetch fail? - bne.w iea_iacc # yes - - bra.b iea_op_setsrc - -iea_op_pack: - - mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to # - lea FP_SRC(%a6),%a1 # pass: ptr to super dst - mov.l &0xc,%d0 # pass: 12 bytes - bsr.l _imem_read # read packed operand - - tst.l %d1 # did ifetch fail? - bne.w iea_iacc # yes - -# The packed operand is an INF or a NAN if the exponent field is all ones. - bfextu FP_SRC(%a6){&1:&15},%d0 # get exp - cmpi.w %d0,&0x7fff # INF or NAN? - beq.b iea_op_setsrc # operand is an INF or NAN - -# The packed operand is a zero if the mantissa is all zero, else it's -# a normal packed op. - mov.b 3+FP_SRC(%a6),%d0 # get byte 4 - andi.b &0x0f,%d0 # clear all but last nybble - bne.b iea_op_gp_not_spec # not a zero - tst.l FP_SRC_HI(%a6) # is lw 2 zero? - bne.b iea_op_gp_not_spec # not a zero - tst.l FP_SRC_LO(%a6) # is lw 3 zero? - beq.b iea_op_setsrc # operand is a ZERO -iea_op_gp_not_spec: - lea FP_SRC(%a6),%a0 # pass: ptr to packed op - bsr.l decbin # convert to extended - fmovm.x &0x80,FP_SRC(%a6) # make this the srcop - -iea_op_setsrc: - addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer - -# FP_SRC now holds the src operand. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l set_tag_x # tag the operand type - mov.b %d0,STAG(%a6) # could be ANYTHING!!! - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b iea_op_getdst # no - bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO - mov.b %d0,STAG(%a6) # set new optype tag -iea_op_getdst: - clr.b STORE_FLG(%a6) # clear "store result" boolean - - btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? - beq.b iea_op_extract # monadic - btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? - bne.b iea_op_spec # yes - -iea_op_loaddst: - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno - bsr.l load_fpn2 # load dst operand - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - mov.b %d0,DTAG(%a6) # could be ANYTHING!!! - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b iea_op_extract # no - bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO - mov.b %d0,DTAG(%a6) # set new optype tag - bra.b iea_op_extract - -# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic -iea_op_spec: - btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? - beq.b iea_op_extract # yes -# now, we're left with ftst and fcmp. so, first let's tag them so that they don't -# store a result. then, only fcmp will branch back and pick up a dst operand. - st STORE_FLG(%a6) # don't store a final result - btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? - beq.b iea_op_loaddst # yes - -iea_op_extract: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec - - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.w &0x007f,%d1 # extract extension - - fmov.l &0x0,%fpcr - fmov.l &0x0,%fpsr - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - - mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - -# -# Exceptions in order of precedence: -# BSUN : none -# SNAN : all operations -# OPERR : all reg-reg or mem-reg operations that can normally operr -# OVFL : same as OPERR -# UNFL : same as OPERR -# DZ : same as OPERR -# INEX2 : same as OPERR -# INEX1 : all packed immediate operations -# - -# we determine the highest priority exception(if any) set by the -# emulation routine that has also been enabled by the user. - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled - bne.b iea_op_ena # some are enabled - -# now, we save the result, unless, of course, the operation was ftst or fcmp. -# these don't save results. -iea_op_save: - tst.b STORE_FLG(%a6) # does this op store a result? - bne.b iea_op_exit1 # exit with no frestore - -iea_op_store: - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno - bsr.l store_fpreg # store the result - -iea_op_exit1: - mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" - mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 # unravel the frame - - btst &0x7,(%sp) # is trace on? - bne.w iea_op_trace # yes - - bra.l _fpsp_done # exit to os - -iea_op_ena: - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set - bfffo %d0{&24:&8},%d0 # find highest priority exception - bne.b iea_op_exc # at least one was set - -# no exception occurred. now, did a disabled, exact overflow occur with inexact -# enabled? if so, then we have to stuff an overflow frame into the FPU. - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? - beq.b iea_op_save - -iea_op_ovfl: - btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? - beq.b iea_op_store # no - bra.b iea_op_exc_ovfl # yes - -# an enabled exception occurred. we have to insert the exception type back into -# the machine. -iea_op_exc: - subi.l &24,%d0 # fix offset to be 0-8 - cmpi.b %d0,&0x6 # is exception INEX? - bne.b iea_op_exc_force # no - -# the enabled exception was inexact. so, if it occurs with an overflow -# or underflow that was disabled, then we have to force an overflow or -# underflow frame. - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? - bne.b iea_op_exc_ovfl # yes - btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? - bne.b iea_op_exc_unfl # yes - -iea_op_exc_force: - mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) - bra.b iea_op_exit2 # exit with frestore - -tbl_iea_except: - short 0xe002, 0xe006, 0xe004, 0xe005 - short 0xe003, 0xe002, 0xe001, 0xe001 - -iea_op_exc_ovfl: - mov.w &0xe005,2+FP_SRC(%a6) - bra.b iea_op_exit2 - -iea_op_exc_unfl: - mov.w &0xe003,2+FP_SRC(%a6) - -iea_op_exit2: - mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" - mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame - - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # restore exceptional state - - unlk %a6 # unravel the frame - - btst &0x7,(%sp) # is trace on? - bne.b iea_op_trace # yes - - bra.l _fpsp_done # exit to os - -# -# The opclass two instruction that took an "Unimplemented Effective Address" -# exception was being traced. Make the "current" PC the FPIAR and put it in -# the trace stack frame then jump to _real_trace(). -# -# UNIMP EA FRAME TRACE FRAME -# ***************** ***************** -# * 0x0 * 0x0f0 * * Current * -# ***************** * PC * -# * Current * ***************** -# * PC * * 0x2 * 0x024 * -# ***************** ***************** -# * SR * * Next * -# ***************** * PC * -# ***************** -# * SR * -# ***************** -iea_op_trace: - mov.l (%sp),-(%sp) # shift stack frame "down" - mov.w 0x8(%sp),0x4(%sp) - mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 - fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR - - bra.l _real_trace - -######################################################################### -iea_fmovm: - btst &14,%d0 # ctrl or data reg - beq.w iea_fmovm_ctrl - -iea_fmovm_data: - - btst &0x5,EXC_SR(%a6) # user or supervisor mode - bne.b iea_fmovm_data_s - -iea_fmovm_data_u: - mov.l %usp,%a0 - mov.l %a0,EXC_A7(%a6) # store current a7 - bsr.l fmovm_dynamic # do dynamic fmovm - mov.l EXC_A7(%a6),%a0 # load possibly new a7 - mov.l %a0,%usp # update usp - bra.w iea_fmovm_exit - -iea_fmovm_data_s: - clr.b SPCOND_FLG(%a6) - lea 0x2+EXC_VOFF(%a6),%a0 - mov.l %a0,EXC_A7(%a6) - bsr.l fmovm_dynamic # do dynamic fmovm - - cmpi.b SPCOND_FLG(%a6),&mda7_flg - beq.w iea_fmovm_data_predec - cmpi.b SPCOND_FLG(%a6),&mia7_flg - bne.w iea_fmovm_exit - -# right now, d0 = the size. -# the data has been fetched from the supervisor stack, but we have not -# incremented the stack pointer by the appropriate number of bytes. -# do it here. -iea_fmovm_data_postinc: - btst &0x7,EXC_SR(%a6) - bne.b iea_fmovm_data_pi_trace - - mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) - mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) - mov.w &0x00f0,(EXC_VOFF,%a6,%d0) - - lea (EXC_SR,%a6,%d0),%a0 - mov.l %a0,EXC_SR(%a6) - - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - mov.l (%sp)+,%sp - bra.l _fpsp_done - -iea_fmovm_data_pi_trace: - mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) - mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) - mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) - mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) - - lea (EXC_SR-0x4,%a6,%d0),%a0 - mov.l %a0,EXC_SR(%a6) - - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - mov.l (%sp)+,%sp - bra.l _real_trace - -# right now, d1 = size and d0 = the strg. -iea_fmovm_data_predec: - mov.b %d1,EXC_VOFF(%a6) # store strg - mov.b %d0,0x1+EXC_VOFF(%a6) # store size - - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - mov.l (%a6),-(%sp) # make a copy of a6 - mov.l %d0,-(%sp) # save d0 - mov.l %d1,-(%sp) # save d1 - mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC - - clr.l %d0 - mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size - neg.l %d0 # get negative of size - - btst &0x7,EXC_SR(%a6) # is trace enabled? - beq.b iea_fmovm_data_p2 - - mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) - mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) - mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) - mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) - - pea (%a6,%d0) # create final sp - bra.b iea_fmovm_data_p3 - -iea_fmovm_data_p2: - mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) - mov.l (%sp)+,(EXC_PC,%a6,%d0) - mov.w &0x00f0,(EXC_VOFF,%a6,%d0) - - pea (0x4,%a6,%d0) # create final sp - -iea_fmovm_data_p3: - clr.l %d1 - mov.b EXC_VOFF(%a6),%d1 # fetch strg - - tst.b %d1 - bpl.b fm_1 - fmovm.x &0x80,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_1: - lsl.b &0x1,%d1 - bpl.b fm_2 - fmovm.x &0x40,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_2: - lsl.b &0x1,%d1 - bpl.b fm_3 - fmovm.x &0x20,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_3: - lsl.b &0x1,%d1 - bpl.b fm_4 - fmovm.x &0x10,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_4: - lsl.b &0x1,%d1 - bpl.b fm_5 - fmovm.x &0x08,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_5: - lsl.b &0x1,%d1 - bpl.b fm_6 - fmovm.x &0x04,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_6: - lsl.b &0x1,%d1 - bpl.b fm_7 - fmovm.x &0x02,(0x4+0x8,%a6,%d0) - addi.l &0xc,%d0 -fm_7: - lsl.b &0x1,%d1 - bpl.b fm_end - fmovm.x &0x01,(0x4+0x8,%a6,%d0) -fm_end: - mov.l 0x4(%sp),%d1 - mov.l 0x8(%sp),%d0 - mov.l 0xc(%sp),%a6 - mov.l (%sp)+,%sp - - btst &0x7,(%sp) # is trace enabled? - beq.l _fpsp_done - bra.l _real_trace - -######################################################################### -iea_fmovm_ctrl: - - bsr.l fmovm_ctrl # load ctrl regs - -iea_fmovm_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - btst &0x7,EXC_SR(%a6) # is trace on? - bne.b iea_fmovm_trace # yes - - mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC - - unlk %a6 # unravel the frame - - bra.l _fpsp_done # exit to os - -# -# The control reg instruction that took an "Unimplemented Effective Address" -# exception was being traced. The "Current PC" for the trace frame is the -# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. -# After fixing the stack frame, jump to _real_trace(). -# -# UNIMP EA FRAME TRACE FRAME -# ***************** ***************** -# * 0x0 * 0x0f0 * * Current * -# ***************** * PC * -# * Current * ***************** -# * PC * * 0x2 * 0x024 * -# ***************** ***************** -# * SR * * Next * -# ***************** * PC * -# ***************** -# * SR * -# ***************** -# this ain't a pretty solution, but it works: -# -restore a6 (not with unlk) -# -shift stack frame down over where old a6 used to be -# -add LOCAL_SIZE to stack pointer -iea_fmovm_trace: - mov.l (%a6),%a6 # restore frame pointer - mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) - mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) - mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) - mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 - add.l &LOCAL_SIZE,%sp # clear stack frame - - bra.l _real_trace - -######################################################################### -# The FPU is disabled and so we should really have taken the "Line -# F Emulator" exception. So, here we create an 8-word stack frame -# from our 4-word stack frame. This means we must calculate the length -# the faulting instruction to get the "next PC". This is trivial for -# immediate operands but requires some extra work for fmovm dynamic -# which can use most addressing modes. -iea_disabled: - mov.l (%sp)+,%d0 # restore d0 - - link %a6,&-LOCAL_SIZE # init stack frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - -# PC of instruction that took the exception is the PC in the frame - mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD - - tst.w %d0 # is instr fmovm? - bmi.b iea_dis_fmovm # yes -# instruction is using an extended precision immediate operand. Therefore, -# the total instruction length is 16 bytes. -iea_dis_immed: - mov.l &0x10,%d0 # 16 bytes of instruction - bra.b iea_dis_cont -iea_dis_fmovm: - btst &0xe,%d0 # is instr fmovm ctrl - bne.b iea_dis_fmovm_data # no -# the instruction is a fmovm.l with 2 or 3 registers. - bfextu %d0{&19:&3},%d1 - mov.l &0xc,%d0 - cmpi.b %d1,&0x7 # move all regs? - bne.b iea_dis_cont - addq.l &0x4,%d0 - bra.b iea_dis_cont -# the instruction is an fmovm.x dynamic which can use many addressing -# modes and thus can have several different total instruction lengths. -# call fmovm_calc_ea which will go through the ea calc process and, -# as a by-product, will tell us how long the instruction is. -iea_dis_fmovm_data: - clr.l %d0 - bsr.l fmovm_calc_ea - mov.l EXC_EXTWPTR(%a6),%d0 - sub.l EXC_PC(%a6),%d0 -iea_dis_cont: - mov.w %d0,EXC_VOFF(%a6) # store stack shift value - - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - -# here, we actually create the 8-word frame from the 4-word frame, -# with the "next PC" as additional info. -# the field is let as undefined. - subq.l &0x8,%sp # make room for new stack - mov.l %d0,-(%sp) # save d0 - mov.w 0xc(%sp),0x4(%sp) # move SR - mov.l 0xe(%sp),0x6(%sp) # move Current PC - clr.l %d0 - mov.w 0x12(%sp),%d0 - mov.l 0x6(%sp),0x10(%sp) # move Current PC - add.l %d0,0x6(%sp) # make Next PC - mov.w &0x402c,0xa(%sp) # insert offset,frame format - mov.l (%sp)+,%d0 # restore d0 - - bra.l _real_fpu_disabled - -########## - -iea_iacc: - movc %pcr,%d0 - btst &0x1,%d0 - bne.b iea_iacc_cont - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack -iea_iacc_cont: - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - subq.w &0x8,%sp # make stack frame bigger - mov.l 0x8(%sp),(%sp) # store SR,hi(PC) - mov.w 0xc(%sp),0x4(%sp) # store lo(PC) - mov.w &0x4008,0x6(%sp) # store voff - mov.l 0x2(%sp),0x8(%sp) # store ea - mov.l &0x09428001,0xc(%sp) # store fslw - -iea_acc_done: - btst &0x5,(%sp) # user or supervisor mode? - beq.b iea_acc_done2 # user - bset &0x2,0xd(%sp) # set supervisor TM bit - -iea_acc_done2: - bra.l _real_access - -iea_dacc: - lea -LOCAL_SIZE(%a6),%sp - - movc %pcr,%d1 - btst &0x1,%d1 - bne.b iea_dacc_cont - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack - fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs -iea_dacc_cont: - mov.l (%a6),%a6 - - mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) - mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) - mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) - mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) - mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) - mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) - - movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 - add.w &LOCAL_SIZE-0x4,%sp - - bra.b iea_acc_done - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Operand Error exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# _real_operr() - "callout" to operating system operr handler # -# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # -# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # -# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP Operr exception frame # -# - The fsave frame contains the source operand # -# # -# OUTPUT ************************************************************** # -# No access error: # -# - The system stack is unchanged # -# - The fsave frame contains the adjusted src op for opclass 0,2 # -# # -# ALGORITHM *********************************************************** # -# In a system where the FP Operr exception is enabled, the goal # -# is to get to the handler specified at _real_operr(). But, on the 060, # -# for opclass zero and two instruction taking this exception, the # -# input operand in the fsave frame may be incorrect for some cases # -# and needs to be corrected. This handler calls fix_skewed_ops() to # -# do just this and then exits through _real_operr(). # -# For opclass 3 instructions, the 060 doesn't store the default # -# operr result out to memory or data register file as it should. # -# This code must emulate the move out before finally exiting through # -# _real_inex(). The move out, if to memory, is performed using # -# _mem_write() "callout" routines that may return a failing result. # -# In this special case, the handler must exit through facc_out() # -# which creates an access error stack frame from the current operr # -# stack frame. # -# # -######################################################################### - - global _fpsp_operr -_fpsp_operr: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - btst &13,%d0 # is instr an fmove out? - bne.b foperr_out # fmove out - - -# here, we simply see if the operand in the fsave frame needs to be "unskewed". -# this would be the case for opclass two operations with a source infinity or -# denorm operand in the sgl or dbl format. NANs also become skewed, but can't -# cause an operr so we don't need to check for them here. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - -foperr_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) - - unlk %a6 - bra.l _real_operr - -######################################################################## - -# -# the hardware does not save the default result to memory on enabled -# operand error exceptions. we do this here before passing control to -# the user operand error handler. -# -# byte, word, and long destination format operations can pass -# through here. we simply need to test the sign of the src -# operand and save the appropriate minimum or maximum integer value -# to the effective address as pointed to by the stacked effective address. -# -# although packed opclass three operations can take operand error -# exceptions, they won't pass through here since they are caught -# first by the unsupported data format exception handler. that handler -# sends them directly to _real_operr() if necessary. -# -foperr_out: - - mov.w FP_SRC_EX(%a6),%d1 # fetch exponent - andi.w &0x7fff,%d1 - cmpi.w %d1,&0x7fff - bne.b foperr_out_not_qnan -# the operand is either an infinity or a QNAN. - tst.l FP_SRC_LO(%a6) - bne.b foperr_out_qnan - mov.l FP_SRC_HI(%a6),%d1 - andi.l &0x7fffffff,%d1 - beq.b foperr_out_not_qnan -foperr_out_qnan: - mov.l FP_SRC_HI(%a6),L_SCR1(%a6) - bra.b foperr_out_jmp - -foperr_out_not_qnan: - mov.l &0x7fffffff,%d1 - tst.b FP_SRC_EX(%a6) - bpl.b foperr_out_not_qnan2 - addq.l &0x1,%d1 -foperr_out_not_qnan2: - mov.l %d1,L_SCR1(%a6) - -foperr_out_jmp: - bfextu %d0{&19:&3},%d0 # extract dst format field - mov.b 1+EXC_OPWORD(%a6),%d1 # extract mode,reg - mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 - jmp (tbl_operr.b,%pc,%a0) - -tbl_operr: - short foperr_out_l - tbl_operr # long word integer - short tbl_operr - tbl_operr # sgl prec shouldn't happen - short tbl_operr - tbl_operr # ext prec shouldn't happen - short foperr_exit - tbl_operr # packed won't enter here - short foperr_out_w - tbl_operr # word integer - short tbl_operr - tbl_operr # dbl prec shouldn't happen - short foperr_out_b - tbl_operr # byte integer - short tbl_operr - tbl_operr # packed won't enter here - -foperr_out_b: - mov.b L_SCR1(%a6),%d0 # load positive default result - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b foperr_out_b_save_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_byte # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_b # yes - - bra.w foperr_exit -foperr_out_b_save_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_b # store result to regfile - bra.w foperr_exit - -foperr_out_w: - mov.w L_SCR1(%a6),%d0 # load positive default result - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b foperr_out_w_save_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_word # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_w # yes - - bra.w foperr_exit -foperr_out_w_save_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_w # store result to regfile - bra.w foperr_exit - -foperr_out_l: - mov.l L_SCR1(%a6),%d0 # load positive default result - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b foperr_out_l_save_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_long # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_l # yes - - bra.w foperr_exit -foperr_out_l_save_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_l # store result to regfile - bra.w foperr_exit - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Signalling NAN exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# _real_snan() - "callout" to operating system SNAN handler # -# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # -# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # -# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # -# _calc_ea_fout() - fix An if is -() or ()+; also get # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP SNAN exception frame # -# - The fsave frame contains the source operand # -# # -# OUTPUT ************************************************************** # -# No access error: # -# - The system stack is unchanged # -# - The fsave frame contains the adjusted src op for opclass 0,2 # -# # -# ALGORITHM *********************************************************** # -# In a system where the FP SNAN exception is enabled, the goal # -# is to get to the handler specified at _real_snan(). But, on the 060, # -# for opclass zero and two instructions taking this exception, the # -# input operand in the fsave frame may be incorrect for some cases # -# and needs to be corrected. This handler calls fix_skewed_ops() to # -# do just this and then exits through _real_snan(). # -# For opclass 3 instructions, the 060 doesn't store the default # -# SNAN result out to memory or data register file as it should. # -# This code must emulate the move out before finally exiting through # -# _real_snan(). The move out, if to memory, is performed using # -# _mem_write() "callout" routines that may return a failing result. # -# In this special case, the handler must exit through facc_out() # -# which creates an access error stack frame from the current SNAN # -# stack frame. # -# For the case of an extended precision opclass 3 instruction, # -# if the effective addressing mode was -() or ()+, then the address # -# register must get updated by calling _calc_ea_fout(). If the # -# was -(a7) from supervisor mode, then the exception frame currently # -# on the system stack must be carefully moved "down" to make room # -# for the operand being moved. # -# # -######################################################################### - - global _fpsp_snan -_fpsp_snan: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - btst &13,%d0 # is instr an fmove out? - bne.w fsnan_out # fmove out - - -# here, we simply see if the operand in the fsave frame needs to be "unskewed". -# this would be the case for opclass two operations with a source infinity or -# denorm operand in the sgl or dbl format. NANs also become skewed and must be -# fixed here. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - -fsnan_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) - - unlk %a6 - bra.l _real_snan - -######################################################################## - -# -# the hardware does not save the default result to memory on enabled -# snan exceptions. we do this here before passing control to -# the user snan handler. -# -# byte, word, long, and packed destination format operations can pass -# through here. since packed format operations already were handled by -# fpsp_unsupp(), then we need to do nothing else for them here. -# for byte, word, and long, we simply need to test the sign of the src -# operand and save the appropriate minimum or maximum integer value -# to the effective address as pointed to by the stacked effective address. -# -fsnan_out: - - bfextu %d0{&19:&3},%d0 # extract dst format field - mov.b 1+EXC_OPWORD(%a6),%d1 # extract mode,reg - mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 - jmp (tbl_snan.b,%pc,%a0) - -tbl_snan: - short fsnan_out_l - tbl_snan # long word integer - short fsnan_out_s - tbl_snan # sgl prec shouldn't happen - short fsnan_out_x - tbl_snan # ext prec shouldn't happen - short tbl_snan - tbl_snan # packed needs no help - short fsnan_out_w - tbl_snan # word integer - short fsnan_out_d - tbl_snan # dbl prec shouldn't happen - short fsnan_out_b - tbl_snan # byte integer - short tbl_snan - tbl_snan # packed needs no help - -fsnan_out_b: - mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN - bset &6,%d0 # set SNAN bit - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b fsnan_out_b_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_byte # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_b # yes - - bra.w fsnan_exit -fsnan_out_b_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_b # store result to regfile - bra.w fsnan_exit - -fsnan_out_w: - mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN - bset &14,%d0 # set SNAN bit - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b fsnan_out_w_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_word # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_w # yes - - bra.w fsnan_exit -fsnan_out_w_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_w # store result to regfile - bra.w fsnan_exit - -fsnan_out_l: - mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN - bset &30,%d0 # set SNAN bit - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b fsnan_out_l_dn # yes - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_long # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_l # yes - - bra.w fsnan_exit -fsnan_out_l_dn: - andi.w &0x0007,%d1 - bsr.l store_dreg_l # store result to regfile - bra.w fsnan_exit - -fsnan_out_s: - cmpi.b %d1,&0x7 # is mode a data reg? - ble.b fsnan_out_d_dn # yes - mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign - andi.l &0x80000000,%d0 # keep sign - ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit - mov.l FP_SRC_HI(%a6),%d1 # load mantissa - lsr.l &0x8,%d1 # shift mantissa for sgl - or.l %d1,%d0 # create sgl SNAN - mov.l EXC_EA(%a6),%a0 # pass: of default result - bsr.l _dmem_write_long # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_l # yes - - bra.w fsnan_exit -fsnan_out_d_dn: - mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign - andi.l &0x80000000,%d0 # keep sign - ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit - mov.l %d1,-(%sp) - mov.l FP_SRC_HI(%a6),%d1 # load mantissa - lsr.l &0x8,%d1 # shift mantissa for sgl - or.l %d1,%d0 # create sgl SNAN - mov.l (%sp)+,%d1 - andi.w &0x0007,%d1 - bsr.l store_dreg_l # store result to regfile - bra.w fsnan_exit - -fsnan_out_d: - mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign - andi.l &0x80000000,%d0 # keep sign - ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit - mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa - mov.l %d0,FP_SCR0_EX(%a6) # store to temp space - mov.l &11,%d0 # load shift amt - lsr.l %d0,%d1 - or.l %d1,FP_SCR0_EX(%a6) # create dbl hi - mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa - andi.l &0x000007ff,%d1 - ror.l %d0,%d1 - mov.l %d1,FP_SCR0_HI(%a6) # store to temp space - mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa - lsr.l %d0,%d1 - or.l %d1,FP_SCR0_HI(%a6) # create dbl lo - lea FP_SCR0(%a6),%a0 # pass: ptr to operand - mov.l EXC_EA(%a6),%a1 # pass: dst addr - movq.l &0x8,%d0 # pass: size of 8 bytes - bsr.l _dmem_write # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_d # yes - - bra.w fsnan_exit - -# for extended precision, if the addressing mode is pre-decrement or -# post-increment, then the address register did not get updated. -# in addition, for pre-decrement, the stacked is incorrect. -fsnan_out_x: - clr.b SPCOND_FLG(%a6) # clear special case flag - - mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) - clr.w 2+FP_SCR0(%a6) - mov.l FP_SRC_HI(%a6),%d0 - bset &30,%d0 - mov.l %d0,FP_SCR0_HI(%a6) - mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) - - btst &0x5,EXC_SR(%a6) # supervisor mode exception? - bne.b fsnan_out_x_s # yes - - mov.l %usp,%a0 # fetch user stack pointer - mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() - mov.l (%a6),EXC_A6(%a6) - - bsr.l _calc_ea_fout # find the correct ea,update An - mov.l %a0,%a1 - mov.l %a0,EXC_EA(%a6) # stack correct - - mov.l EXC_A7(%a6),%a0 - mov.l %a0,%usp # restore user stack pointer - mov.l EXC_A6(%a6),(%a6) - -fsnan_out_x_save: - lea FP_SCR0(%a6),%a0 # pass: ptr to operand - movq.l &0xc,%d0 # pass: size of extended - bsr.l _dmem_write # write the default result - - tst.l %d1 # did dstore fail? - bne.l facc_out_x # yes - - bra.w fsnan_exit - -fsnan_out_x_s: - mov.l (%a6),EXC_A6(%a6) - - bsr.l _calc_ea_fout # find the correct ea,update An - mov.l %a0,%a1 - mov.l %a0,EXC_EA(%a6) # stack correct - - mov.l EXC_A6(%a6),(%a6) - - cmpi.b SPCOND_FLG(%a6),&mda7_flg # is mode -(a7)? - bne.b fsnan_out_x_save # no - -# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) - - mov.l EXC_A6(%a6),%a6 # restore frame pointer - - mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) - mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) - mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) - - mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) - mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) - mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) - - add.l &LOCAL_SIZE-0x8,%sp - - bra.l _real_snan - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Inexact exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword # -# fix_skewed_ops() - adjust src operand in fsave frame # -# set_tag_x() - determine optype of src/dst operands # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# unnorm_fix() - change UNNORM operands to NORM or ZERO # -# load_fpn2() - load dst operand from FP regfile # -# smovcr() - emulate an "fmovcr" instruction # -# fout() - emulate an opclass 3 instruction # -# tbl_unsupp - add of table of emulation routines for opclass 0,2 # -# _real_inex() - "callout" to operating system inexact handler # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP Inexact exception frame # -# - The fsave frame contains the source operand # -# # -# OUTPUT ************************************************************** # -# - The system stack is unchanged # -# - The fsave frame contains the adjusted src op for opclass 0,2 # -# # -# ALGORITHM *********************************************************** # -# In a system where the FP Inexact exception is enabled, the goal # -# is to get to the handler specified at _real_inex(). But, on the 060, # -# for opclass zero and two instruction taking this exception, the # -# hardware doesn't store the correct result to the destination FP # -# register as did the '040 and '881/2. This handler must emulate the # -# instruction in order to get this value and then store it to the # -# correct register before calling _real_inex(). # -# For opclass 3 instructions, the 060 doesn't store the default # -# inexact result out to memory or data register file as it should. # -# This code must emulate the move out by calling fout() before finally # -# exiting through _real_inex(). # -# # -######################################################################### - - global _fpsp_inex -_fpsp_inex: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - btst &13,%d0 # is instr an fmove out? - bne.w finex_out # fmove out - - -# the hardware, for "fabs" and "fneg" w/ a long source format, puts the -# longword integer directly into the upper longword of the mantissa along -# w/ an exponent value of 0x401e. we convert this to extended precision here. - bfextu %d0{&19:&3},%d0 # fetch instr size - bne.b finex_cont # instr size is not long - cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? - bne.b finex_cont # no - fmov.l &0x0,%fpcr - fmov.l FP_SRC_HI(%a6),%fp0 # load integer src - fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision - mov.w &0xe001,0x2+FP_SRC(%a6) - -finex_cont: - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - -# Here, we zero the ccode and exception byte field since we're going to -# emulate the whole instruction. Notice, though, that we don't kill the -# INEX1 bit. This is because a packed op has long since been converted -# to extended before arriving here. Therefore, we need to retain the -# INEX1 bit from when the operand was first converted. - andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field - - fmov.l &0x0,%fpcr # zero current control regs - fmov.l &0x0,%fpsr - - bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg - cmpi.b %d1,&0x17 # is op an fmovecr? - beq.w finex_fmovcr # yes - - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l set_tag_x # tag the operand type - mov.b %d0,STAG(%a6) # maybe NORM,DENORM - -# bits four and five of the fp extension word separate the monadic and dyadic -# operations that can pass through fpsp_inex(). remember that fcmp and ftst -# will never take this exception, but fsincos will. - btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? - beq.b finex_extract # monadic - - btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? - bne.b finex_extract # yes - - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg - bsr.l load_fpn2 # load dst into FP_DST - - lea FP_DST(%a6),%a0 # pass: ptr to dst op - bsr.l set_tag_x # tag the operand type - cmpi.b %d0,&UNNORM # is operand an UNNORM? - bne.b finex_op2_done # no - bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO -finex_op2_done: - mov.b %d0,DTAG(%a6) # save dst optype tag - -finex_extract: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode - - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.w &0x007f,%d1 # extract extension - - lea FP_SRC(%a6),%a0 - lea FP_DST(%a6),%a1 - - mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr - jsr (tbl_unsupp.l,%pc,%d1.l*1) - -# the operation has been emulated. the result is in fp0. -finex_save: - bfextu EXC_CMDREG(%a6){&6:&3},%d0 - bsr.l store_fpreg - -finex_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) - - unlk %a6 - bra.l _real_inex - -finex_fmovcr: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.l &0x0000007f,%d1 # pass rom offset - bsr.l smovcr - bra.b finex_save - -######################################################################## - -# -# the hardware does not save the default result to memory on enabled -# inexact exceptions. we do this here before passing control to -# the user inexact handler. -# -# byte, word, and long destination format operations can pass -# through here. so can double and single precision. -# although packed opclass three operations can take inexact -# exceptions, they won't pass through here since they are caught -# first by the unsupported data format exception handler. that handler -# sends them directly to _real_inex() if necessary. -# -finex_out: - - mov.b &NORM,STAG(%a6) # src is a NORM - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode - - andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field - - lea FP_SRC(%a6),%a0 # pass ptr to src operand - - bsr.l fout # store the default result - - bra.b finex_exit - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # -# # -# This handler should be the first code executed upon taking # -# the FP DZ exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_long() - read instruction longword from memory # -# fix_skewed_ops() - adjust fsave operand # -# _real_dz() - "callout" exit point from FP DZ handler # -# # -# INPUT *************************************************************** # -# - The system stack contains the FP DZ exception stack. # -# - The fsave frame contains the source operand. # -# # -# OUTPUT ************************************************************** # -# - The system stack contains the FP DZ exception stack. # -# - The fsave frame contains the adjusted source operand. # -# # -# ALGORITHM *********************************************************** # -# In a system where the DZ exception is enabled, the goal is to # -# get to the handler specified at _real_dz(). But, on the 060, when the # -# exception is taken, the input operand in the fsave state frame may # -# be incorrect for some cases and need to be adjusted. So, this package # -# adjusts the operand using fix_skewed_ops() and then branches to # -# _real_dz(). # -# # -######################################################################### - - global _fpsp_dz -_fpsp_dz: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - fsave FP_SRC(%a6) # grab the "busy" frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack - -# the FPIAR holds the "current PC" of the faulting instruction - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################## - - -# here, we simply see if the operand in the fsave frame needs to be "unskewed". -# this would be the case for opclass two operations with a source zero -# in the sgl or dbl format. - lea FP_SRC(%a6),%a0 # pass: ptr to src op - bsr.l fix_skewed_ops # fix src op - -fdz_exit: - fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) - - unlk %a6 - bra.l _real_dz - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. # -# # -# This handler should be the first code executed upon taking the # -# "Line F Emulator" exception in an operating system. # -# # -# XREF **************************************************************** # -# _fpsp_unimp() - handle "FP Unimplemented" exceptions # -# _real_fpu_disabled() - handle "FPU disabled" exceptions # -# _real_fline() - handle "FLINE" exceptions # -# _imem_read_long() - read instruction longword # -# # -# INPUT *************************************************************** # -# - The system stack contains a "Line F Emulator" exception # -# stack frame. # -# # -# OUTPUT ************************************************************** # -# - The system stack is unchanged # -# # -# ALGORITHM *********************************************************** # -# When a "Line F Emulator" exception occurs, there are 3 possible # -# exception types, denoted by the exception stack frame format number: # -# (1) FPU unimplemented instruction (6 word stack frame) # -# (2) FPU disabled (8 word stack frame) # -# (3) Line F (4 word stack frame) # -# # -# This module determines which and forks the flow off to the # -# appropriate "callout" (for "disabled" and "Line F") or to the # -# correct emulation code (for "FPU unimplemented"). # -# This code also must check for "fmovecr" instructions w/ a # -# non-zero field. These may get flagged as "Line F" but should # -# really be flagged as "FPU Unimplemented". (This is a "feature" on # -# the '060. # -# # -######################################################################### - - global _fpsp_fline -_fpsp_fline: - -# check to see if this exception is a "FP Unimplemented Instruction" -# exception. if so, branch directly to that handler's entry point. - cmpi.w 0x6(%sp),&0x202c - beq.l _fpsp_unimp - -# check to see if the FPU is disabled. if so, jump to the OS entry -# point for that condition. - cmpi.w 0x6(%sp),&0x402c - beq.l _real_fpu_disabled - -# the exception was an "F-Line Illegal" exception. we check to see -# if the F-Line instruction is an "fmovecr" w/ a non-zero . if -# so, convert the F-Line exception stack frame to an FP Unimplemented -# Instruction exception stack frame else branch to the OS entry -# point for the F-Line exception handler. - link.w %a6,&-LOCAL_SIZE # init stack frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - - mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch instruction words - - bfextu %d0{&0:&10},%d1 # is it an fmovecr? - cmpi.w %d1,&0x03c8 - bne.b fline_fline # no - - bfextu %d0{&16:&6},%d1 # is it an fmovecr? - cmpi.b %d1,&0x17 - bne.b fline_fline # no - -# it's an fmovecr w/ a non-zero that has entered through -# the F-Line Illegal exception. -# so, we need to convert the F-Line exception stack frame into an -# FP Unimplemented Instruction stack frame and jump to that entry -# point. -# -# but, if the FPU is disabled, then we need to jump to the FPU disabled -# entry point. - movc %pcr,%d0 - btst &0x1,%d0 - beq.b fline_fmovcr - - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - sub.l &0x8,%sp # make room for "Next PC", - mov.w 0x8(%sp),(%sp) - mov.l 0xa(%sp),0x2(%sp) # move "Current PC" - mov.w &0x402c,0x6(%sp) - mov.l 0x2(%sp),0xc(%sp) - addq.l &0x4,0x2(%sp) # set "Next PC" - - bra.l _real_fpu_disabled - -fline_fmovcr: - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - fmov.l 0x2(%sp),%fpiar # set current PC - addq.l &0x4,0x2(%sp) # set Next PC - - mov.l (%sp),-(%sp) - mov.l 0x8(%sp),0x4(%sp) - mov.b &0x20,0x6(%sp) - - bra.l _fpsp_unimp - -fline_fline: - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - - bra.l _real_fline - -######################################################################### -# XDEF **************************************************************** # -# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented # -# Instruction" exception. # -# # -# This handler should be the first code executed upon taking the # -# FP Unimplemented Instruction exception in an operating system. # -# # -# XREF **************************************************************** # -# _imem_read_{word,long}() - read instruction word/longword # -# load_fop() - load src/dst ops from memory and/or FP regfile # -# store_fpreg() - store opclass 0 or 2 result to FP regfile # -# tbl_trans - addr of table of emulation routines for trnscndls # -# _real_access() - "callout" for access error exception # -# _fpsp_done() - "callout" for exit; work all done # -# _real_trace() - "callout" for Trace enabled exception # -# smovcr() - emulate "fmovecr" instruction # -# funimp_skew() - adjust fsave src ops to "incorrect" value # -# _ftrapcc() - emulate an "ftrapcc" instruction # -# _fdbcc() - emulate an "fdbcc" instruction # -# _fscc() - emulate an "fscc" instruction # -# _real_trap() - "callout" for Trap exception # -# _real_bsun() - "callout" for enabled Bsun exception # -# # -# INPUT *************************************************************** # -# - The system stack contains the "Unimplemented Instr" stk frame # -# # -# OUTPUT ************************************************************** # -# If access error: # -# - The system stack is changed to an access error stack frame # -# If Trace exception enabled: # -# - The system stack is changed to a Trace exception stack frame # -# Else: (normal case) # -# - Correct result has been stored as appropriate # -# # -# ALGORITHM *********************************************************** # -# There are two main cases of instructions that may enter here to # -# be emulated: (1) the FPgen instructions, most of which were also # -# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". # -# For the first set, this handler calls the routine load_fop() # -# to load the source and destination (for dyadic) operands to be used # -# for instruction emulation. The correct emulation routine is then # -# chosen by decoding the instruction type and indexing into an # -# emulation subroutine index table. After emulation returns, this # -# handler checks to see if an exception should occur as a result of the # -# FP instruction emulation. If so, then an FP exception of the correct # -# type is inserted into the FPU state frame using the "frestore" # -# instruction before exiting through _fpsp_done(). In either the # -# exceptional or non-exceptional cases, we must check to see if the # -# Trace exception is enabled. If so, then we must create a Trace # -# exception frame from the current exception frame and exit through # -# _real_trace(). # -# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines # -# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three # -# may flag that a BSUN exception should be taken. If so, then the # -# current exception stack frame is converted into a BSUN exception # -# stack frame and an exit is made through _real_bsun(). If the # -# instruction was "ftrapcc" and a Trap exception should result, a Trap # -# exception stack frame is created from the current frame and an exit # -# is made through _real_trap(). If a Trace exception is pending, then # -# a Trace exception frame is created from the current frame and a jump # -# is made to _real_trace(). Finally, if none of these conditions exist, # -# then the handler exits though the callout _fpsp_done(). # -# # -# In any of the above scenarios, if a _mem_read() or _mem_write() # -# "callout" returns a failing value, then an access error stack frame # -# is created from the current stack frame and an exit is made through # -# _real_access(). # -# # -######################################################################### - -# -# FP UNIMPLEMENTED INSTRUCTION STACK FRAME: -# -# ***************** -# * * => of fp unimp instr. -# - EA - -# * * -# ***************** -# * 0x2 * 0x02c * => frame format and vector offset(vector #11) -# ***************** -# * * -# - Next PC - => PC of instr to execute after exc handling -# * * -# ***************** -# * SR * => SR at the time the exception was taken -# ***************** -# -# Note: the !NULL bit does not get set in the fsave frame when the -# machine encounters an fp unimp exception. Therefore, it must be set -# before leaving this handler. -# - global _fpsp_unimp -_fpsp_unimp: - - link.w %a6,&-LOCAL_SIZE # init stack frame - - movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 - fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs - fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 - - btst &0x5,EXC_SR(%a6) # user mode exception? - bne.b funimp_s # no; supervisor mode - -# save the value of the user stack pointer onto the stack frame -funimp_u: - mov.l %usp,%a0 # fetch user stack pointer - mov.l %a0,EXC_A7(%a6) # store in stack frame - bra.b funimp_cont - -# store the value of the supervisor stack pointer BEFORE the exc occurred. -# old_sp is address just above stacked effective address. -funimp_s: - lea 4+EXC_EA(%a6),%a0 # load old a7' - mov.l %a0,EXC_A7(%a6) # store a7' - mov.l %a0,OLD_A7(%a6) # make a copy - -funimp_cont: - -# the FPIAR holds the "current PC" of the faulting instruction. - mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) - - mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr - addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr - bsr.l _imem_read_long # fetch the instruction words - mov.l %d0,EXC_OPWORD(%a6) - -############################################################################ - - fmov.l &0x0,%fpcr # clear FPCR - fmov.l &0x0,%fpsr # clear FPSR - - clr.b SPCOND_FLG(%a6) # clear "special case" flag - -# Divide the fp instructions into 8 types based on the TYPE field in -# bits 6-8 of the opword(classes 6,7 are undefined). -# (for the '060, only two types can take this exception) -# bftst %d0{&7:&3} # test TYPE - btst &22,%d0 # type 0 or 1 ? - bne.w funimp_misc # type 1 - -######################################### -# TYPE == 0: General instructions # -######################################### -funimp_gen: - - clr.b STORE_FLG(%a6) # clear "store result" flag - -# clear the ccode byte and exception status byte - andi.l &0x00ff00ff,USER_FPSR(%a6) - - bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg - cmpi.b %d1,&0x17 # is op an fmovecr? - beq.w funimp_fmovcr # yes - -funimp_gen_op: - bsr.l _load_fop # load - - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode - - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.w &0x003f,%d1 # extract extension bits - lsl.w &0x3,%d1 # shift right 3 bits - or.b STAG(%a6),%d1 # insert src optag bits - - lea FP_DST(%a6),%a1 # pass dst ptr in a1 - lea FP_SRC(%a6),%a0 # pass src ptr in a0 - - mov.w (tbl_trans.w,%pc,%d1.w*2),%d1 - jsr (tbl_trans.w,%pc,%d1.w*1) # emulate - -funimp_fsave: - mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled - bne.w funimp_ena # some are enabled - -funimp_store: - bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn - bsr.l store_fpreg # store result to fp regfile - -funimp_gen_exit: - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - -funimp_gen_exit_cmp: - cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ? - beq.b funimp_gen_exit_a7 # yes - - cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ? - beq.b funimp_gen_exit_a7 # yes - -funimp_gen_exit_cont: - unlk %a6 - -funimp_gen_exit_cont2: - btst &0x7,(%sp) # is trace on? - beq.l _fpsp_done # no - -# this catches a problem with the case where an exception will be re-inserted -# into the machine. the frestore has already been executed...so, the fmov.l -# alone of the control register would trigger an unwanted exception. -# until I feel like fixing this, we'll sidestep the exception. - fsave -(%sp) - fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR - frestore (%sp)+ - mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24 - bra.l _real_trace - -funimp_gen_exit_a7: - btst &0x5,EXC_SR(%a6) # supervisor or user mode? - bne.b funimp_gen_exit_a7_s # supervisor - - mov.l %a0,-(%sp) - mov.l EXC_A7(%a6),%a0 - mov.l %a0,%usp - mov.l (%sp)+,%a0 - bra.b funimp_gen_exit_cont - -# if the instruction was executed from supervisor mode and the addressing -# mode was (a7)+, then the stack frame for the rte must be shifted "up" -# "n" bytes where "n" is the size of the src operand type. -# f.{b,w,l,s,d,x,p} -funimp_gen_exit_a7_s: - mov.l %d0,-(%sp) # save d0 - mov.l EXC_A7(%a6),%d0 # load new a7' - sub.l OLD_A7(%a6),%d0 # subtract old a7' - mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame - mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame - mov.w %d0,EXC_SR(%a6) # store incr number - mov.l (%sp)+,%d0 # restore d0 - - unlk %a6 - - add.w (%sp),%sp # stack frame shifted - bra.b funimp_gen_exit_cont2 - -###################### -# fmovecr.x #ccc,fpn # -###################### -funimp_fmovcr: - clr.l %d0 - mov.b FPCR_MODE(%a6),%d0 - mov.b 1+EXC_CMDREG(%a6),%d1 - andi.l &0x0000007f,%d1 # pass rom offset in d1 - bsr.l smovcr - bra.w funimp_fsave - -######################################################################### - -# -# the user has enabled some exceptions. we figure not to see this too -# often so that's why it gets lower priority. -# -funimp_ena: - -# was an exception set that was also enabled? - and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set - bfffo %d0{&24:&8},%d0 # find highest priority exception - bne.b funimp_exc # at least one was set - -# no exception that was enabled was set BUT if we got an exact overflow -# and overflow wasn't enabled but inexact was (yech!) then this is -# an inexact exception; otherwise, return to normal non-exception flow. - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? - beq.w funimp_store # no; return to normal flow - -# the overflow w/ exact result happened but was inexact set in the FPCR? -funimp_ovfl: - btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? - beq.w funimp_store # no; return to normal flow - bra.b funimp_exc_ovfl # yes - -# some exception happened that was actually enabled. -# we'll insert this new exception into the FPU and then return. -funimp_exc: - subi.l &24,%d0 # fix offset to be 0-8 - cmpi.b %d0,&0x6 # is exception INEX? - bne.b funimp_exc_force # no - -# the enabled exception was inexact. so, if it occurs with an overflow -# or underflow that was disabled, then we have to force an overflow or -# underflow frame. the eventual overflow or underflow handler will see that -# it's actually an inexact and act appropriately. this is the only easy -# way to have the EXOP available for the enabled inexact handler when -# a disabled overflow or underflow has also happened. - btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? - bne.b funimp_exc_ovfl # yes - btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? - bne.b funimp_exc_unfl # yes - -# force the fsave exception status bits to signal an exception of the -# appropriate type. don't forget to "skew" the source operand in case we -# "unskewed" the one the hardware initially gave us. -funimp_exc_force: - mov.l %d0,-(%sp) # save d0 - bsr.l funimp_skew # check for special case - mov.l (%sp)+,%d0 # restore d0 - mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) - bra.b funimp_gen_exit2 # exit with frestore - -tbl_funimp_except: - short 0xe002, 0xe006, 0xe004, 0xe005 - short 0xe003, 0xe002, 0xe001, 0xe001 - -# insert an overflow frame -funimp_exc_ovfl: - bsr.l funimp_skew # check for special case - mov.w &0xe005,2+FP_SRC(%a6) - bra.b funimp_gen_exit2 - -# insert an underflow frame -funimp_exc_unfl: - bsr.l funimp_skew # check for special case - mov.w &0xe003,2+FP_SRC(%a6) - -# this is the general exit point for an enabled exception that will be -# restored into the machine for the instruction just emulated. -funimp_gen_exit2: - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - frestore FP_SRC(%a6) # insert exceptional status - - bra.w funimp_gen_exit_cmp - -############################################################################ - -# -# TYPE == 1: FDB, FS, FTRAP -# -# These instructions were implemented on the '881/2 and '040 in hardware but -# are emulated in software on the '060. -# -funimp_misc: - bfextu %d0{&10:&3},%d1 # extract mode field - cmpi.b %d1,&0x1 # is it an fdb? - beq.w funimp_fdbcc # yes - cmpi.b %d1,&0x7 # is it an fs? - bne.w funimp_fscc # yes - bfextu %d0{&13:&3},%d1 - cmpi.b %d1,&0x2 # is it an fs? - blt.w funimp_fscc # yes - -######################### -# ftrap # -# ftrap.w # # -# ftrap.l # # -######################### -funimp_ftrapcc: - - bsr.l _ftrapcc # FTRAP() - - cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? - beq.w funimp_bsun # yes - - cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur? - bne.w funimp_done # no - -# FP UNIMP FRAME TRAP FRAME -# ***************** ***************** -# ** ** ** Current PC ** -# ***************** ***************** -# * 0x2 * 0x02c * * 0x2 * 0x01c * -# ***************** ***************** -# ** Next PC ** ** Next PC ** -# ***************** ***************** -# * SR * * SR * -# ***************** ***************** -# (6 words) (6 words) -# -# the ftrapcc instruction should take a trap. so, here we must create a -# trap stack frame from an unimplemented fp instruction stack frame and -# jump to the user supplied entry point for the trap exception -funimp_ftrapcc_tp: - mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC - mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c - - fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 - fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs - movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 - - unlk %a6 - bra.l _real_trap - -######################### -# fdb Dn,