diff options
Diffstat (limited to 'include/asm-alpha/byteorder.h')
-rw-r--r-- | include/asm-alpha/byteorder.h | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/include/asm-alpha/byteorder.h b/include/asm-alpha/byteorder.h index edc376a04..91b55ea3e 100644 --- a/include/asm-alpha/byteorder.h +++ b/include/asm-alpha/byteorder.h @@ -3,6 +3,44 @@ #include <asm/types.h> +#ifdef __GNUC__ + +static __inline __u32 __attribute__((__const)) __arch__swab32(__u32 x) +{ + /* + * Unfortunately, we can't use the 6 instruction sequence + * on ev6 since the latency of the UNPKBW is 3, which is + * pretty hard to hide. Just in case a future implementation + * has a lower latency, here's the sequence (also by Mike Burrows) + * + * UNPKBW a0, v0 v0: 00AA00BB00CC00DD + * SLL v0, 24, a0 a0: BB00CC00DD000000 + * BIS v0, a0, a0 a0: BBAACCBBDDCC00DD + * EXTWL a0, 6, v0 v0: 000000000000BBAA + * ZAP a0, 0xf3, a0 a0: 00000000DDCC0000 + * ADDL a0, v0, v0 v0: ssssssssDDCCBBAA + */ + + __u64 t0, t1, t2, t3; + + __asm__("inslh %1, 7, %0" /* t0 : 0000000000AABBCC */ + : "=r"(t0) : "r"(x)); + __asm__("inswl %1, 3, %0" /* t1 : 000000CCDD000000 */ + : "=r"(t1) : "r"(x)); + + t1 |= t0; /* t1 : 000000CCDDAABBCC */ + t2 = t1 >> 16; /* t2 : 0000000000CCDDAA */ + t0 = t1 & 0xFF00FF00; /* t0 : 00000000DD00BB00 */ + t3 = t2 & 0x00FF00FF; /* t3 : 0000000000CC00AA */ + t1 = t0 + t3; /* t1 : ssssssssDDCCBBAA */ + + return t1; +} + +#define __arch__swab32 __arch__swab32 + +#endif /* __GNUC__ */ + #define __BYTEORDER_HAS_U64__ #include <linux/byteorder/little_endian.h> |