summaryrefslogtreecommitdiffstats
path: root/include/asm-alpha/fpu.h
blob: 5e56e7db2304c0edc0733f107739da098eee308b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#ifndef __ASM_ALPHA_FPU_H
#define __ASM_ALPHA_FPU_H

/*
 * Alpha floating-point control register defines:
 */
#define FPCR_INVD	(1UL<<49)	/* invalid op disable (opt.) */
#define FPCR_DZED	(1UL<<50)	/* division by zero disable (opt.) */
#define FPCR_OVFD	(1UL<<51)	/* overflow disable (optional) */
#define FPCR_INV	(1UL<<52)	/* invalid operation */
#define FPCR_DZE	(1UL<<53)	/* division by zero */
#define FPCR_OVF	(1UL<<54)	/* overflow */
#define FPCR_UNF	(1UL<<55)	/* underflow */
#define FPCR_INE	(1UL<<56)	/* inexact */
#define FPCR_IOV	(1UL<<57)	/* integer overflow */
#define FPCR_UNDZ	(1UL<<60)	/* underflow to zero (opt.) */
#define FPCR_UNFD	(1UL<<61)	/* underflow disable (opt.) */
#define FPCR_INED	(1UL<<62)	/* inexact disable (opt.) */
#define FPCR_SUM	(1UL<<63)	/* summary bit */

#define FPCR_DYN_SHIFT	58		/* first dynamic rounding mode bit */
#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
#define FPCR_DYN_MINUS	 (0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
#define FPCR_DYN_NORMAL	 (0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
#define FPCR_DYN_PLUS	 (0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
#define FPCR_DYN_MASK	 (0x3UL << FPCR_DYN_SHIFT)

#define FPCR_MASK	0xfffe000000000000

/*
 * IEEE trap enables are implemented in software.  These per-thread
 * bits are stored in the "flags" field of "struct thread_struct".
 * Thus, the bits are defined so as not to conflict with the
 * floating-point enable bit (which is architected).  On top of that,
 * we want to make these bits compatible with OSF/1 so
 * ieee_set_fp_control() etc. can be implemented easily and
 * compatibly.  The corresponding definitions are in
 * /usr/include/machine/fpu.h under OSF/1.
 */
#define IEEE_TRAP_ENABLE_INV	(1UL<<1)	/* invalid op */
#define IEEE_TRAP_ENABLE_DZE	(1UL<<2)	/* division by zero */
#define IEEE_TRAP_ENABLE_OVF	(1UL<<3)	/* overflow */
#define IEEE_TRAP_ENABLE_UNF	(1UL<<4)	/* underflow */
#define IEEE_TRAP_ENABLE_INE	(1UL<<5)	/* inexact */
#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
				 IEEE_TRAP_ENABLE_INE)

/* status bits coming from fpcr: */
#define IEEE_STATUS_INV		(1UL<<17)
#define IEEE_STATUS_DZE		(1UL<<18)
#define IEEE_STATUS_OVF		(1UL<<19)
#define IEEE_STATUS_UNF		(1UL<<20)
#define IEEE_STATUS_INE		(1UL<<21)

#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
				 IEEE_STATUS_INE)

#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK | IEEE_STATUS_MASK)

#define IEEE_STATUS_TO_EXCSUM_SHIFT	16

#define IEEE_INHERIT    (1UL<<63)	/* inherit on thread create? */

/*
 * Convert the software IEEE trap enable and status bits into the
 * hardware fpcr format.
 */

static inline unsigned long
ieee_swcr_to_fpcr(unsigned long sw)
{
	unsigned long fp;
	fp = (sw & IEEE_STATUS_MASK) << 35;
	fp |= sw & IEEE_STATUS_MASK ? FPCR_SUM : 0;
	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
		      | IEEE_TRAP_ENABLE_DZE
		      | IEEE_TRAP_ENABLE_OVF)) << 48;
	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
	return fp;
}

static inline unsigned long
ieee_fpcr_to_swcr(unsigned long fp)
{
	unsigned long sw;
	sw = (fp >> 35) & IEEE_STATUS_MASK;
	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
			     | IEEE_TRAP_ENABLE_DZE
			     | IEEE_TRAP_ENABLE_OVF);
	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
	return sw;
}

#ifdef __KERNEL__

/* The following two functions don't need trapb/excb instructions
   around the mf_fpcr/mt_fpcr instructions because (a) the kernel
   never generates arithmetic faults and (b) call_pal instructions
   are implied trap barriers.  */

static inline unsigned long rdfpcr(void)
{
	unsigned long tmp, ret;
	__asm__ ("stt $f0,%0\n\t"
		 "mf_fpcr $f0\n\t"
		 "stt $f0,%1\n\t"
		 "ldt $f0,%0"
		: "=m"(tmp), "=m"(ret));
	return ret;
}

static inline void wrfpcr(unsigned long val)
{
	unsigned long tmp;
	__asm__ __volatile__ (
		"stt $f0,%0\n\t"
		"ldt $f0,%1\n\t"
		"mt_fpcr $f0\n\t"
		"ldt $f0,%0"
		: "=m"(tmp) : "m"(val));
}

extern unsigned long alpha_read_fp_reg (unsigned long reg);
extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);

#endif /* __KERNEL__ */

#endif /* __ASM_ALPHA_FPU_H */