1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
#ifndef __ALPHA_DELAY_H
#define __ALPHA_DELAY_H
#include <asm/smp.h>
/*
* Copyright (C) 1993 Linus Torvalds
*
* Delay routines, using a pre-computed "loops_per_second" value.
*/
extern __inline__ void
__delay(unsigned long loops)
{
__asm__ __volatile__(".align 3\n"
"1:\tsubq %0,1,%0\n\t"
"bge %0,1b": "=r" (loops) : "0" (loops));
}
/*
* division by multiplication: you don't have to worry about
* loss of precision.
*
* Use only for very small delays ( < 1 msec). Should probably use a
* lookup table, really, as the multiplications take much too long with
* short delays. This is a "reasonable" implementation, though (and the
* first constant multiplications gets optimized away if the delay is
* a constant).
*
* Optimize small constants further by exposing the second multiplication
* to the compiler. In addition, mulq is 2 cycles faster than umulh.
*/
extern __inline__ void
__udelay(unsigned long usecs, unsigned long lps)
{
/* compute (usecs * 2**64 / 10**6) * loops_per_sec / 2**64 */
usecs *= 0x000010c6f7a0b5edUL; /* 2**64 / 1000000 */
__asm__("umulh %1,%2,%0" :"=r" (usecs) :"r" (usecs),"r" (lps));
__delay(usecs);
}
extern __inline__ void
__small_const_udelay(unsigned long usecs, unsigned long lps)
{
/* compute (usecs * 2**32 / 10**6) * loops_per_sec / 2**32 */
usecs *= 0x10c6; /* 2^32 / 10^6 */
usecs *= lps;
usecs >>= 32;
__delay(usecs);
}
#ifdef __SMP__
#define udelay(usecs) \
(__builtin_constant_p(usecs) && usecs < 0x100000000UL \
? __small_const_udelay(usecs, \
cpu_data[smp_processor_id()].loops_per_sec) \
: __udelay(usecs, \
cpu_data[smp_processor_id()].loops_per_sec))
#else
#define udelay(usecs) \
(__builtin_constant_p(usecs) && usecs < 0x100000000UL \
? __small_const_udelay(usecs, loops_per_sec) \
: __udelay(usecs, loops_per_sec))
#endif
#endif /* defined(__ALPHA_DELAY_H) */
|