#ifndef _M68K_STRING_H_
#define _M68K_STRING_H_

#include <linux/config.h>
#include <asm/page.h>

#define __HAVE_ARCH_STRCPY
extern inline char * strcpy(char * dest,const char *src)
{
  char *xdest = dest;

  __asm__ __volatile__
       ("1:\tmoveb %1@+,%0@+\n\t"
        "jne 1b"
	: "=a" (dest), "=a" (src)
        : "0" (dest), "1" (src) : "memory");
  return xdest;
}

#define __HAVE_ARCH_STRNCPY
extern inline char * strncpy(char *dest, const char *src, size_t n)
{
  char *xdest = dest;

  if (n == 0)
    return xdest;

  __asm__ __volatile__
       ("1:\tmoveb %1@+,%0@+\n\t"
	"jeq 2f\n\t"
        "subql #1,%2\n\t"
        "jne 1b\n\t"
        "2:"
        : "=a" (dest), "=a" (src), "=d" (n)
        : "0" (dest), "1" (src), "2" (n)
        : "memory");
  return xdest;
}

#define __HAVE_ARCH_STRCAT
extern inline char * strcat(char * dest, const char * src)
{
	char *tmp = dest;

	while (*dest)
		dest++;
	while ((*dest++ = *src++))
		;

	return tmp;
}

#define __HAVE_ARCH_STRNCAT
extern inline char * strncat(char *dest, const char *src, size_t count)
{
	char *tmp = dest;

	if (count) {
		while (*dest)
			dest++;
		while ((*dest++ = *src++)) {
			if (--count == 0) {
				*dest++='\0';
				break;
			}
		}
	}

	return tmp;
}

#define __HAVE_ARCH_STRCHR
extern inline char * strchr(const char * s, int c)
{
  const char ch = c;
  
  for(; *s != ch; ++s)
    if (*s == '\0')
      return( NULL );
  return( (char *) s);
}

#define __HAVE_ARCH_STRPBRK
extern inline char * strpbrk(const char * cs,const char * ct)
{
  const char *sc1,*sc2;
  
  for( sc1 = cs; *sc1 != '\0'; ++sc1)
    for( sc2 = ct; *sc2 != '\0'; ++sc2)
      if (*sc1 == *sc2)
	return((char *) sc1);
  return( NULL );
}

#define __HAVE_ARCH_STRSPN
extern inline size_t strspn(const char *s, const char *accept)
{
  const char *p;
  const char *a;
  size_t count = 0;

  for (p = s; *p != '\0'; ++p)
    {
      for (a = accept; *a != '\0'; ++a)
        if (*p == *a)
          break;
      if (*a == '\0')
        return count;
      else
        ++count;
    }

  return count;
}

#define __HAVE_ARCH_STRTOK
extern inline char * strtok(char * s,const char * ct)
{
  char *sbegin, *send;
  
  sbegin  = s ? s : ___strtok;
  if (!sbegin) {
	  return NULL;
  }
  sbegin += strspn(sbegin,ct);
  if (*sbegin == '\0') {
    ___strtok = NULL;
    return( NULL );
  }
  send = strpbrk( sbegin, ct);
  if (send && *send != '\0')
    *send++ = '\0';
  ___strtok = send;
  return (sbegin);
}

/* strstr !! */

#define __HAVE_ARCH_STRLEN
extern inline size_t strlen(const char * s)
{
  const char *sc;
  for (sc = s; *sc != '\0'; ++sc) ;
  return(sc - s);
}

/* strnlen !! */

#define __HAVE_ARCH_STRCMP
extern inline int strcmp(const char * cs,const char * ct)
{
  char __res;

  __asm__
       ("1:\tmoveb %0@+,%2\n\t" /* get *cs */
        "cmpb %1@+,%2\n\t"      /* compare a byte */
        "jne  2f\n\t"           /* not equal, break out */
        "tstb %2\n\t"           /* at end of cs? */
        "jne  1b\n\t"           /* no, keep going */
        "jra  3f\n\t"		/* strings are equal */
        "2:\tsubb %1@-,%2\n\t"  /* *cs - *ct */
        "3:"
        : "=a" (cs), "=a" (ct), "=d" (__res)
        : "0" (cs), "1" (ct));
  return __res;
}

#define __HAVE_ARCH_STRNCMP
extern inline int strncmp(const char * cs,const char * ct,size_t count)
{
  char __res;

  if (!count)
    return 0;
  __asm__
       ("1:\tmovb %0@+,%3\n\t"          /* get *cs */
        "cmpb   %1@+,%3\n\t"            /* compare a byte */
        "jne    3f\n\t"                 /* not equal, break out */
        "tstb   %3\n\t"                 /* at end of cs? */
        "jeq    4f\n\t"                 /* yes, all done */
        "subql  #1,%2\n\t"              /* no, adjust count */
        "jne    1b\n\t"                 /* more to do, keep going */
        "2:\tmoveq #0,%3\n\t"           /* strings are equal */
        "jra    4f\n\t"
        "3:\tsubb %1@-,%3\n\t"          /* *cs - *ct */
        "4:"
        : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res)
        : "0" (cs), "1" (ct), "2" (count));
  return __res;
}

#define __HAVE_ARCH_MEMSET
/*
 * This is really ugly, but its highly optimizatiable by the
 * compiler and is meant as compensation for gcc's missing
 * __builtin_memset(). For the 680[23]0	it might be worth considering
 * the optimal number of misaligned writes compared to the number of
 * tests'n'branches needed to align the destination address. The
 * 680[46]0 doesn't really care due to their copy-back caches.
 *						10/09/96 - Jes Sorensen
 */
extern inline void * __memset_g(void * s, int c, size_t count)
{
  void *xs = s;
  size_t temp;

  if (!count)
    return xs;

  c &= 0xff;
  c |= c << 8;
  c |= c << 16;

  if (count < 36){
	  long *ls = s;

	  switch(count){
	  case 32: case 33: case 34: case 35:
		  *ls++ = c;
	  case 28: case 29: case 30: case 31:
		  *ls++ = c;
	  case 24: case 25: case 26: case 27:
		  *ls++ = c;
	  case 20: case 21: case 22: case 23:
		  *ls++ = c;
	  case 16: case 17: case 18: case 19:
		  *ls++ = c;
	  case 12: case 13: case 14: case 15:
		  *ls++ = c;
	  case 8: case 9: case 10: case 11:
		  *ls++ = c;
	  case 4: case 5: case 6: case 7:
		  *ls++ = c;
		  break;
	  default:
		  break;
	  }
	  s = ls;
	  if (count & 0x02){
		  short *ss = s;
		  *ss++ = c;
		  s = ss;
	  }
	  if (count & 0x01){
		  char *cs = s;
		  *cs++ = c;
		  s = cs;
	  }
	  return xs;
  }

  if ((long) s & 1)
    {
      char *cs = s;
      *cs++ = c;
      s = cs;
      count--;
    }
  if (count > 2 && (long) s & 2)
    {
      short *ss = s;
      *ss++ = c;
      s = ss;
      count -= 2;
    }
  temp = count >> 2;
  if (temp)
    {
      long *ls = s;
      temp--;
      do
	*ls++ = c;
      while (temp--);
      s = ls;
    }
  if (count & 2)
    {
      short *ss = s;
      *ss++ = c;
      s = ss;
    }
  if (count & 1)
    {
      char *cs = s;
      *cs = c;
    }
  return xs;
}

/*
 * __memset_page assumes that data is longword aligned. Most, if not
 * all, of these page sized memsets are performed on page aligned
 * areas, thus we do not need to check if the destination is longword
 * aligned. Of course we suffer a serious performance loss if this is
 * not the case but I think the risk of this ever happening is
 * extremely small. We spend a lot of time clearing pages in
 * get_empty_page() so I think it is worth it anyway. Besides, the
 * 680[46]0 do not really care about misaligned writes due to their
 * copy-back cache.
 *
 * The optimized case for the 680[46]0 is implemented using the move16
 * instruction. My tests showed that this implementation is 35-45%
 * faster than the original implementation using movel, the only
 * caveat is that the destination address must be 16-byte aligned.
 *                                            01/09/96 - Jes Sorensen
 */
extern inline void * __memset_page(void * s,int c,size_t count)
{
  unsigned long data, tmp;
  void *xs, *sp;

  xs = sp = s;

  c = c & 255;
  data = c | (c << 8);
  data |= data << 16;

#if defined(CONFIG_OPTIMIZE_040) || defined(CONFIG_OPTIMIZE_060)

  if (((unsigned long) s) & 0x0f)
	  memset(s, c, count);
  else{
	  *((unsigned long *)(s))++ = data;
	  *((unsigned long *)(s))++ = data;
	  *((unsigned long *)(s))++ = data;
	  *((unsigned long *)(s))++ = data;

	  __asm__ __volatile__("1:\t"
			       "move16 %2@+,%0@+\n\t"
			       "subqw  #8,%2\n\t"
			       "subqw  #8,%2\n\t"
			       "dbra   %1,1b\n\t"
			       : "=a" (s), "=d" (tmp)
			       : "a" (sp), "0" (s), "1" ((count - 16) / 16 - 1)
			       );
  }

#else
  __asm__ __volatile__("1:\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "movel %2,%0@+\n\t"
		       "dbra  %1,1b\n\t"
		       : "=a" (s), "=d" (tmp)
		       : "d" (data), "0" (s), "1" (count / 32 - 1)
		       );
#endif

  return xs;
}

#define __memset_const(s,c,count) \
((count==PAGE_SIZE) ? \
  __memset_page((s),(c),(count)) : \
  __memset_g((s),(c),(count)))

#define memset(s, c, count) \
(__builtin_constant_p(count) ? \
 __memset_const((s),(c),(count)) : \
 memset((s),(c),(count)))

#define __HAVE_ARCH_MEMCPY
/*
 * __builtin_memcpy() does not handle page-sized memcpys very well,
 * thus following the same assumptions as for page-sized memsets, this
 * function copies page-sized areas using an unrolled loop, without
 * considering alignment.
 *
 * For the 680[46]0 only kernels we use the move16 instruction instead
 * as it writes through the data-cache, invalidating the cache-lines
 * touched. In this way we do not use up the entire data-cache (well,
 * half of it on the 68060) by copying a page. An unrolled loop of two
 * move16 instructions seem to the fastest. The only caveat is that
 * both source and destination must be 16-byte aligned, if not we fall
 * back to the generic memcpy function.  - Jes
 */
extern inline void * __memcpy_page(void * to, const void * from, size_t count)
{
  unsigned long tmp;
  void *xto = to;

#if defined(CONFIG_OPTIMIZE_040) || defined(CONFIG_OPTIMIZE_060)

  if (((unsigned long) to | (unsigned long) from) & 0x0f)
	  return memcpy(to, from, count);

  __asm__ __volatile__("1:\t"
		       "move16 %1@+,%0@+\n\t"
		       "move16 %1@+,%0@+\n\t"
		       "dbra  %2,1b\n\t"
		       : "=a" (to), "=a" (from), "=d" (tmp)
		       : "0" (to), "1" (from) , "2" (count / 32 - 1)
		       );
#else
  __asm__ __volatile__("1:\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "movel %1@+,%0@+\n\t"
		       "dbra  %2,1b\n\t"
		       : "=a" (to), "=a" (from), "=d" (tmp)
		       : "0" (to), "1" (from) , "2" (count / 32 - 1)
		       );
#endif
  return xto;
}

#define __memcpy_const(to, from, n) \
((n==PAGE_SIZE) ? \
  __memcpy_page((to),(from),(n)) : \
  __builtin_memcpy((to),(from),(n)))

#define memcpy(to, from, n) \
(__builtin_constant_p(n) ? \
 __memcpy_const((to),(from),(n)) : \
 memcpy((to),(from),(n)))

#define __HAVE_ARCH_MEMMOVE
extern inline void * memmove(void * dest,const void * src, size_t n)
{
  void *xdest = dest;
  size_t temp;

  if (!n)
    return xdest;

  if (dest < src)
    {
      if ((long) dest & 1)
	{
	  char *cdest = dest;
	  const char *csrc = src;
	  *cdest++ = *csrc++;
	  dest = cdest;
	  src = csrc;
	  n--;
	}
      if (n > 2 && (long) dest & 2)
	{
	  short *sdest = dest;
	  const short *ssrc = src;
	  *sdest++ = *ssrc++;
	  dest = sdest;
	  src = ssrc;
	  n -= 2;
	}
      temp = n >> 2;
      if (temp)
	{
	  long *ldest = dest;
	  const long *lsrc = src;
	  temp--;
	  do
	    *ldest++ = *lsrc++;
	  while (temp--);
	  dest = ldest;
	  src = lsrc;
	}
      if (n & 2)
	{
	  short *sdest = dest;
	  const short *ssrc = src;
	  *sdest++ = *ssrc++;
	  dest = sdest;
	  src = ssrc;
	}
      if (n & 1)
	{
	  char *cdest = dest;
	  const char *csrc = src;
	  *cdest = *csrc;
	}
    }
  else
    {
      dest = (char *) dest + n;
      src = (const char *) src + n;
      if ((long) dest & 1)
	{
	  char *cdest = dest;
	  const char *csrc = src;
	  *--cdest = *--csrc;
	  dest = cdest;
	  src = csrc;
	  n--;
	}
      if (n > 2 && (long) dest & 2)
	{
	  short *sdest = dest;
	  const short *ssrc = src;
	  *--sdest = *--ssrc;
	  dest = sdest;
	  src = ssrc;
	  n -= 2;
	}
      temp = n >> 2;
      if (temp)
	{
	  long *ldest = dest;
	  const long *lsrc = src;
	  temp--;
	  do
	    *--ldest = *--lsrc;
	  while (temp--);
	  dest = ldest;
	  src = lsrc;
	}
      if (n & 2)
	{
	  short *sdest = dest;
	  const short *ssrc = src;
	  *--sdest = *--ssrc;
	  dest = sdest;
	  src = ssrc;
	}
      if (n & 1)
	{
	  char *cdest = dest;
	  const char *csrc = src;
	  *--cdest = *--csrc;
	}
    }
  return xdest;
}

#define __HAVE_ARCH_MEMCMP
#define memcmp(cs, ct, n) \
(__builtin_constant_p(n) ? \
 __builtin_memcmp((cs),(ct),(n)) : \
 memcmp((cs),(ct),(n)))

#endif /* _M68K_STRING_H_ */