1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/*
* Copyright (c) 2011 embedded brains GmbH. All rights reserved.
*
* embedded brains GmbH
* Obere Lagerstr. 30
* 82178 Puchheim
* Germany
* <info@embedded-brains.de>
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
* http://www.rtems.com/license/LICENSE.
*
* $Id$
*/
#include <bspopts.h>
#include <rtems/powerpc/powerpc.h>
#if BSP_DATA_CACHE_ENABLED && PPC_CACHE_ALIGNMENT == 32
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#include <libcpu/powerpc-utility.h>
#define CACHE_LINE_SIZE 32
#define WORD_SIZE 4
#define WORD_MASK (WORD_SIZE - 1)
static bool aligned(const void *a, const void *b)
{
return ((((uintptr_t) a) | ((uintptr_t) b)) & WORD_MASK) == 0;
}
void *memcpy(void *dst_ptr, const void *src_ptr, size_t n)
{
uint8_t *dst = dst_ptr;
const uint8_t *src = src_ptr;
ppc_data_cache_block_touch(src);
if (__builtin_expect(n >= WORD_SIZE && aligned(src, dst), 1)) {
uint32_t *word_dst = (uint32_t *) dst - 1;
const uint32_t *word_src = (const uint32_t *) src - 1;
if (n >= 2 * CACHE_LINE_SIZE - WORD_SIZE) {
while ((uintptr_t) (word_dst + 1) % CACHE_LINE_SIZE != 0) {
uint32_t tmp;
__asm__ volatile (
"lwzu %[tmp], 0x4(%[src])\n"
"stwu %[tmp], 0x4(%[dst])\n"
: [src] "+b" (word_src),
[dst] "+b" (word_dst),
[tmp] "=&r" (tmp)
);
n -= WORD_SIZE;
}
while (n >= CACHE_LINE_SIZE) {
uint32_t dst_offset = 4;
uint32_t src_offset = 32 + 4;
uint32_t tmp0;
uint32_t tmp1;
uint32_t tmp2;
uint32_t tmp3;
__asm__ volatile (
"dcbz %[dst], %[dst_offset]\n"
"lwz %[tmp0], 0x04(%[src])\n"
"dcbt %[src], %[src_offset]\n"
"lwz %[tmp1], 0x08(%[src])\n"
"lwz %[tmp2], 0x0c(%[src])\n"
"lwz %[tmp3], 0x10(%[src])\n"
"stw %[tmp0], 0x04(%[dst])\n"
"stw %[tmp1], 0x08(%[dst])\n"
"stw %[tmp2], 0x0c(%[dst])\n"
"stw %[tmp3], 0x10(%[dst])\n"
"lwz %[tmp0], 0x14(%[src])\n"
"lwz %[tmp1], 0x18(%[src])\n"
"lwz %[tmp2], 0x1c(%[src])\n"
"lwzu %[tmp3], 0x20(%[src])\n"
"stw %[tmp0], 0x14(%[dst])\n"
"stw %[tmp1], 0x18(%[dst])\n"
"stw %[tmp2], 0x1c(%[dst])\n"
"stwu %[tmp3], 0x20(%[dst])\n"
: [src] "+b" (word_src),
[dst] "+b" (word_dst),
[tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [src_offset] "r" (src_offset),
[dst_offset] "r" (dst_offset)
);
n -= CACHE_LINE_SIZE;
}
}
while (n >= WORD_SIZE) {
uint32_t tmp;
__asm__ volatile (
"lwzu %[tmp], 0x4(%[src])\n"
"stwu %[tmp], 0x4(%[dst])\n"
: [src] "+b" (word_src),
[dst] "+b" (word_dst),
[tmp] "=&r" (tmp)
);
n -= WORD_SIZE;
}
dst = (uint8_t *) word_dst + 4;
src = (const uint8_t *) word_src + 4;
}
while (n > 0) {
*dst = *src;
++src;
++dst;
--n;
}
return dst_ptr;
}
#endif /* BSP_DATA_CACHE_ENABLED && PPC_CACHE_ALIGNMENT == 32 */
|