在移植u-boot的过程看到过u-boot在重定向时的实现,当时不知道怎么就觉得很好理解就把这个知识点没怎么深入的理解,最近在看华为的鸿蒙OS在Cortex-A平台上的实现过程时再次遇到一时间看不太懂了,所以花了点时间研究了一下这里做一下记录,后续有时间再把u-boot的实现再复盘一下加深理解。具体的代码如下
1 /*
2 * Copyright (c) 2013-2019, Huawei Technologies Co., Ltd. All rights reserved.
3 * Copyright (c) 2020, Huawei Device Co., Ltd. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this list of
9 * conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
12 * of conditions and the following disclaimer in the documentation and/or other materials
13 * provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its contributors may be used
16 * to endorse or promote products derived from this software without specific prior written
17 * permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #define ASSEMBLY
33 #include "arch_config.h"
34 #include "los_vm_boot.h"
35 #include "los_vm_zone.h"
36 #include "los_mmu_descriptor_v6.h"
37 #undef ASSEMBLY
38
39
40 .global __exc_stack_top
41 .global __irq_stack_top
42 .global __fiq_stack_top
43 .global __svc_stack_top
44 .global __abt_stack_top
45 .global __undef_stack_top
46 .global __exc_stack
47 .global __irq_stack
48 .global __fiq_stack
49 .global __svc_stack
50 .global __abt_stack
51 .global __undef_stack
52
53 .extern __bss_start
54 .extern __bss_end
55 .extern hal_clock_initialize_start
56 .extern los_bss_init
57 .extern _osExceptFiqHdl
58 .extern _osExceptAddrAbortHdl
59 .extern _osExceptDataAbortHdl
60 .extern _osExceptPrefetchAbortHdl
61 .extern _osExceptSwiHdl
62 .extern _osExceptUndefInstrHdl
63 .extern __stack_chk_guard_setup
64 .extern g_firstPageTable
65 .extern g_mmuJumpPageTable
66
67 .equ MPIDR_CPUID_MASK, 0xffU
68
69 .fpu vfpv4
70 .arm
71
72 /* param0 is stack bottom, param1 is stack size, r11 hold cpu id */
73 .macro EXC_SP_SET param0, param1
74 ldr r1, =\param0
75 mov r0, \param1
76 bl sp_set
77 .endm
78
79 /* param0 is stack top, param1 is stack size, param2 is magic num */
80 .macro STACK_MAGIC_SET param0, param1, param2
81 ldr r0, =\param0
82 mov r1, \param1
83 ldr r2, =\param2
84 bl excstack_magic
85 .endm
86
87 /* param0 is physical address, param1 virtual address, param2 is sizes, param3 is flag */
88 .macro PAGE_TABLE_SET param0, param1, param2, param3
89 ldr r6, =\param0
90 ldr r7, =\param1
91 ldr r8, =\param2
92 ldr r10, =\param3
93 bl page_table_build
94 .endm
95 .code 32
96 .section ".vectors","ax"
97
98 __exception_handlers:
99 /*
100 *Assumption: ROM code has these vectors at the hardware reset address.
101 *A simple jump removes any address-space dependencies [i.e. safer]
102 */
103 b reset_vector
104 b _osExceptUndefInstrHdl
105 b _osExceptSwiHdl
106 b _osExceptPrefetchAbortHdl
107 b _osExceptDataAbortHdl
108 b _osExceptAddrAbortHdl
109 b OsIrqHandler
110 b _osExceptFiqHdl
111
112 /* Startup code which will get the machine into supervisor mode */
113 .global reset_vector
114 .type reset_vector,function
115 reset_vector:
116 /* do some early cpu setup: i/d cache disable, mmu disabled */
117 mrc p15, 0, r0, c1, c0, 0
118 bic r0, #(1<<12)
119 bic r0, #(1<<2 | 1<<0)
120 mcr p15, 0, r0, c1, c0, 0
121
122 /* r11: delta of physical address and virtual address */
123 adr r11, pa_va_offset;此时r11为物理地址 具体原因是硬件决定了第一条指令的地址,当执行到这里pc此时是当前的指令的地址(自然是物理地址)
124 ;然后而adr伪指令的作用就是得到了当前标识pa_va_offset和当前指令的offset和保存在r11,而代码的实现在这个标识处定
125 ;义了一个连接地址相关的标识"."所以按照程序连接指定的运行地址(虚拟的)这里保存的值肯定是连接实际的虚拟运行地址所以r0为虚拟地址
126 ldr r0, [r11]
127 sub r11, r11, r0 ;进而物理地址减去虚拟地址(连接地址)即就是物理地址和虚拟地址的差。
128
129 /* if we need to relocate to proper location or not */
130 adr r4, __exception_handlers /* r4: base of load address */
131 ldr r5, =SYS_MEM_BASE /* r5: base of physical address */
132 subs r12, r4, r5 /* r12: delta of load address and physical address */
133 beq reloc_img_to_bottom_done /* if we load image at the bottom of physical address */
134
135 /* we need to relocate image at the bottom of physical address */
136 ldr r7, =__exception_handlers /* r7: base of linked address (or vm address) */
137 ldr r6, =__bss_start /* r6: end of linked address (or vm address) */
138 sub r6, r7 /* r6: delta of linked address (or vm address) */
139 add r6, r4 /* r6: end of load address */
140
141 reloc_img_to_bottom_loop:
142 ldr r7, [r4], #4
143 str r7, [r5], #4
144 cmp r4, r6
145 bne reloc_img_to_bottom_loop
146 sub pc, r12
147 nop
148 sub r11, r11, r12 /* r11: eventual address offset */
149
150 reloc_img_to_bottom_done:
151 ldr r4, =g_firstPageTable /* r4: physical address of translation table and clear it */
152 add r4, r4, r11
153 bl page_table_clear
154
155 PAGE_TABLE_SET SYS_MEM_BASE, KERNEL_VMM_BASE, KERNEL_VMM_SIZE, MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS
156 PAGE_TABLE_SET SYS_MEM_BASE, UNCACHED_VMM_BASE, UNCACHED_VMM_SIZE, MMU_INITIAL_MAP_STRONGLY_ORDERED
157 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_DEVICE_BASE, PERIPH_DEVICE_SIZE, MMU_INITIAL_MAP_DEVICE
158 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_CACHED_BASE, PERIPH_CACHED_SIZE, MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS
159 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_UNCACHED_BASE, PERIPH_UNCACHED_SIZE, MMU_INITIAL_MAP_STRONGLY_ORDERED
160
161 orr r8, r4, #MMU_TTBRx_FLAGS /* r8 = r4 and set cacheable attributes on translation walk */
162 ldr r4, =g_mmuJumpPageTable /* r4: jump pagetable vaddr */
163 add r4, r4, r11
164 ldr r4, [r4]
165 add r4, r4, r11 /* r4: jump pagetable paddr */
166 bl page_table_clear
167
168 /* build 1M section mapping, in order to jump va during turing on mmu:pa == pa, va == pa */
169 mov r6, pc
170 mov r7, r6 /* r7: pa (MB aligned)*/
171 lsr r6, r6, #20 /* r6: va l1 index */
172 ldr r10, =MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS
173 add r12, r10, r6, lsl #20 /* r12: pa |flags */
174 str r12, [r4, r7, lsr #(20 - 2)] /* jumpTable[paIndex] = pt entry */
175 rsb r7, r11, r6, lsl #20 /* r7: va */
176 str r12, [r4, r7, lsr #(20 - 2)] /* jumpTable[vaIndex] = pt entry */
177
178 bl mmu_setup /* set up the mmu */
179
180 /* get cpuid and keep it in r11 */
181 mrc p15, 0, r11, c0, c0, 5
182 and r11, r11, #MPIDR_CPUID_MASK
183 cmp r11, #0
184 bne excstatck_loop_done
185
186 excstatck_loop:
187 /* clear out the interrupt and exception stack and set magic num to check the overflow */
188 ldr r0, =__undef_stack
189 ldr r1, =__exc_stack_top
190 bl stack_init
191
192 STACK_MAGIC_SET __undef_stack, #OS_EXC_UNDEF_STACK_SIZE, OS_STACK_MAGIC_WORD
193 STACK_MAGIC_SET __abt_stack, #OS_EXC_ABT_STACK_SIZE, OS_STACK_MAGIC_WORD
194 STACK_MAGIC_SET __irq_stack, #OS_EXC_IRQ_STACK_SIZE, OS_STACK_MAGIC_WORD
195 STACK_MAGIC_SET __fiq_stack, #OS_EXC_FIQ_STACK_SIZE, OS_STACK_MAGIC_WORD
196 STACK_MAGIC_SET __svc_stack, #OS_EXC_SVC_STACK_SIZE, OS_STACK_MAGIC_WORD
197 STACK_MAGIC_SET __exc_stack, #OS_EXC_STACK_SIZE, OS_STACK_MAGIC_WORD
198
199 excstatck_loop_done:
200 warm_reset:
201 /* initialize interrupt/exception environments */
202 mov r0, #(CPSR_IRQ_DISABLE |CPSR_FIQ_DISABLE|CPSR_IRQ_MODE)
203 msr cpsr, r0
204 EXC_SP_SET __irq_stack_top, #OS_EXC_IRQ_STACK_SIZE
205
206 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_UNDEF_MODE)
207 msr cpsr, r0
208 EXC_SP_SET __undef_stack_top, #OS_EXC_UNDEF_STACK_SIZE
209
210 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_ABT_MODE)
211 msr cpsr, r0
212 EXC_SP_SET __abt_stack_top, #OS_EXC_ABT_STACK_SIZE
213
214 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_FIQ_MODE)
215 msr cpsr, r0
216 EXC_SP_SET __fiq_stack_top, #OS_EXC_FIQ_STACK_SIZE
217
218 /* initialize CPSR (machine state register) */
219 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_SVC_MODE)
220 msr cpsr, r0
221
222 /* Note: some functions in LIBGCC1 will cause a "restore from SPSR"!! */
223 msr spsr, r0
224
225 /* set svc stack, every cpu has OS_EXC_SVC_STACK_SIZE stack */
226 ldr r0, =__svc_stack_top
227 mov r2, #OS_EXC_SVC_STACK_SIZE
228 mul r2, r2, r11
229 sub r0, r0, r2
230 mov sp, r0
231
232 /* enable fpu+neon */
233 MRC p15, 0, r0, c1, c1, 2
234 ORR r0, r0, #0xC00
235 BIC r0, r0, #0xC000
236 MCR p15, 0, r0, c1, c1, 2
237
238 LDR r0, =(0xF << 20)
239 MCR p15, 0, r0, c1, c0, 2
240
241 MOV r3, #0x40000000
242 VMSR FPEXC, r3
243
244 LDR r0, =__exception_handlers
245 MCR p15, 0, r0, c12, c0, 0
246
247 cmp r11, #0
248 bne cpu_start
249
250 clear_bss:
251 ldr r1, =__bss_start
252 ldr r2, =__bss_end
253 mov r0, #0
254
255 bss_loop:
256 cmp r1, r2
257 strlo r0, [r1], #4
258 blo bss_loop
259
260 #if defined(LOSCFG_CC_STACKPROTECTOR_ALL) || \
261 defined(LOSCFG_CC_STACKPROTECTOR_STRONG) || \
262 defined(LOSCFG_CC_STACKPROTECTOR)
263 bl __stack_chk_guard_setup
264 #endif
265
266 #ifdef LOSCFG_GDB_DEBUG
267 /* GDB_START - generate a compiled_breadk,This function will get GDB stubs started, with a proper environment */
268 bl GDB_START
269 .word 0xe7ffdeff
270 #endif
271
272 bl main
273
274 _start_hang:
275 b _start_hang
276
277 mmu_setup:
278 mov r12, #0
279 mcr p15, 0, r12, c8, c7, 0 /* Set c8 to control the TLB and set the mapping to invalid */
280 isb
281
282 mcr p15, 0, r12, c2, c0, 2 /* Initialize the c2 register */
283 isb
284
285 orr r12, r4, #MMU_TTBRx_FLAGS
286 mcr p15, 0, r12, c2, c0, 0 /* Set attributes and set temp page table */
287 isb
288
289 mov r12, #0x7 /* 0b0111 */
290 mcr p15, 0, r12, c3, c0, 0 /* Set DACR with 0b0111, client and manager domian */
291 isb
292
293 mrc p15, 0, r12, c1, c0, 0
294 bic r12, #(1 << 29 | 1 << 28)
295 orr r12, #(1 << 0)
296 bic r12, #(1 << 1)
297 orr r12, #(1 << 2)
298 orr r12, #(1 << 12)
299 mcr p15, 0, r12, c1, c0, 0 /* Set SCTLR with r12: Turn on the MMU, I/D cache Disable TRE/AFE */
300 isb
301
302 ldr pc, =1f /* Convert to VA */
303 1:
304 mcr p15, 0, r8, c2, c0, 0 /* Go to the base address saved in C2: Jump to the page table */
305 isb
306
307 mov r12, #0
308 mcr p15, 0, r12, c8, c7, 0
309 isb
310
311 sub lr, r11 /* adjust lr with delta of physical address and virtual address */
312 bx lr
313
314 .code 32
315
316 .global reset_platform
317 .type reset_platform,function
318 reset_platform:
319 #ifdef A7SEM_HAL_ROM_MONITOR
320 /* initialize CPSR (machine state register) */
321 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_SVC_MODE)
322 msr cpsr, r0
323 b warm_reset
324 #else
325 mov r0, #0
326 mov pc, r0 // Jump to reset vector
327 #endif
328 cpu_start:
329 bl secondary_cpu_start
330 b .
331
332
333
334 /*
335 * set sp for current cpu
336 * r1 is stack bottom, r0 is stack size, r11 hold cpu id
337 */
338 sp_set:
339 mul r3, r0, r11
340 sub r2, r1, r3
341 mov sp, r2
342 bx lr /* set sp */
343
344 /*
345 * r4: page table base address
346 * r5 and r6 will be used as variable
347 */
348 page_table_clear:
349 mov r5, #0
350 mov r6, #0
351 0:
352 str r5, [r4, r6, lsl #2]
353 add r6, #1
354 cmp r6, #0x1000 /* r6 < 4096 */
355 blt 0b
356 bx lr
357
358 /*
359 * r4: page table base address
360 * r6: physical address
361 * r7: virtual address
362 * r8: sizes
363 * r10: flags
364 * r9 and r12 will be used as variable
365 */
366 page_table_build:
367 mov r9, r6
368 bfc r9, #20, #12 /* r9: pa % MB */
369 add r8, r8, r9
370 add r8, r8, #(1 << 20)
371 sub r8, r8, #1
372 lsr r6, #20 /* r6 = physical address / MB */
373 lsr r7, #20 /* r7 = virtual address / MB */
374 lsr r8, #20 /* r8 = roundup(size, MB) */
375
376 page_table_build_loop:
377 orr r12, r10, r6, lsl #20 /* r12: flags | physAddr */
378 str r12, [r4, r7, lsl #2] /* gPgTable[l1Index] = physAddr | flags */
379 add r6, #1 /* physAddr+ */
380 add r7, #1 /* l1Index++ */
381 subs r8, #1 /* sizes-- */
382 bne page_table_build_loop
383 bx lr
384
385 /*
386 * init stack to initial value
387 * r0 is stack mem start, r1 is stack mem end
388 */
389 stack_init:
390 ldr r2, =OS_STACK_INIT
391 ldr r3, =OS_STACK_INIT
392 /* Main loop sets 32 bytes at a time. */
393 stack_init_loop:
394 .irp offset, #0, #8, #16, #24
395 strd r2, r3, [r0, \offset]
396 .endr
397 add r0, #32
398 cmp r0, r1
399 blt stack_init_loop
400 bx lr
401
402 pa_va_offset:
403 .word .
404
405 /*
406 * set magic num to stack top for all cpu
407 * r0 is stack top, r1 is stack size, r2 is magic num
408 */
409 excstack_magic:
410 mov r3, #0
411 excstack_magic_loop:
412 str r2, [r0]
413 add r0, r0, r1
414 add r3, r3, #1
415 cmp r3, #CORE_NUM
416 blt excstack_magic_loop
417 bx lr
418
419 /*
420 * 0xe51ff004 = "ldr pc, [pc, #-4]"
421 * next addr value will be the real booting addr
422 */
423 _bootaddr_setup:
424 mov r0, #0
425 ldr r1, =0xe51ff004
426 str r1, [r0]
427
428 add r0, r0, #4
429 ldr r1, =SYS_MEM_BASE
430 str r1, [r0]
431
432 dsb
433 isb
434
435 bx lr
436
437 init_done:
438 .long 0xDEADB00B
439
440 .code 32
441 .data
442
443 init_flag:
444 .balign 4
445 .long 0
446
447 /*
448 * Temporary interrupt stack
449 */
450 .section ".int_stack", "wa", %nobits
451 .align 3
452
453 __undef_stack:
454 .space OS_EXC_UNDEF_STACK_SIZE * CORE_NUM
455 __undef_stack_top:
456
457 __abt_stack:
458 .space OS_EXC_ABT_STACK_SIZE * CORE_NUM
459 __abt_stack_top:
460
461 __irq_stack:
462 .space OS_EXC_IRQ_STACK_SIZE * CORE_NUM
463 __irq_stack_top:
464
465 __fiq_stack:
466 .space OS_EXC_FIQ_STACK_SIZE * CORE_NUM
467 __fiq_stack_top:
468
469 __svc_stack:
470 .space OS_EXC_SVC_STACK_SIZE * CORE_NUM
471 __svc_stack_top:
472
473 __exc_stack:
474 .space OS_EXC_STACK_SIZE * CORE_NUM
475 __exc_stack_top:
看不太明白就是复位操作其中的计算虚拟地址和物理地址差的部分实现这里单独贴出来。
1 /* r11: delta of physical address and virtual address */
2 adr r11, pa_va_offset;此时r11为物理地址 具体原因是硬件决定了第一条指令的地址,当执行到这里pc此时是当前的指令的地址(自然是物理地址)
3 ;然后而adr伪指令的作用就是得到了当前标识pa_va_offset和当前指令的offset和保存在r11,而代码的实现在这个标识处定
4 ;义了一个连接地址相关的标识"."所以按照程序连接指定的运行地址(虚拟的)这里保存的值肯定是连接实际的虚拟运行地址所以r0为虚拟地址
5 ldr r0, [r11]
6 sub r11, r11, r0 ;进而物理地址减去虚拟地址(连接地址)即就是物理地址和虚拟地址的差。
这里主要是对adr这一句的汇编理解的不是很深入,详细了解之后才知道这是一条伪指令在汇编器汇编的时候他会被汇编为sub这个机器码。在程序连接完成后 pa_va_offset 的地址是固定的,并且这一条伪指令的地址也是确定的所以他俩之间的偏移就是确定的,这里记住是后面的符号的地址和 adr r11, pa_va_offset ;这条语句的相对偏移是固定的。所以在汇编这个语句的时候实际上就已经知道他俩的地址差offset,并且这条伪指令在Rn不为pc时最后的结果就如同 sub Rn pc #offset。 所以这里的r11 保存的就是程序的运行时刻的物理地址这一点比较饶,但是从硬件的行为考虑就很容易理解了---硬件启动之后从物理地址的那个地址开始运行具体的硬件模式确定后也是确定的所以这个最后在程序运行到这里的时候PC寄存器就是这一条伪指令的真实的物理地址,所以前面红色字体的内容就能理解了。然后就是后面两句就得到了虚拟地址和物理地址的差。这里也费解但是如果把这句话换一下就好理解多了---实际得到的是链接地址和物理地址的差。具体还要看 pa_va_offset 标识符下的实现下面贴出来:
1 pa_va_offset:
2 .word .
这简单的一句话其实就是在当前符号连接的地址处放置一个32位的值这个值是程序连接到这里时的连接地址如果熟悉连接脚本就很容易想到了。所以这里在这个符号的地址处存放了这个符号的连接地址,还是有点绕,换个说法就是在一个内存地址(链接地址)上存放了这块内存自己的链接地址,但是在实际存放时具体本存放在那里程序本身自己决定不了,所以这一块内容永远保存的是链接时指定的地址,所以上面代码的实现就得到了程序的链接地址和实际运行物理地址的差,再反过来说就是如果把编译出来的可执行文件放到了链接时指定的地址上的话符号地址里面的内容就是符号的地址。
之所以注释说是虚拟地址和物理地址的差是因为OS程序在链接时指定的连接地址实际上是在开启了MMU之后的虚拟地址,将来是要拷贝到DRAM中运行的。所以这里的虚拟地址实际上就是链接地址因为必须这样否则系统是无法启动的。最后这里还有一个知识点就是链接地址和运行地址不同为什么还能正常运行呢,这是因为部分代码是PIC的即不关心load地址,也可以通过编译时指定部分代码编译为位置无关的代码,也可查看前面写的博客编译过程中的链接地址和实际运行地址。
手机扫一扫
移动阅读更方便
你可能感兴趣的文章