Tuesday, 20 June 2017

ARM Linux boot process analysis step 2



3 create a page table

At this time, some specific register values are shown as follows:

r8 = machine info       (base address struct machine_desc)

r9 = cpu id            (ID CPU obtained by CP15 co processor)

r10 = procinfo        (base address struct proc_info_list)

Create a page table is realized through the function __create_page_tables

Here, we are using the arm L1 page, L1 page table is also called a section of the page table(section page table)

The L1 home page table 4 GB address space is divided into a plurality of 1 MB segment (section), so the L1 page table contains 4096-page table entries (section entry). Each page table entry is 32 bits(4 bytes) Thus the L1 home page table occupied 4096 *4 = 16K memory space.

 

 

Analysis of __create_page_tables function here:
         In arch/arm/kernel/head.S:

        152 /*

153  * Setup the initial page tables.  We only setup the barest

154  * amount which are required to get the kernel running, which

155  * generally means mapping in the kernel code.

156  *

157  * r8 = phys_offset, r9 = cpuid, r10 = procinfo

158  *

159  * Returns:

160  *  r0, r3, r5-r7 corrupted

161  *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)

162  */

163 __create_page_tables:

164         pgtbl   r4, r8                          @ page table address

165

166         /*

167          * Clear the swapper page table

168          */

169         mov     r0, r4

170         mov     r3, #0

171         add     r6, r0, #PG_DIR_SIZE

172 1:      str     r3, [r0], #4

173         str     r3, [r0], #4

174         str     r3, [r0], #4

175         str     r3, [r0], #4

176         teq     r0, r6

177         bne     1b

178

179 #ifdef CONFIG_ARM_LPAE

180         /*

181          * Build the PGD table (first level) to point to the PMD table. A PGD

182          * entry is 64-bit wide.

183          */

184         mov     r0, r4

185         add     r3, r4, #0x1000                 @ first PMD table address

186         orr     r3, r3, #3                      @ PGD block type

187         mov     r6, #4                          @ PTRS_PER_PGD

188         mov     r7, #1 << (55 - 32)             @ L_PGD_SWAPPER

189 1:

190 #ifdef CONFIG_CPU_ENDIAN_BE8

191         str     r7, [r0], #4                    @ set top PGD entry bits

192         str     r3, [r0], #4                    @ set bottom PGD entry bits

193 #else

194         str     r3, [r0], #4                    @ set bottom PGD entry bits

195         str     r7, [r0], #4                    @ set top PGD entry bits

196 #endif

197         add     r3, r3, #0x1000                 @ next PMD table

198         subs    r6, r6, #1

199         bne     1b

200

201         add     r4, r4, #0x1000                 @ point to the PMD tables

202 #ifdef CONFIG_CPU_ENDIAN_BE8

203         add     r4, r4, #4                      @ we only write the bottom word

204 #endif

205 #endif

206

207         ldr     r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags

208

209         /*

210          * Create identity mapping to cater for __enable_mmu.

211          * This identity mapping will be removed by paging_init().

212          */

213         adr     r0, __turn_mmu_on_loc

214         ldmia   r0, {r3, r5, r6}

215         sub     r0, r0, r3                      @ virt->phys offset

216         add     r5, r5, r0                      @ phys __turn_mmu_on

217         add     r6, r6, r0                      @ phys __turn_mmu_on_end

218         mov     r5, r5, lsr #SECTION_SHIFT

219         mov     r6, r6, lsr #SECTION_SHIFT

220

221 1:      orr     r3, r7, r5, lsl #SECTION_SHIFT  @ flags + kernel base

222         str     r3, [r4, r5, lsl #PMD_ORDER]    @ identity mapping

223         cmp     r5, r6

224         addlo   r5, r5, #1                      @ next section

225         blo     1b

226

227         /*

228          * Map our RAM from the start to the end of the kernel .bss section.

229          */

230         add     r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)

231         ldr     r6, =(_end - 1)

232         orr     r3, r8, r7

233         add     r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)

234 1:      str     r3, [r0], #1 << PMD_ORDER

235         add     r3, r3, #1 << SECTION_SHIFT

236         cmp     r0, r6

237         bls     1b

238

239 #ifdef CONFIG_XIP_KERNEL

240         /*

241          * Map the kernel image separately as it is not located in RAM.

242          */

243 #define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)

244         mov     r3, pc

245         mov     r3, r3, lsr #SECTION_SHIFT

246         orr     r3, r7, r3, lsl #SECTION_SHIFT

247         add     r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)

248         str     r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!

249         ldr     r6, =(_edata_loc - 1)

250         add     r0, r0, #1 << PMD_ORDER

251         add     r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)

252 1:      cmp     r0, r6

253         add     r3, r3, #1 << SECTION_SHIFT

254         strls   r3, [r0], #1 << PMD_ORDER

255         bls     1b

256 #endif

257

258         /*

259          * Then map boot params address in r2 if specified.

260          * We map 2 sections in case the ATAGs/DTB crosses a section boundary.

261          */

262         mov     r0, r2, lsr #SECTION_SHIFT

263         movs    r0, r0, lsl #SECTION_SHIFT

264         subne   r3, r0, r8

265         addne   r3, r3, #PAGE_OFFSET

266         addne   r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)

267         orrne   r6, r7, r0

268         strne   r6, [r3], #1 << PMD_ORDER

269         addne   r6, r6, #1 << SECTION_SHIFT

270         strne   r6, [r3]

271

272 #if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)

273         sub     r4, r4, #4                      @ Fixup page table pointer

274                                                 @ for 64-bit descriptors

275 #endif

276

277 #ifdef CONFIG_DEBUG_LL

278 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)

279         /*

280          * Map in IO space for serial debugging.

281          * This allows debug messages to be output

282          * via a serial console before paging_init.

283          */

284         addruart r7, r3, r0

285

286         mov     r3, r3, lsr #SECTION_SHIFT

287         mov     r3, r3, lsl #PMD_ORDER

288

289         add     r0, r4, r3

290         mov     r3, r7, lsr #SECTION_SHIFT

291         ldr     r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags

292         orr     r3, r7, r3, lsl #SECTION_SHIFT

293 #ifdef CONFIG_ARM_LPAE

294         mov     r7, #1 << (54 - 32)             @ XN

295 #ifdef CONFIG_CPU_ENDIAN_BE8

296         str     r7, [r0], #4

297         str     r3, [r0], #4

298 #else

299         str     r3, [r0], #4

300         str     r7, [r0], #4

301 #endif

302 #else

303         orr     r3, r3, #PMD_SECT_XN

304         str     r3, [r0], #4

305 #endif

306

307 #else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */

308         /* we don't need any serial debugging mappings */

309         ldr     r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags

310 #endif

311

312 #if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)

313         /*

314          * If we're using the NetWinder or CATS, we also need to map

315          * in the 16550-type serial port for the debug messages

316          */

317         add     r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)

318         orr     r3, r7, #0x7c000000

319         str     r3, [r0]

320 #endif

321 #ifdef CONFIG_ARCH_RPC

322         /*

323          * Map in screen at 0x02000000 & SCREEN2_BASE

324          * Similar reasons here - for debug.  This is

325          * only for Acorn RiscPC architectures.

326          */

327         add     r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)

328         orr     r3, r7, #0x02000000

329         str     r3, [r0]

330         add     r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)

331         str     r3, [r0]

332 #endif

333 #endif

334 #ifdef CONFIG_ARM_LPAE

335         sub     r4, r4, #0x1000         @ point to the PGD table

336         mov     r4, r4, lsr #ARCH_PGD_SHIFT

337 #endif

338         ret     lr

339 ENDPROC(__create_page_tables)

340         .ltorg

341         .align

// phy=0x30000000 +0x000080000

54         .macro  pgtbl, rd, phys

 55         add     \rd, \phys, #TEXT_OFFSET(0x000080000)

 56         sub     \rd, \rd, #PG_DIR_SIZE(0x4000)

 57         .endm

The following 54 lines from the 57 row, the 16K page table 0
Line 56: R4 = (0x30004000), the page table base in R4
169 row: R0 =r4
170 row r3 set to 0
172 line: r6(0x30008000) = page table base address + 16K, can see this is the page table tail
172 - 177: circular, from R0 to R6 of the 16K page table with 0 filling
207 row: proc_info_list value of __cpu_mm_mmu_flags, and stored in the R7.
213         adr     r0, __turn_mmu_on_loc

342 __turn_mmu_on_loc:
343         .long   .
344         .long   __turn_mmu_on
345         .long   __turn_mmu_on_end
213 row r0 = 0xC0008130 (__turn_mmu_on_loc)
Line 231: the PC value of the high 12 bits (right 20), kernel section, and stored in the R6. Because the current is obtained by address when running the kernel section, which is a physical address
Line 232: R3 = R7 | (R6 <<20); flags + kernel base, to get the value set in a page table
Line 233: set the page table: mem [r4 + r6 * 4] = r3

        Here, because the page table every 32 bits (4 bytes), so be multiplied by 4(<<2).
The above three lines, set up the first section kernel (physical address where the page entry) page table entry
239, 240 line: TEXTADDR is the starting virtual address kernel (0xc0008000), these two lines are set kernel starting virtual address of the page table entries (note, here to set the page table entries and the above 231 - 233 line sets the page table entry is different)
        After the implementation, the virtual address R0 to kernel second section page table
        /* TODO: the two line code is very strange, why take a high TEXTADDR 8 (Bit[31:24]) 0xff000000, and then take back 8 (Bit[23:20])0x00f00000*/           
242 line: this line calculation of kernel image size(bytes).
        _End is in the vmlinux.lds.S 162 line definition, end position marker kernel (virtual address):
        00158                .bss : {
        00159                __bss_start = .;        /* BSS                                */
        00160                *(.bss)
        00161                *(COMMON)
        00162                _end = .;
        00163        }
        Kernel size =  _end PAGE_OFFSET -1, because _end is location counter 1 reasons. Here, it is the address of a byte kernel image behind the address
243 row: address to the 20, to calculate kernel how many sections (that is, how many megabytes, because the virtual address segment descriptor of each can be mapped to 1MiB), and save the results into R6
245 - 248 lines: the line to fill the kernel all section virtual address to the corresponding page table entries
Line 253: set R0 to RAM 1000000000000th virtual address of the page table entries(page entry)
254 line: R7 is stored in the MMU flags, the initial physical address logic or RAM, RAM first MB page table entry values
Line 255: set the RAM first MB virtual address of the page table
The three line is used to set up 1000000000000th virtual address in RAM page table. The reason to set the page table entry is the reason may be stored with boot params. RAM 1000000000000th memory
Thus, kernel needed basic page table we are set up, as shown below:


*
 * Setup the initial page tables.  We only setup the barest
 * amount which are required to get the kernel running, which
 * generally means mapping in the kernel code.
 *
 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
 *
 * Returns:
 *  r0, r3, r5-r7 corrupted
 *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
 */

 r8=PHYS_OFFSET =0x20000000
 TEXT_OFFSET    = 0x00008000

 #define PG_DIR_SIZE     0x4000
 #define PMD_ORDER       2

 .macro  pgtbl, rd, phys
        add     \rd, \phys, #TEXT_OFFSET
        sub     \rd, \rd, #PG_DIR_SIZE
 .endm

 r4 = (0x20000000 + #TEXT_OFFSET(0x00008000)) =0x20008000
 r4 = (0x20008000) - (PG_DIR_SIZE)(0x4000)    = 0x20004000

__create_page_tables:
        pgtbl   r4, r8                          @ page table address

        /*
         * Clear the swapper page table
         */
        mov     r0, r4                // r0 = r4 = 0x20004000
        mov     r3, #0                // r3 = 0
        add     r6, r0, #PG_DIR_SIZE   // r6 = r0 + PG_DIR_SIZE = 0x20008000
1:      str     r3, [r0], #4
        str     r3, [r0], #4
        str     r3, [r0], #4
        str     r3, [r0], #4
        teq     r0, r6  if (r0 == r6) break loop
        bne     1b

#ifdef CONFIG_ARM_LPAE
        /*
         * Build the PGD table (first level) to point to the PMD table. A PGD
         * entry is 64-bit wide.
         */
        mov     r0, r4
        add     r3, r4, #0x1000                 @ first PMD table address
        orr     r3, r3, #3                      @ PGD block type
        mov     r6, #4                          @ PTRS_PER_PGD
        mov     r7, #1 << (55 - 32)             @ L_PGD_SWAPPER
1:
#ifdef CONFIG_CPU_ENDIAN_BE8
        str     r7, [r0], #4                    @ set top PGD entry bits
        str     r3, [r0], #4                    @ set bottom PGD entry bits
#else
        str     r3, [r0], #4                    @ set bottom PGD entry bits
        str     r7, [r0], #4                    @ set top PGD entry bits
#endif
        add     r3, r3, #0x1000                 @ next PMD table
        subs    r6, r6, #1
        bne     1b

        add     r4, r4, #0x1000                 @ point to the PMD tables
#ifdef CONFIG_CPU_ENDIAN_BE8
        add     r4, r4, #4                      @ we only write the bottom word
#endif
#endif

        ldr     r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
  /*
         * Create identity mapping to cater for __enable_mmu.
         * This identity mapping will be removed by paging_init().
         */
        adr     r0, __turn_mmu_on_loc (0xc0008130)  // r3=0xc0008130,r5=0xc0008134,r6=0xc0008138,r0=0x00000130
        ldmia   r0, {r3, r5, r6}               
        sub     r0, r0, r3                      @ virt->phys offset
        add     r5, r5, r0                      @ phys __turn_mmu_on
        add     r6, r6, r0                      @ phys __turn_mmu_on_end
        mov     r5, r5, lsr #SECTION_SHIFT
        mov     r6, r6, lsr #SECTION_SHIFT

1:      orr     r3, r7, r5, lsl #SECTION_SHIFT  @ flags + kernel base
        str     r3, [r4, r5, lsl #PMD_ORDER]    @ identity mapping
        cmp     r5, r6
        addlo   r5, r5, #1                      @ next section
        blo     1b

        /*
         * Map our RAM from the start to the end of the kernel .bss section.
         */
        add     r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
        ldr     r6, =(_end - 1)
        orr     r3, r8, r7
        add     r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1:      str     r3, [r0], #1 << PMD_ORDER
        add     r3, r3, #1 << SECTION_SHIFT
        cmp     r0, r6
        bls     1b

#ifdef CONFIG_XIP_KERNEL
        /*
         * Map the kernel image separately as it is not located in RAM.
         */
#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
        mov     r3, pc
        mov     r3, r3, lsr #SECTION_SHIFT
        orr     r3, r7, r3, lsl #SECTION_SHIFT
        add     r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
        str     r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
        ldr     r6, =(_edata_loc - 1)
        add     r0, r0, #1 << PMD_ORDER
        add     r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1:      cmp     r0, r6
        add     r3, r3, #1 << SECTION_SHIFT
        strls   r3, [r0], #1 << PMD_ORDER
        bls     1b
#endif

        /*
         * Then map boot params address in r2 if specified.
         * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
         */
        mov     r0, r2, lsr #SECTION_SHIFT
        movs    r0, r0, lsl #SECTION_SHIFT
        subne   r3, r0, r8
        addne   r3, r3, #PAGE_OFFSET
        addne   r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
        orrne   r6, r7, r0
        strne   r6, [r3], #1 << PMD_ORDER
        addne   r6, r6, #1 << SECTION_SHIFT
        strne   r6, [r3]

#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
        sub     r4, r4, #4                      @ Fixup page table pointer
                                                @ for 64-bit descriptors
#endif

#ifdef CONFIG_DEBUG_LL
#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
        /*
         * Map in IO space for serial debugging.
         * This allows debug messages to be output
         * via a serial console before paging_init.
         */
        addruart r7, r3, r0

        mov     r3, r3, lsr #SECTION_SHIFT
        mov     r3, r3, lsl #PMD_ORDER

        add     r0, r4, r3
        mov     r3, r7, lsr #SECTION_SHIFT
        ldr     r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
        orr     r3, r7, r3, lsl #SECTION_SHIFT
#ifdef CONFIG_ARM_LPAE
        mov     r7, #1 << (54 - 32)             @ XN
#ifdef CONFIG_CPU_ENDIAN_BE8
        str     r7, [r0], #4
        str     r3, [r0], #4
#else
        str     r3, [r0], #4
        str     r7, [r0], #4
#endif
#else
        orr     r3, r3, #PMD_SECT_XN
        str     r3, [r0], #4
#endif

#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
        /* we don't need any serial debugging mappings */
        ldr     r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
#endif

#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
        /*
         * If we're using the NetWinder or CATS, we also need to map
         * in the 16550-type serial port for the debug messages
         */
        add     r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
        orr     r3, r7, #0x7c000000
        str     r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
        /*
         * Map in screen at 0x02000000 & SCREEN2_BASE
         * Similar reasons here - for debug.  This is
         * only for Acorn RiscPC architectures.
         */
        add     r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
        orr     r3, r7, #0x02000000
        str     r3, [r0]
        add     r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
        str     r3, [r0]
#endif
#endif
#ifdef CONFIG_ARM_LPAE
        sub     r4, r4, #0x1000         @ point to the PGD table
        mov     r4, r4, lsr #ARCH_PGD_SHIFT
#endif
        ret     lr
ENDPROC(__create_page_tables)
        .ltorg
        .align
__turn_mmu_on_loc:
        .long   .
        .long   __turn_mmu_on
        .long   __turn_mmu_on_end
                               
                               
/*
 * Hardware-wise, we have a two level page table structure, where the first
 * level has 4096 entries, and the second level has 256 entries.  Each entry
 * is one 32-bit word.  Most of the bits in the second level entry are used
 * by hardware, and there aren't any "accessed" and "dirty" bits.
 *
 * Linux on the other hand has a three level page table structure, which can
 * be wrapped to fit a two level page table structure easily - using the PGD
 * and PTE only.  However, Linux also expects one "PTE" table per page, and
 * at least a "dirty" bit.
 *
 * Therefore, we tweak the implementation slightly - we tell Linux that we
 * have 2048 entries in the first level, each of which is 8 bytes (iow, two
 * hardware pointers to the second level.)  The second level contains two
 * hardware PTE tables arranged contiguously, preceded by Linux versions
 * which contain the state information Linux needs.  We, therefore, end up
 * with 512 entries in the "PTE" level.
 *
 * This leads to the page tables having the following layout:
 *
 *    pgd             pte
 * |        |
 * +--------+
 * |        |       +------------+ +0
 * +- - - - +       | Linux pt 0 |
 * |        |       +------------+ +1024
 * +--------+ +0    | Linux pt 1 |
 * |        |-----> +------------+ +2048
 * +- - - - + +4    |  h/w pt 0  |
 * |        |-----> +------------+ +3072
 * +--------+ +8    |  h/w pt 1  |
 * |        |       +------------+ +4096
 *
 * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
 * PTE_xxx for definitions of bits appearing in the "h/w pt".
 *
 * PMD_xxx definitions refer to bits in the first level page table.
 *
 * The "dirty" bit is emulated by only granting hardware write permission
 * iff the page is marked "writable" and "dirty" in the Linux PTE.  This
 * means that a write to a clean page will cause a permission fault, and
 * the Linux MM layer will mark the page dirty via handle_pte_fault().
 * For the hardware to notice the permission change, the TLB entry must
 * be flushed, and ptep_set_access_flags() does that for us.
 *
 * The "accessed" or "young" bit is emulated by a similar method; we only
 * allow accesses to the page if the "young" bit is set.  Accesses to the
 * page will cause a fault, and handle_pte_fault() will set the young bit
 * for us as long as the page is marked present in the corresponding Linux
 * PTE entry.  Again, ptep_set_access_flags() will ensure that the TLB is
 * up to date.
 *
 * However, when the "young" bit is cleared, we deny access to the page
 * by clearing the hardware PTE.  Currently Linux does not flush the TLB
 * for us in this case, which means the TLB will retain the transation
 * until either the TLB entry is evicted under pressure, or a context
 * switch which changes the user space mapping occurs.
 */
#define PTRS_PER_PTE                 512
#define PTRS_PER_PMD                               1
#define PTRS_PER_PGD                2048

#define PTE_HWTABLE_PTRS     (PTRS_PER_PTE)
#define PTE_HWTABLE_OFF                       (PTE_HWTABLE_PTRS * sizeof(pte_t))
#define PTE_HWTABLE_SIZE       (PTRS_PER_PTE * sizeof(u32))

/*
 * PMD_SHIFT determines the size of the area a second-level page table can map
 * PGDIR_SHIFT determines what a third-level page table entry can map
 */
#define PMD_SHIFT                        21
#define PGDIR_SHIFT                     21

#define PMD_SIZE                           (1UL << PMD_SHIFT)
#define PMD_MASK                       (~(PMD_SIZE-1))
#define PGDIR_SIZE                        (1UL << PGDIR_SHIFT)
#define PGDIR_MASK                    (~(PGDIR_SIZE-1))

/*
 * section address mask and size definitions.
 */
#define SECTION_SHIFT                20
#define SECTION_SIZE                   (1UL << SECTION_SHIFT)
#define SECTION_MASK                (~(SECTION_SIZE-1))

When __create_page_tables returns
At this time, some specific register values are shown as follows:
r4 = pgtbl              (physical base address page table)
r8 = machine info       (base address struct machine_desc)
r9 = cpu id             (ID CPU obtained by CP15 co processor)
r10 = procinfo          (base address struct proc_info_list)
We need in the open MMU before, do some necessary work: ICache clearance, clearance of DCache, Writebuffer clearance, clearance of TLB
This is generally achieved by CP15 co processor, and is platform dependent. This is what __cpu_flush needs to

4. open MMU

Open MMU and __enable_mmu function to achieve
        When entering __enable_mmu, some configuration control register C1 is stored in R0 (in the last step, setting), but did not really open the MMU,
        In __enable_mmu, we will open the MMU
        At this time, some specific register values are shown as follows:
       
   r0  = cp#15 control register
   r1  = machine ID
   r2  = atags or dtb pointer
   r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
   r9  = processor ID
   r13 = *virtual* address to jump to upon completion



In arch/arm/kernel/head.S:

130         /*
131          * The following calls CPU specific code in a position independent
132          * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
133          * xxx_proc_info structure selected by __lookup_processor_type
134          * above.  On return, the CPU will be ready for the MMU to be
135          * turned on, and r0 will hold the CPU control register value.
136          */
137         ldr     r13, =__mmap_switched           @ address to jump to after
138                                                 @ mmu has been enabled
139         adr     lr, BSYM(1f)                    @ return (PIC) address
140         mov     r8, r4                          @ set TTBR1 to swapper_pg_dir
141  ARM(   add     pc, r10, #PROCINFO_INITFUNC     )
142  THUMB( add     r12, r10, #PROCINFO_INITFUNC    )
143  THUMB( ret     r12                             )
144 1:      b       __enable_mmu
145 ENDPROC(stext)

137 row: Load __mmap_switched address into r13 register.
139 row: Load the 1 label forward address into link register.

In arch/arm/kernel/head.S:
409 /*
410  * Setup common bits before finally enabling the MMU.  Essentially
411  * this is just loading the page table pointer and domain access
412  * registers.
413  *
414  *  r0  = cp#15 control register
415  *  r1  = machine ID
416  *  r2  = atags or dtb pointer
417  *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
418  *  r9  = processor ID
419  *  r13 = *virtual* address to jump to upon completion
420  */
421 __enable_mmu:
422 #if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
423         orr     r0, r0, #CR_A
424 #else
425         bic     r0, r0, #CR_A
426 #endif
427 #ifdef CONFIG_CPU_DCACHE_DISABLE
428         bic     r0, r0, #CR_C
429 #endif
430 #ifdef CONFIG_CPU_BPREDICT_DISABLE
431         bic     r0, r0, #CR_Z
432 #endif
433 #ifdef CONFIG_CPU_ICACHE_DISABLE
434         bic     r0, r0, #CR_I
435 #endif
436 #ifndef CONFIG_ARM_LPAE
437         mov     r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
438                       domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
439                       domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
440                       domain_val(DOMAIN_IO, DOMAIN_CLIENT))
441         mcr     p15, 0, r5, c3, c0, 0           @ load domain access register
442         mcr     p15, 0, r4, c2, c0, 0           @ load page table pointer
443 #endif
444         b       __turn_mmu_on
445 ENDPROC(__enable_mmu)
446
447 /*
448  * Enable the MMU.  This completely changes the structure of the visible
449  * memory space.  You will not be able to trace execution through this.
450  * If you have an enquiry about this, *please* check the linux-arm-kernel
451  * mailing list archives BEFORE sending another post to the list.
452  *
453  *  r0  = cp#15 control register
454  *  r1  = machine ID
455  *  r2  = atags or dtb pointer
456  *  r9  = processor ID
457  *  r13 = *virtual* address to jump to upon completion
458  *
459  * other registers depend on the function called upon completion
460  */
461         .align  5
462         .pushsection    .idmap.text, "ax"
463 ENTRY(__turn_mmu_on)
464         mov     r0, r0
465         instr_sync
466         mcr     p15, 0, r0, c1, c0, 0           @ write control reg
467         mrc     p15, 0, r3, c0, c0, 0           @ read id reg
468         instr_sync
469         mov     r3, r3
470         mov     r3, r13
471         ret     r3
472 __turn_mmu_on_end:
473 ENDPROC(__turn_mmu_on)
474         .popsection

441th, 449 line: the function declaration
423th - 432:   according to the corresponding configuration, set in the corresponding R0 Bit. (R0 will be used to configure the C1 control register)
436th - 440: setting the domain parameter r5. (R5 will be used to configure the domain)
Line 441th: configure domain (clear reference arm Handbook for details)
442th row: allocation page table in the memory locations (set TTB). Here the base address of the page table is R4, by writing to the CP15 registers of the C2 to set the page table base address
Line 444th: jump to the __turn_mmu_on. from the name, we can guess, the following is to really open up MMU
(continue to look down, we will find that, under the current __turn_mmu_on code below, why to jump it? This is a reason for this. Go on)

445th - 460: blank lines and comments. The comment here we can see, R0 is CP15, the contents of the control register, R13 stores the virtual address after the completion of the need to jump (because after the completion of the MMU is opened, is a virtual address)
Line 461st:.Algin 5 this is the cache line alignment. We can see the following line is __turn_mmu_on, the
463nd - 470:   __turn_mmu_on function declarations. Here we can see, the __turn_mmu_on is followed by a jump instruction 168th line from the top of the line, just in the middle of 181st more than a cache line alignment
       The reason for this is: Next we are going to open a truemmuOperation, We want to openmmuOperating on a singlecache lineOn the. But before the"Start condition"One day we say,I CacheCan be opened can be closed,Here the reason for doing this is to ensure that theI CacheWhen open,OpenmmuThe operation can also the normal execution.
Line 465th: This is a null operation, equivalent to nop. in arm, NOP operation often use instruction mov Rd, RD

Note: why there has to be a NOP, I thought for a long time, here is my guess, might not be correct:
        Because before setting the page table base address (set TTB), to the next line (line 185) open the MMU operation, the middle is the sequence of instructions:
        Set TTB
        Branch
        NOP
        Enable MMU
        For arm five line: fetch - decode - execute - memory - write
        Their implementation as shown below:
     
      There is a need to explain, branch operation will be completed in 3 cycle, and will lead to fetch
        See from this graph we can, when fetching in enable MMU operation, set TTB had just completed
466th row: control register C1 write CP15, here is the open MMU operating, also will open cache (R0 according to the corresponding configuration)
467th: read the ID register
468th  : two NOP
Line 4699th: from R13 to PC, as we have seen, R13 is stored in the __switch_data (arch/arm/kernel/head.S 91), the following will jump to the __switch_data
Two NOP 469-671th lines is very important, because after the open MMU operating in the 470 line, after the 3 cycle will be in force, this and the relationship between the arm pipeline
Thus, after opening the MMU operation and two NOP operation

5 Switching Data

71 /*
 72  * The following fragment of code is executed with the MMU on in MMU mode,
 73  * and uses absolute addresses; this is not position independent.
 74  *
 75  *  r0  = cp#15 control register
 76  *  r1  = machine ID
 77  *  r2  = atags/dtb pointer
 78  *  r9  = processor ID
 79  */
 80         __INIT
 81 __mmap_switched:
 82         adr     r3, __mmap_switched_data
 83
 84         ldmia   r3!, {r4, r5, r6, r7}
 85         cmp     r4, r5                          @ Copy data segment if needed
 86 1:      cmpne   r5, r6
 87         ldrne   fp, [r4], #4
 88         strne   fp, [r5], #4
 89         bne     1b
 90
 91         mov     fp, #0                          @ Clear BSS (and zero fp)
 92 1:      cmp     r6, r7
 93         strcc   fp, [r6],#4
 94         bcc     1b
 95
 96  ARM(   ldmia   r3, {r4, r5, r6, r7, sp})
 97  THUMB( ldmia   r3, {r4, r5, r6, r7}    )
 98  THUMB( ldr     sp, [r3, #16]           )
 99         str     r9, [r4]                        @ Save processor ID
100         str     r1, [r5]                        @ Save machine type
101         str     r2, [r6]                        @ Save atags pointer
102         cmp     r7, #0
103         strne   r0, [r7]                        @ Save control register values
104         b       start_kernel
105 ENDPROC(__mmap_switched)
106
107         .align  2
108         .type   __mmap_switched_data, %object
109 __mmap_switched_data:
110         .long   __data_loc                      @ r4
111         .long   _sdata                          @ r5
112         .long   __bss_start                     @ r6
113         .long   _end                            @ r7
114         .long   processor_id                    @ r4
115         .long   __machine_arch_type             @ r5
116         .long   __atags_pointer                 @ r6
117 #ifdef CONFIG_CPU_CP15
118         .long   cr_alignment                    @ r7
119 #else
120         .long   0                               @ r7
121 #endif
122         .long   init_thread_union + THREAD_START_SP @ sp
123         .size   __mmap_switched_data, . - __mmap_switched_data
124

71, 81 line: the function declaration
81 - 104: define some address, for example, sixteenth lines of storage is the address of the __mmap_switched, seventeenth row storage is the address of the __data_loc......
109, 110 line: function __mmap_switched
82 row: __switch_data + 4 address to r3. as you can see the address is seventeenth lines of the address
Thirty-seventh row: are taken from the seventeenth row to the twentieth row address, stored in the R4, R5, R6, R7. And cumulative R3 value. When after the implementation, R3 points to the 120th position
        Control the above, we can know the:
                r4 - __data_loc
                r5 - __data_start
                r6 - __bss_start
                r7 - _end
These symbols are defined in the arch/arm/kernel/vmlinux.lds.S variables:
{
233 #ifdef CONFIG_XIP_KERNEL
234         __data_loc = ALIGN(4);          /* location in binary */
235         . = PAGE_OFFSET + TEXT_OFFSET;
236 #else
237         __init_end = .;
238 #ifdef CONFIG_ARM_KERNMEM_PERMS
239         . = ALIGN(1<<SECTION_SHIFT);
240 #else
241         . = ALIGN(THREAD_SIZE);
242 #endif
243         __data_loc = .;
244 #endif
245
246         .data : AT(__data_loc) {
247                 _data = .;              /* address in memory */
248                 _sdata = .;
249
250                 /*
251                  * first, the init task union, aligned
252                  * to an 8192 byte boundary.
253                  */
254                 INIT_TASK_DATA(THREAD_SIZE)
255
256 #ifdef CONFIG_XIP_KERNEL
257                 . = ALIGN(PAGE_SIZE);
258                 __init_begin = .;
259                 INIT_DATA
260                 ARM_EXIT_KEEP(EXIT_DATA)
261                 . = ALIGN(PAGE_SIZE);
262                 __init_end = .;
263 #endif
264
265                 NOSAVE_DATA
266                 CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
267                 READ_MOSTLY_DATA(L1_CACHE_BYTES)
268
269                 /*
270                  * and the usual data section
271                  */
272                 DATA_DATA
273                 CONSTRUCTORS
274
275                 _edata = .;
276         }
277         _edata_loc = __data_loc + SIZEOF(.data);
282          * free it after init has commenced and TCM contents have
283          * been copied to its destination.
284          */
285         .tcm_start : {
286                 . = ALIGN(PAGE_SIZE);
287                 __tcm_start = .;
288                 __itcm_start = .;
289         }
290
291         /*
292          * Link these to the ITCM RAM
293          * Put VMA to the TCM address and LMA to the common RAM
294          * and we'll upload the contents from RAM to TCM and free
295          * the used RAM after that.
296          */
297         .text_itcm ITCM_OFFSET : AT(__itcm_start)
298         {
299                 __sitcm_text = .;
300                 *(.tcm.text)
301                 *(.tcm.rodata)
302                 . = ALIGN(4);
303                 __eitcm_text = .;
304         }
305
306         /*
307          * Reset the dot pointer, this is needed to create the
308          * relative __dtcm_start below (to be used as extern in code).
309          */
310         . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
311
312         .dtcm_start : {
313                 __dtcm_start = .;
314         }
315
316         /* TODO: add remainder of ITCM as well, that can be used for data! */
317         .data_dtcm DTCM_OFFSET : AT(__dtcm_start)
318         {
319                 . = ALIGN(4);
320                 __sdtcm_data = .;
321                 *(.tcm.data)
322                 . = ALIGN(4);
323                 __edtcm_data = .;
324         }
325
326         /* Reset the dot pointer or the linker gets confused */
327         . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
328
329         /* End marker for freeing TCM copy in linked object */
330         .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
331                 . = ALIGN(PAGE_SIZE);
332                 __tcm_end = .;
333         }
334 #endif
335
336         BSS_SECTION(0, 0, 0)
337         _end = .;
}
  For these four variables, we introduce a simple:
        __Data_loc is a data storage location
        __Data_start is the data starting position
        __Bss_start is a BSS position
        _End is BSS end position, is also the core end position
        The 110th lines of command explain: here the definition of the.Data segment, the AT (__data_loc) is the meaning of this part of the content is stored in the __data_loc (note, storage location and link position is not the same)
        On the AT detailed information please refer to the ld.info
Line 110-11: comparison of __data_loc and __data_start
Line 110 - 116: the line is judged to the location of data storage and data position is equal, if not equal, you need to transport data, the data from the __data_loc to the __data_start
Where __bss_start is the BSS start position, also marks the end of the data, so the judge whether data handling

336-337: is clear BSS content, which are arranged into 0 here the use of _end to judge the end position of the BSS
Fiftieth row: because in thirty-eighth rows, R3 is updated to point to the twenty-first position. So it has R4, R5, R6, which is the value of SP:
        r4 - processor_id
        r5 - __machine_arch_type
        r6 - cr_alignment
        sp - init_thread_union + THREAD_START_SP
        Processor_id and __machine_arch_type these two variables was sixty-second in the arch/arm/kernel/setup.c, defined in the 63 row
       
        Cr_alignment is defined in the arch/arm/kernel/entry-armv.S:
       
1229         .globl  cr_alignment
1230         cr_alignment:
1231         .space  4

        Init_thread_union is the base address in the /init/init_task.c init process:
        00033: union thread_union init_thread_union
        00034:         __attribute__((__section__(".init.task"))) =
        00035:                 { INIT_THREAD_INFO(init_task) };        

The control of vmlnux.lds.S in the 117 line, we can know that init task is stored in the.Data section of the 8K, which is THREAD_SIZE (8K) alignment

/arch/arm/include/asm/thread_info.h
#define THREAD_SIZE_ORDER    1
#define THREAD_SIZE        ( PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP        (THREAD_SIZE - 8)

104         b       start_kernel

final jump to start_kernel

1 comment:


  1. read this above post its very greatful for me thanks sharing this post ,great post.
    visit our website

    ReplyDelete