Thursday, 12 August 2021

Linux memory management fixmap of memory mapping in ARM64 bit

 Fixmap base address of memory map

After compilation stage, the mmu function has been enbale completed, which means that for the SOC we are currently analyzing,   we can only use virt addr to access dram in the future;
But at this time, the range of addresses we can access is limited, only the idmap and swapper parts can find the physical address, and the other parts are still inaccessible without translation text;
This part is to add a small mechanism before the complete paging is established. At the current stage, the mapping of necessary resources is established.

1. Fixmap

In fixmap, fix means fixed, and map means to establish a mapping. However, the understanding here is not to establish a fixed mapping, but to establish a mapping at a fixed virtual address;
You can use this virtual address to map to any physicals address. After memory management unit(MMU) enable, you can freely access the content we need;

That is to say, the kernel fixes a virtual address at compile time, and this address is used for the use of memory by each module before the early memory management system is completed;
For example, in the early debugging, outputting a log to the console, reading flatten device tree(FDT), and then actually establishing paging init also need to use this, peripherals

2. Fixmap

Linux integrates the fixmap mechanism and supports the following paragraphs:

  1. FDT is used to obtain device tree information
  2. console is used for early debugging needs, printing log and the like
  3. text is used to map the RO segment code, which can be used as a dynamic upgrade
  4. other BTMAP is used to apply for each module, that is, temporary mapping
  5. fix page is used for mapping page table processing and will be used in the paging_init part
https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/asm/fixmap.h#L35

enum fixed_addresses {
	FIX_HOLE,

	/*
	 * Reserve a virtual window for the FDT that is 2 MB larger than the
	 * maximum supported size, and put it at the top of the fixmap region.
	 * The additional space ensures that any FDT that does not exceed
	 * MAX_FDT_SIZE can be mapped regardless of whether it crosses any
	 * 2 MB alignment boundaries.
	 *
	 * Keep this at the top so it remains 2 MB aligned.
	 */
#define FIX_FDT_SIZE		(MAX_FDT_SIZE + SZ_2M)
	FIX_FDT_END,
	FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,

	FIX_EARLYCON_MEM_BASE,
	FIX_TEXT_POKE0,

#ifdef CONFIG_ACPI_APEI_GHES
	/* Used for GHES mapping from assorted contexts */
	FIX_APEI_GHES_IRQ,
	FIX_APEI_GHES_SEA,
#ifdef CONFIG_ARM_SDE_INTERFACE
	FIX_APEI_GHES_SDEI_NORMAL,
	FIX_APEI_GHES_SDEI_CRITICAL,
#endif
#endif /* CONFIG_ACPI_APEI_GHES */

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
	FIX_ENTRY_TRAMP_DATA,
	FIX_ENTRY_TRAMP_TEXT,
#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
	__end_of_permanent_fixed_addresses,

	/*
	 * Temporary boot-time mappings, used by early_ioremap(),
	 * before ioremap() is functional.
	 */
#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
#define FIX_BTMAPS_SLOTS	7
#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)

	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,

	/*
	 * Used for kernel page table creation, so unmapped memory may be used
	 * for tables.
	 */
	FIX_PTE,
	FIX_PMD,
	FIX_PUD,
	FIX_PGD,

	__end_of_fixed_addresses
};

#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)

3. Fixmap

This part of initialization (mainly for FDT) will be performed when the architecture-related initialization is performed when the kernel is started, that is, early_fixmap_init;In essence,
the mapping between FIXADDR_START and physical address is established, and
the code is directly uploaded:

void __init early_fixmap_init(void)
{
	pgd_t *pgdp;
	p4d_t *p4dp, p4d;
	pud_t *pudp;
	pmd_t *pmdp;
	unsigned long addr = FIXADDR_START;

	pgdp = pgd_offset_k(addr);
	p4dp = p4d_offset(pgdp, addr);
	p4d = READ_ONCE(*p4dp);
	if (CONFIG_PGTABLE_LEVELS > 3 &&
	    !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) {
		/*
		 * We only end up here if the kernel mapping and the fixmap
		 * share the top level pgd entry, which should only happen on
		 * 16k/4 levels configurations.
		 */
		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
		pudp = pud_offset_kimg(p4dp, addr);
	} else {
		if (p4d_none(p4d))
			__p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
		pudp = fixmap_pud(addr);
	}
	if (pud_none(READ_ONCE(*pudp)))
		__pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
	pmdp = fixmap_pmd(addr);
	__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);

	/*
	 * The boot-ioremap range spans multiple pmds, for which
	 * we are not prepared:
	 */
	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));

	if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
	     || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
		WARN_ON(1);
		pr_warn("pmdp %p != %p, %p\n",
			pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
			fix_to_virt(FIX_BTMAP_BEGIN));
		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
			fix_to_virt(FIX_BTMAP_END));

		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
	}
}
Based on page=4k, level=3, vbit=39, FIXADDR_START = 0xffffffbefe7fb000,
and then split it into page table conversion:

  1. The offset within the page is 0000 0000 0000
  2. L3 index is 1 1111 1011 [0x1FB]
  3. L2 index is 1 1111 0011 [0x1F3]
  4. L1 index is 0 1111 1011 [0xFB]
In other words, what the above function does is: write bm_pmd to the position of
swapper[0xFB], and then write bm_pte to the position of bm_pmd[0x1F3]

3.1 bm_pmd\bm_pte address:

    
static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; //PTRS_PER_PTE << 9
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; //PTRS_PER_PMD << 9 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; //If there are 3 levels
                                                                    of PTRS_PER_PUD, there is no such level
system.map

ffffffc012eca000 b bm_pud ffffffc012ecb000 b bm_pmd ffffffc012ecc000 b bm_pte
ffffffc012480000 R swapper_pg_dir ffffffc012481000 R swapper_pg_end

array page requested here is the type of u64, so the offset is calculated as index offset * sizeof (pmd_t), that is, base + offset * 8 There are 8 addresses in a page table entry; So you need to confirm 0xffffff80094a1000 + 0xFB * 8

3.2 pgd offset calculation

The swapper address as the base address + the offset taken from FIXADDR_START for calculation

//The definition of init_mm, the initial root node of the memory red-black tree, here only pay attention to pgd as swapper_pg_dir

struct mm_struct init_mm = {
	.mm_rb		= RB_ROOT,
	.pgd		= swapper_pg_dir,
	.mm_users	= ATOMIC_INIT(2),
	.mm_count	= ATOMIC_INIT(1),
	.write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq),
	MMAP_LOCK_INITIALIZER(init_mm)
	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
	.user_ns	= &init_user_ns,
	.cpu_bitmap	= CPU_BITS_NONE,
	INIT_MM_CONTEXT(init_mm)
};

#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)//Get to init_mm
#define pgd_offset(mm, addr)	(pgd_offset_raw((mm)->pgd, (addr)))//Take the pgd part of the structure that is swap
#define pgd_offset_raw(pgd, addr)	((pgd) + pgd_index(addr))//Need to add the offset behind
#define pgd_index(addr)		(((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))//The offset is calculated by FIXADDR_START = 0xffffffbefe7fb000
#define PGDIR_SHIFT		30//This is the definition at level = 3;
#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))

After the above calculation, pgd_index is: FB (in fact, it is the process of our calculation above)
swapper_pg_dir is 0xffffff80094a1000, so calculated here should be 0xffffff80094a17d8

3.3 pud offset calculation:


if level is 3, pud and pgd should be the same.
static inline pud_t *fixmap_pud(unsigned long addr)
{
	pgd_t *pgdp = pgd_offset_k(addr); /First get the pgd virtual address
	p4d_t *p4dp = p4d_offset(pgdp, addr);
	p4d_t p4d = READ_ONCE(*p4dp);

	BUG_ON(p4d_none(p4d) || p4d_bad(p4d));

	return pud_offset_kimg(p4dp, addr); //Calculate pud position
}
#define pud_offset_kimg(dir,addr)	((pud_t *)dir) ////Since there are only 3 levels, pud is pgd 

3.4 pmd offset calculation

PMD partial offset calculation:
  1. Get the VA address of the PUD
  2. Calculate PMD offset position by PUD and FIXADDR
    1.         Get PUD address
    2.     Calculate the PMD_index offset, shift addr 21bit to the right and then take the lower 8bit
    3.     Add kimage_voffset offset

static inline pmd_t *fixmap_pmd(unsigned long addr)
{
	pud_t *pudp = fixmap_pud(addr);
	pud_t pud = READ_ONCE(*pudp);

	BUG_ON(pud_none(pud) || pud_bad(pud));

	return pmd_offset_kimg(pudp, addr);
}
#define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))

//Add the offset between the physical address and the linear address u64 kimage_voffset __ro_after_init
#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset)) 
#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) // pud + offset static inline phys_addr_t pud_page_paddr(pud_t pud) { return __pud_to_phys(pud); } static inline unsigned long pud_page_vaddr(pud_t pud) { return (unsigned long)__va(pud_page_paddr(pud)); } #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) static inline pte_t pud_pte(pud_t pud) { return __pte(pud_val(pud)); } #define pud_val(x) ((x).pud) #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define pte_val(x) ((x).pte) #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))//Shift 21bits to the right and take the lower 8bits of data #define PMD_SHIFT 21 #define PTRS_PER_PMD PTRS_PER_PTE #define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))

4 FDT

init process calling sequence: start_kernel -> setup_arch -> setup_machine_fdt(__fdt_pointer) -> fixmap_remap_fdt(dt_phys) Two things are done in fixmap_remap_fdt
  1. remap
  2. reserved zone management

void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
{
	const u64 dt_virt_base = __fix_to_virt(FIX_FDT); //First find the corresponding FIX_FDT address
int offset; void *dt_virt; /* * Check whether the physical FDT address is set and meets the minimum * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be * at least 8 bytes so that we can always access the magic and size * fields of the FDT header after mapping the first chunk, double check * here if that is indeed the case. */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) return NULL; /* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe * to call create_mapping_noalloc() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. * On 4k pages, we'll use section mappings for the FDT so we only * have to be in the same PUD. */ BUILD_BUG_ON(dt_virt_base % SZ_2M); BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); offset = dt_phys % SWAPPER_BLOCK_SIZE; //Calculate the offset
dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ Map the 2M size first, then read the FDT size in the head, and then perform subsequent processing
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
	dt_virt_base, SWAPPER_BLOCK_SIZE, prot);

	if (fdt_magic(dt_virt) != FDT_MAGIC)
		return NULL;

	*size = fdt_totalsize(dt_virt);
	if (*size > MAX_FDT_SIZE)
		return NULL;

	if (offset + *size > SWAPPER_BLOCK_SIZE) //To map data other than 2M, we need to pay attention here, we actually use less than 1M, 
but because here is 2M alignment, there is a possibility of just crossing two 2M
		create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
			       round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);

	return dt_virt;
}
Here is the actual mapping between our known virtual fix address and the corresponding incoming physical address;
Since the machine does not know the size of the FDT at this time, first add 2 M at this time, and 
then if it is not completed, do it again;
create_mapping_noalloc:

static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
				  phys_addr_t size, pgprot_t prot)
{
	if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
			&phys, virt);
		return;
	}
	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
			     NO_CONT_MAPPINGS);
}
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
				 unsigned long virt, phys_addr_t size,
				 pgprot_t prot,
				 phys_addr_t (*pgtable_alloc)(int),
				 int flags)
{
	unsigned long addr, end, next;
	pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); //Get pgd, which is swapper

	/*
	 * If the virtual and physical address don't have the same offset
	 * within a page, we cannot map the region as the caller expects.
	 */
	if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
		return;

	phys &= PAGE_MASK;
	addr = virt & PAGE_MASK;
	end = PAGE_ALIGN(virt + size); //Get the size and align with page

	do {
		next = pgd_addr_end(addr, end); //Calculate the next address, of course, the range of a pgd mapping is 1G size, and the next one will not be needed
		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
			       flags);//It has been mapped to bm_pmd during earlyinit in the previous article, here? ?
		phys += next - addr;
	} while (pgdp++, addr = next, addr != end);
}
The core implementation here is divided into two parts:

  1. Calculate whether the size of the address to be mapped currently exceeds the mapping range of one pgd. According to the location of the pgd, there is a total capacity of 1 G. Normally, there will be no next time here
  2. Then select an entry in pgd to construct a pud page

static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
			   phys_addr_t phys, pgprot_t prot,
			   phys_addr_t (*pgtable_alloc)(int),
			   int flags)
{
	unsigned long next;
	pud_t *pudp;
	p4d_t *p4dp = p4d_offset(pgdp, addr);
	p4d_t p4d = READ_ONCE(*p4dp);

	if (p4d_none(p4d)) { //Here is to determine whether the pud has the previously mapped content, and if there is, release and clear it. What about the fixaddrstart we implemented before?
		p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
		phys_addr_t pud_phys;

		if (flags & NO_EXEC_MAPPINGS)
			p4dval |= P4D_TABLE_PXN;
		BUG_ON(!pgtable_alloc);
		pud_phys = pgtable_alloc(PUD_SHIFT);
		__p4d_populate(p4dp, pud_phys, p4dval);
		p4d = READ_ONCE(*p4dp);
	}
	BUG_ON(p4d_bad(p4d));

	pudp = pud_set_fixmap_offset(p4dp, addr);////Calculate the virtual address location of pud according to pgd and addr
	do {
		pud_t old_pud = READ_ONCE(*pudp);

		next = pud_addr_end(addr, end);

		/*
		 * For 4K granule only, attempt to put down a 1GB block
		 */
		if (use_1G_block(addr, next, phys) &&
		    (flags & NO_BLOCK_MAPPINGS) == 0) {
			pud_set_huge(pudp, phys, prot);

			/*
			 * After the PUD entry has been populated once, we
			 * only allow updates to the permission attributes.
			 */
			BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
						      READ_ONCE(pud_val(*pudp))));
		} else {
			alloc_init_cont_pmd(pudp, addr, next, phys, prot,
					    pgtable_alloc, flags);

			BUG_ON(pud_val(old_pud) != 0 &&
			       pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
		}
		phys += next - addr;
	} while (pudp++, addr = next, addr != end);

	pud_clear_fixmap();
}

5.Summarize

The content of fixmap is mainly used for memory usage in the stage before MMU enabled ~~ mem_init is completed; From the current point of view, there will be three main parts:

  1. FDT mapping, allocate a fixed FDT virtual address for mapping its physical address
  2. IOMAP mapping. For the use of peripherals at this stage, there is a section of address, which is allocated to IO.
  3. The difference from FDT is that FDT is a permanent mapping, and IOMAP is used up and erased here Use fixmap PGD\PMD\PTE and other information when paging init to facilitate the establishment of subsequent mapping









No comments:

Post a Comment