RFC: Major re-jig of the ARM page table handling

Top Page
Attachments:
Message as email
+ (text/plain)
Delete this message
Reply to this message
Author: Russell King - ARM Linux
Date:  
To: linux-arm-kernel
Subject: RFC: Major re-jig of the ARM page table handling
Linus put into the 2.5.5 kernel contains Ingo Molnar's highpte changes,
which changed the way the page tables were handled slightly in Linux.
This unfortunately broke the way we handle ARM's 1K second level page
tables, and in 2.5.5-rmk1 I bodged around the problem so there was
something usable I could release. However, it is a bodge, and needs
to be cleaned up.

Not only do Ingo's highpte patches require some changes in this area,
but also Rik van Riel's rmap VM patches benefit from changes in this
area.

The Current Situation
---------------------

We have a page table slab for specifically for allocating 2K page tables.
The first 1K of each object contains 256 32-bit page table entries that
the processor uses. The second 1K contains the page table that Linux
uses. Obviously there are two of these combined tables in a 4K page.

The first level page table contains 4096 32-bit entries.

The problem arises where the user space PTE allocation functions now
expect a struct page * pointer to be returned. This is a pointer to an
array that the Linux VM uses to track page allocation, usage, and so
forth.

In 2.5.5-rmk1, we just cast from our old 2K page table to a page struct
pointer. Luckily, no one dereferences this pointer, so it works. This
situation, however, isn't guaranteed to last.

The Solution
------------

We pull a couple of tricks on the Linux VM model:

1. We tell the Linux VM that we have 2048 64-bit entries in the first
level page table.

2. We place two 256-entry second-level page tables side by side in
memory, forming (apparantly) one 512-entry second-level page table.

Linux doesn't know any better since it leaves the content of the page
table entries up to us.

When Linux allocates a page table, we now allocate a real 4K page. We
do something similar to the old situation - we split a page up into
two sections - the hardware section and the Linux section:

Take some time to think about what the above changes mean. We've gone
from:

  1st level
 +---------+             2nd level          2nd level

| entry 0 |----------------------------->+------------+

 +---------+                              |  h/w pt 0  |

 | entry 1 |--------->+------------+      +------------+

 +---------+          |  h/w pt 1  |      | Linux pt 0 |

 |  ...    |          +------------+      +------------+ (2K)

                      | Linux pt 1 |
                      +------------+ (2K)


to:

Linux view hardware view
+-------------+-------------+
 |             | h/w entry 0 |--------->+------------+
 | Linux entry +-------------+          |  h/w pt 0  |
 |             | h/w entry 1 |--------->+------------+

 +-------------+-------------+          |  h/w pt 1  |

 |    ...      |     ...     |          +------------+

                                        | Linux pt 0 |
                                        +------------+
                                        | Linux pt 1 |
                                        +------------+ (4K)


When we link this second level page table into the first level, we set
both halves of the "64-bit" entry, one to point at h/w pt 0, the other
to point at h/w pt 1.

When Linux walks the page tables, we only ever read the first half of
the 1st level page table entry, ie, 'h/w entry 0'. This gives us our
base address for 'h/w page table 0' which appears to extend for 512
entries.

The net effect is that as far as Linux is concerned, we now have one
second-level (pte) page table per page rather than two, which means we
can now use a real struct page * to describe this second level page
table. [The Ingo problem solved!]

Ok, here's the patch that should apply _on top_ of 2.5.5-rmk1. It only
covers SA110 and SA11x0 processors, but the change for other processors
is obvious:

 ENTRY(cpu_sa110_set_pte)
-    str    r1, [r0], #-1024        @ linux version
+    tst    r0, #2048
+    streq    r0, [r0, -r0]            @ BUG_ON
+    str    r1, [r0], #-2048        @ linux version


The first two added lines there are a safety net that will be removed
once we're happy its working properly.

Feel free to try this out - it's not been heavily tested, but my assabet
does still load, unload and reload modules. It also still plays my mp3
collection, so I'm reasonably happy that the patch is mostly correct.

diff -ur ref/arch/arm/mm/init.c linux/arch/arm/mm/init.c
--- ref/arch/arm/mm/init.c    Thu Feb 28 21:50:57 2002
+++ linux/arch/arm/mm/init.c    Wed Mar  6 14:35:55 2002
@@ -319,7 +319,7 @@
      * and can only be in node 0.
      */
     reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
-                 PTRS_PER_PGD * sizeof(void *));
+                 PTRS_PER_PGD * sizeof(pgd_t));
 #endif
     /*
      * And don't forget to reserve the allocator bitmap,
diff -ur ref/arch/arm/mm/mm-armv.c linux/arch/arm/mm/mm-armv.c
--- ref/arch/arm/mm/mm-armv.c    Sun Mar  3 21:21:41 2002
+++ linux/arch/arm/mm/mm-armv.c    Wed Mar  6 14:35:31 2002
@@ -165,11 +165,14 @@
 static inline void
 alloc_init_section(unsigned long virt, unsigned long phys, int prot)
 {
-    pmd_t pmd;
+    pmd_t *pmdp, pmd;


-    pmd_val(pmd) = phys | prot;
+    pmdp = pmd_offset(pgd_offset_k(virt), virt);
+    if (virt & (1 << 20))
+        pmdp++;


-    set_pmd(pmd_offset(pgd_offset_k(virt), virt), pmd);
+    pmd_val(pmd) = phys | prot;
+    set_pmd(pmdp, pmd);
 }


 /*
@@ -182,18 +185,19 @@
 static inline void
 alloc_init_page(unsigned long virt, unsigned long phys, int domain, int prot)
 {
-    pmd_t *pmdp;
+    pmd_t *pmdp, pmd;
     pte_t *ptep;


     pmdp = pmd_offset(pgd_offset_k(virt), virt);


     if (pmd_none(*pmdp)) {
-        pte_t *ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
-                              sizeof(pte_t));
-
-        ptep += PTRS_PER_PTE;
+        ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
+                           sizeof(pte_t));


-        set_pmd(pmdp, __mk_pmd(ptep, PMD_TYPE_TABLE | PMD_DOMAIN(domain)));
+        pmd_val(pmd) = __pa(ptep) | PMD_TYPE_TABLE | PMD_DOMAIN(domain);
+        set_pmd(pmdp, pmd);
+        pmd_val(pmd) += 256 * sizeof(pte_t);
+        set_pmd(pmdp + 1, pmd);
     }
     ptep = pte_offset_kernel(pmdp, virt);


@@ -224,7 +228,7 @@

     if (md->prot_read && md->prot_write &&
         !md->cacheable && !md->bufferable) {
-        printk(KERN_WARNING "Security risk: creating user "
+        printk(KERN_WARNING "MM: Security risk: creating user "
                "accessible mapping for 0x%08lx at 0x%08lx\n",
                md->physical, md->virtual);
     }
@@ -259,11 +263,11 @@
         length -= PAGE_SIZE;
     }


-    while (length >= PGDIR_SIZE) {
+    while (length >= (PGDIR_SIZE / 2)) {
         alloc_init_section(virt, virt + off, prot_sect);


-        virt   += PGDIR_SIZE;
-        length -= PGDIR_SIZE;
+        virt   += (PGDIR_SIZE / 2);
+        length -= (PGDIR_SIZE / 2);
     }


     while (length >= PAGE_SIZE) {
@@ -455,39 +459,4 @@


     for (node = 0; node < numnodes; node++)
         free_unused_memmap_node(node, mi);
-}
-
-/*
- * PTE table allocation cache.
- *
- * This is a move away from our custom 2K page allocator.  We now use the
- * slab cache to keep track of these objects.
- *
- * With this, it is questionable as to whether the PGT cache gains us
- * anything.  We may be better off dropping the PTE stuff from our PGT
- * cache implementation.
- */
-kmem_cache_t *pte_cache;
-
-/*
- * The constructor gets called for each object within the cache when the
- * cache page is created.  Note that if slab tries to misalign the blocks,
- * we BUG() loudly.
- */
-static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
-{
-    if (((unsigned long)pte) & 2047)
-        BUG();
-
-    memzero(pte, 2 * PTRS_PER_PTE * sizeof(pte_t));
-    clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
-}
-
-void __init pgtable_cache_init(void)
-{
-    pte_cache = kmem_cache_create("pte-cache",
-                2 * PTRS_PER_PTE * sizeof(pte_t), 0, 0,
-                pte_cache_ctor, NULL);
-    if (!pte_cache)
-        BUG();
 }
diff -ur ref/arch/arm/mm/proc-sa110.S linux/arch/arm/mm/proc-sa110.S
--- ref/arch/arm/mm/proc-sa110.S    Sun Mar  3 15:33:18 2002
+++ linux/arch/arm/mm/proc-sa110.S    Tue Mar  5 22:25:15 2002
@@ -192,7 +192,9 @@
  */
     .align    5
 ENTRY(cpu_sa110_set_pte)
-    str    r1, [r0], #-1024        @ linux version
+    tst    r0, #2048
+    streq    r0, [r0, -r0]            @ BUG_ON
+    str    r1, [r0], #-2048        @ linux version


     eor    r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY


diff -ur ref/arch/arm/mm/proc-sa1100.S linux/arch/arm/mm/proc-sa1100.S
--- ref/arch/arm/mm/proc-sa1100.S    Sun Mar  3 22:04:23 2002
+++ linux/arch/arm/mm/proc-sa1100.S    Wed Mar  6 13:07:37 2002
@@ -207,7 +207,9 @@
  */
     .align    5
 ENTRY(cpu_sa1100_set_pte)
-    str    r1, [r0], #-1024        @ linux version
+    tst    r0, #2048
+    streq    r0, [r0, -r0]            @ BUG_ON
+    str    r1, [r0], #-2048        @ linux version


     eor    r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY


diff -ur ref/include/asm-arm/page.h linux/include/asm-arm/page.h
--- ref/include/asm-arm/page.h    Fri Mar  1 10:46:57 2002
+++ linux/include/asm-arm/page.h    Tue Mar  5 22:56:22 2002
@@ -2,18 +2,8 @@
 #define _ASMARM_PAGE_H


 #include <linux/config.h>
-#include <asm/proc/page.h>
-
-#define PAGE_SIZE        (1UL << PAGE_SHIFT)
-#define PAGE_MASK        (~(PAGE_SIZE-1))


 #ifdef __KERNEL__
-
-/*
- * to align the pointer to the (next) page boundary
- */
-#define PAGE_ALIGN(addr)    (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__


#include <asm/bug.h>
@@ -65,17 +55,14 @@
*/
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;

 #define pte_val(x)      ((x).pte)
 #define pmd_val(x)      ((x).pmd)
-#define pgd_val(x)      ((x).pgd)
 #define pgprot_val(x)   ((x).pgprot)


 #define __pte(x)        ((pte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
-#define __pgd(x)        ((pgd_t) { (x) } )
 #define __pgprot(x)     ((pgprot_t) { (x) } )


#else
@@ -84,20 +71,32 @@
*/
typedef unsigned long pte_t;
typedef unsigned long pmd_t;
-typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;

 #define pte_val(x)      (x)
 #define pmd_val(x)      (x)
-#define pgd_val(x)      (x)
 #define pgprot_val(x)   (x)


 #define __pte(x)        (x)
 #define __pmd(x)        (x)
-#define __pgd(x)        (x)
 #define __pgprot(x)     (x)


 #endif
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#include <asm/proc/page.h>
+
+#define PAGE_SIZE        (1UL << PAGE_SHIFT)
+#define PAGE_MASK        (~(PAGE_SIZE-1))
+
+/*
+ * to align the pointer to the (next) page boundary
+ */
+#define PAGE_ALIGN(addr)    (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__


 /* Pure 2^n version of get_order */
 static inline int get_order(unsigned long size)
diff -ur ref/include/asm-arm/pgtable.h linux/include/asm-arm/pgtable.h
--- ref/include/asm-arm/pgtable.h    Fri Mar  1 10:48:29 2002
+++ linux/include/asm-arm/pgtable.h    Tue Mar  5 22:33:09 2002
@@ -19,7 +19,11 @@
  * PGDIR_SHIFT determines what a third-level page table entry can map
  */
 #define PMD_SHIFT        20
+#ifdef CONFIG_CPU_32
+#define PGDIR_SHIFT        21
+#else
 #define PGDIR_SHIFT        20
+#endif


 #define LIBRARY_TEXT_START    0x0c000000


@@ -93,7 +97,6 @@

 #define pmd_none(pmd)        (!pmd_val(pmd))
 #define pmd_present(pmd)    (pmd_val(pmd))
-#define pmd_clear(pmdp)        set_pmd(pmdp, __pmd(0))


 /*
  * Permanent address of a page. We never have highmem, so this is trivial.
@@ -106,18 +109,11 @@
  */
 static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
-    pte_t pte;
-    pte_val(pte) = physpage | pgprot_val(pgprot);
-    return pte;
+    return __pte(physpage | pgprot_val(pgprot));
 }


-#define mk_pte(page,pgprot)                \
-({                            \
-    pte_t __pte;                    \
-    pte_val(__pte) = __pa(page_address(page)) +    \
-               pgprot_val(pgprot);        \
-    __pte;                        \
-})
+#define mk_pte(page,pgprot) \
+    __pte(__pa(page_address(page)) | pgprot_val(pgprot))


 /*
  * The "pgd_xxx()" functions here are trivial for a folded two-level
@@ -127,7 +123,7 @@
 #define pgd_none(pgd)        (0)
 #define pgd_bad(pgd)        (0)
 #define pgd_present(pgd)    (1)
-#define pgd_clear(pgdp)
+#define pgd_clear(pgdp)        do { } while (0)


 #define page_pte_prot(page,prot)    mk_pte(page, prot)
 #define page_pte(page)        mk_pte(page, __pgprot(0))
@@ -147,15 +143,6 @@
 /* Find an entry in the third-level page table.. */
 #define __pte_index(addr)    (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))


-#define pmd_page(dir)        ((struct page *)__pmd_page(dir))
-
-#define __pte_offset(dir, addr)    ((pte_t *)__pmd_page(*(dir)) + __pte_index(addr))
-#define pte_offset_kernel    __pte_offset
-#define pte_offset_map        __pte_offset
-#define pte_offset_map_nested    __pte_offset
-#define pte_unmap(pte)        do { } while (0)
-#define pte_unmap_nested(pte)    do { } while (0)
-
 #include <asm/proc/pgtable.h>


static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -182,7 +169,7 @@

#include <asm-generic/pgtable.h>

-extern void pgtable_cache_init(void);
+#define pgtable_cache_init() do { } while (0)

 /*
  * remap a physical address `phys' of size `size' with page protection `prot'
diff -ur ref/include/asm-arm/proc-armo/pgtable.h linux/include/asm-arm/proc-armo/pgtable.h
--- ref/include/asm-arm/proc-armo/pgtable.h    Fri Aug  3 00:13:34 2001
+++ linux/include/asm-arm/proc-armo/pgtable.h    Tue Mar  5 22:21:19 2002
@@ -32,6 +32,7 @@


 #define pmd_bad(pmd)        ((pmd_val(pmd) & 0xfc000002))
 #define set_pmd(pmdp,pmd)    ((*(pmdp)) = (pmd))
+#define pmd_clear(pmdp)        set_pmd(pmdp, __pmd(0))


 static inline pmd_t __mk_pmd(pte_t *ptep, unsigned long prot)
 {
@@ -47,6 +48,12 @@
 {
     return __phys_to_virt(pmd_val(pmd) & ~_PAGE_TABLE);
 }
+
+#define pte_offset_kernel    (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map        (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested    (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_unmap(pte)        do { } while (0)
+#define pte_unmap_nested(pte)    do { } while (0)


 #define set_pte(pteptr, pteval)    ((*(pteptr)) = (pteval))


Only in linux/include/asm-arm/proc-armv: _xlk
diff -ur ref/include/asm-arm/proc-armv/page.h linux/include/asm-arm/proc-armv/page.h
--- ref/include/asm-arm/proc-armv/page.h    Fri Aug  3 00:13:34 2001
+++ linux/include/asm-arm/proc-armv/page.h    Tue Mar  5 22:28:04 2002
@@ -15,4 +15,23 @@


#define EXEC_PAGESIZE 4096

+#ifndef __ASSEMBLY__
+#ifdef STRICT_MM_TYPECHECKS
+
+typedef struct {
+    unsigned long pgd0;
+    unsigned long pgd1;
+} pgd_t;
+
+#define pgd_val(x)    ((x).pgd0)
+
+#else
+
+typedef unsigned long pgd_t[2];
+
+#define pgd_val(x)    ((x)[0])
+
+#endif
+#endif /* __ASSEMBLY__ */
+
 #endif /* __ASM_PROC_PAGE_H */
diff -ur ref/include/asm-arm/proc-armv/pgalloc.h linux/include/asm-arm/proc-armv/pgalloc.h
--- ref/include/asm-arm/proc-armv/pgalloc.h    Thu Feb 28 23:12:18 2002
+++ linux/include/asm-arm/proc-armv/pgalloc.h    Wed Mar  6 14:59:17 2002
@@ -1,43 +1,72 @@
 /*
  *  linux/include/asm-arm/proc-armv/pgalloc.h
  *
- *  Copyright (C) 2001 Russell King
+ *  Copyright (C) 2001-2002 Russell King
  *
  * Page table allocation/freeing primitives for 32-bit ARM processors.
  */
-
-/* unfortunately, this includes linux/mm.h and the rest of the universe. */
-#include <linux/slab.h>
-
-extern kmem_cache_t *pte_cache;
+#include "pgtable.h"


 /*
  * Allocate one PTE table.
  *
- * Note that we keep the processor copy of the PTE entries separate
- * from the Linux copy.  The processor copies are offset by -PTRS_PER_PTE
- * words from the Linux copy.
+ * This actually allocates two hardware PTE tables, but we wrap this up
+ * into one table thus:
+ *
+ *  +------------+
+ *  |  h/w pt 0  |
+ *  +------------+
+ *  |  h/w pt 1  |
+ *  +------------+
+ *  | Linux pt 0 |
+ *  +------------+
+ *  | Linux pt 1 |
+ *  +------------+
  */
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
+    int count = 0;
     pte_t *pte;


-    pte = kmem_cache_alloc(pte_cache, GFP_KERNEL);
-    if (pte)
+    do {
+        pte = (pte_t *)__get_free_page(GFP_KERNEL);
+        if (!pte) {
+            current->state = TASK_UNINTERRUPTIBLE;
+            schedule_timeout(HZ);
+        }
+    } while (!pte && (count++ < 10));
+
+    if (pte) {
+        clear_page(pte);
+        clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE);
         pte += PTRS_PER_PTE;
+    }
+
     return pte;
 }


 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-    pte_t *pte;
+    struct page *pte;
+    int count = 0;


-    pte = kmem_cache_alloc(pte_cache, GFP_KERNEL);
-    if (pte)
-        pte += PTRS_PER_PTE;
-    return (struct page *)pte;
+    do {
+        pte = alloc_pages(GFP_KERNEL, 0);
+        if (!pte) {
+            current->state = TASK_UNINTERRUPTIBLE;
+            schedule_timeout(HZ);
+        }
+    } while (!pte && (count++ < 10));
+
+    if (pte) {
+        void *page = page_address(pte);
+        clear_page(page);
+        clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE);
+    }
+
+    return pte;
 }


 /*
@@ -47,34 +76,49 @@
 {
     if (pte) {
         pte -= PTRS_PER_PTE;
-        kmem_cache_free(pte_cache, pte);
+        free_page((unsigned long)pte);
     }
 }


 static inline void pte_free(struct page *pte)
 {
-    pte_t *_pte = (pte_t *)pte;
-    if (pte) {
-        _pte -= PTRS_PER_PTE;
-        kmem_cache_free(pte_cache, _pte);
-    }
+    __free_page(pte);
 }


 /*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
  *
- * If 'mm' is the init tasks mm, then we are doing a vmalloc, and we
- * need to set stuff up correctly for it.
+ * Ensure that we always set both PMD entries.
  */
-#define pmd_populate_kernel(mm,pmdp,pte)            \
-    do {                            \
-        BUG_ON(mm != &init_mm);                \
-        set_pmd(pmdp, __mk_pmd(pte, _PAGE_KERNEL_TABLE));\
-    } while (0)
-
-#define pmd_populate(mm,pmdp,pte)                \
-    do {                            \
-        BUG_ON(mm == &init_mm);                \
-        set_pmd(pmdp, __mk_pmd(pte, _PAGE_USER_TABLE));    \
-    } while (0)
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+    unsigned long pte_ptr = (unsigned long)ptep;
+    pmd_t pmd;
+
+    BUG_ON(mm != &init_mm);
+
+    /*
+     * The pmd must be loaded with the physical
+     * address of the PTE table
+     */
+    pte_ptr -= PTRS_PER_PTE * sizeof(void *);
+    pmd_val(pmd) = __pa(pte_ptr) | _PAGE_KERNEL_TABLE;
+    set_pmd(pmdp, pmd);
+    pmd_val(pmd) += 256 * sizeof(pte_t);
+    set_pmd(pmdp + 1, pmd);
+}
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
+{
+    pmd_t pmd;
+
+    BUG_ON(mm == &init_mm);
+
+    pmd_val(pmd) = __pa(page_address(ptep)) | _PAGE_USER_TABLE;
+    set_pmd(pmdp, pmd);
+    pmd_val(pmd) += 256 * sizeof(pte_t);
+    set_pmd(pmdp + 1, pmd);
+}
diff -ur ref/include/asm-arm/proc-armv/pgtable.h linux/include/asm-arm/proc-armv/pgtable.h
--- ref/include/asm-arm/proc-armv/pgtable.h    Thu Feb 28 23:15:30 2002
+++ linux/include/asm-arm/proc-armv/pgtable.h    Tue Mar  5 23:20:52 2002
@@ -16,12 +16,17 @@
 #define __ASM_PROC_PGTABLE_H


 /*
- * entries per page directory level: they are two-level, so
- * we don't really have any PMD directory.
+ * We pull a couple of tricks here:
+ *  1. We wrap the PMD into the PGD.
+ *  2. We lie about the size of the PTE and PGD.
+ * Even though we have 256 PTE entries and 4096 PGD entries, we tell
+ * Linux that we actually have 512 PTE entries and 2048 PGD entries.
+ * Each "Linux" PGD entry is made up of two hardware PGD entries, and
+ * each PTE table is actually two hardware PTE tables.
  */
-#define PTRS_PER_PTE        256
+#define PTRS_PER_PTE        512
 #define PTRS_PER_PMD        1
-#define PTRS_PER_PGD        4096
+#define PTRS_PER_PGD        2048


 /*
  * Hardware page table definitions.
@@ -109,32 +114,29 @@
 #define pmd_bad(pmd)        (pmd_val(pmd) & 2)
 #define set_pmd(pmdp,pmd)    cpu_set_pmd(pmdp, pmd)


-static inline pmd_t __mk_pmd(pte_t *ptep, unsigned long prot)
+static inline void pmd_clear(pmd_t *pmdp)
 {
-    unsigned long pte_ptr = (unsigned long)ptep;
-    pmd_t pmd;
-
-    pte_ptr -= PTRS_PER_PTE * sizeof(void *);
-
-    /*
-     * The pmd must be loaded with the physical
-     * address of the PTE table
-     */
-    pmd_val(pmd) = __virt_to_phys(pte_ptr) | prot;
-
-    return pmd;
+    set_pmd(pmdp, __pmd(0));
+    set_pmd(pmdp + 1, __pmd(0));
 }


-static inline unsigned long __pmd_page(pmd_t pmd)
+static inline pte_t *pmd_page_kernel(pmd_t pmd)
 {
     unsigned long ptr;


     ptr = pmd_val(pmd) & ~(PTRS_PER_PTE * sizeof(void *) - 1);
-
     ptr += PTRS_PER_PTE * sizeof(void *);


-    return __phys_to_virt(ptr);
+    return __va(ptr);
 }
+
+#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd)))
+
+#define pte_offset_kernel(dir,addr)    (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map(dir,addr)    (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested(dir,addr)    (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_unmap(pte)            do { } while (0)
+#define pte_unmap_nested(pte)        do { } while (0)


 #define set_pte(ptep, pte)    cpu_set_pte(ptep,pte)