setup_64.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. /*
  2. *
  3. * Common boot and setup code.
  4. *
  5. * Copyright (C) 2001 PPC64 Team, IBM Corp
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #define DEBUG
  13. #include <linux/export.h>
  14. #include <linux/string.h>
  15. #include <linux/sched.h>
  16. #include <linux/init.h>
  17. #include <linux/kernel.h>
  18. #include <linux/reboot.h>
  19. #include <linux/delay.h>
  20. #include <linux/initrd.h>
  21. #include <linux/seq_file.h>
  22. #include <linux/ioport.h>
  23. #include <linux/console.h>
  24. #include <linux/utsname.h>
  25. #include <linux/tty.h>
  26. #include <linux/root_dev.h>
  27. #include <linux/notifier.h>
  28. #include <linux/cpu.h>
  29. #include <linux/unistd.h>
  30. #include <linux/serial.h>
  31. #include <linux/serial_8250.h>
  32. #include <linux/bootmem.h>
  33. #include <linux/pci.h>
  34. #include <linux/lockdep.h>
  35. #include <linux/memblock.h>
  36. #include <linux/hugetlb.h>
  37. #include <linux/memory.h>
  38. #include <linux/nmi.h>
  39. #include <linux/debugfs.h>
  40. #include <asm/io.h>
  41. #include <asm/kdump.h>
  42. #include <asm/prom.h>
  43. #include <asm/processor.h>
  44. #include <asm/pgtable.h>
  45. #include <asm/smp.h>
  46. #include <asm/elf.h>
  47. #include <asm/machdep.h>
  48. #include <asm/paca.h>
  49. #include <asm/time.h>
  50. #include <asm/cputable.h>
  51. #include <asm/sections.h>
  52. #include <asm/btext.h>
  53. #include <asm/nvram.h>
  54. #include <asm/setup.h>
  55. #include <asm/rtas.h>
  56. #include <asm/iommu.h>
  57. #include <asm/serial.h>
  58. #include <asm/cache.h>
  59. #include <asm/page.h>
  60. #include <asm/mmu.h>
  61. #include <asm/firmware.h>
  62. #include <asm/xmon.h>
  63. #include <asm/udbg.h>
  64. #include <asm/kexec.h>
  65. #include <asm/mmu_context.h>
  66. #include <asm/code-patching.h>
  67. #include <asm/kvm_ppc.h>
  68. #include <asm/hugetlb.h>
  69. #include <asm/epapr_hcalls.h>
  70. #ifdef DEBUG
  71. #define DBG(fmt...) udbg_printf(fmt)
  72. #else
  73. #define DBG(fmt...)
  74. #endif
  75. int spinning_secondaries;
  76. u64 ppc64_pft_size;
  77. /* Pick defaults since we might want to patch instructions
  78. * before we've read this from the device tree.
  79. */
  80. struct ppc64_caches ppc64_caches = {
  81. .dline_size = 0x40,
  82. .log_dline_size = 6,
  83. .iline_size = 0x40,
  84. .log_iline_size = 6
  85. };
  86. EXPORT_SYMBOL_GPL(ppc64_caches);
  87. /*
  88. * These are used in binfmt_elf.c to put aux entries on the stack
  89. * for each elf executable being started.
  90. */
  91. int dcache_bsize;
  92. int icache_bsize;
  93. int ucache_bsize;
  94. #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
  95. static void setup_tlb_core_data(void)
  96. {
  97. int cpu;
  98. BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
  99. for_each_possible_cpu(cpu) {
  100. int first = cpu_first_thread_sibling(cpu);
  101. /*
  102. * If we boot via kdump on a non-primary thread,
  103. * make sure we point at the thread that actually
  104. * set up this TLB.
  105. */
  106. if (cpu_first_thread_sibling(boot_cpuid) == first)
  107. first = boot_cpuid;
  108. paca[cpu].tcd_ptr = &paca[first].tcd;
  109. /*
  110. * If we have threads, we need either tlbsrx.
  111. * or e6500 tablewalk mode, or else TLB handlers
  112. * will be racy and could produce duplicate entries.
  113. */
  114. if (smt_enabled_at_boot >= 2 &&
  115. !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
  116. book3e_htw_mode != PPC_HTW_E6500) {
  117. /* Should we panic instead? */
  118. WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
  119. __func__);
  120. }
  121. }
  122. }
  123. #else
  124. static void setup_tlb_core_data(void)
  125. {
  126. }
  127. #endif
  128. #ifdef CONFIG_SMP
  129. static char *smt_enabled_cmdline;
  130. /* Look for ibm,smt-enabled OF option */
  131. static void check_smt_enabled(void)
  132. {
  133. struct device_node *dn;
  134. const char *smt_option;
  135. /* Default to enabling all threads */
  136. smt_enabled_at_boot = threads_per_core;
  137. /* Allow the command line to overrule the OF option */
  138. if (smt_enabled_cmdline) {
  139. if (!strcmp(smt_enabled_cmdline, "on"))
  140. smt_enabled_at_boot = threads_per_core;
  141. else if (!strcmp(smt_enabled_cmdline, "off"))
  142. smt_enabled_at_boot = 0;
  143. else {
  144. int smt;
  145. int rc;
  146. rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
  147. if (!rc)
  148. smt_enabled_at_boot =
  149. min(threads_per_core, smt);
  150. }
  151. } else {
  152. dn = of_find_node_by_path("/options");
  153. if (dn) {
  154. smt_option = of_get_property(dn, "ibm,smt-enabled",
  155. NULL);
  156. if (smt_option) {
  157. if (!strcmp(smt_option, "on"))
  158. smt_enabled_at_boot = threads_per_core;
  159. else if (!strcmp(smt_option, "off"))
  160. smt_enabled_at_boot = 0;
  161. }
  162. of_node_put(dn);
  163. }
  164. }
  165. }
  166. /* Look for smt-enabled= cmdline option */
  167. static int __init early_smt_enabled(char *p)
  168. {
  169. smt_enabled_cmdline = p;
  170. return 0;
  171. }
  172. early_param("smt-enabled", early_smt_enabled);
  173. #else
  174. #define check_smt_enabled()
  175. #endif /* CONFIG_SMP */
  176. /** Fix up paca fields required for the boot cpu */
  177. static void fixup_boot_paca(void)
  178. {
  179. /* The boot cpu is started */
  180. get_paca()->cpu_start = 1;
  181. /* Allow percpu accesses to work until we setup percpu data */
  182. get_paca()->data_offset = 0;
  183. }
  184. static void cpu_ready_for_interrupts(void)
  185. {
  186. /* Set IR and DR in PACA MSR */
  187. get_paca()->kernel_msr = MSR_KERNEL;
  188. /*
  189. * Enable AIL if supported, and we are in hypervisor mode. If we are
  190. * not in hypervisor mode, we enable relocation-on interrupts later
  191. * in pSeries_setup_arch() using the H_SET_MODE hcall.
  192. */
  193. if (cpu_has_feature(CPU_FTR_HVMODE) &&
  194. cpu_has_feature(CPU_FTR_ARCH_207S)) {
  195. unsigned long lpcr = mfspr(SPRN_LPCR);
  196. mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
  197. }
  198. /*
  199. * Fixup HFSCR:TM based on CPU features. The bit is set by our
  200. * early asm init because at that point we haven't updated our
  201. * CPU features from firmware and device-tree. Here we have,
  202. * so let's do it.
  203. */
  204. if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
  205. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
  206. }
  207. /*
  208. * Early initialization entry point. This is called by head.S
  209. * with MMU translation disabled. We rely on the "feature" of
  210. * the CPU that ignores the top 2 bits of the address in real
  211. * mode so we can access kernel globals normally provided we
  212. * only toy with things in the RMO region. From here, we do
  213. * some early parsing of the device-tree to setup out MEMBLOCK
  214. * data structures, and allocate & initialize the hash table
  215. * and segment tables so we can start running with translation
  216. * enabled.
  217. *
  218. * It is this function which will call the probe() callback of
  219. * the various platform types and copy the matching one to the
  220. * global ppc_md structure. Your platform can eventually do
  221. * some very early initializations from the probe() routine, but
  222. * this is not recommended, be very careful as, for example, the
  223. * device-tree is not accessible via normal means at this point.
  224. */
  225. void __init early_setup(unsigned long dt_ptr)
  226. {
  227. static __initdata struct paca_struct boot_paca;
  228. /* -------- printk is _NOT_ safe to use here ! ------- */
  229. /* Identify CPU type */
  230. identify_cpu(0, mfspr(SPRN_PVR));
  231. /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
  232. initialise_paca(&boot_paca, 0);
  233. setup_paca(&boot_paca);
  234. fixup_boot_paca();
  235. /* Initialize lockdep early or else spinlocks will blow */
  236. lockdep_init();
  237. /* -------- printk is now safe to use ------- */
  238. /* Enable early debugging if any specified (see udbg.h) */
  239. udbg_early_init();
  240. DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
  241. /*
  242. * Do early initialization using the flattened device
  243. * tree, such as retrieving the physical memory map or
  244. * calculating/retrieving the hash table size.
  245. */
  246. early_init_devtree(__va(dt_ptr));
  247. epapr_paravirt_early_init();
  248. /* Now we know the logical id of our boot cpu, setup the paca. */
  249. setup_paca(&paca[boot_cpuid]);
  250. fixup_boot_paca();
  251. /* Probe the machine type */
  252. probe_machine();
  253. setup_kdump_trampoline();
  254. DBG("Found, Initializing memory management...\n");
  255. /* Initialize the hash table or TLB handling */
  256. early_init_mmu();
  257. /*
  258. * At this point, we can let interrupts switch to virtual mode
  259. * (the MMU has been setup), so adjust the MSR in the PACA to
  260. * have IR and DR set and enable AIL if it exists
  261. */
  262. cpu_ready_for_interrupts();
  263. /* Reserve large chunks of memory for use by CMA for KVM */
  264. kvm_cma_reserve();
  265. /*
  266. * Reserve any gigantic pages requested on the command line.
  267. * memblock needs to have been initialized by the time this is
  268. * called since this will reserve memory.
  269. */
  270. reserve_hugetlb_gpages();
  271. DBG(" <- early_setup()\n");
  272. #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
  273. /*
  274. * This needs to be done *last* (after the above DBG() even)
  275. *
  276. * Right after we return from this function, we turn on the MMU
  277. * which means the real-mode access trick that btext does will
  278. * no longer work, it needs to switch to using a real MMU
  279. * mapping. This call will ensure that it does
  280. */
  281. btext_map();
  282. #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
  283. }
  284. #ifdef CONFIG_SMP
  285. void early_setup_secondary(void)
  286. {
  287. /* Mark interrupts enabled in PACA */
  288. get_paca()->soft_enabled = 0;
  289. /* Initialize the hash table or TLB handling */
  290. early_init_mmu_secondary();
  291. /*
  292. * At this point, we can let interrupts switch to virtual mode
  293. * (the MMU has been setup), so adjust the MSR in the PACA to
  294. * have IR and DR set.
  295. */
  296. cpu_ready_for_interrupts();
  297. }
  298. #endif /* CONFIG_SMP */
  299. #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
  300. static bool use_spinloop(void)
  301. {
  302. if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
  303. return true;
  304. /*
  305. * When book3e boots from kexec, the ePAPR spin table does
  306. * not get used.
  307. */
  308. return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
  309. }
  310. void smp_release_cpus(void)
  311. {
  312. unsigned long *ptr;
  313. int i;
  314. if (!use_spinloop())
  315. return;
  316. DBG(" -> smp_release_cpus()\n");
  317. /* All secondary cpus are spinning on a common spinloop, release them
  318. * all now so they can start to spin on their individual paca
  319. * spinloops. For non SMP kernels, the secondary cpus never get out
  320. * of the common spinloop.
  321. */
  322. ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
  323. - PHYSICAL_START);
  324. *ptr = ppc_function_entry(generic_secondary_smp_init);
  325. /* And wait a bit for them to catch up */
  326. for (i = 0; i < 100000; i++) {
  327. mb();
  328. HMT_low();
  329. if (spinning_secondaries == 0)
  330. break;
  331. udelay(1);
  332. }
  333. DBG("spinning_secondaries = %d\n", spinning_secondaries);
  334. DBG(" <- smp_release_cpus()\n");
  335. }
  336. #endif /* CONFIG_SMP || CONFIG_KEXEC */
  337. /*
  338. * Initialize some remaining members of the ppc64_caches and systemcfg
  339. * structures
  340. * (at least until we get rid of them completely). This is mostly some
  341. * cache informations about the CPU that will be used by cache flush
  342. * routines and/or provided to userland
  343. */
  344. static void __init initialize_cache_info(void)
  345. {
  346. struct device_node *np;
  347. unsigned long num_cpus = 0;
  348. DBG(" -> initialize_cache_info()\n");
  349. for_each_node_by_type(np, "cpu") {
  350. num_cpus += 1;
  351. /*
  352. * We're assuming *all* of the CPUs have the same
  353. * d-cache and i-cache sizes... -Peter
  354. */
  355. if (num_cpus == 1) {
  356. const __be32 *sizep, *lsizep;
  357. u32 size, lsize;
  358. size = 0;
  359. lsize = cur_cpu_spec->dcache_bsize;
  360. sizep = of_get_property(np, "d-cache-size", NULL);
  361. if (sizep != NULL)
  362. size = be32_to_cpu(*sizep);
  363. lsizep = of_get_property(np, "d-cache-block-size",
  364. NULL);
  365. /* fallback if block size missing */
  366. if (lsizep == NULL)
  367. lsizep = of_get_property(np,
  368. "d-cache-line-size",
  369. NULL);
  370. if (lsizep != NULL)
  371. lsize = be32_to_cpu(*lsizep);
  372. if (sizep == NULL || lsizep == NULL)
  373. DBG("Argh, can't find dcache properties ! "
  374. "sizep: %p, lsizep: %p\n", sizep, lsizep);
  375. ppc64_caches.dsize = size;
  376. ppc64_caches.dline_size = lsize;
  377. ppc64_caches.log_dline_size = __ilog2(lsize);
  378. ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
  379. size = 0;
  380. lsize = cur_cpu_spec->icache_bsize;
  381. sizep = of_get_property(np, "i-cache-size", NULL);
  382. if (sizep != NULL)
  383. size = be32_to_cpu(*sizep);
  384. lsizep = of_get_property(np, "i-cache-block-size",
  385. NULL);
  386. if (lsizep == NULL)
  387. lsizep = of_get_property(np,
  388. "i-cache-line-size",
  389. NULL);
  390. if (lsizep != NULL)
  391. lsize = be32_to_cpu(*lsizep);
  392. if (sizep == NULL || lsizep == NULL)
  393. DBG("Argh, can't find icache properties ! "
  394. "sizep: %p, lsizep: %p\n", sizep, lsizep);
  395. ppc64_caches.isize = size;
  396. ppc64_caches.iline_size = lsize;
  397. ppc64_caches.log_iline_size = __ilog2(lsize);
  398. ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
  399. }
  400. }
  401. DBG(" <- initialize_cache_info()\n");
  402. }
  403. /*
  404. * Do some initial setup of the system. The parameters are those which
  405. * were passed in from the bootloader.
  406. */
  407. void __init setup_system(void)
  408. {
  409. DBG(" -> setup_system()\n");
  410. /* Apply the CPUs-specific and firmware specific fixups to kernel
  411. * text (nop out sections not relevant to this CPU or this firmware)
  412. */
  413. do_feature_fixups(cur_cpu_spec->cpu_features,
  414. &__start___ftr_fixup, &__stop___ftr_fixup);
  415. do_feature_fixups(cur_cpu_spec->mmu_features,
  416. &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup);
  417. do_feature_fixups(powerpc_firmware_features,
  418. &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
  419. do_lwsync_fixups(cur_cpu_spec->cpu_features,
  420. &__start___lwsync_fixup, &__stop___lwsync_fixup);
  421. do_final_fixups();
  422. /*
  423. * Unflatten the device-tree passed by prom_init or kexec
  424. */
  425. unflatten_device_tree();
  426. /*
  427. * Fill the ppc64_caches & systemcfg structures with informations
  428. * retrieved from the device-tree.
  429. */
  430. initialize_cache_info();
  431. #ifdef CONFIG_PPC_RTAS
  432. /*
  433. * Initialize RTAS if available
  434. */
  435. rtas_initialize();
  436. #endif /* CONFIG_PPC_RTAS */
  437. /*
  438. * Check if we have an initrd provided via the device-tree
  439. */
  440. check_for_initrd();
  441. /*
  442. * Do some platform specific early initializations, that includes
  443. * setting up the hash table pointers. It also sets up some interrupt-mapping
  444. * related options that will be used by finish_device_tree()
  445. */
  446. if (ppc_md.init_early)
  447. ppc_md.init_early();
  448. /*
  449. * We can discover serial ports now since the above did setup the
  450. * hash table management for us, thus ioremap works. We do that early
  451. * so that further code can be debugged
  452. */
  453. find_legacy_serial_ports();
  454. /*
  455. * Register early console
  456. */
  457. register_early_udbg_console();
  458. /*
  459. * Initialize xmon
  460. */
  461. xmon_setup();
  462. smp_setup_cpu_maps();
  463. check_smt_enabled();
  464. setup_tlb_core_data();
  465. /*
  466. * Freescale Book3e parts spin in a loop provided by firmware,
  467. * so smp_release_cpus() does nothing for them
  468. */
  469. #if defined(CONFIG_SMP)
  470. /* Release secondary cpus out of their spinloops at 0x60 now that
  471. * we can map physical -> logical CPU ids
  472. */
  473. smp_release_cpus();
  474. #endif
  475. pr_info("Starting Linux %s %s\n", init_utsname()->machine,
  476. init_utsname()->version);
  477. pr_info("-----------------------------------------------------\n");
  478. pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
  479. pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size());
  480. if (ppc64_caches.dline_size != 0x80)
  481. pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size);
  482. if (ppc64_caches.iline_size != 0x80)
  483. pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size);
  484. pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
  485. pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE);
  486. pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS);
  487. pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
  488. cur_cpu_spec->cpu_user_features2);
  489. pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
  490. pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
  491. #ifdef CONFIG_PPC_STD_MMU_64
  492. if (htab_address)
  493. pr_info("htab_address = 0x%p\n", htab_address);
  494. pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
  495. #endif
  496. if (PHYSICAL_START > 0)
  497. pr_info("physical_start = 0x%llx\n",
  498. (unsigned long long)PHYSICAL_START);
  499. pr_info("-----------------------------------------------------\n");
  500. DBG(" <- setup_system()\n");
  501. }
  502. /* This returns the limit below which memory accesses to the linear
  503. * mapping are guarnateed not to cause a TLB or SLB miss. This is
  504. * used to allocate interrupt or emergency stacks for which our
  505. * exception entry path doesn't deal with being interrupted.
  506. */
  507. static u64 safe_stack_limit(void)
  508. {
  509. #ifdef CONFIG_PPC_BOOK3E
  510. /* Freescale BookE bolts the entire linear mapping */
  511. if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
  512. return linear_map_top;
  513. /* Other BookE, we assume the first GB is bolted */
  514. return 1ul << 30;
  515. #else
  516. /* BookS, the first segment is bolted */
  517. if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
  518. return 1UL << SID_SHIFT_1T;
  519. return 1UL << SID_SHIFT;
  520. #endif
  521. }
  522. static void __init irqstack_early_init(void)
  523. {
  524. u64 limit = safe_stack_limit();
  525. unsigned int i;
  526. /*
  527. * Interrupt stacks must be in the first segment since we
  528. * cannot afford to take SLB misses on them.
  529. */
  530. for_each_possible_cpu(i) {
  531. softirq_ctx[i] = (struct thread_info *)
  532. __va(memblock_alloc_base(THREAD_SIZE,
  533. THREAD_SIZE, limit));
  534. hardirq_ctx[i] = (struct thread_info *)
  535. __va(memblock_alloc_base(THREAD_SIZE,
  536. THREAD_SIZE, limit));
  537. }
  538. }
  539. #ifdef CONFIG_PPC_BOOK3E
  540. static void __init exc_lvl_early_init(void)
  541. {
  542. unsigned int i;
  543. unsigned long sp;
  544. for_each_possible_cpu(i) {
  545. sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
  546. critirq_ctx[i] = (struct thread_info *)__va(sp);
  547. paca[i].crit_kstack = __va(sp + THREAD_SIZE);
  548. sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
  549. dbgirq_ctx[i] = (struct thread_info *)__va(sp);
  550. paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
  551. sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
  552. mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
  553. paca[i].mc_kstack = __va(sp + THREAD_SIZE);
  554. }
  555. if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
  556. patch_exception(0x040, exc_debug_debug_book3e);
  557. }
  558. #else
  559. #define exc_lvl_early_init()
  560. #endif
  561. /*
  562. * Stack space used when we detect a bad kernel stack pointer, and
  563. * early in SMP boots before relocation is enabled. Exclusive emergency
  564. * stack for machine checks.
  565. */
  566. static void __init emergency_stack_init(void)
  567. {
  568. u64 limit;
  569. unsigned int i;
  570. /*
  571. * Emergency stacks must be under 256MB, we cannot afford to take
  572. * SLB misses on them. The ABI also requires them to be 128-byte
  573. * aligned.
  574. *
  575. * Since we use these as temporary stacks during secondary CPU
  576. * bringup, we need to get at them in real mode. This means they
  577. * must also be within the RMO region.
  578. */
  579. limit = min(safe_stack_limit(), ppc64_rma_size);
  580. for_each_possible_cpu(i) {
  581. unsigned long sp;
  582. sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
  583. sp += THREAD_SIZE;
  584. paca[i].emergency_sp = __va(sp);
  585. #ifdef CONFIG_PPC_BOOK3S_64
  586. /* emergency stack for machine check exception handling. */
  587. sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
  588. sp += THREAD_SIZE;
  589. paca[i].mc_emergency_sp = __va(sp);
  590. #endif
  591. }
  592. }
  593. /*
  594. * Called into from start_kernel this initializes memblock, which is used
  595. * to manage page allocation until mem_init is called.
  596. */
  597. void __init setup_arch(char **cmdline_p)
  598. {
  599. *cmdline_p = boot_command_line;
  600. /*
  601. * Set cache line size based on type of cpu as a default.
  602. * Systems with OF can look in the properties on the cpu node(s)
  603. * for a possibly more accurate value.
  604. */
  605. dcache_bsize = ppc64_caches.dline_size;
  606. icache_bsize = ppc64_caches.iline_size;
  607. if (ppc_md.panic)
  608. setup_panic();
  609. init_mm.start_code = (unsigned long)_stext;
  610. init_mm.end_code = (unsigned long) _etext;
  611. init_mm.end_data = (unsigned long) _edata;
  612. init_mm.brk = klimit;
  613. #ifdef CONFIG_PPC_64K_PAGES
  614. init_mm.context.pte_frag = NULL;
  615. #endif
  616. #ifdef CONFIG_SPAPR_TCE_IOMMU
  617. mm_iommu_init(&init_mm.context);
  618. #endif
  619. irqstack_early_init();
  620. exc_lvl_early_init();
  621. emergency_stack_init();
  622. initmem_init();
  623. #ifdef CONFIG_DUMMY_CONSOLE
  624. conswitchp = &dummy_con;
  625. #endif
  626. if (ppc_md.setup_arch)
  627. ppc_md.setup_arch();
  628. paging_init();
  629. /* Initialize the MMU context management stuff */
  630. mmu_context_init();
  631. /* Interrupt code needs to be 64K-aligned */
  632. if ((unsigned long)_stext & 0xffff)
  633. panic("Kernelbase not 64K-aligned (0x%lx)!\n",
  634. (unsigned long)_stext);
  635. }
  636. #ifdef CONFIG_SMP
  637. #define PCPU_DYN_SIZE ()
  638. static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
  639. {
  640. return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
  641. __pa(MAX_DMA_ADDRESS));
  642. }
  643. static void __init pcpu_fc_free(void *ptr, size_t size)
  644. {
  645. free_bootmem(__pa(ptr), size);
  646. }
  647. static int pcpu_cpu_distance(unsigned int from, unsigned int to)
  648. {
  649. if (cpu_to_node(from) == cpu_to_node(to))
  650. return LOCAL_DISTANCE;
  651. else
  652. return REMOTE_DISTANCE;
  653. }
  654. unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
  655. EXPORT_SYMBOL(__per_cpu_offset);
  656. void __init setup_per_cpu_areas(void)
  657. {
  658. const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
  659. size_t atom_size;
  660. unsigned long delta;
  661. unsigned int cpu;
  662. int rc;
  663. /*
  664. * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
  665. * to group units. For larger mappings, use 1M atom which
  666. * should be large enough to contain a number of units.
  667. */
  668. if (mmu_linear_psize == MMU_PAGE_4K)
  669. atom_size = PAGE_SIZE;
  670. else
  671. atom_size = 1 << 20;
  672. rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
  673. pcpu_fc_alloc, pcpu_fc_free);
  674. if (rc < 0)
  675. panic("cannot initialize percpu area (err=%d)", rc);
  676. delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
  677. for_each_possible_cpu(cpu) {
  678. __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
  679. paca[cpu].data_offset = __per_cpu_offset[cpu];
  680. }
  681. }
  682. #endif
  683. #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  684. unsigned long memory_block_size_bytes(void)
  685. {
  686. if (ppc_md.memory_block_size)
  687. return ppc_md.memory_block_size();
  688. return MIN_MEMORY_BLOCK_SIZE;
  689. }
  690. #endif
  691. #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
  692. struct ppc_pci_io ppc_pci_io;
  693. EXPORT_SYMBOL(ppc_pci_io);
  694. #endif
  695. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  696. u64 hw_nmi_get_sample_period(int watchdog_thresh)
  697. {
  698. return ppc_proc_freq * watchdog_thresh;
  699. }
  700. /*
  701. * The hardlockup detector breaks PMU event based branches and is likely
  702. * to get false positives in KVM guests, so disable it by default.
  703. */
  704. static int __init disable_hardlockup_detector(void)
  705. {
  706. hardlockup_detector_disable();
  707. return 0;
  708. }
  709. early_initcall(disable_hardlockup_detector);
  710. #endif
  711. #ifdef CONFIG_PPC_BOOK3S_64
  712. static enum l1d_flush_type enabled_flush_types;
  713. static void *l1d_flush_fallback_area;
  714. static bool no_rfi_flush;
  715. bool rfi_flush;
  716. static int __init handle_no_rfi_flush(char *p)
  717. {
  718. pr_info("rfi-flush: disabled on command line.");
  719. no_rfi_flush = true;
  720. return 0;
  721. }
  722. early_param("no_rfi_flush", handle_no_rfi_flush);
  723. /*
  724. * The RFI flush is not KPTI, but because users will see doco that says to use
  725. * nopti we hijack that option here to also disable the RFI flush.
  726. */
  727. static int __init handle_no_pti(char *p)
  728. {
  729. pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
  730. handle_no_rfi_flush(NULL);
  731. return 0;
  732. }
  733. early_param("nopti", handle_no_pti);
  734. static void do_nothing(void *unused)
  735. {
  736. /*
  737. * We don't need to do the flush explicitly, just enter+exit kernel is
  738. * sufficient, the RFI exit handlers will do the right thing.
  739. */
  740. }
  741. void rfi_flush_enable(bool enable)
  742. {
  743. if (rfi_flush == enable)
  744. return;
  745. if (enable) {
  746. do_rfi_flush_fixups(enabled_flush_types);
  747. on_each_cpu(do_nothing, NULL, 1);
  748. } else
  749. do_rfi_flush_fixups(L1D_FLUSH_NONE);
  750. rfi_flush = enable;
  751. }
  752. static void init_fallback_flush(void)
  753. {
  754. u64 l1d_size, limit;
  755. int cpu;
  756. l1d_size = ppc64_caches.dsize;
  757. limit = min(safe_stack_limit(), ppc64_rma_size);
  758. /*
  759. * Align to L1d size, and size it at 2x L1d size, to catch possible
  760. * hardware prefetch runoff. We don't have a recipe for load patterns to
  761. * reliably avoid the prefetcher.
  762. */
  763. l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
  764. memset(l1d_flush_fallback_area, 0, l1d_size * 2);
  765. for_each_possible_cpu(cpu) {
  766. /*
  767. * The fallback flush is currently coded for 8-way
  768. * associativity. Different associativity is possible, but it
  769. * will be treated as 8-way and may not evict the lines as
  770. * effectively.
  771. *
  772. * 128 byte lines are mandatory.
  773. */
  774. u64 c = l1d_size / 8;
  775. paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
  776. paca[cpu].l1d_flush_congruence = c;
  777. paca[cpu].l1d_flush_sets = c / 128;
  778. }
  779. }
  780. void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
  781. {
  782. if (types & L1D_FLUSH_FALLBACK) {
  783. pr_info("rfi-flush: Using fallback displacement flush\n");
  784. init_fallback_flush();
  785. }
  786. if (types & L1D_FLUSH_ORI)
  787. pr_info("rfi-flush: Using ori type flush\n");
  788. if (types & L1D_FLUSH_MTTRIG)
  789. pr_info("rfi-flush: Using mttrig type flush\n");
  790. enabled_flush_types = types;
  791. if (!no_rfi_flush)
  792. rfi_flush_enable(enable);
  793. }
  794. #ifdef CONFIG_DEBUG_FS
  795. static int rfi_flush_set(void *data, u64 val)
  796. {
  797. if (val == 1)
  798. rfi_flush_enable(true);
  799. else if (val == 0)
  800. rfi_flush_enable(false);
  801. else
  802. return -EINVAL;
  803. return 0;
  804. }
  805. static int rfi_flush_get(void *data, u64 *val)
  806. {
  807. *val = rfi_flush ? 1 : 0;
  808. return 0;
  809. }
  810. DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
  811. static __init int rfi_flush_debugfs_init(void)
  812. {
  813. debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
  814. return 0;
  815. }
  816. device_initcall(rfi_flush_debugfs_init);
  817. #endif
  818. ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
  819. {
  820. if (rfi_flush)
  821. return sprintf(buf, "Mitigation: RFI Flush\n");
  822. return sprintf(buf, "Vulnerable\n");
  823. }
  824. #endif /* CONFIG_PPC_BOOK3S_64 */