jfs_logmgr.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /*
  2. * Copyright (C) International Business Machines Corp., 2000-2004
  3. * Portions Copyright (C) Christoph Hellwig, 2001-2002
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  13. * the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #ifndef _H_JFS_LOGMGR
  20. #define _H_JFS_LOGMGR
  21. #include "jfs_filsys.h"
  22. #include "jfs_lock.h"
  23. /*
  24. * log manager configuration parameters
  25. */
  26. /* log page size */
  27. #define LOGPSIZE 4096
  28. #define L2LOGPSIZE 12
  29. #define LOGPAGES 16 /* Log pages per mounted file system */
  30. /*
  31. * log logical volume
  32. *
  33. * a log is used to make the commit operation on journalled
  34. * files within the same logical volume group atomic.
  35. * a log is implemented with a logical volume.
  36. * there is one log per logical volume group.
  37. *
  38. * block 0 of the log logical volume is not used (ipl etc).
  39. * block 1 contains a log "superblock" and is used by logFormat(),
  40. * lmLogInit(), lmLogShutdown(), and logRedo() to record status
  41. * of the log but is not otherwise used during normal processing.
  42. * blocks 2 - (N-1) are used to contain log records.
  43. *
  44. * when a volume group is varied-on-line, logRedo() must have
  45. * been executed before the file systems (logical volumes) in
  46. * the volume group can be mounted.
  47. */
  48. /*
  49. * log superblock (block 1 of logical volume)
  50. */
  51. #define LOGSUPER_B 1
  52. #define LOGSTART_B 2
  53. #define LOGMAGIC 0x87654321
  54. #define LOGVERSION 1
  55. #define MAX_ACTIVE 128 /* Max active file systems sharing log */
  56. struct logsuper {
  57. __le32 magic; /* 4: log lv identifier */
  58. __le32 version; /* 4: version number */
  59. __le32 serial; /* 4: log open/mount counter */
  60. __le32 size; /* 4: size in number of LOGPSIZE blocks */
  61. __le32 bsize; /* 4: logical block size in byte */
  62. __le32 l2bsize; /* 4: log2 of bsize */
  63. __le32 flag; /* 4: option */
  64. __le32 state; /* 4: state - see below */
  65. __le32 end; /* 4: addr of last log record set by logredo */
  66. char uuid[16]; /* 16: 128-bit journal uuid */
  67. char label[16]; /* 16: journal label */
  68. struct {
  69. char uuid[16];
  70. } active[MAX_ACTIVE]; /* 2048: active file systems list */
  71. };
  72. #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
  73. /* log flag: commit option (see jfs_filsys.h) */
  74. /* log state */
  75. #define LOGMOUNT 0 /* log mounted by lmLogInit() */
  76. #define LOGREDONE 1 /* log shutdown by lmLogShutdown().
  77. * log redo completed by logredo().
  78. */
  79. #define LOGWRAP 2 /* log wrapped */
  80. #define LOGREADERR 3 /* log read error detected in logredo() */
  81. /*
  82. * log logical page
  83. *
  84. * (this comment should be rewritten !)
  85. * the header and trailer structures (h,t) will normally have
  86. * the same page and eor value.
  87. * An exception to this occurs when a complete page write is not
  88. * accomplished on a power failure. Since the hardware may "split write"
  89. * sectors in the page, any out of order sequence may occur during powerfail
  90. * and needs to be recognized during log replay. The xor value is
  91. * an "exclusive or" of all log words in the page up to eor. This
  92. * 32 bit eor is stored with the top 16 bits in the header and the
  93. * bottom 16 bits in the trailer. logredo can easily recognize pages
  94. * that were not completed by reconstructing this eor and checking
  95. * the log page.
  96. *
  97. * Previous versions of the operating system did not allow split
  98. * writes and detected partially written records in logredo by
  99. * ordering the updates to the header, trailer, and the move of data
  100. * into the logdata area. The order: (1) data is moved (2) header
  101. * is updated (3) trailer is updated. In logredo, when the header
  102. * differed from the trailer, the header and trailer were reconciled
  103. * as follows: if h.page != t.page they were set to the smaller of
  104. * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
  105. * h.eor != t.eor they were set to the smaller of their two values.
  106. */
  107. struct logpage {
  108. struct { /* header */
  109. __le32 page; /* 4: log sequence page number */
  110. __le16 rsrvd; /* 2: */
  111. __le16 eor; /* 2: end-of-log offset of lasrt record write */
  112. } h;
  113. __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
  114. struct { /* trailer */
  115. __le32 page; /* 4: normally the same as h.page */
  116. __le16 rsrvd; /* 2: */
  117. __le16 eor; /* 2: normally the same as h.eor */
  118. } t;
  119. };
  120. #define LOGPHDRSIZE 8 /* log page header size */
  121. #define LOGPTLRSIZE 8 /* log page trailer size */
  122. /*
  123. * log record
  124. *
  125. * (this comment should be rewritten !)
  126. * jfs uses only "after" log records (only a single writer is allowed
  127. * in a page, pages are written to temporary paging space if
  128. * if they must be written to disk before commit, and i/o is
  129. * scheduled for modified pages to their home location after
  130. * the log records containing the after values and the commit
  131. * record is written to the log on disk, undo discards the copy
  132. * in main-memory.)
  133. *
  134. * a log record consists of a data area of variable length followed by
  135. * a descriptor of fixed size LOGRDSIZE bytes.
  136. * the data area is rounded up to an integral number of 4-bytes and
  137. * must be no longer than LOGPSIZE.
  138. * the descriptor is of size of multiple of 4-bytes and aligned on a
  139. * 4-byte boundary.
  140. * records are packed one after the other in the data area of log pages.
  141. * (sometimes a DUMMY record is inserted so that at least one record ends
  142. * on every page or the longest record is placed on at most two pages).
  143. * the field eor in page header/trailer points to the byte following
  144. * the last record on a page.
  145. */
  146. /* log record types */
  147. #define LOG_COMMIT 0x8000
  148. #define LOG_SYNCPT 0x4000
  149. #define LOG_MOUNT 0x2000
  150. #define LOG_REDOPAGE 0x0800
  151. #define LOG_NOREDOPAGE 0x0080
  152. #define LOG_NOREDOINOEXT 0x0040
  153. #define LOG_UPDATEMAP 0x0008
  154. #define LOG_NOREDOFILE 0x0001
  155. /* REDOPAGE/NOREDOPAGE log record data type */
  156. #define LOG_INODE 0x0001
  157. #define LOG_XTREE 0x0002
  158. #define LOG_DTREE 0x0004
  159. #define LOG_BTROOT 0x0010
  160. #define LOG_EA 0x0020
  161. #define LOG_ACL 0x0040
  162. #define LOG_DATA 0x0080
  163. #define LOG_NEW 0x0100
  164. #define LOG_EXTEND 0x0200
  165. #define LOG_RELOCATE 0x0400
  166. #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
  167. /* UPDATEMAP log record descriptor type */
  168. #define LOG_ALLOCXADLIST 0x0080
  169. #define LOG_ALLOCPXDLIST 0x0040
  170. #define LOG_ALLOCXAD 0x0020
  171. #define LOG_ALLOCPXD 0x0010
  172. #define LOG_FREEXADLIST 0x0008
  173. #define LOG_FREEPXDLIST 0x0004
  174. #define LOG_FREEXAD 0x0002
  175. #define LOG_FREEPXD 0x0001
  176. struct lrd {
  177. /*
  178. * type independent area
  179. */
  180. __le32 logtid; /* 4: log transaction identifier */
  181. __le32 backchain; /* 4: ptr to prev record of same transaction */
  182. __le16 type; /* 2: record type */
  183. __le16 length; /* 2: length of data in record (in byte) */
  184. __le32 aggregate; /* 4: file system lv/aggregate */
  185. /* (16) */
  186. /*
  187. * type dependent area (20)
  188. */
  189. union {
  190. /*
  191. * COMMIT: commit
  192. *
  193. * transaction commit: no type-dependent information;
  194. */
  195. /*
  196. * REDOPAGE: after-image
  197. *
  198. * apply after-image;
  199. *
  200. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  201. */
  202. struct {
  203. __le32 fileset; /* 4: fileset number */
  204. __le32 inode; /* 4: inode number */
  205. __le16 type; /* 2: REDOPAGE record type */
  206. __le16 l2linesize; /* 2: log2 of line size */
  207. pxd_t pxd; /* 8: on-disk page pxd */
  208. } redopage; /* (20) */
  209. /*
  210. * NOREDOPAGE: the page is freed
  211. *
  212. * do not apply after-image records which precede this record
  213. * in the log with the same page block number to this page.
  214. *
  215. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  216. */
  217. struct {
  218. __le32 fileset; /* 4: fileset number */
  219. __le32 inode; /* 4: inode number */
  220. __le16 type; /* 2: NOREDOPAGE record type */
  221. __le16 rsrvd; /* 2: reserved */
  222. pxd_t pxd; /* 8: on-disk page pxd */
  223. } noredopage; /* (20) */
  224. /*
  225. * UPDATEMAP: update block allocation map
  226. *
  227. * either in-line PXD,
  228. * or out-of-line XADLIST;
  229. *
  230. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  231. */
  232. struct {
  233. __le32 fileset; /* 4: fileset number */
  234. __le32 inode; /* 4: inode number */
  235. __le16 type; /* 2: UPDATEMAP record type */
  236. __le16 nxd; /* 2: number of extents */
  237. pxd_t pxd; /* 8: pxd */
  238. } updatemap; /* (20) */
  239. /*
  240. * NOREDOINOEXT: the inode extent is freed
  241. *
  242. * do not apply after-image records which precede this
  243. * record in the log with the any of the 4 page block
  244. * numbers in this inode extent.
  245. *
  246. * NOTE: The fileset and pxd fields MUST remain in
  247. * the same fields in the REDOPAGE record format.
  248. *
  249. */
  250. struct {
  251. __le32 fileset; /* 4: fileset number */
  252. __le32 iagnum; /* 4: IAG number */
  253. __le32 inoext_idx; /* 4: inode extent index */
  254. pxd_t pxd; /* 8: on-disk page pxd */
  255. } noredoinoext; /* (20) */
  256. /*
  257. * SYNCPT: log sync point
  258. *
  259. * replay log up to syncpt address specified;
  260. */
  261. struct {
  262. __le32 sync; /* 4: syncpt address (0 = here) */
  263. } syncpt;
  264. /*
  265. * MOUNT: file system mount
  266. *
  267. * file system mount: no type-dependent information;
  268. */
  269. /*
  270. * ? FREEXTENT: free specified extent(s)
  271. *
  272. * free specified extent(s) from block allocation map
  273. * N.B.: nextents should be length of data/sizeof(xad_t)
  274. */
  275. struct {
  276. __le32 type; /* 4: FREEXTENT record type */
  277. __le32 nextent; /* 4: number of extents */
  278. /* data: PXD or XAD list */
  279. } freextent;
  280. /*
  281. * ? NOREDOFILE: this file is freed
  282. *
  283. * do not apply records which precede this record in the log
  284. * with the same inode number.
  285. *
  286. * NOREDOFILE must be the first to be written at commit
  287. * (last to be read in logredo()) - it prevents
  288. * replay of preceding updates of all preceding generations
  289. * of the inumber esp. the on-disk inode itself.
  290. */
  291. struct {
  292. __le32 fileset; /* 4: fileset number */
  293. __le32 inode; /* 4: inode number */
  294. } noredofile;
  295. /*
  296. * ? NEWPAGE:
  297. *
  298. * metadata type dependent
  299. */
  300. struct {
  301. __le32 fileset; /* 4: fileset number */
  302. __le32 inode; /* 4: inode number */
  303. __le32 type; /* 4: NEWPAGE record type */
  304. pxd_t pxd; /* 8: on-disk page pxd */
  305. } newpage;
  306. /*
  307. * ? DUMMY: filler
  308. *
  309. * no type-dependent information
  310. */
  311. } log;
  312. }; /* (36) */
  313. #define LOGRDSIZE (sizeof(struct lrd))
  314. /*
  315. * line vector descriptor
  316. */
  317. struct lvd {
  318. __le16 offset;
  319. __le16 length;
  320. };
  321. /*
  322. * log logical volume
  323. */
  324. struct jfs_log {
  325. struct list_head sb_list;/* This is used to sync metadata
  326. * before writing syncpt.
  327. */
  328. struct list_head journal_list; /* Global list */
  329. struct block_device *bdev; /* 4: log lv pointer */
  330. int serial; /* 4: log mount serial number */
  331. s64 base; /* @8: log extent address (inline log ) */
  332. int size; /* 4: log size in log page (in page) */
  333. int l2bsize; /* 4: log2 of bsize */
  334. unsigned long flag; /* 4: flag */
  335. struct lbuf *lbuf_free; /* 4: free lbufs */
  336. wait_queue_head_t free_wait; /* 4: */
  337. /* log write */
  338. int logtid; /* 4: log tid */
  339. int page; /* 4: page number of eol page */
  340. int eor; /* 4: eor of last record in eol page */
  341. struct lbuf *bp; /* 4: current log page buffer */
  342. struct mutex loglock; /* 4: log write serialization lock */
  343. /* syncpt */
  344. int nextsync; /* 4: bytes to write before next syncpt */
  345. int active; /* 4: */
  346. wait_queue_head_t syncwait; /* 4: */
  347. /* commit */
  348. uint cflag; /* 4: */
  349. struct list_head cqueue; /* FIFO commit queue */
  350. struct tblock *flush_tblk; /* tblk we're waiting on for flush */
  351. int gcrtc; /* 4: GC_READY transaction count */
  352. struct tblock *gclrt; /* 4: latest GC_READY transaction */
  353. spinlock_t gclock; /* 4: group commit lock */
  354. int logsize; /* 4: log data area size in byte */
  355. int lsn; /* 4: end-of-log */
  356. int clsn; /* 4: clsn */
  357. int syncpt; /* 4: addr of last syncpt record */
  358. int sync; /* 4: addr from last logsync() */
  359. struct list_head synclist; /* 8: logsynclist anchor */
  360. spinlock_t synclock; /* 4: synclist lock */
  361. struct lbuf *wqueue; /* 4: log pageout queue */
  362. int count; /* 4: count */
  363. char uuid[16]; /* 16: 128-bit uuid of log device */
  364. int no_integrity; /* 3: flag to disable journaling to disk */
  365. };
  366. /*
  367. * Log flag
  368. */
  369. #define log_INLINELOG 1
  370. #define log_SYNCBARRIER 2
  371. #define log_QUIESCE 3
  372. #define log_FLUSH 4
  373. /*
  374. * group commit flag
  375. */
  376. /* jfs_log */
  377. #define logGC_PAGEOUT 0x00000001
  378. /* tblock/lbuf */
  379. #define tblkGC_QUEUE 0x0001
  380. #define tblkGC_READY 0x0002
  381. #define tblkGC_COMMIT 0x0004
  382. #define tblkGC_COMMITTED 0x0008
  383. #define tblkGC_EOP 0x0010
  384. #define tblkGC_FREE 0x0020
  385. #define tblkGC_LEADER 0x0040
  386. #define tblkGC_ERROR 0x0080
  387. #define tblkGC_LAZY 0x0100 // D230860
  388. #define tblkGC_UNLOCKED 0x0200 // D230860
  389. /*
  390. * log cache buffer header
  391. */
  392. struct lbuf {
  393. struct jfs_log *l_log; /* 4: log associated with buffer */
  394. /*
  395. * data buffer base area
  396. */
  397. uint l_flag; /* 4: pageout control flags */
  398. struct lbuf *l_wqnext; /* 4: write queue link */
  399. struct lbuf *l_freelist; /* 4: freelistlink */
  400. int l_pn; /* 4: log page number */
  401. int l_eor; /* 4: log record eor */
  402. int l_ceor; /* 4: committed log record eor */
  403. s64 l_blkno; /* 8: log page block number */
  404. caddr_t l_ldata; /* 4: data page */
  405. struct page *l_page; /* The page itself */
  406. uint l_offset; /* Offset of l_ldata within the page */
  407. wait_queue_head_t l_ioevent; /* 4: i/o done event */
  408. };
  409. /* Reuse l_freelist for redrive list */
  410. #define l_redrive_next l_freelist
  411. /*
  412. * logsynclist block
  413. *
  414. * common logsyncblk prefix for jbuf_t and tblock
  415. */
  416. struct logsyncblk {
  417. u16 xflag; /* flags */
  418. u16 flag; /* only meaninful in tblock */
  419. lid_t lid; /* lock id */
  420. s32 lsn; /* log sequence number */
  421. struct list_head synclist; /* log sync list link */
  422. };
  423. /*
  424. * logsynclist serialization (per log)
  425. */
  426. #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
  427. #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
  428. #define LOGSYNC_UNLOCK(log, flags) \
  429. spin_unlock_irqrestore(&(log)->synclock, flags)
  430. /* compute the difference in bytes of lsn from sync point */
  431. #define logdiff(diff, lsn, log)\
  432. {\
  433. diff = (lsn) - (log)->syncpt;\
  434. if (diff < 0)\
  435. diff += (log)->logsize;\
  436. }
  437. extern int lmLogOpen(struct super_block *sb);
  438. extern int lmLogClose(struct super_block *sb);
  439. extern int lmLogShutdown(struct jfs_log * log);
  440. extern int lmLogInit(struct jfs_log * log);
  441. extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
  442. extern int lmGroupCommit(struct jfs_log *, struct tblock *);
  443. extern int jfsIOWait(void *);
  444. extern void jfs_flush_journal(struct jfs_log * log, int wait);
  445. extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
  446. #endif /* _H_JFS_LOGMGR */