aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_ialloc.c')
-rw-r--r--fs/xfs/xfs_ialloc.c527
1 files changed, 280 insertions, 247 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 177a21a7ac4..a815412eab8 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -200,7 +200,8 @@ xfs_ialloc_inode_init(
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize * blks_per_cluster, 0);
+ mp->m_bsize * blks_per_cluster,
+ XBF_UNMAPPED);
if (!fbuf)
return ENOMEM;
/*
@@ -210,6 +211,7 @@ xfs_ialloc_inode_init(
* to log a whole cluster of inodes instead of all the
* individual transactions causing a lot of log traffic.
*/
+ fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
for (i = 0; i < ninodes; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
@@ -250,6 +252,7 @@ xfs_ialloc_ag_alloc(
/* boundary */
struct xfs_perag *pag;
+ memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = tp->t_mountp;
@@ -431,7 +434,7 @@ xfs_ialloc_next_ag(
spin_lock(&mp->m_agirotor_lock);
agno = mp->m_agirotor;
- if (++mp->m_agirotor == mp->m_maxagi)
+ if (++mp->m_agirotor >= mp->m_maxagi)
mp->m_agirotor = 0;
spin_unlock(&mp->m_agirotor_lock);
@@ -442,14 +445,13 @@ xfs_ialloc_next_ag(
* Select an allocation group to look for a free inode in, based on the parent
* inode and then mode. Return the allocation group buffer.
*/
-STATIC xfs_buf_t * /* allocation group buffer */
+STATIC xfs_agnumber_t
xfs_ialloc_ag_select(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t parent, /* parent directory inode number */
umode_t mode, /* bits set to indicate file type */
int okalloc) /* ok to allocate more space */
{
- xfs_buf_t *agbp; /* allocation group header buffer */
xfs_agnumber_t agcount; /* number of ag's in the filesystem */
xfs_agnumber_t agno; /* current ag number */
int flags; /* alloc buffer locking flags */
@@ -459,6 +461,7 @@ xfs_ialloc_ag_select(
int needspace; /* file mode implies space allocated */
xfs_perag_t *pag; /* per allocation group data */
xfs_agnumber_t pagno; /* parent (starting) ag number */
+ int error;
/*
* Files of these types need at least one block if length > 0
@@ -474,7 +477,9 @@ xfs_ialloc_ag_select(
if (pagno >= agcount)
pagno = 0;
}
+
ASSERT(pagno < agcount);
+
/*
* Loop through allocation groups, looking for one with a little
* free space in it. Note we don't look for free inodes, exactly.
@@ -486,51 +491,45 @@ xfs_ialloc_ag_select(
flags = XFS_ALLOC_FLAG_TRYLOCK;
for (;;) {
pag = xfs_perag_get(mp, agno);
+ if (!pag->pagi_inodeok) {
+ xfs_ialloc_next_ag(mp);
+ goto nextag;
+ }
+
if (!pag->pagi_init) {
- if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
+ error = xfs_ialloc_pagi_init(mp, tp, agno);
+ if (error)
goto nextag;
- }
- } else
- agbp = NULL;
+ }
- if (!pag->pagi_inodeok) {
- xfs_ialloc_next_ag(mp);
- goto unlock_nextag;
+ if (pag->pagi_freecount) {
+ xfs_perag_put(pag);
+ return agno;
}
- /*
- * Is there enough free space for the file plus a block
- * of inodes (if we need to allocate some)?
- */
- ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
- if (ineed && !pag->pagf_init) {
- if (agbp == NULL &&
- xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
+ if (!okalloc)
+ goto nextag;
+
+ if (!pag->pagf_init) {
+ error = xfs_alloc_pagf_init(mp, tp, agno, flags);
+ if (error)
goto nextag;
- }
- (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
}
- if (!ineed || pag->pagf_init) {
- if (ineed && !(longest = pag->pagf_longest))
- longest = pag->pagf_flcount > 0;
- if (!ineed ||
- (pag->pagf_freeblks >= needspace + ineed &&
- longest >= ineed &&
- okalloc)) {
- if (agbp == NULL &&
- xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
- goto nextag;
- }
- xfs_perag_put(pag);
- return agbp;
- }
+
+ /*
+ * Is there enough free space for the file plus a block of
+ * inodes? (if we need to allocate some)?
+ */
+ ineed = XFS_IALLOC_BLOCKS(mp);
+ longest = pag->pagf_longest;
+ if (!longest)
+ longest = pag->pagf_flcount > 0;
+
+ if (pag->pagf_freeblks >= needspace + ineed &&
+ longest >= ineed) {
+ xfs_perag_put(pag);
+ return agno;
}
-unlock_nextag:
- if (agbp)
- xfs_trans_brelse(tp, agbp);
nextag:
xfs_perag_put(pag);
/*
@@ -538,13 +537,13 @@ nextag:
* down.
*/
if (XFS_FORCED_SHUTDOWN(mp))
- return NULL;
+ return NULLAGNUMBER;
agno++;
if (agno >= agcount)
agno = 0;
if (agno == pagno) {
if (flags == 0)
- return NULL;
+ return NULLAGNUMBER;
flags = 0;
}
}
@@ -607,195 +606,39 @@ xfs_ialloc_get_rec(
}
/*
- * Visible inode allocation functions.
- */
-/*
- * Find a free (set) bit in the inode bitmask.
- */
-static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
-{
- return xfs_lowbit64(*fp);
-}
-
-/*
- * Allocate an inode on disk.
- * Mode is used to tell whether the new inode will need space, and whether
- * it is a directory.
+ * Allocate an inode.
*
- * The arguments IO_agbp and alloc_done are defined to work within
- * the constraint of one allocation per transaction.
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes. On the first call,
- * IO_agbp should be set to NULL. If an inode is available,
- * i.e., xfs_dialloc() did not need to do an allocation, an inode
- * number is returned. In this case, IO_agbp would be set to the
- * current ag_buf and alloc_done set to false.
- * If an allocation needed to be done, xfs_dialloc would return
- * the current ag_buf in IO_agbp and set alloc_done to true.
- * The caller should then commit the current transaction, allocate a new
- * transaction, and call xfs_dialloc() again, passing in the previous
- * value of IO_agbp. IO_agbp should be held across the transactions.
- * Since the agbp is locked across the two calls, the second call is
- * guaranteed to have a free inode available.
- *
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated. The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
*/
-int
-xfs_dialloc(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ino_t parent, /* parent inode (directory) */
- umode_t mode, /* mode bits for new inode */
- int okalloc, /* ok to allocate more space */
- xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
- boolean_t *alloc_done, /* true if we needed to replenish
- inode freelist */
- xfs_ino_t *inop) /* inode number allocated */
+STATIC int
+xfs_dialloc_ag(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_ino_t parent,
+ xfs_ino_t *inop)
{
- xfs_agnumber_t agcount; /* number of allocation groups */
- xfs_buf_t *agbp; /* allocation group header's buffer */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_agi_t *agi; /* allocation group header structure */
- xfs_btree_cur_t *cur; /* inode allocation btree cursor */
- int error; /* error return value */
- int i; /* result code */
- int ialloced; /* inode allocation status */
- int noroom = 0; /* no space for inode blk allocation */
- xfs_ino_t ino; /* fs-relative inode to be returned */
- /* REFERENCED */
- int j; /* result code */
- xfs_mount_t *mp; /* file system mount structure */
- int offset; /* index of inode in chunk */
- xfs_agino_t pagino; /* parent's AG relative inode # */
- xfs_agnumber_t pagno; /* parent's AG number */
- xfs_inobt_rec_incore_t rec; /* inode allocation record */
- xfs_agnumber_t tagno; /* testing allocation group number */
- xfs_btree_cur_t *tcur; /* temp cursor */
- xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
- struct xfs_perag *pag;
-
-
- if (*IO_agbp == NULL) {
- /*
- * We do not have an agbp, so select an initial allocation
- * group for inode allocation.
- */
- agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
- /*
- * Couldn't find an allocation group satisfying the
- * criteria, give up.
- */
- if (!agbp) {
- *inop = NULLFSINO;
- return 0;
- }
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- } else {
- /*
- * Continue where we left off before. In this case, we
- * know that the allocation group has free inodes.
- */
- agbp = *IO_agbp;
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
- }
- mp = tp->t_mountp;
- agcount = mp->m_sb.sb_agcount;
- agno = be32_to_cpu(agi->agi_seqno);
- tagno = agno;
- pagno = XFS_INO_TO_AGNO(mp, parent);
- pagino = XFS_INO_TO_AGINO(mp, parent);
-
- /*
- * If we have already hit the ceiling of inode blocks then clear
- * okalloc so we scan all available agi structures for a free
- * inode.
- */
-
- if (mp->m_maxicount &&
- mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
- noroom = 1;
- okalloc = 0;
- }
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
+ xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
+ struct xfs_perag *pag;
+ struct xfs_btree_cur *cur, *tcur;
+ struct xfs_inobt_rec_incore rec, trec;
+ xfs_ino_t ino;
+ int error;
+ int offset;
+ int i, j;
- /*
- * Loop until we find an allocation group that either has free inodes
- * or in which we can allocate some inodes. Iterate through the
- * allocation groups upward, wrapping at the end.
- */
- *alloc_done = B_FALSE;
- while (!agi->agi_freecount) {
- /*
- * Don't do anything if we're not supposed to allocate
- * any blocks, just go on to the next ag.
- */
- if (okalloc) {
- /*
- * Try to allocate some new inodes in the allocation
- * group.
- */
- if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
- xfs_trans_brelse(tp, agbp);
- if (error == ENOSPC) {
- *inop = NULLFSINO;
- return 0;
- } else
- return error;
- }
- if (ialloced) {
- /*
- * We successfully allocated some inodes, return
- * the current context to the caller so that it
- * can commit the current transaction and call
- * us again where we left off.
- */
- ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
- *alloc_done = B_TRUE;
- *IO_agbp = agbp;
- *inop = NULLFSINO;
- return 0;
- }
- }
- /*
- * If it failed, give up on this ag.
- */
- xfs_trans_brelse(tp, agbp);
- /*
- * Go on to the next ag: get its ag header.
- */
-nextag:
- if (++tagno == agcount)
- tagno = 0;
- if (tagno == agno) {
- *inop = NULLFSINO;
- return noroom ? ENOSPC : 0;
- }
- pag = xfs_perag_get(mp, tagno);
- if (pag->pagi_inodeok == 0) {
- xfs_perag_put(pag);
- goto nextag;
- }
- error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
- xfs_perag_put(pag);
- if (error)
- goto nextag;
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- }
- /*
- * Here with an allocation group that has a free inode.
- * Reset agno since we may have chosen a new ag in the
- * loop above.
- */
- agno = tagno;
- *IO_agbp = NULL;
pag = xfs_perag_get(mp, agno);
+ ASSERT(pag->pagi_init);
+ ASSERT(pag->pagi_inodeok);
+ ASSERT(pag->pagi_freecount > 0);
+
restart_pagno:
- cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -995,7 +838,7 @@ newino:
}
alloc_inode:
- offset = xfs_ialloc_find_free(&rec.ir_free);
+ offset = xfs_lowbit64(rec.ir_free);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1028,6 +871,165 @@ error0:
}
/*
+ * Allocate an inode on disk.
+ *
+ * Mode is used to tell whether the new inode will need space, and whether it
+ * is a directory.
+ *
+ * This function is designed to be called twice if it has to do an allocation
+ * to make more free inodes. On the first call, *IO_agbp should be set to NULL.
+ * If an inode is available without having to performn an allocation, an inode
+ * number is returned. In this case, *IO_agbp is set to NULL. If an allocation
+ * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
+ * The caller should then commit the current transaction, allocate a
+ * new transaction, and call xfs_dialloc() again, passing in the previous value
+ * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI
+ * buffer is locked across the two calls, the second call is guaranteed to have
+ * a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the on-disk
+ * data structures are updated. The inode itself is not read in, since doing so
+ * would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+ struct xfs_trans *tp,
+ xfs_ino_t parent,
+ umode_t mode,
+ int okalloc,
+ struct xfs_buf **IO_agbp,
+ xfs_ino_t *inop)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *agbp;
+ xfs_agnumber_t agno;
+ int error;
+ int ialloced;
+ int noroom = 0;
+ xfs_agnumber_t start_agno;
+ struct xfs_perag *pag;
+
+ if (*IO_agbp) {
+ /*
+ * If the caller passes in a pointer to the AGI buffer,
+ * continue where we left off before. In this case, we
+ * know that the allocation group has free inodes.
+ */
+ agbp = *IO_agbp;
+ goto out_alloc;
+ }
+
+ /*
+ * We do not have an agbp, so select an initial allocation
+ * group for inode allocation.
+ */
+ start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+ if (start_agno == NULLAGNUMBER) {
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+ /*
+ * If we have already hit the ceiling of inode blocks then clear
+ * okalloc so we scan all available agi structures for a free
+ * inode.
+ */
+ if (mp->m_maxicount &&
+ mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+ noroom = 1;
+ okalloc = 0;
+ }
+
+ /*
+ * Loop until we find an allocation group that either has free inodes
+ * or in which we can allocate some inodes. Iterate through the
+ * allocation groups upward, wrapping at the end.
+ */
+ agno = start_agno;
+ for (;;) {
+ pag = xfs_perag_get(mp, agno);
+ if (!pag->pagi_inodeok) {
+ xfs_ialloc_next_ag(mp);
+ goto nextag;
+ }
+
+ if (!pag->pagi_init) {
+ error = xfs_ialloc_pagi_init(mp, tp, agno);
+ if (error)
+ goto out_error;
+ }
+
+ /*
+ * Do a first racy fast path check if this AG is usable.
+ */
+ if (!pag->pagi_freecount && !okalloc)
+ goto nextag;
+
+ /*
+ * Then read in the AGI buffer and recheck with the AGI buffer
+ * lock held.
+ */
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error)
+ goto out_error;
+
+ if (pag->pagi_freecount) {
+ xfs_perag_put(pag);
+ goto out_alloc;
+ }
+
+ if (!okalloc)
+ goto nextag_relse_buffer;
+
+
+ error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
+ if (error) {
+ xfs_trans_brelse(tp, agbp);
+
+ if (error != ENOSPC)
+ goto out_error;
+
+ xfs_perag_put(pag);
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+ if (ialloced) {
+ /*
+ * We successfully allocated some inodes, return
+ * the current context to the caller so that it
+ * can commit the current transaction and call
+ * us again where we left off.
+ */
+ ASSERT(pag->pagi_freecount > 0);
+ xfs_perag_put(pag);
+
+ *IO_agbp = agbp;
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+nextag_relse_buffer:
+ xfs_trans_brelse(tp, agbp);
+nextag:
+ xfs_perag_put(pag);
+ if (++agno == mp->m_sb.sb_agcount)
+ agno = 0;
+ if (agno == start_agno) {
+ *inop = NULLFSINO;
+ return noroom ? ENOSPC : 0;
+ }
+ }
+
+out_alloc:
+ *IO_agbp = NULL;
+ return xfs_dialloc_ag(tp, agbp, parent, inop);
+out_error:
+ xfs_perag_put(pag);
+ return XFS_ERROR(error);
+}
+
+/*
* Free disk inode. Carefully avoids touching the incore inode, all
* manipulations incore are the caller's responsibility.
* The on-disk inode is not changed by this operation, only the
@@ -1472,6 +1474,57 @@ xfs_check_agi_unlinked(
#define xfs_check_agi_unlinked(agi)
#endif
+static void
+xfs_agi_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
+ int agi_ok;
+
+ /*
+ * Validate the magic number of the agi block.
+ */
+ agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
+ XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
+
+ /*
+ * during growfs operations, the perag is not fully initialised,
+ * so we can't use it for any useful checking. growfs ensures we can't
+ * use it by using uncached buffers that don't have the perag attached
+ * so we can detect and avoid this problem.
+ */
+ if (bp->b_pag)
+ agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) ==
+ bp->b_pag->pag_agno;
+
+ if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
+ XFS_RANDOM_IALLOC_READ_AGI))) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+ xfs_check_agi_unlinked(agi);
+}
+
+static void
+xfs_agi_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_agi_verify(bp);
+}
+
+static void
+xfs_agi_write_verify(
+ struct xfs_buf *bp)
+{
+ xfs_agi_verify(bp);
+}
+
+const struct xfs_buf_ops xfs_agi_buf_ops = {
+ .verify_read = xfs_agi_read_verify,
+ .verify_write = xfs_agi_write_verify,
+};
+
/*
* Read in the allocation group header (inode allocation section)
*/
@@ -1482,38 +1535,18 @@ xfs_read_agi(
xfs_agnumber_t agno, /* allocation group number */
struct xfs_buf **bpp) /* allocation group hdr buf */
{
- struct xfs_agi *agi; /* allocation group header */
- int agi_ok; /* agi is consistent */
int error;
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, bpp);
+ XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
if (error)
return error;
ASSERT(!xfs_buf_geterror(*bpp));
- agi = XFS_BUF_TO_AGI(*bpp);
-
- /*
- * Validate the magic number of the agi block.
- */
- agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
- XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
- be32_to_cpu(agi->agi_seqno) == agno;
- if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
- XFS_RANDOM_IALLOC_READ_AGI))) {
- XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
- mp, agi);
- xfs_trans_brelse(tp, *bpp);
- return XFS_ERROR(EFSCORRUPTED);
- }
-
xfs_buf_set_ref(*bpp, XFS_AGI_REF);
-
- xfs_check_agi_unlinked(agi);
return 0;
}