summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_zone_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_zone_alloc.c')
-rw-r--r--fs/xfs/xfs_zone_alloc.c148
1 files changed, 60 insertions, 88 deletions
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 1147bacb2da8..23cdab4515bb 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -26,14 +26,22 @@
#include "xfs_trace.h"
#include "xfs_mru_cache.h"
+static void
+xfs_open_zone_free_rcu(
+ struct callback_head *cb)
+{
+ struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu);
+
+ xfs_rtgroup_rele(oz->oz_rtg);
+ kfree(oz);
+}
+
void
xfs_open_zone_put(
struct xfs_open_zone *oz)
{
- if (atomic_dec_and_test(&oz->oz_ref)) {
- xfs_rtgroup_rele(oz->oz_rtg);
- kfree(oz);
- }
+ if (atomic_dec_and_test(&oz->oz_ref))
+ call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu);
}
static inline uint32_t
@@ -614,14 +622,25 @@ static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip)
}
/*
- * Try to pack inodes that are written back after they were closed tight instead
- * of trying to open new zones for them or spread them to the least recently
- * used zone. This optimizes the data layout for workloads that untar or copy
- * a lot of small files. Right now this does not separate multiple such
+ * Try to tightly pack small files that are written back after they were closed
+ * instead of trying to open new zones for them or spread them to the least
+ * recently used zone. This optimizes the data layout for workloads that untar
+ * or copy a lot of small files. Right now this does not separate multiple such
* streams.
*/
static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
{
+ struct xfs_mount *mp = ip->i_mount;
+ size_t zone_capacity =
+ XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].blocks);
+
+ /*
+ * Do not pack write files that are already using a full zone to avoid
+ * fragmentation.
+ */
+ if (i_size_read(VFS_I(ip)) >= zone_capacity)
+ return false;
+
return !inode_is_open_for_write(VFS_I(ip)) &&
!(ip->i_diflags & XFS_DIFLAG_APPEND);
}
@@ -746,97 +765,54 @@ xfs_mark_rtg_boundary(
}
/*
- * Cache the last zone written to for an inode so that it is considered first
- * for subsequent writes.
- */
-struct xfs_zone_cache_item {
- struct xfs_mru_cache_elem mru;
- struct xfs_open_zone *oz;
-};
-
-static inline struct xfs_zone_cache_item *
-xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
-{
- return container_of(mru, struct xfs_zone_cache_item, mru);
-}
-
-static void
-xfs_zone_cache_free_func(
- void *data,
- struct xfs_mru_cache_elem *mru)
-{
- struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
-
- xfs_open_zone_put(item->oz);
- kfree(item);
-}
-
-/*
* Check if we have a cached last open zone available for the inode and
* if yes return a reference to it.
*/
static struct xfs_open_zone *
-xfs_cached_zone(
- struct xfs_mount *mp,
- struct xfs_inode *ip)
+xfs_get_cached_zone(
+ struct xfs_inode *ip)
{
- struct xfs_mru_cache_elem *mru;
- struct xfs_open_zone *oz;
+ struct xfs_open_zone *oz;
- mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
- if (!mru)
- return NULL;
- oz = xfs_zone_cache_item(mru)->oz;
+ rcu_read_lock();
+ oz = VFS_I(ip)->i_private;
if (oz) {
/*
* GC only steals open zones at mount time, so no GC zones
* should end up in the cache.
*/
ASSERT(!oz->oz_is_gc);
- ASSERT(atomic_read(&oz->oz_ref) > 0);
- atomic_inc(&oz->oz_ref);
+ if (!atomic_inc_not_zero(&oz->oz_ref))
+ oz = NULL;
}
- xfs_mru_cache_done(mp->m_zone_cache);
+ rcu_read_unlock();
+
return oz;
}
/*
- * Update the last used zone cache for a given inode.
+ * Stash our zone in the inode so that is is reused for future allocations.
*
- * The caller must have a reference on the open zone.
+ * The open_zone structure will be pinned until either the inode is freed or
+ * until the cached open zone is replaced with a different one because the
+ * current one was full when we tried to use it. This means we keep any
+ * open zone around forever as long as any inode that used it for the last
+ * write is cached, which slightly increases the memory use of cached inodes
+ * that were every written to, but significantly simplifies the cached zone
+ * lookup. Because the open_zone is clearly marked as full when all data
+ * in the underlying RTG was written, the caching is always safe.
*/
static void
-xfs_zone_cache_create_association(
- struct xfs_inode *ip,
- struct xfs_open_zone *oz)
+xfs_set_cached_zone(
+ struct xfs_inode *ip,
+ struct xfs_open_zone *oz)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_zone_cache_item *item = NULL;
- struct xfs_mru_cache_elem *mru;
+ struct xfs_open_zone *old_oz;
- ASSERT(atomic_read(&oz->oz_ref) > 0);
atomic_inc(&oz->oz_ref);
-
- mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
- if (mru) {
- /*
- * If we have an association already, update it to point to the
- * new zone.
- */
- item = xfs_zone_cache_item(mru);
- xfs_open_zone_put(item->oz);
- item->oz = oz;
- xfs_mru_cache_done(mp->m_zone_cache);
- return;
- }
-
- item = kmalloc(sizeof(*item), GFP_KERNEL);
- if (!item) {
- xfs_open_zone_put(oz);
- return;
- }
- item->oz = oz;
- xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
+ old_oz = xchg(&VFS_I(ip)->i_private, oz);
+ if (old_oz)
+ xfs_open_zone_put(old_oz);
}
static void
@@ -880,15 +856,14 @@ xfs_zone_alloc_and_submit(
* the inode is still associated with a zone and use that if so.
*/
if (!*oz)
- *oz = xfs_cached_zone(mp, ip);
+ *oz = xfs_get_cached_zone(ip);
if (!*oz) {
select_zone:
*oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
-
- xfs_zone_cache_create_association(ip, *oz);
+ xfs_set_cached_zone(ip, *oz);
}
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -966,6 +941,12 @@ xfs_free_open_zones(
xfs_open_zone_put(oz);
}
spin_unlock(&zi->zi_open_zones_lock);
+
+ /*
+ * Wait for all open zones to be freed so that they drop the group
+ * references:
+ */
+ rcu_barrier();
}
struct xfs_init_zones {
@@ -1279,14 +1260,6 @@ xfs_mount_zones(
error = xfs_zone_gc_mount(mp);
if (error)
goto out_free_zone_info;
-
- /*
- * Set up a mru cache to track inode to open zone for data placement
- * purposes. The magic values for group count and life time is the
- * same as the defaults for file streams, which seems sane enough.
- */
- xfs_mru_cache_create(&mp->m_zone_cache, mp,
- 5000, 10, xfs_zone_cache_free_func);
return 0;
out_free_zone_info:
@@ -1300,5 +1273,4 @@ xfs_unmount_zones(
{
xfs_zone_gc_unmount(mp);
xfs_free_zone_info(mp->m_zone_info);
- xfs_mru_cache_destroy(mp->m_zone_cache);
}