Page MenuHomeClusterLabs Projects

dio.c
No OneTemporary

/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <asm/semaphore.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include "gfs.h"
#include "dio.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "lops.h"
#include "rgrp.h"
#include "trans.h"
#define buffer_busy(bh) ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
/**
* aspace_get_block -
* @inode:
* @lblock:
* @bh_result:
* @create:
*
* Returns: errno
*/
static int
aspace_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
gfs_assert_warn(get_v2sdp(inode->i_sb), FALSE);
return -ENOSYS;
}
/**
* gfs_aspace_writepage - write an aspace page
* @page: the page
* @wbc:
*
* Returns: errno
*/
static int
gfs_aspace_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, aspace_get_block, wbc);
}
/**
* stuck_releasepage - We're stuck in gfs_releasepage(). Print stuff out.
* @bh: the buffer we're stuck on
*
*/
static void
stuck_releasepage(struct buffer_head *bh)
{
struct gfs_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb);
struct gfs_bufdata *bd = get_v2bd(bh);
printk("GFS: fsid=%s: stuck in gfs_releasepage()...\n", sdp->sd_fsname);
printk("GFS: fsid=%s: blkno = %"PRIu64", bh->b_count = %d\n",
sdp->sd_fsname,
(uint64_t)bh->b_blocknr,
atomic_read(&bh->b_count));
printk("GFS: fsid=%s: get_v2bd(bh) = %s\n",
sdp->sd_fsname,
(bd) ? "!NULL" : "NULL");
if (bd) {
struct gfs_glock *gl = bd->bd_gl;
printk("GFS: fsid=%s: gl = (%u, %"PRIu64")\n",
sdp->sd_fsname,
gl->gl_name.ln_type,
gl->gl_name.ln_number);
printk("GFS: fsid=%s: bd_new_le.le_trans = %s\n",
sdp->sd_fsname,
(bd->bd_new_le.le_trans) ? "!NULL" : "NULL");
printk("GFS: fsid=%s: bd_incore_le.le_trans = %s\n",
sdp->sd_fsname,
(bd->bd_incore_le.le_trans) ? "!NULL" : "NULL");
printk("GFS: fsid=%s: bd_frozen = %s\n",
sdp->sd_fsname,
(bd->bd_frozen) ? "!NULL" : "NULL");
printk("GFS: fsid=%s: bd_pinned = %u\n",
sdp->sd_fsname, bd->bd_pinned);
printk("GFS: fsid=%s: bd_ail_tr_list = %s\n",
sdp->sd_fsname,
(list_empty(&bd->bd_ail_tr_list)) ? "Empty" : "!Empty");
if (gl->gl_ops == &gfs_inode_glops) {
struct gfs_inode *ip = get_gl2ip(gl);
if (ip) {
unsigned int x;
printk("GFS: fsid=%s: ip = %"PRIu64"/%"PRIu64"\n",
sdp->sd_fsname,
ip->i_num.no_formal_ino,
ip->i_num.no_addr);
printk("GFS: fsid=%s: ip->i_count = %d, ip->i_vnode = %s\n",
sdp->sd_fsname,
atomic_read(&ip->i_count),
(ip->i_vnode) ? "!NULL" : "NULL");
for (x = 0; x < GFS_MAX_META_HEIGHT; x++)
printk("GFS: fsid=%s: ip->i_cache[%u] = %s\n",
sdp->sd_fsname, x,
(ip->i_cache[x]) ? "!NULL" : "NULL");
}
}
}
}
/**
* gfs_aspace_releasepage - free the metadata associated with a page
* @page: the page that's being released
* @gfp_mask: passed from Linux VFS, ignored by us
*
* Call try_to_free_buffers() if the buffers in this page can be
* released.
*
* Returns: 0
*/
static int
gfs_aspace_releasepage(struct page *page, gfp_t gfp_mask)
{
struct inode *aspace = page->mapping->host;
struct gfs_sbd *sdp = get_v2sdp(aspace->i_sb);
struct buffer_head *bh, *head;
struct gfs_bufdata *bd;
unsigned long t;
if (!page_has_buffers(page))
goto out;
head = bh = page_buffers(page);
do {
t = jiffies;
while (atomic_read(&bh->b_count)) {
if (atomic_read(&aspace->i_writecount)) {
if (time_after_eq(jiffies,
t +
gfs_tune_get(sdp, gt_stall_secs) * HZ)) {
stuck_releasepage(bh);
t = jiffies;
}
yield();
continue;
}
return 0;
}
bd = get_v2bd(bh);
if (bd) {
gfs_assert_warn(sdp, bd->bd_bh == bh);
gfs_assert_warn(sdp, !bd->bd_new_le.le_trans);
gfs_assert_warn(sdp, !bd->bd_incore_le.le_trans);
gfs_assert_warn(sdp, !bd->bd_frozen);
gfs_assert_warn(sdp, !bd->bd_pinned);
gfs_assert_warn(sdp, list_empty(&bd->bd_ail_tr_list));
kmem_cache_free(gfs_bufdata_cachep, bd);
atomic_dec(&sdp->sd_bufdata_count);
set_v2bd(bh, NULL);
}
bh = bh->b_this_page;
}
while (bh != head);
out:
return try_to_free_buffers(page);
}
static struct address_space_operations aspace_aops = {
.writepage = gfs_aspace_writepage,
.releasepage = gfs_aspace_releasepage,
};
/**
* gfs_aspace_get - Create and initialize a struct inode structure
* @sdp: the filesystem the aspace is in
*
* Right now a struct inode is just a struct inode. Maybe Linux
* will supply a more lightweight address space construct (that works)
* in the future.
*
* Make sure pages/buffers in this aspace aren't in high memory.
*
* Returns: the aspace
*/
struct inode *
gfs_aspace_get(struct gfs_sbd *sdp)
{
struct inode *aspace;
aspace = new_inode(sdp->sd_vfs);
if (aspace) {
mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
aspace->i_mapping->a_ops = &aspace_aops;
aspace->i_size = ~0ULL;
set_v2ip(aspace, NULL);
insert_inode_hash(aspace);
}
return aspace;
}
/**
* gfs_aspace_put - get rid of an aspace
* @aspace:
*
*/
void
gfs_aspace_put(struct inode *aspace)
{
remove_inode_hash(aspace);
iput(aspace);
}
/**
* gfs_ail_start_trans - Start I/O on a part of the AIL
* @sdp: the filesystem
* @tr: the part of the AIL
*
*/
void
gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
{
struct list_head *head, *tmp, *prev;
struct gfs_bufdata *bd;
struct buffer_head *bh;
int retry;
do {
retry = FALSE;
spin_lock(&sdp->sd_ail_lock);
for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
tmp != head;
tmp = prev, prev = tmp->prev) {
bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
bh = bd->bd_bh;
if (gfs_trylock_buffer(bh))
continue;
if (bd->bd_pinned) {
gfs_unlock_buffer(bh);
continue;
}
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
list_del_init(&bd->bd_ail_tr_list);
list_del(&bd->bd_ail_gl_list);
gfs_unlock_buffer(bh);
brelse(bh);
continue;
}
if (buffer_dirty(bh)) {
list_move(&bd->bd_ail_tr_list, head);
spin_unlock(&sdp->sd_ail_lock);
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
spin_lock(&sdp->sd_ail_lock);
gfs_unlock_buffer(bh);
retry = TRUE;
break;
}
gfs_unlock_buffer(bh);
}
spin_unlock(&sdp->sd_ail_lock);
} while (retry);
}
/**
* gfs_ail_empty_trans - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @tr: the transaction
*
*/
int
gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
{
struct list_head *head, *tmp, *prev;
struct gfs_bufdata *bd;
struct buffer_head *bh;
int ret;
spin_lock(&sdp->sd_ail_lock);
for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
tmp != head;
tmp = prev, prev = tmp->prev) {
bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
bh = bd->bd_bh;
if (gfs_trylock_buffer(bh))
continue;
if (bd->bd_pinned || buffer_busy(bh)) {
gfs_unlock_buffer(bh);
continue;
}
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
list_del_init(&bd->bd_ail_tr_list);
list_del(&bd->bd_ail_gl_list);
gfs_unlock_buffer(bh);
brelse(bh);
}
ret = list_empty(head);
spin_unlock(&sdp->sd_ail_lock);
return ret;
}
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @gl: the glock
*
* None of the buffers should be dirty, locked, or pinned.
*/
static void
ail_empty_gl(struct gfs_glock *gl)
{
struct gfs_sbd *sdp = gl->gl_sbd;
struct gfs_bufdata *bd;
struct buffer_head *bh;
spin_lock(&sdp->sd_ail_lock);
while (!list_empty(&gl->gl_ail_bufs)) {
bd = list_entry(gl->gl_ail_bufs.next,
struct gfs_bufdata, bd_ail_gl_list);
bh = bd->bd_bh;
gfs_assert_withdraw(sdp, !bd->bd_pinned && !buffer_busy(bh));
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
list_del_init(&bd->bd_ail_tr_list);
list_del(&bd->bd_ail_gl_list);
brelse(bh);
}
spin_unlock(&sdp->sd_ail_lock);
}
/**
* gfs_inval_buf - Invalidate all buffers associated with a glock
* @gl: the glock
*
*/
void
gfs_inval_buf(struct gfs_glock *gl)
{
struct inode *aspace = gl->gl_aspace;
struct address_space *mapping = gl->gl_aspace->i_mapping;
ail_empty_gl(gl);
atomic_inc(&aspace->i_writecount);
truncate_inode_pages(mapping, 0);
atomic_dec(&aspace->i_writecount);
gfs_assert_withdraw(gl->gl_sbd, !mapping->nrpages);
}
/**
* gfs_sync_buf - Sync all buffers associated with a glock
* @gl: The glock
* @flags: DIO_START | DIO_WAIT | DIO_CHECK
*
*/
void
gfs_sync_buf(struct gfs_glock *gl, int flags)
{
struct address_space *mapping = gl->gl_aspace->i_mapping;
int error = 0;
if (flags & DIO_START)
error = filemap_fdatawrite(mapping);
if (!error && (flags & DIO_WAIT))
error = filemap_fdatawait(mapping);
if (!error && (flags & (DIO_INVISIBLE | DIO_CHECK)) == DIO_CHECK)
ail_empty_gl(gl);
if (error)
gfs_io_error(gl->gl_sbd);
}
/**
* getbuf - Get a buffer with a given address space
* @sdp: the filesystem
* @aspace: the address space
* @blkno: the block number (filesystem scope)
* @create: TRUE if the buffer should be created
*
* Returns: the buffer
*/
static struct buffer_head *
getbuf(struct gfs_sbd *sdp, struct inode *aspace, uint64_t blkno, int create)
{
struct page *page;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
unsigned int bufnum;
shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
RETRY_MALLOC(page = grab_cache_page(aspace->i_mapping, index), page);
} else {
page = find_lock_page(aspace->i_mapping, index);
if (!page)
return NULL;
}
if (!page_has_buffers(page))
create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
/* Locate header for our buffer within our page */
for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
/* Do nothing */;
get_bh(bh);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
else if (gfs_assert_warn(sdp, bh->b_bdev == sdp->sd_vfs->s_bdev &&
bh->b_blocknr == blkno))
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
page_cache_release(page);
return bh;
}
/**
* gfs_dgetblk - Get a block
* @gl: The glock associated with this block
* @blkno: The block number
*
* Returns: The buffer
*/
struct buffer_head *
gfs_dgetblk(struct gfs_glock *gl, uint64_t blkno)
{
return getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
}
/**
* gfs_dread - Read a block from disk
* @gl: The glock covering the block
* @blkno: The block number
* @flags: flags to gfs_dreread()
* @bhp: the place where the buffer is returned (NULL on failure)
*
* Returns: errno
*/
int
gfs_dread(struct gfs_glock *gl, uint64_t blkno,
int flags, struct buffer_head **bhp)
{
int error;
*bhp = gfs_dgetblk(gl, blkno);
error = gfs_dreread(gl->gl_sbd, *bhp, flags);
if (error)
brelse(*bhp);
return error;
}
/**
* gfs_prep_new_buffer - Mark a new buffer we just gfs_dgetblk()ed uptodate
* @bh: the buffer
*
*/
void
gfs_prep_new_buffer(struct buffer_head *bh)
{
wait_on_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
}
/**
* gfs_dreread - Reread a block from disk
* @sdp: the filesystem
* @bh: The block to read
* @flags: Flags that control the read
*
* Returns: errno
*/
int
gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
{
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
/* Fill in meta-header if we have a cached copy, else read from disk */
if (flags & DIO_NEW) {
if (gfs_mhc_fish(sdp, bh))
return 0;
clear_buffer_uptodate(bh);
}
if (flags & DIO_FORCE)
clear_buffer_uptodate(bh);
if ((flags & DIO_START) && !buffer_uptodate(bh))
ll_rw_block(READ, 1, &bh);
if (flags & DIO_WAIT) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
gfs_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
}
return 0;
}
/**
* gfs_dwrite - Write a buffer to disk (and/or wait for write to complete)
* @sdp: the filesystem
* @bh: The buffer to write
* @flags: DIO_XXX The type of write/wait operation to do
*
* Returns: errno
*/
int
gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
{
if (gfs_assert_warn(sdp, !test_bit(SDF_ROFS, &sdp->sd_flags)))
return -EIO;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
if (flags & DIO_CLEAN) {
lock_buffer(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
}
if (flags & DIO_DIRTY) {
if (gfs_assert_warn(sdp, buffer_uptodate(bh)))
return -EIO;
mark_buffer_dirty(bh);
}
if ((flags & DIO_START) && buffer_dirty(bh)) {
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
}
if (flags & DIO_WAIT) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
gfs_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
}
return 0;
}
/**
* gfs_attach_bufdata - attach a struct gfs_bufdata structure to a buffer
* @bh: The buffer to be attached to
* @gl: the glock the buffer belongs to
*
*/
void
gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl)
{
struct gfs_bufdata *bd;
lock_page(bh->b_page);
/* If there's one attached already, we're done */
if (get_v2bd(bh)) {
unlock_page(bh->b_page);
return;
}
RETRY_MALLOC(bd = kmem_cache_alloc(gfs_bufdata_cachep, GFP_KERNEL), bd);
atomic_inc(&gl->gl_sbd->sd_bufdata_count);
memset(bd, 0, sizeof(struct gfs_bufdata));
bd->bd_bh = bh;
bd->bd_gl = gl;
INIT_LE(&bd->bd_new_le, &gfs_buf_lops);
INIT_LE(&bd->bd_incore_le, &gfs_buf_lops);
init_MUTEX(&bd->bd_lock);
INIT_LIST_HEAD(&bd->bd_ail_tr_list);
set_v2bd(bh, bd);
unlock_page(bh->b_page);
}
/**
* gfs_is_pinned - Figure out if a buffer is pinned or not
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
* Returns: TRUE if the buffer is pinned, FALSE otherwise
*/
int
gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh)
{
struct gfs_bufdata *bd = get_v2bd(bh);
int ret = FALSE;
if (bd) {
gfs_lock_buffer(bh);
if (bd->bd_pinned)
ret = TRUE;
gfs_unlock_buffer(bh);
}
return ret;
}
/**
* gfs_dpin - Pin a metadata buffer in memory
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
* "Pinning" means keeping buffer from being written to its in-place location.
* A buffer should be pinned from the time it is added to a new transaction,
* until after it has been written to the log.
* If an earlier change to this buffer is still pinned, waiting to be written
* to on-disk log, we need to keep a "frozen" copy of the old data while this
* transaction is modifying the real data. We keep the frozen copy until
* this transaction's incore_commit(), i.e. until the transaction has
* finished modifying the real data, at which point we can use the real
* buffer for logging, even if the frozen copy didn't get written to the log.
*
*/
void
gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh)
{
struct gfs_bufdata *bd = get_v2bd(bh);
char *data;
gfs_assert_withdraw(sdp, !test_bit(SDF_ROFS, &sdp->sd_flags));
gfs_lock_buffer(bh);
gfs_assert_warn(sdp, !bd->bd_frozen);
if (!bd->bd_pinned++) {
wait_on_buffer(bh);
/* If this buffer is in the AIL and it has already been written
to in-place disk block, remove it from the AIL. */
spin_lock(&sdp->sd_ail_lock);
if (!list_empty(&bd->bd_ail_tr_list) && !buffer_busy(bh)) {
list_del_init(&bd->bd_ail_tr_list);
list_del(&bd->bd_ail_gl_list);
brelse(bh);
}
spin_unlock(&sdp->sd_ail_lock);
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
} else {
gfs_unlock_buffer(bh);
gfs_assert_withdraw(sdp, buffer_uptodate(bh));
data = gmalloc(sdp->sd_sb.sb_bsize);
gfs_lock_buffer(bh);
/* Create frozen copy, if needed. */
if (bd->bd_pinned > 1) {
memcpy(data, bh->b_data, sdp->sd_sb.sb_bsize);
bd->bd_frozen = data;
} else
kfree(data);
}
gfs_unlock_buffer(bh);
get_bh(bh);
}
/**
* gfs_dunpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @tr: The transaction in the AIL that contains this buffer
* If NULL, don't attach buffer to any AIL list
* (i.e. when dropping a pin reference when merging a new transaction
* with an already existing incore transaction)
*
* Called for (meta) buffers, after they've been logged to on-disk journal.
* Make a (meta) buffer writeable to in-place location on-disk, if recursive
* pin count is 1 (i.e. no other, later transaction is modifying this buffer).
* Add buffer to AIL lists of 1) the latest transaction that's modified and
* logged (on-disk) the buffer, and of 2) the glock that protects the buffer.
* A single buffer might have been modified by more than one transaction
* since the buffer's previous write to disk (in-place location). We keep
* the buffer on only one transaction's AIL list, i.e. that of the latest
* transaction that's completed logging this buffer (no need to write it to
* in-place block multiple times for multiple transactions, only once with
* the most up-to-date data).
* A single buffer will be protected by one and only one glock. If buffer is
* already on a (previous) transaction's AIL, we know that we're already
* on buffer's glock's AIL.
*
*/
void
gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh, struct gfs_trans *tr)
{
struct gfs_bufdata *bd = get_v2bd(bh);
gfs_assert_withdraw(sdp, buffer_uptodate(bh));
gfs_lock_buffer(bh);
if (gfs_assert_warn(sdp, bd->bd_pinned)) {
gfs_unlock_buffer(bh);
return;
}
/* No other (later) transaction is modifying buffer; ready to write */
if (bd->bd_pinned == 1)
mark_buffer_dirty(bh);
bd->bd_pinned--;
gfs_unlock_buffer(bh);
if (tr) {
spin_lock(&sdp->sd_ail_lock);
if (list_empty(&bd->bd_ail_tr_list)) {
/* Buffer not attached to any earlier transaction. Add
it to glock's AIL, and this trans' AIL (below). */
list_add(&bd->bd_ail_gl_list, &bd->bd_gl->gl_ail_bufs);
} else {
/* Was part of earlier transaction.
Move from that trans' AIL to this newer one's AIL.
Buf is already on glock's AIL. */
list_del_init(&bd->bd_ail_tr_list);
brelse(bh);
}
list_add(&bd->bd_ail_tr_list, &tr->tr_ail_bufs);
spin_unlock(&sdp->sd_ail_lock);
} else
brelse(bh);
}
/**
* logbh_end_io - Called by OS at the end of a logbh ("fake" bh) write to log
* @bh: the buffer
* @uptodate: whether or not the write succeeded
*
* Interrupt context, no ENTER/RETURN
*
*/
static void
logbh_end_io(struct buffer_head *bh, int uptodate)
{
if (uptodate)
set_buffer_uptodate(bh);
else
clear_buffer_uptodate(bh);
unlock_buffer(bh);
}
/**
* gfs_logbh_init - Initialize a fake buffer head
* @sdp: the filesystem
* @bh: the buffer to initialize
* @blkno: the block address of the buffer
* @data: the data to be written
*
*/
void
gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh,
uint64_t blkno, char *data)
{
memset(bh, 0, sizeof(struct buffer_head));
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
atomic_set(&bh->b_count, 1);
set_bh_page(bh, virt_to_page(data), ((unsigned long)data) & (PAGE_SIZE - 1));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
init_buffer(bh, logbh_end_io, NULL);
INIT_LIST_HEAD(&bh->b_assoc_buffers);
}
/**
* gfs_logbh_uninit - Clean up a fake buffer head
* @sdp: the filesystem
* @bh: the buffer to clean
*
*/
void
gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh)
{
gfs_assert_warn(sdp, test_bit(SDF_SHUTDOWN, &sdp->sd_flags) ||
!buffer_busy(bh));
gfs_assert_warn(sdp, atomic_read(&bh->b_count) == 1);
}
/**
* gfs_logbh_start - Start writing a fake buffer head
* @sdp: the filesystem
* @bh: the buffer to write
*
* This starts a block write to our journal.
*/
void
gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh)
{
submit_bh(WRITE, bh);
}
/**
* gfs_logbh_wait - Wait for the write of a fake buffer head to complete
* @sdp: the filesystem
* @bh: the buffer to write
*
* This waits for a block write to our journal to complete.
*
* Returns: errno
*/
int
gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh)
{
wait_on_buffer(bh);
if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
gfs_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
return 0;
}
/**
* gfs_replay_buf - write a log buffer to its inplace location
* @gl: the journal's glock
* @bh: the buffer
*
* Returns: errno
*/
int
gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh)
{
struct gfs_sbd *sdp = gl->gl_sbd;
struct gfs_bufdata *bd;
bd = get_v2bd(bh);
if (!bd) {
gfs_attach_bufdata(bh, gl);
bd = get_v2bd(bh);
}
mark_buffer_dirty(bh);
if (list_empty(&bd->bd_ail_tr_list)) {
get_bh(bh);
list_add(&bd->bd_ail_tr_list, &sdp->sd_recovery_bufs);
}
return 0;
}
/**
* gfs_replay_check - Check up on journal replay
* @sdp: the filesystem
*
*/
void
gfs_replay_check(struct gfs_sbd *sdp)
{
struct buffer_head *bh;
struct gfs_bufdata *bd;
while (!list_empty(&sdp->sd_recovery_bufs)) {
bd = list_entry(sdp->sd_recovery_bufs.prev,
struct gfs_bufdata, bd_ail_tr_list);
bh = bd->bd_bh;
if (buffer_busy(bh)) {
list_move(&bd->bd_ail_tr_list,
&sdp->sd_recovery_bufs);
break;
} else {
list_del_init(&bd->bd_ail_tr_list);
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
brelse(bh);
}
}
}
/**
* gfs_replay_wait - Wait for all replayed buffers to hit the disk
* @sdp: the filesystem
*
*/
void
gfs_replay_wait(struct gfs_sbd *sdp)
{
struct list_head *head, *tmp, *prev;
struct buffer_head *bh;
struct gfs_bufdata *bd;
for (head = &sdp->sd_recovery_bufs, tmp = head->prev, prev = tmp->prev;
tmp != head;
tmp = prev, prev = tmp->prev) {
bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
bh = bd->bd_bh;
if (!buffer_busy(bh)) {
list_del_init(&bd->bd_ail_tr_list);
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
brelse(bh);
continue;
}
if (buffer_dirty(bh)) {
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
}
}
while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs_bufdata, bd_ail_tr_list);
bh = bd->bd_bh;
wait_on_buffer(bh);
gfs_assert_withdraw(sdp, !buffer_busy(bh));
list_del_init(&bd->bd_ail_tr_list);
if (!buffer_uptodate(bh))
gfs_io_error_bh(sdp, bh);
brelse(bh);
}
}
/**
* gfs_wipe_buffers - make inode's buffers so they aren't dirty/AILed anymore
* @ip: the inode who owns the buffers
* @rgd: the resource group
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
* Called when de-allocating a contiguous run of meta blocks within an rgrp.
* Make sure all buffers for de-alloc'd blocks are removed from the AIL, if
* they can be. Dirty or pinned blocks are left alone. Add relevant
* meta-headers to meta-header cache, so we don't need to read disk
* if we re-allocate blocks.
*/
void
gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd,
uint64_t bstart, uint32_t blen)
{
struct gfs_sbd *sdp = ip->i_sbd;
struct inode *aspace = ip->i_gl->gl_aspace;
struct buffer_head *bh;
struct gfs_bufdata *bd;
int busy;
int add = FALSE;
while (blen) {
bh = getbuf(sdp, aspace, bstart, NO_CREATE);
if (bh) {
bd = get_v2bd(bh);
if (buffer_uptodate(bh)) {
if (bd) {
gfs_lock_buffer(bh);
gfs_mhc_add(rgd, &bh, 1);
busy = bd->bd_pinned || buffer_busy(bh);
gfs_unlock_buffer(bh);
if (busy)
add = TRUE;
else {
spin_lock(&sdp->sd_ail_lock);
if (!list_empty(&bd->bd_ail_tr_list)) {
list_del_init(&bd->bd_ail_tr_list);
list_del(&bd->bd_ail_gl_list);
brelse(bh);
}
spin_unlock(&sdp->sd_ail_lock);
}
} else {
gfs_assert_withdraw(sdp, !buffer_dirty(bh));
wait_on_buffer(bh);
gfs_assert_withdraw(sdp, !buffer_busy(bh));
gfs_mhc_add(rgd, &bh, 1);
}
} else {
gfs_assert_withdraw(sdp, !bd || !bd->bd_pinned);
gfs_assert_withdraw(sdp, !buffer_dirty(bh));
wait_on_buffer(bh);
gfs_assert_withdraw(sdp, !buffer_busy(bh));
}
brelse(bh);
}
bstart++;
blen--;
}
if (add)
gfs_depend_add(rgd, ip->i_num.no_formal_ino);
}
/**
* gfs_sync_meta - sync all the buffers in a filesystem
* @sdp: the filesystem
*
* Flush metadata blocks to on-disk journal, then
* Flush metadata blocks (now in AIL) to on-disk in-place locations
* Periodically keep checking until done (AIL empty)
*/
void
gfs_sync_meta(struct gfs_sbd *sdp)
{
gfs_log_flush(sdp);
for (;;) {
gfs_ail_start(sdp, DIO_ALL);
if (gfs_ail_empty(sdp))
break;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ / 10);
}
}
/**
* gfs_flush_meta_cache - get rid of any references on buffers for this inode
* @ip: The GFS inode
*
* This releases buffers that are in the most-recently-used array of
* blocks used for indirect block addressing for this inode.
* Don't confuse this with the meta-HEADER cache (mhc)!
*/
void
gfs_flush_meta_cache(struct gfs_inode *ip)
{
struct buffer_head **bh_slot;
unsigned int x;
spin_lock(&ip->i_spin);
for (x = 0; x < GFS_MAX_META_HEIGHT; x++) {
bh_slot = &ip->i_cache[x];
if (*bh_slot) {
brelse(*bh_slot);
*bh_slot = NULL;
}
}
spin_unlock(&ip->i_spin);
}
/**
* gfs_get_meta_buffer - Get a metadata buffer
* @ip: The GFS inode
* @height: The level of this buf in the metadata (indir addr) tree (if any)
* @num: The block number (device relative) of the buffer
* @new: Non-zero if we may create a new buffer
* @bhp: the buffer is returned here
*
* Returns: errno
*/
int
gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new,
struct buffer_head **bhp)
{
struct buffer_head *bh, **bh_slot = &ip->i_cache[height];
int flags = ((new) ? DIO_NEW : 0) | DIO_START | DIO_WAIT;
int error;
/* Try to use the gfs_inode's MRU metadata tree cache */
spin_lock(&ip->i_spin);
bh = *bh_slot;
if (bh) {
if (bh->b_blocknr == num)
get_bh(bh);
else
bh = NULL;
}
spin_unlock(&ip->i_spin);
if (bh) {
error = gfs_dreread(ip->i_sbd, bh, flags);
if (error) {
brelse(bh);
return error;
}
} else {
error = gfs_dread(ip->i_gl, num, flags, &bh);
if (error)
return error;
spin_lock(&ip->i_spin);
if (*bh_slot != bh) {
if (*bh_slot)
brelse(*bh_slot);
*bh_slot = bh;
get_bh(bh);
}
spin_unlock(&ip->i_spin);
}
if (new) {
if (gfs_assert_warn(ip->i_sbd, height)) {
brelse(bh);
return -EIO;
}
gfs_trans_add_bh(ip->i_gl, bh);
gfs_metatype_set(bh, GFS_METATYPE_IN, GFS_FORMAT_IN);
gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
} else if (gfs_metatype_check(ip->i_sbd, bh,
(height) ? GFS_METATYPE_IN : GFS_METATYPE_DI)) {
brelse(bh);
return -EIO;
}
*bhp = bh;
return 0;
}
/**
* gfs_get_data_buffer - Get a data buffer
* @ip: The GFS inode
* @num: The block number (device relative) of the data block
* @new: Non-zero if this is a new allocation
* @bhp: the buffer is returned here
*
* Returns: errno
*/
int
gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new,
struct buffer_head **bhp)
{
struct buffer_head *bh;
int error = 0;
if (block == ip->i_num.no_addr) {
if (gfs_assert_warn(ip->i_sbd, !new))
return -EIO;
error = gfs_dread(ip->i_gl, block, DIO_START | DIO_WAIT, &bh);
if (error)
return error;
if (gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_DI)) {
brelse(bh);
return -EIO;
}
} else if (gfs_is_jdata(ip)) {
if (new) {
error = gfs_dread(ip->i_gl, block,
DIO_NEW | DIO_START | DIO_WAIT, &bh);
if (error)
return error;
gfs_trans_add_bh(ip->i_gl, bh);
gfs_metatype_set(bh, GFS_METATYPE_JD, GFS_FORMAT_JD);
gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
} else {
error = gfs_dread(ip->i_gl, block,
DIO_START | DIO_WAIT, &bh);
if (error)
return error;
if (gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_JD)) {
brelse(bh);
return -EIO;
}
}
} else {
if (new) {
bh = gfs_dgetblk(ip->i_gl, block);
gfs_prep_new_buffer(bh);
} else {
error = gfs_dread(ip->i_gl, block,
DIO_START | DIO_WAIT, &bh);
if (error)
return error;
}
}
*bhp = bh;
return 0;
}
/**
* gfs_start_ra - start readahead on an extent of a file
* @gl: the glock the blocks belong to
* @dblock: the starting disk block
* @extlen: the number of blocks in the extent
*
*/
void
gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen)
{
struct gfs_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
struct buffer_head *first_bh, *bh;
uint32_t max_ra = gfs_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift;
int error;
if (!extlen)
return;
if (!max_ra)
return;
if (extlen > max_ra)
extlen = max_ra;
first_bh = getbuf(sdp, aspace, dblock, CREATE);
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh)) {
error = gfs_dreread(sdp, first_bh, DIO_START);
if (error)
goto out;
}
dblock++;
extlen--;
while (extlen) {
bh = getbuf(sdp, aspace, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
error = gfs_dreread(sdp, bh, DIO_START);
brelse(bh);
if (error)
goto out;
} else
brelse(bh);
dblock++;
extlen--;
if (buffer_uptodate(first_bh))
break;
}
out:
brelse(first_bh);
}

File Metadata

Mime Type
text/x-c
Expires
Thu, Feb 27, 4:50 AM (1 d, 13 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1466179
Default Alt Text
dio.c (30 KB)

Event Timeline