summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-13 14:35:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-13 14:35:37 -0700
commitb86faee6d111294fa95a2e89b5f771b2da3c9782 (patch)
tree1518742b6f7fabd4e3b3875fc31aeddcae11ceb8
parent48ea2cedde3507941f4549b0d27ed46ed29e39ff (diff)
parentb4f937cffa66b3d56eb8f586e620d0b223a281a3 (diff)
Merge tag 'nfs-for-4.13-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Stable bugfixes: - Fix -EACCESS on commit to DS handling - Fix initialization of nfs_page_array->npages - Only invalidate dentries that are actually invalid Features: - Enable NFSoRDMA transparent state migration - Add support for lookup-by-filehandle - Add support for nfs re-exporting Other bugfixes and cleanups: - Christoph cleaned up the way we declare NFS operations - Clean up various internal structures - Various cleanups to commits - Various improvements to error handling - Set the dt_type of . and .. entries in NFS v4 - Make slot allocation more reliable - Fix fscache stat printing - Fix uninitialized variable warnings - Fix potential list overrun in nfs_atomic_open() - Fix a race in NFSoRDMA RPC reply handler - Fix return size for nfs42_proc_copy() - Fix against MAC forgery timing attacks" * tag 'nfs-for-4.13-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (68 commits) NFS: Don't run wake_up_bit() when nobody is waiting... nfs: add export operations nfs4: add NFSv4 LOOKUPP handlers nfs: add a nfs_ilookup helper nfs: replace d_add with d_splice_alias in atomic_open sunrpc: use constant time memory comparison for mac NFSv4.2 fix size storage for nfs42_proc_copy xprtrdma: Fix documenting comments in frwr_ops.c xprtrdma: Replace PAGE_MASK with offset_in_page() xprtrdma: FMR does not need list_del_init() xprtrdma: Demote "connect" log messages NFSv4.1: Use seqid returned by EXCHANGE_ID after state migration NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration xprtrdma: Don't defer MR recovery if ro_map fails xprtrdma: Fix FRWR invalidation error recovery xprtrdma: Fix client lock-up after application signal fires xprtrdma: Rename rpcrdma_req::rl_free xprtrdma: Pass only the list of registered MRs to ro_unmap_sync xprtrdma: Pre-mark remotely invalidated MRs xprtrdma: On invalidation failure, remove MWs from rl_registered ...
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/dir.c42
-rw-r--r--fs/nfs/export.c177
-rw-r--r--fs/nfs/filelayout/filelayout.c31
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c33
-rw-r--r--fs/nfs/inode.c36
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs42proc.c2
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4idmap.c3
-rw-r--r--fs/nfs/nfs4proc.c109
-rw-r--r--fs/nfs/nfs4state.c16
-rw-r--r--fs/nfs/nfs4trace.h29
-rw-r--r--fs/nfs/nfs4xdr.c77
-rw-r--r--fs/nfs/pagelist.c23
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/unlink.c13
-rw-r--r--fs/nfs/write.c59
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_fs_sb.h2
-rw-r--r--include/linux/nfs_page.h4
-rw-r--r--include/linux/nfs_xdr.h31
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c3
-rw-r--r--net/sunrpc/xprt.c8
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c47
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c69
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c125
-rw-r--r--net/sunrpc/xprtrdma/transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c55
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h40
35 files changed, 777 insertions, 289 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 98f4e5728a67..1fb118902d57 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
io.o direct.o pagelist.o read.o symlink.o unlink.o \
- write.o namespace.o mount_clnt.o nfstrace.o
+ write.o namespace.o mount_clnt.o nfstrace.o export.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ac00bf4ecf1..5ac484fe0dee 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -151,7 +151,7 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
-typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
@@ -165,8 +165,8 @@ typedef struct {
unsigned long timestamp;
unsigned long gencount;
unsigned int cache_entry_index;
- unsigned int plus:1;
- unsigned int eof:1;
+ bool plus;
+ bool eof;
} nfs_readdir_descriptor_t;
/*
@@ -355,7 +355,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
if (error == -ENOTSUPP && desc->plus) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
- desc->plus = 0;
+ desc->plus = false;
goto again;
}
goto error;
@@ -557,7 +557,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
count++;
- if (desc->plus != 0)
+ if (desc->plus)
nfs_prime_dcache(file_dentry(desc->file), entry);
status = nfs_readdir_add_to_array(entry, page);
@@ -860,7 +860,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->ctx = ctx;
desc->dir_cookie = &dir_ctx->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
- desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
+ desc->plus = nfs_use_readdirplus(inode, ctx);
if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -885,8 +885,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
nfs_zap_caches(inode);
desc->page_index = 0;
- desc->plus = 0;
- desc->eof = 0;
+ desc->plus = false;
+ desc->eof = false;
continue;
}
if (res < 0)
@@ -1115,11 +1115,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
/* Force a full look up iff the parent directory has changed */
if (!nfs_is_exclusive_create(dir, flags) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
-
- if (nfs_lookup_verify_inode(inode, flags)) {
+ error = nfs_lookup_verify_inode(inode, flags);
+ if (error) {
if (flags & LOOKUP_RCU)
return -ECHILD;
- goto out_zap_parent;
+ if (error == -ESTALE)
+ goto out_zap_parent;
+ goto out_error;
}
nfs_advise_use_readdirplus(dir);
goto out_valid;
@@ -1144,8 +1146,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
- if (error)
+ if (error == -ESTALE || error == -ENOENT)
goto out_bad;
+ if (error)
+ goto out_error;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out_bad;
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
@@ -1427,8 +1431,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
err = finish_open(file, dentry, do_open, opened);
if (err)
goto out;
- nfs_file_set_open_context(file, ctx);
-
+ if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ nfs_file_set_open_context(file, ctx);
+ else
+ err = -ESTALE;
out:
return err;
}
@@ -1512,7 +1518,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
d_drop(dentry);
switch (err) {
case -ENOENT:
- d_add(dentry, NULL);
+ d_splice_alias(NULL, dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case -EISDIR:
@@ -2035,7 +2041,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
error = rpc_wait_for_completion_task(task);
- if (error == 0)
+ if (error != 0) {
+ ((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
+ /* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
+ smp_wmb();
+ } else
error = task->tk_status;
rpc_put_task(task);
nfs_mark_for_revalidate(old_inode);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
new file mode 100644
index 000000000000..249cb96cc5b5
--- /dev/null
+++ b/fs/nfs/export.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015, Primary Data, Inc. All rights reserved.
+ *
+ * Tao Peng <bergwolf@primarydata.com>
+ */
+#include <linux/dcache.h>
+#include <linux/exportfs.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+enum {
+ FILEID_HIGH_OFF = 0, /* inode fileid high */
+ FILEID_LOW_OFF, /* inode fileid low */
+ FILE_I_TYPE_OFF, /* inode type */
+ EMBED_FH_OFF /* embeded server fh */
+};
+
+
+static struct nfs_fh *nfs_exp_embedfh(__u32 *p)
+{
+ return (struct nfs_fh *)(p + EMBED_FH_OFF);
+}
+
+/*
+ * Let's break subtree checking for now... otherwise we'll have to embed parent fh
+ * but there might not be enough space.
+ */
+static int
+nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
+{
+ struct nfs_fh *server_fh = NFS_FH(inode);
+ struct nfs_fh *clnt_fh = nfs_exp_embedfh(p);
+ size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+
+ dprintk("%s: max fh len %d inode %p parent %p",
+ __func__, *max_len, inode, parent);
+
+ if (*max_len < len || IS_AUTOMOUNT(inode)) {
+ dprintk("%s: fh len %d too small, required %d\n",
+ __func__, *max_len, len);
+ *max_len = len;
+ return FILEID_INVALID;
+ }
+ if (IS_AUTOMOUNT(inode)) {
+ *max_len = FILEID_INVALID;
+ goto out;
+ }
+
+ p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
+ p[FILEID_LOW_OFF] = NFS_FILEID(inode);
+ p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
+ p[len - 1] = 0; /* Padding */
+ nfs_copy_fh(clnt_fh, server_fh);
+ *max_len = len;
+out:
+ dprintk("%s: result fh fileid %llu mode %u size %d\n",
+ __func__, NFS_FILEID(inode), inode->i_mode, *max_len);
+ return *max_len;
+}
+
+static struct dentry *
+nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct nfs4_label *label = NULL;
+ struct nfs_fattr *fattr = NULL;
+ struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
+ size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ const struct nfs_rpc_ops *rpc_ops;
+ struct dentry *dentry;
+ struct inode *inode;
+ int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+ u32 *p = fid->raw;
+ int ret;
+
+ /* NULL translates to ESTALE */
+ if (fh_len < len || fh_type != len)
+ return NULL;
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL) {
+ dentry = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF];
+ fattr->mode = p[FILE_I_TYPE_OFF];
+ fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE;
+
+ dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode);
+
+ inode = nfs_ilookup(sb, fattr, server_fh);
+ if (inode)
+ goto out_found;
+
+ label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
+ if (IS_ERR(label)) {
+ dentry = ERR_CAST(label);
+ goto out_free_fattr;
+ }
+
+ rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
+ ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label);
+ if (ret) {
+ dprintk("%s: getattr failed %d\n", __func__, ret);
+ dentry = ERR_PTR(ret);
+ goto out_free_label;
+ }
+
+ inode = nfs_fhget(sb, server_fh, fattr, label);
+
+out_found:
+ dentry = d_obtain_alias(inode);
+
+out_free_label:
+ nfs4_label_free(label);
+out_free_fattr:
+ nfs_free_fattr(fattr);
+out:
+ return dentry;
+}
+
+static struct dentry *
+nfs_get_parent(struct dentry *dentry)
+{
+ int ret;
+ struct inode *inode = d_inode(dentry), *pinode;
+ struct super_block *sb = inode->i_sb;
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_fattr *fattr = NULL;
+ struct nfs4_label *label = NULL;
+ struct dentry *parent;
+ struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
+ struct nfs_fh fh;
+
+ if (!ops->lookupp)
+ return ERR_PTR(-EACCES);
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL) {
+ parent = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ label = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(label)) {
+ parent = ERR_CAST(label);
+ goto out_free_fattr;
+ }
+
+ ret = ops->lookupp(inode, &fh, fattr, label);
+ if (ret) {
+ parent = ERR_PTR(ret);
+ goto out_free_label;
+ }
+
+ pinode = nfs_fhget(sb, &fh, fattr, label);
+ parent = d_obtain_alias(pinode);
+out_free_label:
+ nfs4_label_free(label);
+out_free_fattr:
+ nfs_free_fattr(fattr);
+out:
+ return parent;
+}
+
+const struct export_operations nfs_export_ops = {
+ .encode_fh = nfs_encode_fh,
+ .fh_to_dentry = nfs_fh_to_dentry,
+ .get_parent = nfs_get_parent,
+};
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d65b748..080fc6b278bd 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
- struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
- struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
if (task->tk_status >= 0)
return 0;
switch (task->tk_status) {
- /* MDS state errors */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OPENMODE:
- if (state == NULL)
- break;
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- goto wait_on_recovery;
- case -NFS4ERR_EXPIRED:
- if (state != NULL) {
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- }
- nfs4_schedule_lease_recovery(mds_client);
- goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -172,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
/* Invalidate Layout errors */
+ case -NFS4ERR_ACCESS:
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
@@ -202,26 +184,17 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
+ pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
- pnfs_set_lo_fail(lseg);
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
-out:
task->tk_status = 0;
return -EAGAIN;
-out_bad_stateid:
- task->tk_status = -EIO;
- return 0;
-wait_on_recovery:
- rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
- if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
- rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
- goto out;
}
/* NFS_PROTO call done callback routines */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc44a25..1f2ac3dd0fe5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
- struct nfs_server *mds_server = NFS_SERVER(inode);
-
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
- struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) {
- /* MDS state errors */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- nfs_remove_bad_delegation(state->inode, NULL);
- case -NFS4ERR_OPENMODE:
- if (state == NULL)
- break;
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- goto wait_on_recovery;
- case -NFS4ERR_EXPIRED:
- if (state != NULL) {
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- }
- nfs4_schedule_lease_recovery(mds_client);
- goto wait_on_recovery;
- /* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@@ -1137,17 +1113,8 @@ reset:
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
-out:
task->tk_status = 0;
return -EAGAIN;
-out_bad_stateid:
- task->tk_status = -EIO;
- return 0;
-wait_on_recovery:
- rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
- if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
- rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
- goto out;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1de93ba78dc9..109279d6d91b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -386,6 +386,28 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
#endif
EXPORT_SYMBOL_GPL(nfs_setsecurity);
+/* Search for inode identified by fh, fileid and i_mode in inode cache. */
+struct inode *
+nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+ struct nfs_find_desc desc = {
+ .fh = fh,
+ .fattr = fattr,
+ };
+ struct inode *inode;
+ unsigned long hash;
+
+ if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
+ !(fattr->valid & NFS_ATTR_FATTR_TYPE))
+ return NULL;
+
+ hash = nfs_fattr_to_ino_t(fattr);
+ inode = ilookup5(sb, hash, nfs_find_actor, &desc);
+
+ dprintk("%s: returning %p\n", __func__, inode);
+ return inode;
+}
+
/*
* This is our front-end to iget that looks up inodes by file handle
* instead of inode number.
@@ -525,8 +547,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfs_fscache_init_inode(inode);
unlock_new_inode(inode);
- } else
- nfs_refresh_inode(inode, fattr);
+ } else {
+ int err = nfs_refresh_inode(inode, fattr);
+ if (err < 0) {
+ iput(inode);
+ inode = ERR_PTR(err);
+ goto out_no_inode;
+ }
+ }
dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode),
@@ -1315,9 +1343,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return 0;
/* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
- return -EIO;
+ return -ESTALE;
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
- return -EIO;
+ return -ESTALE;
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c7ebba9eb611..dc456416d2be 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -11,6 +11,8 @@
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+extern const struct export_operations nfs_export_ops;
+
struct nfs_string;
/* Maximum number of readahead requests
@@ -273,17 +275,17 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
/* nfs2xdr.c */
extern const struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
- struct nfs_entry *, int);
+ struct nfs_entry *, bool);
/* nfs3xdr.c */
extern const struct rpc_procinfo nfs3_procedures[];
extern int nfs3_decode_dirent(struct xdr_stream *,
- struct nfs_entry *, int);
+ struct nfs_entry *, bool);
/* nfs4xdr.c */
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs4_decode_dirent(struct xdr_stream *,
- struct nfs_entry *, int);
+ struct nfs_entry *, bool);
#endif
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c8a7e98c1371..fe68dabfbde6 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -939,7 +939,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
* };
*/
int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
- int plus)
+ bool plus)
{
__be32 *p;
int error;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0c07b567118d..df4a7d3ab915 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -621,7 +621,7 @@ out:
*/
static int
nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
- u64 cookie, struct page **pages, unsigned int count, int plus)
+ u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
__be32 *verf = NFS_I(dir)->cookieverf;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 670eddb3ae36..e82c9e553224 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1991,7 +1991,7 @@ out_status:
* };
*/
int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
- int plus)
+ bool plus)
{
struct nfs_entry old = *entry;
__be32 *p;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 319a47db218d..6c2db51e67a7 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -146,7 +146,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
size_t count = args->count;
- int status;
+ ssize_t status;
status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 66776f022111..50566acb5469 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -414,6 +414,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
if (clp != old)
clp->cl_preserve_clid = true;
nfs_put_client(clp);
+ clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
return old;
error:
@@ -852,6 +853,8 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (server->options & NFS_OPTION_MIGRATION)
set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+ if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
+ set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@@ -1212,9 +1215,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
return -EAFNOSUPPORT;
nfs_server_remove_lists(server);
+ set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion, net);
+ clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
nfs_put_client(clp);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 835c163f61af..dd5d27da8c0c 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -364,7 +364,8 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
ret = -EINVAL;
} else {
ret = kstrtol(id_str, 10, &id_long);
- *id = (__u32)id_long;
+ if (!ret)
+ *id = (__u32)id_long;
}
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 98b0b662af09..a0b4e1091340 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -275,6 +275,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
struct nfs4_readdir_arg *readdir)
{
+ unsigned int attrs = FATTR4_WORD0_FILEID | FATTR4_WORD0_TYPE;
__be32 *start, *p;
if (cookie > 2) {
@@ -305,8 +306,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
memcpy(p, ".\0\0\0", 4); /* entry */
p++;
*p++ = xdr_one; /* bitmap length */
- *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
- *p++ = htonl(8); /* attribute buffer length */
+ *p++ = htonl(attrs); /* bitmap */
+ *p++ = htonl(12); /* attribute buffer length */
+ *p++ = htonl(NF4DIR);
p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
}
@@ -317,8 +319,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
memcpy(p, "..\0\0", 4); /* entry */
p++;
*p++ = xdr_one; /* bitmap length */
- *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
- *p++ = htonl(8); /* attribute buffer length */
+ *p++ = htonl(attrs); /* bitmap */
+ *p++ = htonl(12); /* attribute buffer length */
+ *p++ = htonl(NF4DIR);
p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
readdir->pgbase = (char *)p - (char *)start;
@@ -1034,11 +1037,11 @@ struct nfs4_opendata {
struct nfs4_state *state;
struct iattr attrs;
unsigned long timestamp;
- unsigned int rpc_done : 1;
- unsigned int file_created : 1;
- unsigned int is_recover : 1;
+ bool rpc_done;
+ bool file_created;
+ bool is_recover;
+ bool cancelled;
int rpc_status;
- int cancelled;
};
static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
@@ -1962,7 +1965,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
- data->rpc_done = 1;
+ data->rpc_done = true;
}
}
@@ -1972,7 +1975,7 @@ static void nfs4_open_confirm_release(void *calldata)
struct nfs4_state *state = NULL;
/* If this request hasn't been cancelled, do nothing */
- if (data->cancelled == 0)
+ if (!data->cancelled)
goto out_free;
/* In case of error, no cleanup! */
if (!data->rpc_done)
@@ -2015,7 +2018,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
kref_get(&data->kref);
- data->rpc_done = 0;
+ data->rpc_done = false;
data->rpc_status = 0;
data->timestamp = jiffies;
if (data->is_recover)
@@ -2025,7 +2028,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
return PTR_ERR(task);
status = rpc_wait_for_completion_task(task);
if (status != 0) {
- data->cancelled = 1;
+ data->cancelled = true;
smp_wmb();
} else
status = data->rpc_status;
@@ -2124,7 +2127,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
nfs_confirm_seqid(&data->owner->so_seqid, 0);
}
- data->rpc_done = 1;
+ data->rpc_done = true;
}
static void nfs4_open_release(void *calldata)
@@ -2133,7 +2136,7 @@ static void nfs4_open_release(void *calldata)
struct nfs4_state *state = NULL;
/* If this request hasn't been cancelled, do nothing */
- if (data->cancelled == 0)
+ if (!data->cancelled)
goto out_free;
/* In case of error, no cleanup! */
if (data->rpc_status != 0 || !data->rpc_done)
@@ -2179,20 +2182,20 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
kref_get(&data->kref);
- data->rpc_done = 0;
+ data->rpc_done = false;
data->rpc_status = 0;
- data->cancelled = 0;
- data->is_recover = 0;
+ data->cancelled = false;
+ data->is_recover = false;
if (isrecover) {
nfs4_set_sequence_privileged(&o_arg->seq_args);
- data->is_recover = 1;
+ data->is_recover = true;
}
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = rpc_wait_for_completion_task(task);
if (status != 0) {
- data->cancelled = 1;
+ data->cancelled = true;
smp_wmb();
} else
status = data->rpc_status;
@@ -2287,9 +2290,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
if (o_arg->open_flags & O_CREAT) {
if (o_arg->open_flags & O_EXCL)
- data->file_created = 1;
+ data->file_created = true;
else if (o_res->cinfo.before != o_res->cinfo.after)
- data->file_created = 1;
+ data->file_created = true;
if (data->file_created || dir->i_version != o_res->cinfo.after)
update_changeattr(dir, &o_res->cinfo,
o_res->f_attr->time_start);
@@ -3803,6 +3806,54 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
}
+static int _nfs4_proc_lookupp(struct inode *inode,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
+{
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_server *server = NFS_SERVER(inode);
+ int status;
+ struct nfs4_lookupp_arg args = {
+ .bitmask = server->attr_bitmask,
+ .fh = NFS_FH(inode),
+ };
+ struct nfs4_lookupp_res res = {
+ .server = server,
+ .fattr = fattr,
+ .label = label,
+ .fh = fhandle,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+
+ args.bitmask = nfs4_bitmask(server, label);
+
+ nfs_fattr_init(fattr);
+
+ dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino);
+ status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
+ dprintk("NFS reply lookupp: %d\n", status);
+ return status;
+}
+
+static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
+ trace_nfs4_lookupp(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs_server *server = NFS_SERVER(inode);
@@ -4273,7 +4324,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
}
static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
- u64 cookie, struct page **pages, unsigned int count, int plus)
+ u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
struct nfs4_readdir_arg args = {
@@ -4311,7 +4362,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
}
static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
- u64 cookie, struct page **pages, unsigned int count, int plus)
+ u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct nfs4_exception exception = { };
int err;
@@ -6135,7 +6186,7 @@ static void nfs4_lock_release(void *calldata)
dprintk("%s: begin!\n", __func__);
nfs_free_seqid(data->arg.open_seqid);
- if (data->cancelled != 0) {
+ if (data->cancelled) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
@@ -6218,7 +6269,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
nfs4_handle_setlk_error(data->server, data->lsp,
data->arg.new_lock_owner, ret);
} else
- data->cancelled = 1;
+ data->cancelled = true;
rpc_put_task(task);
dprintk("%s: done, ret = %d!\n", __func__, ret);
trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
@@ -7376,12 +7427,11 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
if (status == 0) {
clp->cl_clientid = cdata->res.clientid;
clp->cl_exchange_flags = cdata->res.flags;
+ clp->cl_seqid = cdata->res.seqid;
/* Client ID is not confirmed */
- if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+ if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R))
clear_bit(NFS4_SESSION_ESTABLISHED,
- &clp->cl_session->session_state);
- clp->cl_seqid = cdata->res.seqid;
- }
+ &clp->cl_session->session_state);
kfree(clp->cl_serverowner);
clp->cl_serverowner = cdata->res.server_owner;
@@ -9313,6 +9363,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
+ .lookupp = nfs4_proc_lookupp,
.access = nfs4_proc_access,
.readlink = nfs4_proc_readlink,
.create = nfs4_proc_create,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cbf82b0d4467..0378e2257ca7 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -352,11 +352,17 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
if (clp != *result)
return 0;
- /* Purge state if the client id was established in a prior instance */
- if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
- set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
- else
- set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ /*
+ * Purge state if the client id was established in a prior
+ * instance and the client id could not have arrived on the
+ * server via Transparent State Migration.
+ */
+ if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
+ if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
+ set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ else
+ set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ }
nfs4_schedule_state_manager(clp);
status = nfs_wait_client_init_complete(clp);
if (status < 0)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 845d0eadefc9..be1da19c65d6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -891,6 +891,35 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
+TRACE_EVENT(nfs4_lookupp,
+ TP_PROTO(
+ const struct inode *inode,
+ int error
+ ),
+
+ TP_ARGS(inode, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, ino)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = NFS_FILEID(inode);
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) inode=%02x:%02x:%llu",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->ino
+ )
+);
+
TRACE_EVENT(nfs4_rename,
TP_PROTO(
const struct inode *olddir,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0f1f290c97cd..fa3eb361d4f8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -159,6 +159,8 @@ static int nfs4_stat_to_errno(int);
(op_decode_hdr_maxsz)
#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_lookup_maxsz (op_decode_hdr_maxsz)
+#define encode_lookupp_maxsz (op_encode_hdr_maxsz)
+#define decode_lookupp_maxsz (op_decode_hdr_maxsz)
#define encode_share_access_maxsz \
(2)
#define encode_createmode_maxsz (1 + encode_attrs_maxsz + encode_verifier_maxsz)
@@ -618,6 +620,18 @@ static int nfs4_stat_to_errno(int);
decode_lookup_maxsz + \
decode_getattr_maxsz + \
decode_getfh_maxsz)
+#define NFS4_enc_lookupp_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_lookupp_maxsz + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_lookupp_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_lookupp_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putrootfh_maxsz + \
@@ -1368,6 +1382,11 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
encode_string(xdr, name->len, name->name);
}
+static void encode_lookupp(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_LOOKUPP, decode_lookupp_maxsz, hdr);
+}
+
static void encode_share_access(struct xdr_stream *xdr, u32 share_access)
{
__be32 *p;
@@ -2123,6 +2142,26 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * Encode LOOKUPP request
+ */
+static void nfs4_xdr_enc_lookupp(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs4_lookupp_arg *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_lookupp(xdr, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode LOOKUP_ROOT request
*/
static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
@@ -5058,6 +5097,11 @@ static int decode_lookup(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_LOOKUP);
}
+static int decode_lookupp(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_LOOKUPP);
+}
+
/* This is too sick! */
static int decode_space_limit(struct xdr_stream *xdr,
unsigned long *pagemod_limit)
@@ -6238,6 +6282,36 @@ out:
}
/*
+ * Decode LOOKUPP response
+ */
+static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs4_lookupp_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_lookupp(xdr);
+ if (status)
+ goto out;
+ status = decode_getfh(xdr, res->fh);
+ if (status)
+ goto out;
+ status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+out:
+ return status;
+}
+
+/*
* Decode LOOKUP_ROOT response
*/
static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
@@ -7447,7 +7521,7 @@ out:
* on a directory already in our cache.
*/
int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
- int plus)
+ bool plus)
{
unsigned int savep;
uint32_t bitmap[3] = {0};
@@ -7614,6 +7688,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(ACCESS, enc_access, dec_access),
PROC(GETATTR, enc_getattr, dec_getattr),
PROC(LOOKUP, enc_lookup, dec_lookup),
+ PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
PROC(REMOVE, enc_remove, dec_remove),
PROC(RENAME, enc_rename, dec_rename),
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad92b401326c..de9066a92c0d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -50,8 +50,8 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->cred = hdr->req->wb_context->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
+ hdr->io_completion = desc->pg_io_completion;
hdr->dreq = desc->pg_dreq;
- hdr->layout_private = desc->pg_layout_private;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
return 0;
- if (!nonblock)
+ if (!nonblock) {
+ set_bit(PG_CONTENDED1, &head->wb_flags);
+ smp_mb__after_atomic();
return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE);
+ }
return -EAGAIN;
}
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
WARN_ON_ONCE(head != head->wb_head);
+ if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+ return;
+ set_bit(PG_CONTENDED1, &head->wb_flags);
+ smp_mb__after_atomic();
wait_on_bit(&head->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE);
}
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
smp_mb__before_atomic();
clear_bit(PG_HEADLOCK, &head->wb_flags);
smp_mb__after_atomic();
+ if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+ return;
wake_up_bit(&head->wb_flags, PG_HEADLOCK);
}
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
smp_mb__before_atomic();
clear_bit(PG_BUSY, &req->wb_flags);
smp_mb__after_atomic();
+ if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+ return;
wake_up_bit(&req->wb_flags, PG_BUSY);
}
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
+ if (!test_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+ set_bit(PG_CONTENDED2, &req->wb_flags);
+ smp_mb__after_atomic();
return wait_on_bit_io(&req->wb_flags, PG_BUSY,
TASK_UNINTERRUPTIBLE);
}
@@ -710,8 +725,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
+ desc->pg_io_completion = NULL;
desc->pg_dreq = NULL;
- desc->pg_layout_private = NULL;
desc->pg_bsize = bsize;
desc->pg_mirror_count = 1;
@@ -779,6 +794,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
gfp_t gfp_flags = GFP_KERNEL;
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+ pg_array->npages = pagecount;
if (pagecount <= ARRAY_SIZE(pg_array->page_array))
pg_array->pagevec = pg_array->page_array;
@@ -1233,6 +1249,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
{
LIST_HEAD(failed);
+ desc->pg_io_completion = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 9872cf676a50..7962e49097c3 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -485,7 +485,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
*/
static int
nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
- u64 cookie, struct page **pages, unsigned int count, int plus)
+ u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
struct nfs_readdirargs arg = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c5334c0e23a1..d828ef88e7db 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -879,7 +879,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
if (nfss->options & NFS_OPTION_FSCACHE) {
seq_printf(m, "\n\tfsc:\t");
for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
- seq_printf(m, "%Lu ", totals.bytes[i]);
+ seq_printf(m, "%Lu ", totals.fscache[i]);
}
#endif
seq_printf(m, "\n");
@@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
*/
sb->s_flags |= MS_POSIXACL;
sb->s_time_gran = 1;
+ sb->s_export_op = &nfs_export_ops;
}
nfs_initialise_sb(sb);
@@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb,
sb->s_xattr = old_sb->s_xattr;
sb->s_op = old_sb->s_op;
sb->s_time_gran = 1;
+ sb->s_export_op = old_sb->s_export_op;
if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 191aa577dd1f..e3949d93085c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -288,6 +288,19 @@ static void nfs_async_rename_release(void *calldata)
if (d_really_is_positive(data->old_dentry))
nfs_mark_for_revalidate(d_inode(data->old_dentry));
+ /* The result of the rename is unknown. Play it safe by
+ * forcing a new lookup */
+ if (data->cancelled) {
+ spin_lock(&data->old_dir->i_lock);
+ nfs_force_lookup_revalidate(data->old_dir);
+ spin_unlock(&data->old_dir->i_lock);
+ if (data->new_dir != data->old_dir) {
+ spin_lock(&data->new_dir->i_lock);
+ nfs_force_lookup_revalidate(data->new_dir);
+ spin_unlock(&data->new_dir->i_lock);
+ }
+ }
+
dput(data->old_dentry);
dput(data->new_dentry);
iput(data->old_dir);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index db7ba542559e..b1af5dee5e0a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,6 +40,12 @@
#define MIN_POOL_WRITE (32)
#define MIN_POOL_COMMIT (4)
+struct nfs_io_completion {
+ void (*complete)(void *data);
+ void *data;
+ struct kref refcount;
+};
+
/*
* Local function declarations
*/
@@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
mempool_free(hdr, nfs_wdata_mempool);
}
+static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
+{
+ return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
+}
+
+static void nfs_io_completion_init(struct nfs_io_completion *ioc,
+ void (*complete)(void *), void *data)
+{
+ ioc->complete = complete;
+ ioc->data = data;
+ kref_init(&ioc->refcount);
+}
+
+static void nfs_io_completion_release(struct kref *kref)
+{
+ struct nfs_io_completion *ioc = container_of(kref,
+ struct nfs_io_completion, refcount);
+ ioc->complete(ioc->data);
+ kfree(ioc);
+}
+
+static void nfs_io_completion_get(struct nfs_io_completion *ioc)
+{
+ if (ioc != NULL)
+ kref_get(&ioc->refcount);
+}
+
+static void nfs_io_completion_put(struct nfs_io_completion *ioc)
+{
+ if (ioc != NULL)
+ kref_put(&ioc->refcount, nfs_io_completion_release);
+}
+
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
ctx->error = error;
@@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
return ret;
}
+static void nfs_io_completion_commit(void *inode)
+{
+ nfs_commit_inode(inode, 0);
+}
+
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
+ struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+ if (ioc)
+ nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
&nfs_async_write_completion_ops);
+ pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
+ nfs_io_completion_put(ioc);
if (err < 0)
goto out_err;
@@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
return hdr->verf.committed != NFS_FILE_SYNC;
}
+static void nfs_async_write_init(struct nfs_pgio_header *hdr)
+{
+ nfs_io_completion_get(hdr->io_completion);
+}
+
static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
@@ -973,6 +1028,7 @@ next:
nfs_release_request(req);
}
out:
+ nfs_io_completion_put(hdr->io_completion);
hdr->release(hdr);
}
@@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+ .init_hdr = nfs_async_write_init,
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
.reschedule_io = nfs_async_write_reschedule_io,
@@ -1884,7 +1941,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
- if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
+ if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
goto out_mark_dirty;
/* don't wait for the COMMIT response */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b1ca04820a3..47239c336688 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -479,6 +479,7 @@ enum {
NFSPROC4_CLNT_ACCESS,
NFSPROC4_CLNT_GETATTR,
NFSPROC4_CLNT_LOOKUP,
+ NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LOOKUP_ROOT,
NFSPROC4_CLNT_REMOVE,
NFSPROC4_CLNT_RENAME,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bb0eb2c9acca..e52cc55ac300 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
extern void nfs_invalidate_atime(struct inode *);
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
struct nfs_fattr *, struct nfs4_label *);
+struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e418a1096662..74c44665e6d3 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,6 +42,7 @@ struct nfs_client {
#define NFS_CS_MIGRATION 2 /* - transparent state migr */
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
#define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */
+#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -210,6 +211,7 @@ struct nfs_server {
unsigned long mig_status;
#define NFS_MIG_IN_TRANSITION (1)
#define NFS_MIG_FAILED (2)
+#define NFS_MIG_TSM_POSSIBLE (3)
void (*destroy)(struct nfs_server *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d3498f..d67b67ae6c8b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
PG_UPTODATE, /* page group sync bit in read path */
PG_WB_END, /* page group sync bit in write path */
PG_REMOVE, /* page group sync bit in write path */
+ PG_CONTENDED1, /* Is someone waiting for a lock? */
+ PG_CONTENDED2, /* Is someone waiting for a lock? */
};
struct nfs_inode;
@@ -93,8 +95,8 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
+ struct nfs_io_completion *pg_io_completion;
struct nfs_direct_req *pg_dreq;
- void *pg_layout_private;
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c83475ee8..ca3bcc4ed4e5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -878,7 +878,7 @@ struct nfs3_readdirargs {
struct nfs_fh * fh;
__u64 cookie;
__be32 verf[2];
- int plus;
+ bool plus;
unsigned int count;
struct page ** pages;
};
@@ -909,7 +909,7 @@ struct nfs3_linkres {
struct nfs3_readdirres {
struct nfs_fattr * dir_attr;
__be32 * verf;
- int plus;
+ bool plus;
};
struct nfs3_getaclres {
@@ -1012,7 +1012,6 @@ struct nfs4_link_res {
struct nfs_fattr * dir_attr;
};
-
struct nfs4_lookup_arg {
struct nfs4_sequence_args seq_args;
const struct nfs_fh * dir_fh;
@@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
struct nfs4_label *label;
};
+struct nfs4_lookupp_arg {
+ struct nfs4_sequence_args seq_args;
+ const struct nfs_fh *fh;
+ const u32 *bitmask;
+};
+
+struct nfs4_lookupp_res {
+ struct nfs4_sequence_res seq_res;
+ const struct nfs_server *server;
+ struct nfs_fattr *fattr;
+ struct nfs_fh *fh;
+ struct nfs4_label *label;
+};
+
struct nfs4_lookup_root_arg {
struct nfs4_sequence_args seq_args;
const u32 * bitmask;
@@ -1053,7 +1066,7 @@ struct nfs4_readdir_arg {
struct page ** pages; /* zero-copy data */
unsigned int pgbase; /* zero-copy data */
const u32 * bitmask;
- int plus;
+ bool plus;
};
struct nfs4_readdir_res {
@@ -1422,6 +1435,7 @@ enum {
NFS_IOHDR_STAT,
};
+struct nfs_io_completion;
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
@@ -1435,8 +1449,8 @@ struct nfs_pgio_header {
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
const struct nfs_rw_ops *rw_ops;
+ struct nfs_io_completion *io_completion;
struct nfs_direct_req *dreq;
- void *layout_private;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
@@ -1533,6 +1547,7 @@ struct nfs_renamedata {
struct nfs_fattr new_fattr;
void (*complete)(struct rpc_task *, struct nfs_renamedata *);
long timeout;
+ bool cancelled;
};
struct nfs_access_entry;
@@ -1567,6 +1582,8 @@ struct nfs_rpc_ops {
int (*lookup) (struct inode *, const struct qstr *,
struct nfs_fh *, struct nfs_fattr *,
struct nfs4_label *);
+ int (*lookupp) (struct inode *, struct nfs_fh *,
+ struct nfs_fattr *, struct nfs4_label *);
int (*access) (struct inode *, struct nfs_access_entry *);
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
@@ -1585,7 +1602,7 @@ struct nfs_rpc_ops {
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct dentry *, struct rpc_cred *,
- u64, struct page **, unsigned int, int);
+ u64, struct page **, unsigned int, bool);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
dev_t);
int (*statfs) (struct nfs_server *, struct nfs_fh *,
@@ -1595,7 +1612,7 @@ struct nfs_rpc_ops {
int (*pathconf) (struct nfs_server *, struct nfs_fh *,
struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
- int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+ int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
int (*pgio_rpc_prepare)(struct rpc_task *,
struct nfs_pgio_header *);
void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fb39284ec174..12649c9fedab 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/err.h>
@@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
if (ret)
goto out_err;
- if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+ if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3e63c5e97ebe..4654a9934269 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1047,13 +1047,15 @@ out:
return ret;
}
-static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
goto out;
- req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+ spin_unlock(&xprt->reserve_lock);
+ req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+ spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
atomic_dec(&xprt->num_reqs);
@@ -1081,7 +1083,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
list_del(&req->rq_list);
goto out_init_req;
}
- req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
+ req = xprt_dynamic_alloc_slot(xprt);
if (!IS_ERR(req))
goto out_init_req;
switch (PTR_ERR(req)) {
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 59e64025ed96..d3f84bb1d443 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -91,7 +91,7 @@ __fmr_unmap(struct rpcrdma_mw *mw)
list_add(&mw->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
- list_del_init(&mw->fmr.fm_mr->list);
+ list_del(&mw->fmr.fm_mr->list);
return rc;
}
@@ -213,13 +213,11 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_nents = i;
mw->mw_dir = rpcrdma_data_dir(writing);
- if (i == 0)
- goto out_dmamap_err;
- if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir))
+ mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+ mw->mw_sg, i, mw->mw_dir);
+ if (!mw->mw_nents)
goto out_dmamap_err;
for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
@@ -237,16 +235,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return mw->mw_nents;
out_dmamap_err:
- pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
- mw->mw_sg, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+ mw->mw_sg, i);
+ rpcrdma_put_mw(r_xprt, mw);
return -EIO;
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc);
- rpcrdma_defer_mr_recovery(mw);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rpcrdma_put_mw(r_xprt, mw);
return -EIO;
}
@@ -255,24 +255,26 @@ out_maperr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
*/
static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
{
- struct rpcrdma_mw *mw, *tmp;
+ struct rpcrdma_mw *mw;
LIST_HEAD(unmap_list);
int rc;
- dprintk("RPC: %s: req %p\n", __func__, req);
-
/* ORDER: Invalidate all of the req's MRs first
*
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
- list_for_each_entry(mw, &req->rl_registered, mw_list)
+ list_for_each_entry(mw, mws, mw_list) {
+ dprintk("RPC: %s: unmapping fmr %p\n",
+ __func__, &mw->fmr);
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ }
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
@@ -281,9 +283,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
- list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
- list_del_init(&mw->mw_list);
- list_del_init(&mw->fmr.fm_mr->list);
+ while (!list_empty(mws)) {
+ mw = rpcrdma_pop_mw(mws);
+ dprintk("RPC: %s: DMA unmapping fmr %p\n",
+ __func__, &mw->fmr);
+ list_del(&mw->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
@@ -294,8 +298,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
- list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
- list_del_init(&mw->fmr.fm_mr->list);
+ while (!list_empty(mws)) {
+ mw = rpcrdma_pop_mw(mws);
+ list_del(&mw->fmr.fm_mr->list);
fmr_op_recover_mr(mw);
}
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index f81dd93176c0..6aea36a38bfd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -277,7 +277,7 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
}
/**
- * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@@ -298,7 +298,7 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
}
/**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@@ -319,7 +319,7 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
}
/**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
@@ -355,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct ib_mr *mr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
- int rc, i, n, dma_nents;
+ int rc, i, n;
u8 key;
mw = NULL;
@@ -391,14 +391,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_nents = i;
mw->mw_dir = rpcrdma_data_dir(writing);
- if (i == 0)
- goto out_dmamap_err;
- dma_nents = ib_dma_map_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- if (!dma_nents)
+ mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
+ if (!mw->mw_nents)
goto out_dmamap_err;
n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
@@ -436,13 +432,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return mw->mw_nents;
out_dmamap_err:
- pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
- mw->mw_sg, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+ mw->mw_sg, i);
+ frmr->fr_state = FRMR_IS_INVALID;
+ rpcrdma_put_mw(r_xprt, mw);
return -EIO;
out_mapmr_err:
- pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
+ pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
return -EIO;
@@ -458,21 +455,19 @@ out_senderr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
- struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frmr *f;
struct rpcrdma_mw *mw;
int count, rc;
- dprintk("RPC: %s: req %p\n", __func__, req);
-
- /* ORDER: Invalidate all of the req's MRs first
+ /* ORDER: Invalidate all of the MRs first
*
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
@@ -480,11 +475,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
f = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, &req->rl_registered, mw_list) {
+ list_for_each_entry(mw, mws, mw_list) {
mw->frmr.fr_state = FRMR_IS_INVALID;
- if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
- (mw->mw_handle == rep->rr_inv_rkey))
+ if (mw->mw_flags & RPCRDMA_MW_F_RI)
continue;
f = &mw->frmr;
@@ -524,18 +518,19 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* unless ri_id->qp is a valid pointer.
*/
r_xprt->rx_stats.local_inv_needed++;
+ bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+ if (bad_wr != first)
+ wait_for_completion(&f->fr_linv_done);
if (rc)
goto reset_mrs;
- wait_for_completion(&f->fr_linv_done);
-
- /* ORDER: Now DMA unmap all of the req's MRs, and return
+ /* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MW list.
*/
unmap:
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
+ while (!list_empty(mws)) {
+ mw = rpcrdma_pop_mw(mws);
dprintk("RPC: %s: DMA unmapping frmr %p\n",
__func__, &mw->frmr);
ib_dma_unmap_sg(ia->ri_device,
@@ -546,17 +541,19 @@ unmap:
reset_mrs:
pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
- rdma_disconnect(ia->ri_id);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
- * get posted. This is synchronous, and slow.
+ * get posted.
*/
- list_for_each_entry(mw, &req->rl_registered, mw_list) {
- f = &mw->frmr;
- if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
- __frwr_reset_mr(ia, mw);
- bad_wr = bad_wr->next;
- }
+ rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
+ while (bad_wr) {
+ f = container_of(bad_wr, struct rpcrdma_frmr,
+ fr_invwr);
+ mw = container_of(f, struct rpcrdma_mw, frmr);
+
+ __frwr_reset_mr(ia, mw);
+
+ bad_wr = bad_wr->next;
}
goto unmap;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 694e9b13ecf0..ca4d6e4528f3 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -141,7 +141,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
if (xdr->page_len) {
remaining = xdr->page_len;
- offset = xdr->page_base & ~PAGE_MASK;
+ offset = offset_in_page(xdr->page_base);
count = 0;
while (remaining) {
remaining -= min_t(unsigned int,
@@ -222,7 +222,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
len = xdrbuf->page_len;
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
- page_base = xdrbuf->page_base & ~PAGE_MASK;
+ page_base = offset_in_page(xdrbuf->page_base);
p = 0;
while (len && n < RPCRDMA_MAX_SEGS) {
if (!ppages[p]) {
@@ -540,7 +540,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
goto out;
page = virt_to_page(xdr->tail[0].iov_base);
- page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+ page_base = offset_in_page(xdr->tail[0].iov_base);
/* If the content in the page list is an odd length,
* xdr_write_pages() has added a pad at the beginning
@@ -557,7 +557,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
*/
if (xdr->page_len) {
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
- page_base = xdr->page_base & ~PAGE_MASK;
+ page_base = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining) {
sge_no++;
@@ -587,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
*/
if (xdr->tail[0].iov_len) {
page = virt_to_page(xdr->tail[0].iov_base);
- page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+ page_base = offset_in_page(xdr->tail[0].iov_base);
len = xdr->tail[0].iov_len;
map_tail:
@@ -734,6 +734,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
rpclen = 0;
}
+ req->rl_xid = rqst->rq_xid;
+ rpcrdma_insert_req(&r_xprt->rx_buf, req);
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -875,9 +878,9 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
srcp += curlen;
copy_len -= curlen;
- page_base = rqst->rq_rcv_buf.page_base;
- ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
- page_base &= ~PAGE_MASK;
+ ppages = rqst->rq_rcv_buf.pages +
+ (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
+ page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
fixup_copy_count = 0;
if (copy_len && rqst->rq_rcv_buf.page_len) {
int pagelist_len;
@@ -928,6 +931,24 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
+/* Caller must guarantee @rep remains stable during this call.
+ */
+static void
+rpcrdma_mark_remote_invalidation(struct list_head *mws,
+ struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_mw *mw;
+
+ if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
+ return;
+
+ list_for_each_entry(mw, mws, mw_list)
+ if (mw->mw_handle == rep->rr_inv_rkey) {
+ mw->mw_flags = RPCRDMA_MW_F_RI;
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
@@ -969,14 +990,16 @@ rpcrdma_reply_handler(struct work_struct *work)
{
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
- struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
__be32 *iptr;
int rdmalen, status, rmerr;
unsigned long cwnd;
+ struct list_head mws;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
@@ -994,27 +1017,45 @@ rpcrdma_reply_handler(struct work_struct *work)
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
- spin_lock_bh(&xprt->transport_lock);
- rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
- if (!rqst)
+ spin_lock(&buf->rb_lock);
+ req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
+ headerp->rm_xid);
+ if (!req)
goto out_nomatch;
-
- req = rpcr_to_rdmar(rqst);
if (req->rl_reply)
goto out_duplicate;
- /* Sanity checking has passed. We are now committed
- * to complete this transaction.
+ list_replace_init(&req->rl_registered, &mws);
+ rpcrdma_mark_remote_invalidation(&mws, rep);
+
+ /* Avoid races with signals and duplicate replies
+ * by marking this req as matched.
*/
- list_del_init(&rqst->rq_list);
- spin_unlock_bh(&xprt->transport_lock);
+ req->rl_reply = rep;
+ spin_unlock(&buf->rb_lock);
+
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
- /* from here on, the reply is no longer an orphan */
- req->rl_reply = rep;
- xprt->reestablish_timeout = 0;
+ /* Invalidate and unmap the data payloads before waking the
+ * waiting application. This guarantees the memory regions
+ * are properly fenced from the server before the application
+ * accesses the data. It also ensures proper send flow control:
+ * waking the next RPC waits until this RPC has relinquished
+ * all its Send Queue entries.
+ */
+ if (!list_empty(&mws))
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
+ /* Perform XID lookup, reconstruction of the RPC reply, and
+ * RPC completion while holding the transport lock to ensure
+ * the rep, rqst, and rq_task pointers remain stable.
+ */
+ spin_lock_bh(&xprt->transport_lock);
+ rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+ if (!rqst)
+ goto out_norqst;
+ xprt->reestablish_timeout = 0;
if (headerp->rm_vers != rpcrdma_version)
goto out_badversion;
@@ -1024,12 +1065,9 @@ rpcrdma_reply_handler(struct work_struct *work)
case rdma_msg:
/* never expect read chunks */
/* never expect reply chunks (two ways to check) */
- /* never expect write chunks without having offered RDMA */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
(headerp->rm_body.rm_chunks[1] == xdr_zero &&
- headerp->rm_body.rm_chunks[2] != xdr_zero) ||
- (headerp->rm_body.rm_chunks[1] != xdr_zero &&
- list_empty(&req->rl_registered)))
+ headerp->rm_body.rm_chunks[2] != xdr_zero))
goto badheader;
if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
/* count any expected write chunks in read reply */
@@ -1066,8 +1104,7 @@ rpcrdma_reply_handler(struct work_struct *work)
/* never expect read or write chunks, always reply chunks */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
headerp->rm_body.rm_chunks[1] != xdr_zero ||
- headerp->rm_body.rm_chunks[2] != xdr_one ||
- list_empty(&req->rl_registered))
+ headerp->rm_body.rm_chunks[2] != xdr_one)
goto badheader;
iptr = (__be32 *)((unsigned char *)headerp +
RPCRDMA_HDRLEN_MIN);
@@ -1093,17 +1130,6 @@ badheader:
}
out:
- /* Invalidate and flush the data payloads before waking the
- * waiting application. This guarantees the memory region is
- * properly fenced from the server before the application
- * accesses the data. It also ensures proper send flow
- * control: waking the next RPC waits until this RPC has
- * relinquished all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
-
- spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
@@ -1112,7 +1138,7 @@ out:
xprt_complete_rqst(rqst->rq_task, status);
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
- __func__, xprt, rqst, status);
+ __func__, xprt, rqst, status);
return;
out_badstatus:
@@ -1161,26 +1187,37 @@ out_rdmaerr:
r_xprt->rx_stats.bad_reply_count++;
goto out;
-/* If no pending RPC transaction was matched, post a replacement
- * receive buffer before returning.
+/* The req was still available, but by the time the transport_lock
+ * was acquired, the rqst and task had been released. Thus the RPC
+ * has already been terminated.
*/
+out_norqst:
+ spin_unlock_bh(&xprt->transport_lock);
+ rpcrdma_buffer_put(req);
+ dprintk("RPC: %s: race, no rqst left for req %p\n",
+ __func__, req);
+ return;
+
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
out_nomatch:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
__func__, be32_to_cpu(headerp->rm_xid),
rep->rr_len);
goto repost;
out_duplicate:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: "
"duplicate reply %p to RPC request %p: xid 0x%08x\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
+/* If no pending RPC transaction was matched, post a replacement
+ * receive buffer before returning.
+ */
repost:
r_xprt->rx_stats.bad_reply_count++;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 62ecbccd9748..d1c458e5ec4d 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -684,7 +684,8 @@ xprt_rdma_free(struct rpc_task *task)
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- if (unlikely(!list_empty(&req->rl_registered)))
+ rpcrdma_remove_req(&r_xprt->rx_buf, req);
+ if (!list_empty(&req->rl_registered))
ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3dbce9ac4327..e4171f2abe37 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -243,8 +243,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
#endif
- struct ib_qp_attr *attr = &ia->ri_qp_attr;
- struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
int connstate = 0;
switch (event->event) {
@@ -267,7 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: removing device for %pIS:%u\n",
+ pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+ ia->ri_device->name,
sap, rpc_get_port(sap));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -282,13 +281,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
- ib_query_qp(ia->ri_id->qp, attr,
- IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
- iattr);
- dprintk("RPC: %s: %d responder resources"
- " (%d initiator)\n",
- __func__, attr->max_dest_rd_atomic,
- attr->max_rd_atomic);
rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
case RDMA_CM_EVENT_CONNECT_ERROR:
@@ -298,11 +290,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
- sap, rpc_get_port(sap), ia->ri_device->name,
+ dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
+ sap, rpc_get_port(sap),
rdma_reject_msg(id, event->status));
-#endif
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
connstate = -EAGAIN;
@@ -310,37 +300,19 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
connected:
- dprintk("RPC: %s: %sconnected\n",
- __func__, connstate > 0 ? "" : "dis");
atomic_set(&xprt->rx_buf.rb_credits, 1);
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
- dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
- __func__, sap, rpc_get_port(sap), ep,
- rdma_event_msg(event->event));
+ dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
+ __func__, sap, rpc_get_port(sap),
+ ia->ri_device->name, ia->ri_ops->ro_displayname,
+ ep, rdma_event_msg(event->event));
break;
}
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- if (connstate == 1) {
- int ird = attr->max_dest_rd_atomic;
- int tird = ep->rep_remote_cma.responder_resources;
-
- pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
- sap, rpc_get_port(sap),
- ia->ri_device->name,
- ia->ri_ops->ro_displayname,
- xprt->rx_buf.rb_max_requests,
- ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
- } else if (connstate < 0) {
- pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
- sap, rpc_get_port(sap), connstate);
- }
-#endif
-
return 0;
}
@@ -971,7 +943,6 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
if (req == NULL)
return ERR_PTR(-ENOMEM);
- INIT_LIST_HEAD(&req->rl_free);
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
@@ -1033,6 +1004,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
+ INIT_LIST_HEAD(&buf->rb_pending);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
@@ -1055,7 +1027,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
goto out;
}
req->rl_backchannel = false;
- list_add(&req->rl_free, &buf->rb_send_bufs);
+ list_add(&req->rl_list, &buf->rb_send_bufs);
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
@@ -1084,8 +1056,8 @@ rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
struct rpcrdma_req *req;
req = list_first_entry(&buf->rb_send_bufs,
- struct rpcrdma_req, rl_free);
- list_del(&req->rl_free);
+ struct rpcrdma_req, rl_list);
+ list_del_init(&req->rl_list);
return req;
}
@@ -1187,6 +1159,7 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
if (!mw)
goto out_nomws;
+ mw->mw_flags = 0;
return mw;
out_nomws:
@@ -1267,7 +1240,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
spin_lock(&buffers->rb_lock);
buffers->rb_send_count--;
- list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
+ list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
if (rep) {
buffers->rb_recv_count--;
list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1d66acf1a723..b282d3f8cdd8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -271,6 +271,7 @@ struct rpcrdma_mw {
struct scatterlist *mw_sg;
int mw_nents;
enum dma_data_direction mw_dir;
+ unsigned long mw_flags;
union {
struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
@@ -282,6 +283,11 @@ struct rpcrdma_mw {
struct list_head mw_all;
};
+/* mw_flags */
+enum {
+ RPCRDMA_MW_F_RI = 1,
+};
+
/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
@@ -334,7 +340,8 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
- struct list_head rl_free;
+ struct list_head rl_list;
+ __be32 rl_xid;
unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
@@ -396,6 +403,7 @@ struct rpcrdma_buffer {
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
+ struct list_head rb_pending;
u32 rb_max_requests;
atomic_t rb_credits; /* most recent credit grant */
@@ -461,7 +469,7 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
- struct rpcrdma_req *);
+ struct list_head *);
void (*ro_unmap_safe)(struct rpcrdma_xprt *,
struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *);
@@ -544,6 +552,34 @@ void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+static inline void
+rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+ spin_lock(&buffers->rb_lock);
+ if (list_empty(&req->rl_list))
+ list_add_tail(&req->rl_list, &buffers->rb_pending);
+ spin_unlock(&buffers->rb_lock);
+}
+
+static inline struct rpcrdma_req *
+rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
+{
+ struct rpcrdma_req *pos;
+
+ list_for_each_entry(pos, &buffers->rb_pending, rl_list)
+ if (pos->rl_xid == xid)
+ return pos;
+ return NULL;
+}
+
+static inline void
+rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+ spin_lock(&buffers->rb_lock);
+ list_del(&req->rl_list);
+ spin_unlock(&buffers->rb_lock);
+}
+
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);