NFS client updates for Linux 3.20

Highlights incluse:
 
 Features:
 - Removing the forced serialisation of open()/close() calls in NFSv4.x (x>0)
   makes for a significant performance improvement in metadata intensive
   workloads.
 - Full support for the pNFS "flexible files" layout type
 - Further RPC/RDMA client improvements from Chuck
 
 Bugfixes:
 - Stable fix: NFSv4.1 backchannel calls blocking operations with !TASK_RUNNING
 - Stable fix: pnfs_generic_pg_init_read/write can be called with lseg == NULL
 - Stable fix: Fix an Oopsable condition when nsm_mon_unmon is called as part
   of the namespace cleanup,
 - Stable fix: Ensure we reference the inode for return-on-close in delegreturn
 
 - Use SO_REUSEPORT to ensure that NFSv3 TCP connections can rebind to the
   same source address/port combination during a disconnect/reconnect event.
   This is a requirement imposed by most NFSv3 server duplicate reply cache
   implementations.
 
 Optimisations:
 - Ask for no NFSv4.1 delegations on OPEN if using O_DIRECT
 
 Other:
 - Add Anna Schumaker as co-maintainer for the NFS client
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJU2swgAAoJEGcL54qWCgDyCWoP/1bxN8PesqaiwsBm3fsEqcra
 WZtMirDIpJYpHwgysdv9t5otBQrb7GrLlNyGZ9NBOVNakifoyj2tHe+/XGDx7Qny
 iYxXam0QdyjLU+bi4QoG4bdFncwQ/NmC6fqoG0rc25Il96Oggnc6LeSwL6Koc3CD
 QitRLLi/PaU5qtuaV80+tYMJiqZbpBdVjB+xfSpu7rhyWzm1QNdEeQYor5CozzMi
 6cRJuvHgjoZ1xriCWdxQHjqOiEaKNLwfm3uZ3XVaaUAIDhStXugdhIihj3J6Wi7k
 MKNuY+AKJiy3yOdFfhYALyq+TPundDbYoM9x1foigjgP8zxXVfIU3VS6l33TSlzX
 zH+/lcnXAHFWjFYoAijG1gv1H+OYcTuDlKaYAShQ/cOkTfWFrmlWv+pZs3SSkmPY
 4Aeu97YYOkB5ZZ7wTWKksQMeAu/LYNRSA3h+ANvEIR+NLlTSQTcaChlvBmS0IY5D
 qMmko1Xgmsxv+B8UeIY7PLfGBGrUdFho1JiDTfL8Xk7fGOfM7iBtMeaMAfdyOSUq
 AMqH9EDUUOWaFDggO2iisLtMCY6kJ0iFGKRTwzR38jAqm3bjWaIDitUqshNrNbC+
 mbwvAVxn0IFSCJGFsVd3kD2rTLGDElZ25GLFW9JMalarE6nlLG7e4p65g209Q9bT
 HYKiyinJJM2Zji07kmG/
 =c47U
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-3.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights incluse:

  Features:
   - Removing the forced serialisation of open()/close() calls in
     NFSv4.x (x>0) makes for a significant performance improvement in
     metadata intensive workloads.
   - Full support for the pNFS "flexible files" layout type
   - Further RPC/RDMA client improvements from Chuck

  Bugfixes:
   - Stable fix: NFSv4.1 backchannel calls blocking operations with !TASK_RUNNING
   - Stable fix: pnfs_generic_pg_init_read/write can be called with lseg == NULL
   - Stable fix: Fix an Oopsable condition when nsm_mon_unmon is called
     as part of the namespace cleanup,
   - Stable fix: Ensure we reference the inode for return-on-close in
     delegreturn
   - Use SO_REUSEPORT to ensure that NFSv3 TCP connections can rebind to
     the same source address/port combination during a disconnect/
     reconnect event.  This is a requirement imposed by most NFSv3
     server duplicate reply cache implementations.

  Optimisations:
   - Ask for no NFSv4.1 delegations on OPEN if using O_DIRECT

  Other:
   - Add Anna Schumaker as co-maintainer for the NFS client"

* tag 'nfs-for-3.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (119 commits)
  SUNRPC: Cleanup to remove xs_tcp_close()
  pnfs: delete an unintended goto
  pnfs/flexfiles: Do not dprintk after the free
  SUNRPC: Fix stupid typo in xs_sock_set_reuseport
  SUNRPC: Define xs_tcp_fin_timeout only if CONFIG_SUNRPC_DEBUG
  SUNRPC: Handle connection reset more efficiently.
  SUNRPC: Remove the redundant XPRT_CONNECTION_CLOSE flag
  SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release
  SUNRPC: Ensure xs_tcp_shutdown() requests a full close of the connection
  SUNRPC: Cleanup to remove remaining uses of XPRT_CONNECTION_ABORT
  SUNRPC: Remove TCP socket linger code
  SUNRPC: Remove TCP client connection reset hack
  SUNRPC: TCP/UDP always close the old socket before reconnecting
  SUNRPC: Add helpers to prevent socket create from racing
  SUNRPC: Ensure xs_reset_transport() resets the close connection flags
  SUNRPC: Do not clear the source port in xs_reset_transport
  SUNRPC: Handle EADDRINUSE on connect
  SUNRPC: Set SO_REUSEPORT socket option for TCP connections
  NFSv4.1: Fix pnfs_put_lseg races
  NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS
  ...
This commit is contained in:
Linus Torvalds 2015-02-11 17:14:54 -08:00
commit 6f83e5bd3e
60 changed files with 5233 additions and 1726 deletions

View file

@ -516,6 +516,7 @@ enum pnfs_layouttype {
LAYOUT_NFSV4_1_FILES = 1,
LAYOUT_OSD2_OBJECTS = 2,
LAYOUT_BLOCK_VOLUME = 3,
LAYOUT_FLEX_FILES = 4,
};
/* used for both layout return and recall */

View file

@ -77,10 +77,6 @@ struct nfs_client {
/* Client owner identifier */
const char * cl_owner_id;
/* Our own IP address, as a null-terminated string.
* This is used to generate the mv0 callback address.
*/
char cl_ipaddr[48];
u32 cl_cb_ident; /* v4.0 callback identifier */
const struct nfs4_minor_version_ops *cl_mvops;
unsigned long cl_mig_gen;
@ -108,6 +104,11 @@ struct nfs_client {
#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */
#endif /* CONFIG_NFS_V4 */
/* Our own IP address, as a null-terminated string.
* This is used to generate the mv0 callback address.
*/
char cl_ipaddr[48];
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
#endif

View file

@ -73,5 +73,7 @@ int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t
int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
extern unsigned int nfs_idmap_cache_timeout;
#endif /* NFS_IDMAP_H */

View file

@ -58,6 +58,9 @@ struct nfs_pageio_ops {
size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *,
struct nfs_page *);
void (*pg_cleanup)(struct nfs_pageio_descriptor *);
};
struct nfs_rw_ops {
@ -69,18 +72,21 @@ struct nfs_rw_ops {
struct inode *);
void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *,
const struct nfs_rpc_ops *,
struct rpc_task_setup *, int);
};
struct nfs_pageio_descriptor {
struct nfs_pgio_mirror {
struct list_head pg_list;
unsigned long pg_bytes_written;
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
unsigned char pg_moreio : 1,
pg_recoalesce : 1;
unsigned char pg_recoalesce : 1;
};
struct nfs_pageio_descriptor {
unsigned char pg_moreio : 1;
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
const struct nfs_rw_ops *pg_rw_ops;
@ -91,8 +97,18 @@ struct nfs_pageio_descriptor {
struct pnfs_layout_segment *pg_lseg;
struct nfs_direct_req *pg_dreq;
void *pg_layout_private;
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;
struct nfs_pgio_mirror *pg_mirrors;
struct nfs_pgio_mirror pg_mirrors_static[1];
struct nfs_pgio_mirror *pg_mirrors_dynamic;
u32 pg_mirror_idx; /* current mirror */
};
/* arbitrarily selected limit to number of mirrors */
#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,

View file

@ -285,6 +285,7 @@ struct nfs4_layoutcommit_data {
struct nfs_fattr fattr;
struct list_head lseg_list;
struct rpc_cred *cred;
struct inode *inode;
struct nfs4_layoutcommit_args args;
struct nfs4_layoutcommit_res res;
};
@ -293,6 +294,7 @@ struct nfs4_layoutreturn_args {
struct nfs4_sequence_args seq_args;
struct pnfs_layout_hdr *layout;
struct inode *inode;
struct pnfs_layout_range range;
nfs4_stateid stateid;
__u32 layout_type;
};
@ -308,6 +310,7 @@ struct nfs4_layoutreturn {
struct nfs4_layoutreturn_res res;
struct rpc_cred *cred;
struct nfs_client *clp;
struct inode *inode;
int rpc_status;
};
@ -325,6 +328,7 @@ struct nfs_openargs {
struct nfs_seqid * seqid;
int open_flags;
fmode_t fmode;
u32 share_access;
u32 access;
__u64 clientid;
struct stateowner_id id;
@ -389,9 +393,10 @@ struct nfs_open_confirmres {
struct nfs_closeargs {
struct nfs4_sequence_args seq_args;
struct nfs_fh * fh;
nfs4_stateid * stateid;
nfs4_stateid stateid;
struct nfs_seqid * seqid;
fmode_t fmode;
u32 share_access;
const u32 * bitmask;
};
@ -416,12 +421,13 @@ struct nfs_lock_args {
struct nfs_fh * fh;
struct file_lock * fl;
struct nfs_seqid * lock_seqid;
nfs4_stateid * lock_stateid;
nfs4_stateid lock_stateid;
struct nfs_seqid * open_seqid;
nfs4_stateid * open_stateid;
nfs4_stateid open_stateid;
struct nfs_lowner lock_owner;
unsigned char block : 1;
unsigned char reclaim : 1;
unsigned char new_lock : 1;
unsigned char new_lock_owner : 1;
};
@ -437,7 +443,7 @@ struct nfs_locku_args {
struct nfs_fh * fh;
struct file_lock * fl;
struct nfs_seqid * seqid;
nfs4_stateid * stateid;
nfs4_stateid stateid;
};
struct nfs_locku_res {
@ -513,6 +519,7 @@ struct nfs_pgio_res {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
__u32 count;
__u32 op_status;
int eof; /* used by read */
struct nfs_writeverf * verf; /* used by write */
const struct nfs_server *server; /* used by write */
@ -532,6 +539,7 @@ struct nfs_commitargs {
struct nfs_commitres {
struct nfs4_sequence_res seq_res;
__u32 op_status;
struct nfs_fattr *fattr;
struct nfs_writeverf *verf;
const struct nfs_server *server;
@ -1325,7 +1333,8 @@ struct nfs_pgio_header {
__u64 mds_offset; /* Filelayout dense stripe */
struct nfs_page_array page_array;
struct nfs_client *ds_clp; /* pNFS data server */
int ds_idx; /* ds index if ds_clp is set */
int ds_commit_idx; /* ds index if ds_clp is set */
int pgio_mirror_idx;/* mirror index in pgio layer */
};
struct nfs_mds_commit_info {

View file

@ -57,7 +57,7 @@ struct rpc_clnt {
const struct rpc_timeout *cl_timeout; /* Timeout strategy */
int cl_nodelen; /* nodename length */
char cl_nodename[UNX_MAXNODENAME];
char cl_nodename[UNX_MAXNODENAME+1];
struct rpc_pipe_dir_head cl_pipedir_objects;
struct rpc_clnt * cl_parent; /* Points to parent of clones */
struct rpc_rtt cl_rtt_default;
@ -112,6 +112,7 @@ struct rpc_create_args {
struct sockaddr *saddress;
const struct rpc_timeout *timeout;
const char *servername;
const char *nodename;
const struct rpc_program *program;
u32 prognumber; /* overrides program->number */
u32 version;

View file

@ -79,6 +79,8 @@ struct rpc_clnt;
struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *);
void rpc_count_iostats(const struct rpc_task *,
struct rpc_iostats *);
void rpc_count_iostats_metrics(const struct rpc_task *,
struct rpc_iostats *);
void rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
void rpc_free_iostats(struct rpc_iostats *);
@ -87,6 +89,8 @@ void rpc_free_iostats(struct rpc_iostats *);
static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; }
static inline void rpc_count_iostats(const struct rpc_task *task,
struct rpc_iostats *stats) {}
static inline void rpc_count_iostats_metrics(const struct rpc_task *,
struct rpc_iostats *) {}
static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
static inline void rpc_free_iostats(struct rpc_iostats *stats) {}

View file

@ -42,6 +42,9 @@
#include <linux/types.h>
#define RPCRDMA_VERSION 1
#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION)
struct rpcrdma_segment {
__be32 rs_handle; /* Registered memory handle */
__be32 rs_length; /* Length of the chunk in bytes */
@ -95,7 +98,10 @@ struct rpcrdma_msg {
} rm_body;
};
#define RPCRDMA_HDRLEN_MIN 28
/*
* Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
*/
#define RPCRDMA_HDRLEN_MIN (sizeof(__be32) * 7)
enum rpcrdma_errcode {
ERR_VERS = 1,
@ -115,4 +121,10 @@ enum rpcrdma_proc {
RDMA_ERROR = 4 /* An RPC RDMA encoding error */
};
#define rdma_msg cpu_to_be32(RDMA_MSG)
#define rdma_nomsg cpu_to_be32(RDMA_NOMSG)
#define rdma_msgp cpu_to_be32(RDMA_MSGP)
#define rdma_done cpu_to_be32(RDMA_DONE)
#define rdma_error cpu_to_be32(RDMA_ERROR)
#endif /* _LINUX_SUNRPC_RPC_RDMA_H */

View file

@ -63,8 +63,6 @@ extern atomic_t rdma_stat_rq_prod;
extern atomic_t rdma_stat_sq_poll;
extern atomic_t rdma_stat_sq_prod;
#define RPCRDMA_VERSION 1
/*
* Contexts are built when an RDMA request is created and are a
* record of the resources that can be recovered when the request

View file

@ -347,6 +347,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt);
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
int xs_swapper(struct rpc_xprt *xprt, int enable);
bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *);
void xprt_unlock_connect(struct rpc_xprt *, void *);
/*
* Reserved bit positions in xprt->state
*/
@ -357,10 +360,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable);
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_CONGESTED (9)
#define XPRT_CONNECTION_REUSE (10)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{