The big ticket items are:

- support for rbd "fancy" striping (myself).  The striping feature bit
   is now fully implemented, allowing mapping v2 images with non-default
   striping patterns.  This completes support for --image-format 2.
 
 - CephFS quota support (Luis Henriques and Zheng Yan).  This set is
   based on the new SnapRealm code in the upcoming v13.y.z ("Mimic")
   release.  Quota handling will be rejected on older filesystems.
 
 - memory usage improvements in CephFS (Chengguang Xu).  Directory
   specific bits have been split out of ceph_file_info and some effort
   went into improving cap reservation code to avoid OOM crashes.
 
 Also included a bunch of assorted fixes all over the place from
 Chengguang and others.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJazOI/AAoJEEp/3jgCEfOLOu0IAKGFkcCo0UdQDGHHJZHn2rAm
 CSWMMwyYGAhoWI6Gva0jx1A2omZLFSeq/MC8dWLL/MNAKt8i/qo8bTsTrwCHMR2Q
 D0FsvMWIhkWRS1/FcD1uVDhn0a/DFm5Kfy8kzz3v695TDCt+BYWrCqyHTB/wSdRR
 VpO3KdpHQ9h3ojNBRgIniOCNPeQP+QzLXy+P0h0oKbP2Y03mwJlsWG4L6zakkkwT
 e2I+RVdlOMUDJ7rZxiXESBr6BuLI4oOkPe8roQGmZPy1Xe17xa9M5iWVNuM6RUhO
 Z9bS2aLMhbDyeCPqvzgAnsUtFT0PAQjB5NYw2yqisbHs/wrU5kMOOpcLqz/Ls/s=
 =v1I9
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-4.17-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket items are:

   - support for rbd "fancy" striping (myself).

     The striping feature bit is now fully implemented, allowing mapping
     v2 images with non-default striping patterns. This completes
     support for --image-format 2.

   - CephFS quota support (Luis Henriques and Zheng Yan).

     This set is based on the new SnapRealm code in the upcoming v13.y.z
     ("Mimic") release. Quota handling will be rejected on older
     filesystems.

   - memory usage improvements in CephFS (Chengguang Xu).

     Directory specific bits have been split out of ceph_file_info and
     some effort went into improving cap reservation code to avoid OOM
     crashes.

  Also included a bunch of assorted fixes all over the place from
  Chengguang and others"

* tag 'ceph-for-4.17-rc1' of git://github.com/ceph/ceph-client: (67 commits)
  ceph: quota: report root dir quota usage in statfs
  ceph: quota: add counter for snaprealms with quota
  ceph: quota: cache inode pointer in ceph_snap_realm
  ceph: fix root quota realm check
  ceph: don't check quota for snap inode
  ceph: quota: update MDS when max_bytes is approaching
  ceph: quota: support for ceph.quota.max_bytes
  ceph: quota: don't allow cross-quota renames
  ceph: quota: support for ceph.quota.max_files
  ceph: quota: add initial infrastructure to support cephfs quotas
  rbd: remove VLA usage
  rbd: fix spelling mistake: "reregisteration" -> "reregistration"
  ceph: rename function drop_leases() to a more descriptive name
  ceph: fix invalid point dereference for error case in mdsc destroy
  ceph: return proper bool type to caller instead of pointer
  ceph: optimize memory usage
  ceph: optimize mds session register
  libceph, ceph: add __init attribution to init funcitons
  ceph: filter out used flags when printing unused open flags
  ceph: don't wait on writeback when there is no more dirty pages
  ...
This commit is contained in:
Linus Torvalds 2018-04-10 12:25:30 -07:00
commit b284d4d5a6
35 changed files with 2669 additions and 2031 deletions

View file

@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
CEPH_FEATURE_OSD_POOLRESEND | \
CEPH_FEATURE_MDS_QUOTA | \
CEPH_FEATURE_CRUSH_V4 | \
CEPH_FEATURE_NEW_OSDOP_ENCODING | \
CEPH_FEATURE_SERVER_JEWEL | \

View file

@ -134,6 +134,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_CLIENT_LEASE 0x311
#define CEPH_MSG_CLIENT_SNAP 0x312
#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
#define CEPH_MSG_CLIENT_QUOTA 0x314
/* pool ops */
#define CEPH_MSG_POOLOP_REPLY 48
@ -807,4 +808,20 @@ struct ceph_mds_snap_realm {
} __attribute__ ((packed));
/* followed by my snap list, then prior parent snap list */
/*
* quotas
*/
struct ceph_mds_quota {
__le64 ino; /* ino */
struct ceph_timespec rctime;
__le64 rbytes; /* dir stats */
__le64 rfiles;
__le64 rsubdirs;
__u8 struct_v; /* compat */
__u8 struct_compat;
__le32 struct_len;
__le64 max_bytes; /* quota max. bytes */
__le64 max_files; /* quota max. files */
} __attribute__ ((packed));
#endif

View file

@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep;
extern struct kmem_cache *ceph_cap_flush_cachep;
extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep;
extern struct kmem_cache *ceph_dir_file_cachep;
/* ceph_common.c */
extern bool libceph_compatible(void *data);

View file

@ -76,6 +76,7 @@ enum ceph_msg_data_type {
#ifdef CONFIG_BLOCK
CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */
#endif /* CONFIG_BLOCK */
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
};
static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
@ -87,22 +88,106 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
#ifdef CONFIG_BLOCK
case CEPH_MSG_DATA_BIO:
#endif /* CONFIG_BLOCK */
case CEPH_MSG_DATA_BVECS:
return true;
default:
return false;
}
}
#ifdef CONFIG_BLOCK
struct ceph_bio_iter {
struct bio *bio;
struct bvec_iter iter;
};
#define __ceph_bio_iter_advance_step(it, n, STEP) do { \
unsigned int __n = (n), __cur_n; \
\
while (__n) { \
BUG_ON(!(it)->iter.bi_size); \
__cur_n = min((it)->iter.bi_size, __n); \
(void)(STEP); \
bio_advance_iter((it)->bio, &(it)->iter, __cur_n); \
if (!(it)->iter.bi_size && (it)->bio->bi_next) { \
dout("__ceph_bio_iter_advance_step next bio\n"); \
(it)->bio = (it)->bio->bi_next; \
(it)->iter = (it)->bio->bi_iter; \
} \
__n -= __cur_n; \
} \
} while (0)
/*
* Advance @it by @n bytes.
*/
#define ceph_bio_iter_advance(it, n) \
__ceph_bio_iter_advance_step(it, n, 0)
/*
* Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
*/
#define ceph_bio_iter_advance_step(it, n, BVEC_STEP) \
__ceph_bio_iter_advance_step(it, n, ({ \
struct bio_vec bv; \
struct bvec_iter __cur_iter; \
\
__cur_iter = (it)->iter; \
__cur_iter.bi_size = __cur_n; \
__bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
(void)(BVEC_STEP); \
}))
#endif /* CONFIG_BLOCK */
struct ceph_bvec_iter {
struct bio_vec *bvecs;
struct bvec_iter iter;
};
#define __ceph_bvec_iter_advance_step(it, n, STEP) do { \
BUG_ON((n) > (it)->iter.bi_size); \
(void)(STEP); \
bvec_iter_advance((it)->bvecs, &(it)->iter, (n)); \
} while (0)
/*
* Advance @it by @n bytes.
*/
#define ceph_bvec_iter_advance(it, n) \
__ceph_bvec_iter_advance_step(it, n, 0)
/*
* Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
*/
#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP) \
__ceph_bvec_iter_advance_step(it, n, ({ \
struct bio_vec bv; \
struct bvec_iter __cur_iter; \
\
__cur_iter = (it)->iter; \
__cur_iter.bi_size = (n); \
for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter) \
(void)(BVEC_STEP); \
}))
#define ceph_bvec_iter_shorten(it, n) do { \
BUG_ON((n) > (it)->iter.bi_size); \
(it)->iter.bi_size = (n); \
} while (0)
struct ceph_msg_data {
struct list_head links; /* ceph_msg->data */
enum ceph_msg_data_type type;
union {
#ifdef CONFIG_BLOCK
struct {
struct bio *bio;
size_t bio_length;
struct ceph_bio_iter bio_pos;
u32 bio_length;
};
#endif /* CONFIG_BLOCK */
struct ceph_bvec_iter bvec_pos;
struct {
struct page **pages; /* NOT OWNER. */
size_t length; /* total # bytes */
@ -122,11 +207,9 @@ struct ceph_msg_data_cursor {
bool need_crc; /* crc update needed */
union {
#ifdef CONFIG_BLOCK
struct { /* bio */
struct bio *bio; /* bio from list */
struct bvec_iter bvec_iter;
};
struct ceph_bio_iter bio_iter;
#endif /* CONFIG_BLOCK */
struct bvec_iter bvec_iter;
struct { /* pages */
unsigned int page_offset; /* offset in page */
unsigned short page_index; /* index in array */
@ -290,9 +373,11 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
size_t length);
void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
u32 length);
#endif /* CONFIG_BLOCK */
void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos);
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail);

View file

@ -57,6 +57,7 @@ enum ceph_osd_data_type {
#ifdef CONFIG_BLOCK
CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
CEPH_OSD_DATA_TYPE_BVECS,
};
struct ceph_osd_data {
@ -72,10 +73,11 @@ struct ceph_osd_data {
struct ceph_pagelist *pagelist;
#ifdef CONFIG_BLOCK
struct {
struct bio *bio; /* list of bios */
size_t bio_length; /* total in list */
struct ceph_bio_iter bio_pos;
u32 bio_length;
};
#endif /* CONFIG_BLOCK */
struct ceph_bvec_iter bvec_pos;
};
};
@ -405,10 +407,14 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
unsigned int which,
struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
unsigned int which,
struct bio *bio, size_t bio_length);
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bio_iter *bio_pos,
u32 bio_length);
#endif /* CONFIG_BLOCK */
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
unsigned int which,
@ -418,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
struct page **pages, u64 length,
u32 alignment, bool pages_from_pool,
bool own_pages);
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
unsigned int which,
struct bio_vec *bvecs, u32 bytes);
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
unsigned int which,
struct page **pages, u64 length,

View file

@ -5,7 +5,6 @@
#include <linux/rbtree.h>
#include <linux/ceph/types.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/ceph_fs.h>
#include <linux/crush/crush.h>
/*
@ -280,11 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
const struct ceph_osds *new_acting,
bool any_change);
/* calculate mapping of a file extent to an object */
extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 off, u64 len,
u64 *bno, u64 *oxoff, u64 *oxlen);
int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,

View file

@ -0,0 +1,69 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CEPH_STRIPER_H
#define _LINUX_CEPH_STRIPER_H
#include <linux/list.h>
#include <linux/types.h>
struct ceph_file_layout;
void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
u64 off, u64 len,
u64 *objno, u64 *objoff, u32 *xlen);
struct ceph_object_extent {
struct list_head oe_item;
u64 oe_objno;
u64 oe_off;
u64 oe_len;
};
static inline void ceph_object_extent_init(struct ceph_object_extent *ex)
{
INIT_LIST_HEAD(&ex->oe_item);
}
/*
* Called for each mapped stripe unit.
*
* @bytes: number of bytes mapped, i.e. the minimum of the full length
* requested (file extent length) or the remainder of the stripe
* unit within an object
*/
typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex,
u32 bytes, void *arg);
int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
struct list_head *object_extents,
struct ceph_object_extent *alloc_fn(void *arg),
void *alloc_arg,
ceph_object_extent_fn_t action_fn,
void *action_arg);
int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
struct list_head *object_extents,
ceph_object_extent_fn_t action_fn,
void *action_arg);
struct ceph_file_extent {
u64 fe_off;
u64 fe_len;
};
static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents,
u32 num_file_extents)
{
u64 bytes = 0;
u32 i;
for (i = 0; i < num_file_extents; i++)
bytes += file_extents[i].fe_len;
return bytes;
}
int ceph_extent_to_file(struct ceph_file_layout *l,
u64 objno, u64 objoff, u64 objlen,
struct ceph_file_extent **file_extents,
u32 *num_file_extents);
#endif