This time around we have:
- support for rbd data-pool feature, which enables rbd images on
erasure-coded pools (myself). CEPH_PG_MAX_SIZE has been bumped to
allow erasure-coded profiles with k+m up to 32.
- a patch for ceph_d_revalidate() performance regression introduced in
4.9, along with some cleanups in the area (Jeff Layton)
- a set of fixes for unsafe ->d_parent accesses in CephFS (Jeff Layton)
- buffered reads are now processed in rsize windows instead of rasize
windows (Andreas Gerstmayr). The new default for rsize mount option
is 64M.
- ack vs commit distinction is gone, greatly simplifying ->fsync() and
MOSDOpReply handling code (myself)
Also a few filesystem bug fixes from Zheng, a CRUSH sync up (CRUSH
computations are still serialized though) and several minor fixes and
cleanups all over.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2
iQEcBAABCAAGBQJYtY0rAAoJEEp/3jgCEfOLQioH/36QKsalquY1FCdJnJve9qj0
q19OohamIedhv76AYvXhJzBBHlVwerjicE51/bSzuUhxV+ApdATrPPcLC22oLd3i
h0R9NAUMYjiris1yN/Z9JRiPCSdsxvHuRycsUMRSRbxZhnyP9XdTxFD1A+fLfisU
Z4osyTzadabVL5Um9maRBbAtXCWh3d9JZzPa5xIvWTEO4CWWk87GtEIIQDcgx+Y6
8ZSMmrVFDNtskUp9js+LnFYW7/xBsEXyqgsqKaecf5uQqwu1WKRXSKtv9PUmGAIb
HBrlUdV1PQaCzTYtaoztJshNdYcphM5L7gePzxRG0nXrTNsq8J5eCzI8en5qS8w=
=CPL/
-----END PGP SIGNATURE-----
Merge tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"This time around we have:
- support for rbd data-pool feature, which enables rbd images on
erasure-coded pools (myself). CEPH_PG_MAX_SIZE has been bumped to
allow erasure-coded profiles with k+m up to 32.
- a patch for ceph_d_revalidate() performance regression introduced
in 4.9, along with some cleanups in the area (Jeff Layton)
- a set of fixes for unsafe ->d_parent accesses in CephFS (Jeff
Layton)
- buffered reads are now processed in rsize windows instead of rasize
windows (Andreas Gerstmayr). The new default for rsize mount option
is 64M.
- ack vs commit distinction is gone, greatly simplifying ->fsync()
and MOSDOpReply handling code (myself)
... also a few filesystem bug fixes from Zheng, a CRUSH sync up (CRUSH
computations are still serialized though) and several minor fixes and
cleanups all over"
* tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client: (52 commits)
libceph, rbd, ceph: WRITE | ONDISK -> WRITE
libceph: get rid of ack vs commit
ceph: remove special ack vs commit behavior
ceph: tidy some white space in get_nonsnap_parent()
crush: fix dprintk compilation
crush: do is_out test only if we do not collide
ceph: remove req from unsafe list when unregistering it
rbd: constify device_type structure
rbd: kill obj_request->object_name and rbd_segment_name_cache
rbd: store and use obj_request->object_no
rbd: RBD_V{1,2}_DATA_FORMAT macros
rbd: factor out __rbd_osd_req_create()
rbd: set offset and length outside of rbd_obj_request_create()
rbd: support for data-pool feature
rbd: introduce rbd_init_layout()
rbd: use rbd_obj_bytes() more
rbd: remove now unused rbd_obj_request_wait() and helpers
rbd: switch rbd_obj_method_sync() to ceph_osdc_call()
libceph: pass reply buffer length through ceph_osdc_call()
rbd: do away with obj_request in rbd_obj_read_sync()
...
This commit is contained in:
commit
b2deee2dc0
28 changed files with 835 additions and 932 deletions
|
|
@ -22,7 +22,6 @@ struct ceph_osd_client;
|
|||
* completion callback for async writepages
|
||||
*/
|
||||
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
|
||||
typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
|
||||
|
||||
#define CEPH_HOMELESS_OSD -1
|
||||
|
||||
|
|
@ -170,15 +169,12 @@ struct ceph_osd_request {
|
|||
unsigned int r_num_ops;
|
||||
|
||||
int r_result;
|
||||
bool r_got_reply;
|
||||
|
||||
struct ceph_osd_client *r_osdc;
|
||||
struct kref r_kref;
|
||||
bool r_mempool;
|
||||
struct completion r_completion;
|
||||
struct completion r_done_completion; /* fsync waiter */
|
||||
struct completion r_completion; /* private to osd_client.c */
|
||||
ceph_osdc_callback_t r_callback;
|
||||
ceph_osdc_unsafe_callback_t r_unsafe_callback;
|
||||
struct list_head r_unsafe_item;
|
||||
|
||||
struct inode *r_inode; /* for use by callbacks */
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
|
|||
case CEPH_POOL_TYPE_EC:
|
||||
return false;
|
||||
default:
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest,
|
|||
const struct ceph_object_locator *src);
|
||||
void ceph_oloc_destroy(struct ceph_object_locator *oloc);
|
||||
|
||||
/*
|
||||
* Maximum supported by kernel client object name length
|
||||
*
|
||||
* (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
|
||||
*/
|
||||
#define CEPH_MAX_OID_NAME_LEN 100
|
||||
|
||||
/*
|
||||
* 51-char inline_name is long enough for all cephfs and all but one
|
||||
* rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
|
||||
|
|
@ -173,8 +166,8 @@ struct ceph_osdmap {
|
|||
* the list of osds that store+replicate them. */
|
||||
struct crush_map *crush;
|
||||
|
||||
struct mutex crush_scratch_mutex;
|
||||
int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
|
||||
struct mutex crush_workspace_mutex;
|
||||
void *crush_workspace;
|
||||
};
|
||||
|
||||
static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ struct ceph_timespec {
|
|||
#define CEPH_PG_LAYOUT_LINEAR 2
|
||||
#define CEPH_PG_LAYOUT_HYBRID 3
|
||||
|
||||
#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
|
||||
#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */
|
||||
|
||||
/*
|
||||
* placement group.
|
||||
|
|
|
|||
|
|
@ -135,13 +135,6 @@ struct crush_bucket {
|
|||
__u32 size; /* num items */
|
||||
__s32 *items;
|
||||
|
||||
/*
|
||||
* cached random permutation: used for uniform bucket and for
|
||||
* the linear search fallback for the other bucket types.
|
||||
*/
|
||||
__u32 perm_x; /* @x for which *perm is defined */
|
||||
__u32 perm_n; /* num elements of *perm that are permuted/defined */
|
||||
__u32 *perm;
|
||||
};
|
||||
|
||||
struct crush_bucket_uniform {
|
||||
|
|
@ -211,6 +204,21 @@ struct crush_map {
|
|||
* device fails. */
|
||||
__u8 chooseleaf_stable;
|
||||
|
||||
/*
|
||||
* This value is calculated after decode or construction by
|
||||
* the builder. It is exposed here (rather than having a
|
||||
* 'build CRUSH working space' function) so that callers can
|
||||
* reserve a static buffer, allocate space on the stack, or
|
||||
* otherwise avoid calling into the heap allocator if they
|
||||
* want to. The size of the working space depends on the map,
|
||||
* while the size of the scratch vector passed to the mapper
|
||||
* depends on the size of the desired result set.
|
||||
*
|
||||
* Nothing stops the caller from allocating both in one swell
|
||||
* foop and passing in two points, though.
|
||||
*/
|
||||
size_t working_size;
|
||||
|
||||
#ifndef __KERNEL__
|
||||
/*
|
||||
* version 0 (original) of straw_calc has various flaws. version 1
|
||||
|
|
@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i)
|
|||
return ((i+1) << 1)-1;
|
||||
}
|
||||
|
||||
/*
|
||||
* These data structures are private to the CRUSH implementation. They
|
||||
* are exposed in this header file because builder needs their
|
||||
* definitions to calculate the total working size.
|
||||
*
|
||||
* Moving this out of the crush map allow us to treat the CRUSH map as
|
||||
* immutable within the mapper and removes the requirement for a CRUSH
|
||||
* map lock.
|
||||
*/
|
||||
struct crush_work_bucket {
|
||||
__u32 perm_x; /* @x for which *perm is defined */
|
||||
__u32 perm_n; /* num elements of *perm that are permuted/defined */
|
||||
__u32 *perm; /* Permutation of the bucket's items */
|
||||
};
|
||||
|
||||
struct crush_work {
|
||||
struct crush_work_bucket **work; /* Per-bucket working store */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -15,6 +15,20 @@ extern int crush_do_rule(const struct crush_map *map,
|
|||
int ruleno,
|
||||
int x, int *result, int result_max,
|
||||
const __u32 *weights, int weight_max,
|
||||
int *scratch);
|
||||
void *cwin);
|
||||
|
||||
/*
|
||||
* Returns the exact amount of workspace that will need to be used
|
||||
* for a given combination of crush_map and result_max. The caller can
|
||||
* then allocate this much on its own, either on the stack, in a
|
||||
* per-thread long-lived buffer, or however it likes.
|
||||
*/
|
||||
static inline size_t crush_work_size(const struct crush_map *map,
|
||||
int result_max)
|
||||
{
|
||||
return map->working_size + result_max * 3 * sizeof(__u32);
|
||||
}
|
||||
|
||||
void crush_init_workspace(const struct crush_map *map, void *v);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue