From a790c2584c02c5ce60db07ab601d55b7f539db34 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Feb 2023 15:11:42 +0000 Subject: afs: Remove whitespace before most ')' from the trace header checkpatch objects to whitespace before ')', so remove most of it from the afs trace header. Signed-off-by: David Howells cc: Marc Dionne cc: Jeff Layton cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org --- include/trace/events/afs.h | 242 ++++++++++++++++++++++----------------------- 1 file changed, 121 insertions(+), 121 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index e9d412d19dbb..cfcd6452c156 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -654,12 +654,12 @@ TRACE_EVENT(afs_receive_data, TP_ARGS(call, iter, want_more, ret), TP_STRUCT__entry( - __field(loff_t, remain ) - __field(unsigned int, call ) - __field(enum afs_call_state, state ) - __field(unsigned short, unmarshall ) - __field(bool, want_more ) - __field(int, ret ) + __field(loff_t, remain) + __field(unsigned int, call) + __field(enum afs_call_state, state) + __field(unsigned short, unmarshall) + __field(bool, want_more) + __field(int, ret) ), TP_fast_assign( @@ -686,9 +686,9 @@ TRACE_EVENT(afs_notify_call, TP_ARGS(rxcall, call), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_call_state, state ) - __field(unsigned short, unmarshall ) + __field(unsigned int, call) + __field(enum afs_call_state, state) + __field(unsigned short, unmarshall) ), TP_fast_assign( @@ -708,9 +708,9 @@ TRACE_EVENT(afs_cb_call, TP_ARGS(call), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(u32, op ) - __field(u16, service_id ) + __field(unsigned int, call) + __field(u32, op) + __field(u16, service_id) ), TP_fast_assign( @@ -733,11 +733,11 @@ TRACE_EVENT(afs_call, TP_ARGS(call_debug_id, op, ref, outstanding, where), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(int, op ) - __field(int, ref ) - __field(int, outstanding ) - __field(const void *, where ) + __field(unsigned int, call) + __field(int, op) + __field(int, ref) + __field(int, outstanding) + __field(const void *, where) ), TP_fast_assign( @@ -762,9 +762,9 @@ TRACE_EVENT(afs_make_fs_call, TP_ARGS(call, fid), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_fs_operation, op ) - __field_struct(struct afs_fid, fid ) + __field(unsigned int, call) + __field(enum afs_fs_operation, op) + __field_struct(struct afs_fid, fid) ), TP_fast_assign( @@ -794,10 +794,10 @@ TRACE_EVENT(afs_make_fs_calli, TP_ARGS(call, fid, i), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(unsigned int, i ) - __field(enum afs_fs_operation, op ) - __field_struct(struct afs_fid, fid ) + __field(unsigned int, call) + __field(unsigned int, i) + __field(enum afs_fs_operation, op) + __field_struct(struct afs_fid, fid) ), TP_fast_assign( @@ -829,10 +829,10 @@ TRACE_EVENT(afs_make_fs_call1, TP_ARGS(call, fid, name), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_fs_operation, op ) - __field_struct(struct afs_fid, fid ) - __array(char, name, 24 ) + __field(unsigned int, call) + __field(enum afs_fs_operation, op) + __field_struct(struct afs_fid, fid) + __array(char, name, 24) ), TP_fast_assign( @@ -866,11 +866,11 @@ TRACE_EVENT(afs_make_fs_call2, TP_ARGS(call, fid, name, name2), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_fs_operation, op ) - __field_struct(struct afs_fid, fid ) - __array(char, name, 24 ) - __array(char, name2, 24 ) + __field(unsigned int, call) + __field(enum afs_fs_operation, op) + __field_struct(struct afs_fid, fid) + __array(char, name, 24) + __array(char, name2, 24) ), TP_fast_assign( @@ -907,8 +907,8 @@ TRACE_EVENT(afs_make_vl_call, TP_ARGS(call), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_vl_operation, op ) + __field(unsigned int, call) + __field(enum afs_vl_operation, op) ), TP_fast_assign( @@ -927,10 +927,10 @@ TRACE_EVENT(afs_call_done, TP_ARGS(call), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(struct rxrpc_call *, rx_call ) - __field(int, ret ) - __field(u32, abort_code ) + __field(unsigned int, call) + __field(struct rxrpc_call *, rx_call) + __field(int, ret) + __field(u32, abort_code) ), TP_fast_assign( @@ -953,10 +953,10 @@ TRACE_EVENT(afs_send_data, TP_ARGS(call, msg), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(unsigned int, flags ) - __field(loff_t, offset ) - __field(loff_t, count ) + __field(unsigned int, call) + __field(unsigned int, flags) + __field(loff_t, offset) + __field(loff_t, count) ), TP_fast_assign( @@ -977,10 +977,10 @@ TRACE_EVENT(afs_sent_data, TP_ARGS(call, msg, ret), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(int, ret ) - __field(loff_t, offset ) - __field(loff_t, count ) + __field(unsigned int, call) + __field(int, ret) + __field(loff_t, offset) + __field(loff_t, count) ), TP_fast_assign( @@ -1001,9 +1001,9 @@ TRACE_EVENT(afs_dir_check_failed, TP_ARGS(vnode, off, i_size), TP_STRUCT__entry( - __field(struct afs_vnode *, vnode ) - __field(loff_t, off ) - __field(loff_t, i_size ) + __field(struct afs_vnode *, vnode) + __field(loff_t, off) + __field(loff_t, i_size) ), TP_fast_assign( @@ -1022,11 +1022,11 @@ TRACE_EVENT(afs_folio_dirty, TP_ARGS(vnode, where, folio), TP_STRUCT__entry( - __field(struct afs_vnode *, vnode ) - __field(const char *, where ) - __field(pgoff_t, index ) - __field(unsigned long, from ) - __field(unsigned long, to ) + __field(struct afs_vnode *, vnode) + __field(const char *, where) + __field(pgoff_t, index) + __field(unsigned long, from) + __field(unsigned long, to) ), TP_fast_assign( @@ -1056,11 +1056,11 @@ TRACE_EVENT(afs_call_state, TP_ARGS(call, from, to, ret, remote_abort), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_call_state, from ) - __field(enum afs_call_state, to ) - __field(int, ret ) - __field(u32, abort ) + __field(unsigned int, call) + __field(enum afs_call_state, from) + __field(enum afs_call_state, to) + __field(int, ret) + __field(u32, abort) ), TP_fast_assign( @@ -1084,9 +1084,9 @@ TRACE_EVENT(afs_lookup, TP_ARGS(dvnode, name, fid), TP_STRUCT__entry( - __field_struct(struct afs_fid, dfid ) - __field_struct(struct afs_fid, fid ) - __array(char, name, 24 ) + __field_struct(struct afs_fid, dfid) + __field_struct(struct afs_fid, fid) + __array(char, name, 24) ), TP_fast_assign( @@ -1116,15 +1116,15 @@ TRACE_EVENT(afs_edit_dir, TP_ARGS(dvnode, why, op, block, slot, f_vnode, f_unique, name), TP_STRUCT__entry( - __field(unsigned int, vnode ) - __field(unsigned int, unique ) - __field(enum afs_edit_dir_reason, why ) - __field(enum afs_edit_dir_op, op ) - __field(unsigned int, block ) - __field(unsigned short, slot ) - __field(unsigned int, f_vnode ) - __field(unsigned int, f_unique ) - __array(char, name, 24 ) + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(enum afs_edit_dir_reason, why) + __field(enum afs_edit_dir_op, op) + __field(unsigned int, block) + __field(unsigned short, slot) + __field(unsigned int, f_vnode) + __field(unsigned int, f_unique) + __array(char, name, 24) ), TP_fast_assign( @@ -1157,8 +1157,8 @@ TRACE_EVENT(afs_protocol_error, TP_ARGS(call, cause), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(enum afs_eproto_cause, cause ) + __field(unsigned int, call) + __field(enum afs_eproto_cause, cause) ), TP_fast_assign( @@ -1177,9 +1177,9 @@ TRACE_EVENT(afs_io_error, TP_ARGS(call, error, where), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(int, error ) - __field(enum afs_io_error, where ) + __field(unsigned int, call) + __field(int, error) + __field(enum afs_io_error, where) ), TP_fast_assign( @@ -1199,9 +1199,9 @@ TRACE_EVENT(afs_file_error, TP_ARGS(vnode, error, where), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(int, error ) - __field(enum afs_file_error, where ) + __field_struct(struct afs_fid, fid) + __field(int, error) + __field(enum afs_file_error, where) ), TP_fast_assign( @@ -1222,9 +1222,9 @@ TRACE_EVENT(afs_cm_no_server, TP_ARGS(call, srx), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(unsigned int, op_id ) - __field_struct(struct sockaddr_rxrpc, srx ) + __field(unsigned int, call) + __field(unsigned int, op_id) + __field_struct(struct sockaddr_rxrpc, srx) ), TP_fast_assign( @@ -1243,9 +1243,9 @@ TRACE_EVENT(afs_cm_no_server_u, TP_ARGS(call, uuid), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(unsigned int, op_id ) - __field_struct(uuid_t, uuid ) + __field(unsigned int, call) + __field(unsigned int, op_id) + __field_struct(uuid_t, uuid) ), TP_fast_assign( @@ -1265,11 +1265,11 @@ TRACE_EVENT(afs_flock_ev, TP_ARGS(vnode, fl, event, error), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(enum afs_flock_event, event ) - __field(enum afs_lock_state, state ) - __field(int, error ) - __field(unsigned int, debug_id ) + __field_struct(struct afs_fid, fid) + __field(enum afs_flock_event, event) + __field(enum afs_lock_state, state) + __field(int, error) + __field(unsigned int, debug_id) ), TP_fast_assign( @@ -1295,13 +1295,13 @@ TRACE_EVENT(afs_flock_op, TP_ARGS(vnode, fl, op), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(loff_t, from ) - __field(loff_t, len ) - __field(enum afs_flock_operation, op ) - __field(unsigned char, type ) - __field(unsigned int, flags ) - __field(unsigned int, debug_id ) + __field_struct(struct afs_fid, fid) + __field(loff_t, from) + __field(loff_t, len) + __field(enum afs_flock_operation, op) + __field(unsigned char, type) + __field(unsigned int, flags) + __field(unsigned int, debug_id) ), TP_fast_assign( @@ -1328,7 +1328,7 @@ TRACE_EVENT(afs_reload_dir, TP_ARGS(vnode), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) + __field_struct(struct afs_fid, fid) ), TP_fast_assign( @@ -1345,8 +1345,8 @@ TRACE_EVENT(afs_silly_rename, TP_ARGS(vnode, done), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(bool, done ) + __field_struct(struct afs_fid, fid) + __field(bool, done) ), TP_fast_assign( @@ -1365,9 +1365,9 @@ TRACE_EVENT(afs_get_tree, TP_ARGS(cell, volume), TP_STRUCT__entry( - __field(u64, vid ) - __array(char, cell, 24 ) - __array(char, volume, 24 ) + __field(u64, vid) + __array(char, cell, 24) + __array(char, volume, 24) ), TP_fast_assign( @@ -1392,10 +1392,10 @@ TRACE_EVENT(afs_cb_break, TP_ARGS(fid, cb_break, reason, skipped), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(unsigned int, cb_break ) - __field(enum afs_cb_break_reason, reason ) - __field(bool, skipped ) + __field_struct(struct afs_fid, fid) + __field(unsigned int, cb_break) + __field(enum afs_cb_break_reason, reason) + __field(bool, skipped) ), TP_fast_assign( @@ -1418,8 +1418,8 @@ TRACE_EVENT(afs_cb_miss, TP_ARGS(fid, reason), TP_STRUCT__entry( - __field_struct(struct afs_fid, fid ) - __field(enum afs_cb_break_reason, reason ) + __field_struct(struct afs_fid, fid) + __field(enum afs_cb_break_reason, reason) ), TP_fast_assign( @@ -1439,10 +1439,10 @@ TRACE_EVENT(afs_server, TP_ARGS(server_debug_id, ref, active, reason), TP_STRUCT__entry( - __field(unsigned int, server ) - __field(int, ref ) - __field(int, active ) - __field(int, reason ) + __field(unsigned int, server) + __field(int, ref) + __field(int, active) + __field(int, reason) ), TP_fast_assign( @@ -1465,9 +1465,9 @@ TRACE_EVENT(afs_volume, TP_ARGS(vid, ref, reason), TP_STRUCT__entry( - __field(afs_volid_t, vid ) - __field(int, ref ) - __field(enum afs_volume_trace, reason ) + __field(afs_volid_t, vid) + __field(int, ref) + __field(enum afs_volume_trace, reason) ), TP_fast_assign( @@ -1489,10 +1489,10 @@ TRACE_EVENT(afs_cell, TP_ARGS(cell_debug_id, ref, active, reason), TP_STRUCT__entry( - __field(unsigned int, cell ) - __field(int, ref ) - __field(int, active ) - __field(int, reason ) + __field(unsigned int, cell) + __field(int, ref) + __field(int, active) + __field(int, reason) ), TP_fast_assign( -- cgit v1.2.3 From 2daa6404fd2f00985d5bfeb3c161f4630b46b6bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Feb 2023 15:24:24 +0000 Subject: afs: Automatically generate trace tag enums Automatically generate trace tag enums from the symbol -> string mapping tables rather than having the enums as well, thereby reducing duplicated data. Signed-off-by: David Howells cc: Marc Dionne cc: Jeff Layton cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org --- include/trace/events/afs.h | 233 ++++++--------------------------------------- 1 file changed, 27 insertions(+), 206 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index cfcd6452c156..597677acc6b1 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -18,97 +18,6 @@ #ifndef __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY -enum afs_call_trace { - afs_call_trace_alloc, - afs_call_trace_free, - afs_call_trace_get, - afs_call_trace_put, - afs_call_trace_wake, - afs_call_trace_work, -}; - -enum afs_server_trace { - afs_server_trace_alloc, - afs_server_trace_callback, - afs_server_trace_destroy, - afs_server_trace_free, - afs_server_trace_gc, - afs_server_trace_get_by_addr, - afs_server_trace_get_by_uuid, - afs_server_trace_get_caps, - afs_server_trace_get_install, - afs_server_trace_get_new_cbi, - afs_server_trace_get_probe, - afs_server_trace_give_up_cb, - afs_server_trace_purging, - afs_server_trace_put_call, - afs_server_trace_put_cbi, - afs_server_trace_put_find_rsq, - afs_server_trace_put_probe, - afs_server_trace_put_slist, - afs_server_trace_put_slist_isort, - afs_server_trace_put_uuid_rsq, - afs_server_trace_update, -}; - - -enum afs_volume_trace { - afs_volume_trace_alloc, - afs_volume_trace_free, - afs_volume_trace_get_alloc_sbi, - afs_volume_trace_get_cell_insert, - afs_volume_trace_get_new_op, - afs_volume_trace_get_query_alias, - afs_volume_trace_put_cell_dup, - afs_volume_trace_put_cell_root, - afs_volume_trace_put_destroy_sbi, - afs_volume_trace_put_free_fc, - afs_volume_trace_put_put_op, - afs_volume_trace_put_query_alias, - afs_volume_trace_put_validate_fc, - afs_volume_trace_remove, -}; - -enum afs_cell_trace { - afs_cell_trace_alloc, - afs_cell_trace_free, - afs_cell_trace_get_queue_dns, - afs_cell_trace_get_queue_manage, - afs_cell_trace_get_queue_new, - afs_cell_trace_get_vol, - afs_cell_trace_insert, - afs_cell_trace_manage, - afs_cell_trace_put_candidate, - afs_cell_trace_put_destroy, - afs_cell_trace_put_queue_fail, - afs_cell_trace_put_queue_work, - afs_cell_trace_put_vol, - afs_cell_trace_see_source, - afs_cell_trace_see_ws, - afs_cell_trace_unuse_alias, - afs_cell_trace_unuse_check_alias, - afs_cell_trace_unuse_delete, - afs_cell_trace_unuse_fc, - afs_cell_trace_unuse_lookup, - afs_cell_trace_unuse_mntpt, - afs_cell_trace_unuse_no_pin, - afs_cell_trace_unuse_parse, - afs_cell_trace_unuse_pin, - afs_cell_trace_unuse_probe, - afs_cell_trace_unuse_sbi, - afs_cell_trace_unuse_ws, - afs_cell_trace_use_alias, - afs_cell_trace_use_check_alias, - afs_cell_trace_use_fc, - afs_cell_trace_use_fc_alias, - afs_cell_trace_use_lookup, - afs_cell_trace_use_mntpt, - afs_cell_trace_use_pin, - afs_cell_trace_use_probe, - afs_cell_trace_use_sbi, - afs_cell_trace_wait, -}; - enum afs_fs_operation { afs_FS_FetchData = 130, /* AFS Fetch file data */ afs_FS_FetchACL = 131, /* AFS Fetch file ACL */ @@ -202,121 +111,6 @@ enum yfs_cm_operation { yfs_CB_CallBack = 64204, }; -enum afs_edit_dir_op { - afs_edit_dir_create, - afs_edit_dir_create_error, - afs_edit_dir_create_inval, - afs_edit_dir_create_nospc, - afs_edit_dir_delete, - afs_edit_dir_delete_error, - afs_edit_dir_delete_inval, - afs_edit_dir_delete_noent, -}; - -enum afs_edit_dir_reason { - afs_edit_dir_for_create, - afs_edit_dir_for_link, - afs_edit_dir_for_mkdir, - afs_edit_dir_for_rename_0, - afs_edit_dir_for_rename_1, - afs_edit_dir_for_rename_2, - afs_edit_dir_for_rmdir, - afs_edit_dir_for_silly_0, - afs_edit_dir_for_silly_1, - afs_edit_dir_for_symlink, - afs_edit_dir_for_unlink, -}; - -enum afs_eproto_cause { - afs_eproto_bad_status, - afs_eproto_cb_count, - afs_eproto_cb_fid_count, - afs_eproto_cellname_len, - afs_eproto_file_type, - afs_eproto_ibulkst_cb_count, - afs_eproto_ibulkst_count, - afs_eproto_motd_len, - afs_eproto_offline_msg_len, - afs_eproto_volname_len, - afs_eproto_yvl_fsendpt4_len, - afs_eproto_yvl_fsendpt6_len, - afs_eproto_yvl_fsendpt_num, - afs_eproto_yvl_fsendpt_type, - afs_eproto_yvl_vlendpt4_len, - afs_eproto_yvl_vlendpt6_len, - afs_eproto_yvl_vlendpt_type, -}; - -enum afs_io_error { - afs_io_error_cm_reply, - afs_io_error_extract, - afs_io_error_fs_probe_fail, - afs_io_error_vl_lookup_fail, - afs_io_error_vl_probe_fail, -}; - -enum afs_file_error { - afs_file_error_dir_bad_magic, - afs_file_error_dir_big, - afs_file_error_dir_missing_page, - afs_file_error_dir_name_too_long, - afs_file_error_dir_over_end, - afs_file_error_dir_small, - afs_file_error_dir_unmarked_ext, - afs_file_error_mntpt, - afs_file_error_writeback_fail, -}; - -enum afs_flock_event { - afs_flock_acquired, - afs_flock_callback_break, - afs_flock_defer_unlock, - afs_flock_extend_fail, - afs_flock_fail_other, - afs_flock_fail_perm, - afs_flock_no_lockers, - afs_flock_release_fail, - afs_flock_silly_delete, - afs_flock_timestamp, - afs_flock_try_to_lock, - afs_flock_vfs_lock, - afs_flock_vfs_locking, - afs_flock_waited, - afs_flock_waiting, - afs_flock_work_extending, - afs_flock_work_retry, - afs_flock_work_unlocking, - afs_flock_would_block, -}; - -enum afs_flock_operation { - afs_flock_op_copy_lock, - afs_flock_op_flock, - afs_flock_op_grant, - afs_flock_op_lock, - afs_flock_op_release_lock, - afs_flock_op_return_ok, - afs_flock_op_return_eagain, - afs_flock_op_return_edeadlk, - afs_flock_op_return_error, - afs_flock_op_set_lock, - afs_flock_op_unlock, - afs_flock_op_wake, -}; - -enum afs_cb_break_reason { - afs_cb_break_no_break, - afs_cb_break_no_promise, - afs_cb_break_for_callback, - afs_cb_break_for_deleted, - afs_cb_break_for_lapsed, - afs_cb_break_for_s_reinit, - afs_cb_break_for_unlink, - afs_cb_break_for_v_break, - afs_cb_break_for_volume_callback, - afs_cb_break_for_zap, -}; - #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -391,6 +185,7 @@ enum afs_cb_break_reason { EM(afs_cell_trace_unuse_fc, "UNU fc ") \ EM(afs_cell_trace_unuse_lookup, "UNU lookup") \ EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \ + EM(afs_cell_trace_unuse_no_pin, "UNU no-pin") \ EM(afs_cell_trace_unuse_parse, "UNU parse ") \ EM(afs_cell_trace_unuse_pin, "UNU pin ") \ EM(afs_cell_trace_unuse_probe, "UNU probe ") \ @@ -614,6 +409,32 @@ enum afs_cb_break_reason { EM(afs_cb_break_for_volume_callback, "break-v-cb") \ E_(afs_cb_break_for_zap, "break-zap") +/* + * Generate enums for tracing information. + */ +#ifndef __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY +#define __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY + +#undef EM +#undef E_ +#define EM(a, b) a, +#define E_(a, b) a + +enum afs_call_trace { afs_call_traces } __mode(byte); +enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte); +enum afs_cell_trace { afs_cell_traces } __mode(byte); +enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte); +enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte); +enum afs_eproto_cause { afs_eproto_causes } __mode(byte); +enum afs_file_error { afs_file_errors } __mode(byte); +enum afs_flock_event { afs_flock_events } __mode(byte); +enum afs_flock_operation { afs_flock_operations } __mode(byte); +enum afs_io_error { afs_io_errors } __mode(byte); +enum afs_server_trace { afs_server_traces } __mode(byte); +enum afs_volume_trace { afs_volume_traces } __mode(byte); + +#endif /* end __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY */ + /* * Export enum symbols via userspace. */ -- cgit v1.2.3 From 72904d7b9bfbf2dd146254edea93958bc35bbbfe Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Oct 2023 12:55:11 +0100 Subject: rxrpc, afs: Allow afs to pin rxrpc_peer objects Change rxrpc's API such that: (1) A new function, rxrpc_kernel_lookup_peer(), is provided to look up an rxrpc_peer record for a remote address and a corresponding function, rxrpc_kernel_put_peer(), is provided to dispose of it again. (2) When setting up a call, the rxrpc_peer object used during a call is now passed in rather than being set up by rxrpc_connect_call(). For afs, this meenat passing it to rxrpc_kernel_begin_call() rather than the full address (the service ID then has to be passed in as a separate parameter). (3) A new function, rxrpc_kernel_remote_addr(), is added so that afs can get a pointer to the transport address for display purposed, and another, rxrpc_kernel_remote_srx(), to gain a pointer to the full rxrpc address. (4) The function to retrieve the RTT from a call, rxrpc_kernel_get_srtt(), is then altered to take a peer. This now returns the RTT or -1 if there are insufficient samples. (5) Rename rxrpc_kernel_get_peer() to rxrpc_kernel_call_get_peer(). (6) Provide a new function, rxrpc_kernel_get_peer(), to get a ref on a peer the caller already has. This allows the afs filesystem to pin the rxrpc_peer records that it is using, allowing faster lookups and pointer comparisons rather than comparing sockaddr_rxrpc contents. It also makes it easier to get hold of the RTT. The following changes are made to afs: (1) The addr_list struct's addrs[] elements now hold a peer struct pointer and a service ID rather than a sockaddr_rxrpc. (2) When displaying the transport address, rxrpc_kernel_remote_addr() is used. (3) The port arg is removed from afs_alloc_addrlist() since it's always overridden. (4) afs_merge_fs_addr4() and afs_merge_fs_addr6() do peer lookup and may now return an error that must be handled. (5) afs_find_server() now takes a peer pointer to specify the address. (6) afs_find_server(), afs_compare_fs_alists() and afs_merge_fs_addr[46]{} now do peer pointer comparison rather than address comparison. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/addr_list.c | 125 +++++++++++++++++++++++-------------------- fs/afs/cmservice.c | 5 +- fs/afs/fs_probe.c | 11 ++-- fs/afs/internal.h | 26 ++++----- fs/afs/proc.c | 9 ++-- fs/afs/rotate.c | 6 +-- fs/afs/rxrpc.c | 10 ++-- fs/afs/server.c | 41 +++----------- fs/afs/vl_alias.c | 55 ++----------------- fs/afs/vl_list.c | 15 ++++-- fs/afs/vl_probe.c | 12 +++-- fs/afs/vl_rotate.c | 6 +-- fs/afs/vlclient.c | 22 +++++--- include/net/af_rxrpc.h | 15 ++++-- include/trace/events/rxrpc.h | 3 ++ net/rxrpc/af_rxrpc.c | 62 ++++++++++++++++++--- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_object.c | 17 +++--- net/rxrpc/peer_object.c | 58 ++++++++++++-------- net/rxrpc/sendmsg.c | 11 +++- 20 files changed, 273 insertions(+), 238 deletions(-) (limited to 'include') diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index ac05a59e9d46..519821f5aedc 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -13,26 +13,33 @@ #include "internal.h" #include "afs_fs.h" +static void afs_free_addrlist(struct rcu_head *rcu) +{ + struct afs_addr_list *alist = container_of(rcu, struct afs_addr_list, rcu); + unsigned int i; + + for (i = 0; i < alist->nr_addrs; i++) + rxrpc_kernel_put_peer(alist->addrs[i].peer); +} + /* * Release an address list. */ void afs_put_addrlist(struct afs_addr_list *alist) { if (alist && refcount_dec_and_test(&alist->usage)) - kfree_rcu(alist, rcu); + call_rcu(&alist->rcu, afs_free_addrlist); } /* * Allocate an address list. */ -struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, - unsigned short service, - unsigned short port) +struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, u16 service_id) { struct afs_addr_list *alist; unsigned int i; - _enter("%u,%u,%u", nr, service, port); + _enter("%u,%u", nr, service_id); if (nr > AFS_MAX_ADDRESSES) nr = AFS_MAX_ADDRESSES; @@ -44,16 +51,8 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, refcount_set(&alist->usage, 1); alist->max_addrs = nr; - for (i = 0; i < nr; i++) { - struct sockaddr_rxrpc *srx = &alist->addrs[i].srx; - srx->srx_family = AF_RXRPC; - srx->srx_service = service; - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_port = htons(port); - } - + for (i = 0; i < nr; i++) + alist->addrs[i].service_id = service_id; return alist; } @@ -126,7 +125,7 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, if (!vllist->servers[0].server) goto error_vl; - alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); + alist = afs_alloc_addrlist(nr, service); if (!alist) goto error; @@ -197,9 +196,11 @@ struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, } if (family == AF_INET) - afs_merge_fs_addr4(alist, x[0], xport); + ret = afs_merge_fs_addr4(net, alist, x[0], xport); else - afs_merge_fs_addr6(alist, x, xport); + ret = afs_merge_fs_addr6(net, alist, x, xport); + if (ret < 0) + goto error; } while (p < end); @@ -271,25 +272,33 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry /* * Merge an IPv4 entry into a fileserver address list. */ -void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) +int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist, + __be32 xdr, u16 port) { - struct sockaddr_rxrpc *srx; - u32 addr = ntohl(xdr); + struct sockaddr_rxrpc srx; + struct rxrpc_peer *peer; int i; if (alist->nr_addrs >= alist->max_addrs) - return; + return 0; - for (i = 0; i < alist->nr_ipv4; i++) { - struct sockaddr_in *a = &alist->addrs[i].srx.transport.sin; - u32 a_addr = ntohl(a->sin_addr.s_addr); - u16 a_port = ntohs(a->sin_port); + srx.srx_family = AF_RXRPC; + srx.transport_type = SOCK_DGRAM; + srx.transport_len = sizeof(srx.transport.sin); + srx.transport.sin.sin_family = AF_INET; + srx.transport.sin.sin_port = htons(port); + srx.transport.sin.sin_addr.s_addr = xdr; - if (addr == a_addr && port == a_port) - return; - if (addr == a_addr && port < a_port) - break; - if (addr < a_addr) + peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); + if (!peer) + return -ENOMEM; + + for (i = 0; i < alist->nr_ipv4; i++) { + if (peer == alist->addrs[i].peer) { + rxrpc_kernel_put_peer(peer); + return 0; + } + if (peer <= alist->addrs[i].peer) break; } @@ -298,38 +307,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - srx = &alist->addrs[i].srx; - srx->srx_family = AF_RXRPC; - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.sin.sin_family = AF_INET; - srx->transport.sin.sin_port = htons(port); - srx->transport.sin.sin_addr.s_addr = xdr; + alist->addrs[i].peer = peer; alist->nr_ipv4++; alist->nr_addrs++; + return 0; } /* * Merge an IPv6 entry into a fileserver address list. */ -void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) +int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist, + __be32 *xdr, u16 port) { - struct sockaddr_rxrpc *srx; - int i, diff; + struct sockaddr_rxrpc srx; + struct rxrpc_peer *peer; + int i; if (alist->nr_addrs >= alist->max_addrs) - return; + return 0; - for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { - struct sockaddr_in6 *a = &alist->addrs[i].srx.transport.sin6; - u16 a_port = ntohs(a->sin6_port); + srx.srx_family = AF_RXRPC; + srx.transport_type = SOCK_DGRAM; + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(port); + memcpy(&srx.transport.sin6.sin6_addr, xdr, 16); - diff = memcmp(xdr, &a->sin6_addr, 16); - if (diff == 0 && port == a_port) - return; - if (diff == 0 && port < a_port) - break; - if (diff < 0) + peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); + if (!peer) + return -ENOMEM; + + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + if (peer == alist->addrs[i].peer) { + rxrpc_kernel_put_peer(peer); + return 0; + } + if (peer <= alist->addrs[i].peer) break; } @@ -337,15 +350,9 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) memmove(alist->addrs + i + 1, alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - - srx = &alist->addrs[i].srx; - srx->srx_family = AF_RXRPC; - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_port = htons(port); - memcpy(&srx->transport.sin6.sin6_addr, xdr, 16); + alist->addrs[i].peer = peer; alist->nr_addrs++; + return 0; } /* diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index d4ddb20d6732..99a3f20bc786 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -146,10 +146,11 @@ static int afs_find_cm_server_by_peer(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; + struct rxrpc_peer *peer; - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + peer = rxrpc_kernel_get_call_peer(call->net->socket, call->rxcall); - server = afs_find_server(call->net, &srx); + server = afs_find_server(call->net, peer); if (!server) { trace_afs_cm_no_server(call, &srx); return 0; diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 3dd24842f277..58d28b82571e 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -101,6 +101,7 @@ static void afs_fs_probe_not_done(struct afs_net *net, void afs_fileserver_probe_result(struct afs_call *call) { struct afs_addr_list *alist = call->alist; + struct afs_address *addr = &alist->addrs[call->addr_ix]; struct afs_server *server = call->server; unsigned int index = call->addr_ix; unsigned int rtt_us = 0, cap0; @@ -153,12 +154,12 @@ responded: if (call->service_id == YFS_FS_SERVICE) { server->probe.is_yfs = true; set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); - alist->addrs[index].srx.srx_service = call->service_id; + addr->service_id = call->service_id; } else { server->probe.not_yfs = true; if (!server->probe.is_yfs) { clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); - alist->addrs[index].srx.srx_service = call->service_id; + addr->service_id = call->service_id; } cap0 = ntohl(call->tmp); if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES) @@ -167,7 +168,7 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + rtt_us = rxrpc_kernel_get_srtt(addr->peer); if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; @@ -181,8 +182,8 @@ responded: out: spin_unlock(&server->probe_lock); - _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", - &server->uuid, index, &alist->addrs[index].srx.transport, + _debug("probe %pU [%u] %pISpc rtt=%d ret=%d", + &server->uuid, index, rxrpc_kernel_remote_addr(alist->addrs[index].peer), rtt_us, ret); return afs_done_one_fs_probe(call->net, server); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e2adb314ab6a..ec08b4a7e499 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -72,6 +72,11 @@ enum afs_call_state { AFS_CALL_COMPLETE, /* Completed or failed */ }; +struct afs_address { + struct rxrpc_peer *peer; + u16 service_id; +}; + /* * List of server addresses. */ @@ -87,9 +92,7 @@ struct afs_addr_list { enum dns_lookup_status status:8; unsigned long failed; /* Mask of addrs that failed locally/ICMP */ unsigned long responded; /* Mask of addrs that responded */ - struct { - struct sockaddr_rxrpc srx; - } addrs[] __counted_by(max_addrs); + struct afs_address addrs[] __counted_by(max_addrs); #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; @@ -420,7 +423,7 @@ struct afs_vlserver { atomic_t probe_outstanding; spinlock_t probe_lock; struct { - unsigned int rtt; /* RTT in uS */ + unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */ u32 abort_code; short error; unsigned short flags; @@ -537,7 +540,7 @@ struct afs_server { atomic_t probe_outstanding; spinlock_t probe_lock; struct { - unsigned int rtt; /* RTT in uS */ + unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */ u32 abort_code; short error; bool responded:1; @@ -964,9 +967,7 @@ static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist refcount_inc(&alist->usage); return alist; } -extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, - unsigned short, - unsigned short); +extern struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, u16 service_id); extern void afs_put_addrlist(struct afs_addr_list *); extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, const char *, size_t, char, @@ -977,8 +978,10 @@ extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); -extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); -extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); +extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr, + __be32 xdr, u16 port); +extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr, + __be32 *xdr, u16 port); /* * callback.c @@ -1405,8 +1408,7 @@ extern void __exit afs_clean_up_permit_cache(void); */ extern spinlock_t afs_server_peer_lock; -extern struct afs_server *afs_find_server(struct afs_net *, - const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server(struct afs_net *, const struct rxrpc_peer *); extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32); extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index ab9cd986cfd9..8a65a06908d2 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -307,7 +307,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) for (i = 0; i < alist->nr_addrs; i++) seq_printf(m, " %c %pISpc\n", alist->preferred == i ? '>' : '-', - &alist->addrs[i].srx.transport); + rxrpc_kernel_remote_addr(alist->addrs[i].peer)); } seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt); seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n", @@ -398,9 +398,10 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n", alist->version, alist->responded, alist->failed); for (i = 0; i < alist->nr_addrs; i++) - seq_printf(m, " [%x] %pISpc%s\n", - i, &alist->addrs[i].srx.transport, - alist->preferred == i ? "*" : ""); + seq_printf(m, " [%x] %pISpc%s rtt=%d\n", + i, rxrpc_kernel_remote_addr(alist->addrs[i].peer), + alist->preferred == i ? "*" : "", + rxrpc_kernel_get_srtt(alist->addrs[i].peer)); return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 46081e5da6f5..59aed7a6dd11 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -113,7 +113,7 @@ bool afs_select_fileserver(struct afs_operation *op) struct afs_server *server; struct afs_vnode *vnode = op->file[0].vnode; struct afs_error e; - u32 rtt; + unsigned int rtt; int error = op->ac.error, i; _enter("%lx[%d],%lx[%d],%d,%d", @@ -420,7 +420,7 @@ pick_server: } op->index = -1; - rtt = U32_MAX; + rtt = UINT_MAX; for (i = 0; i < op->server_list->nr_servers; i++) { struct afs_server *s = op->server_list->servers[i].server; @@ -488,7 +488,7 @@ iterate_address: _debug("address [%u] %u/%u %pISp", op->index, op->ac.index, op->ac.alist->nr_addrs, - &op->ac.alist->addrs[op->ac.index].srx.transport); + rxrpc_kernel_remote_addr(op->ac.alist->addrs[op->ac.index].peer)); _leave(" = t"); return true; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 181317126e43..2603db03b7ff 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -296,7 +296,8 @@ static void afs_notify_end_request_tx(struct sock *sock, */ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) { - struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index].srx; + struct afs_address *addr = &ac->alist->addrs[ac->index]; + struct rxrpc_peer *peer = addr->peer; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; @@ -304,7 +305,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) s64 tx_total_len; int ret; - _enter(",{%pISp},", &srx->transport); + _enter(",{%pISp},", rxrpc_kernel_remote_addr(addr->peer)); ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); @@ -333,7 +334,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) } /* create a call */ - rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, + rxcall = rxrpc_kernel_begin_call(call->net->socket, peer, call->key, (unsigned long)call, tx_total_len, call->max_lifespan, @@ -341,6 +342,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) (call->async ? afs_wake_up_async_call : afs_wake_up_call_waiter), + addr->service_id, call->upgrade, (call->intr ? RXRPC_PREINTERRUPTIBLE : RXRPC_UNINTERRUPTIBLE), @@ -461,7 +463,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort) max = m + 1; pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n", msg, call->type->name, - &call->alist->addrs[call->addr_ix].srx.transport); + rxrpc_kernel_remote_addr(call->alist->addrs[call->addr_ix].peer)); } } diff --git a/fs/afs/server.c b/fs/afs/server.c index b8e2d211d4a1..5b5fa94005c9 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -21,13 +21,12 @@ static void __afs_put_server(struct afs_net *, struct afs_server *); /* * Find a server by one of its addresses. */ -struct afs_server *afs_find_server(struct afs_net *net, - const struct sockaddr_rxrpc *srx) +struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer *peer) { const struct afs_addr_list *alist; struct afs_server *server = NULL; unsigned int i; - int seq = 1, diff; + int seq = 1; rcu_read_lock(); @@ -38,37 +37,11 @@ struct afs_server *afs_find_server(struct afs_net *net, seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_addr_lock, &seq); - if (srx->transport.family == AF_INET6) { - const struct sockaddr_in6 *a = &srx->transport.sin6, *b; - hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { - alist = rcu_dereference(server->addresses); - for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { - b = &alist->addrs[i].srx.transport.sin6; - diff = ((u16 __force)a->sin6_port - - (u16 __force)b->sin6_port); - if (diff == 0) - diff = memcmp(&a->sin6_addr, - &b->sin6_addr, - sizeof(struct in6_addr)); - if (diff == 0) - goto found; - } - } - } else { - const struct sockaddr_in *a = &srx->transport.sin, *b; - hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { - alist = rcu_dereference(server->addresses); - for (i = 0; i < alist->nr_ipv4; i++) { - b = &alist->addrs[i].srx.transport.sin; - diff = ((u16 __force)a->sin_port - - (u16 __force)b->sin_port); - if (diff == 0) - diff = ((u32 __force)a->sin_addr.s_addr - - (u32 __force)b->sin_addr.s_addr); - if (diff == 0) - goto found; - } - } + hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + alist = rcu_dereference(server->addresses); + for (i = 0; i < alist->nr_addrs; i++) + if (alist->addrs[i].peer == peer) + goto found; } server = NULL; diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index d3c0df70a1a5..6fdf9f1bedc0 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -32,55 +32,6 @@ static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *k return volume; } -/* - * Compare two addresses. - */ -static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a, - const struct sockaddr_rxrpc *srx_b) -{ - short port_a, port_b; - int addr_a, addr_b, diff; - - diff = (short)srx_a->transport_type - (short)srx_b->transport_type; - if (diff) - goto out; - - switch (srx_a->transport_type) { - case AF_INET: { - const struct sockaddr_in *a = &srx_a->transport.sin; - const struct sockaddr_in *b = &srx_b->transport.sin; - addr_a = ntohl(a->sin_addr.s_addr); - addr_b = ntohl(b->sin_addr.s_addr); - diff = addr_a - addr_b; - if (diff == 0) { - port_a = ntohs(a->sin_port); - port_b = ntohs(b->sin_port); - diff = port_a - port_b; - } - break; - } - - case AF_INET6: { - const struct sockaddr_in6 *a = &srx_a->transport.sin6; - const struct sockaddr_in6 *b = &srx_b->transport.sin6; - diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16); - if (diff == 0) { - port_a = ntohs(a->sin6_port); - port_b = ntohs(b->sin6_port); - diff = port_a - port_b; - } - break; - } - - default: - WARN_ON(1); - diff = 1; - } - -out: - return diff; -} - /* * Compare the address lists of a pair of fileservers. */ @@ -94,9 +45,9 @@ static int afs_compare_fs_alists(const struct afs_server *server_a, lb = rcu_dereference(server_b->addresses); while (a < la->nr_addrs && b < lb->nr_addrs) { - const struct sockaddr_rxrpc *srx_a = &la->addrs[a].srx; - const struct sockaddr_rxrpc *srx_b = &lb->addrs[b].srx; - int diff = afs_compare_addrs(srx_a, srx_b); + unsigned long pa = (unsigned long)la->addrs[a].peer; + unsigned long pb = (unsigned long)lb->addrs[b].peer; + long diff = pa - pb; if (diff < 0) { a++; diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index acc48216136a..ba89140eee9e 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -83,14 +83,15 @@ static u16 afs_extract_le16(const u8 **_b) /* * Build a VL server address list from a DNS queried server list. */ -static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, +static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net, + const u8 **_b, const u8 *end, u8 nr_addrs, u16 port) { struct afs_addr_list *alist; const u8 *b = *_b; int ret = -EINVAL; - alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port); + alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE); if (!alist) return ERR_PTR(-ENOMEM); if (nr_addrs == 0) @@ -109,7 +110,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, goto error; } memcpy(x, b, 4); - afs_merge_fs_addr4(alist, x[0], port); + ret = afs_merge_fs_addr4(net, alist, x[0], port); + if (ret < 0) + goto error; b += 4; break; @@ -119,7 +122,9 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, goto error; } memcpy(x, b, 16); - afs_merge_fs_addr6(alist, x, port); + ret = afs_merge_fs_addr6(net, alist, x, port); + if (ret < 0) + goto error; b += 16; break; @@ -247,7 +252,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, /* Extract the addresses - note that we can't skip this as we * have to advance the payload pointer. */ - addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port); + addrs = afs_extract_vl_addrs(cell->net, &b, end, bs.nr_addrs, bs.port); if (IS_ERR(addrs)) { ret = PTR_ERR(addrs); goto error_2; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index bdd9372e3fb2..9551aef07cee 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -48,6 +48,7 @@ void afs_vlserver_probe_result(struct afs_call *call) { struct afs_addr_list *alist = call->alist; struct afs_vlserver *server = call->vlserver; + struct afs_address *addr = &alist->addrs[call->addr_ix]; unsigned int server_index = call->server_index; unsigned int rtt_us = 0; unsigned int index = call->addr_ix; @@ -106,16 +107,16 @@ responded: if (call->service_id == YFS_VL_SERVICE) { server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS; set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); - alist->addrs[index].srx.srx_service = call->service_id; + addr->service_id = call->service_id; } else { server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS; if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) { clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); - alist->addrs[index].srx.srx_service = call->service_id; + addr->service_id = call->service_id; } } - rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + rtt_us = rxrpc_kernel_get_srtt(addr->peer); if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; @@ -130,8 +131,9 @@ responded: out: spin_unlock(&server->probe_lock); - _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", - server_index, index, &alist->addrs[index].srx.transport, rtt_us, ret); + _debug("probe [%u][%u] %pISpc rtt=%d ret=%d", + server_index, index, rxrpc_kernel_remote_addr(addr->peer), + rtt_us, ret); afs_done_one_vl_probe(server, have_result); } diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index e52b9d4c8a0a..f8f255c966ae 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -92,7 +92,7 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) struct afs_addr_list *alist; struct afs_vlserver *vlserver; struct afs_error e; - u32 rtt; + unsigned int rtt; int error = vc->ac.error, i; _enter("%lx[%d],%lx[%d],%d,%d", @@ -194,7 +194,7 @@ pick_server: goto selected_server; vc->index = -1; - rtt = U32_MAX; + rtt = UINT_MAX; for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; @@ -249,7 +249,7 @@ iterate_address: _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); - _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].srx.transport); + _leave(" = t %pISpc", rxrpc_kernel_remote_addr(vc->ac.alist->addrs[vc->ac.index].peer)); return true; next_server: diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 00fca3c66ba6..41e7932d75c6 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -208,7 +208,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) count = ntohl(*bp); nentries = min(nentries, count); - alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT); + alist = afs_alloc_addrlist(nentries, FS_SERVICE); if (!alist) return -ENOMEM; alist->version = uniquifier; @@ -230,9 +230,13 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) alist = call->ret_alist; bp = call->buffer; count = min(call->count, 4U); - for (i = 0; i < count; i++) - if (alist->nr_addrs < call->count2) - afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); + for (i = 0; i < count; i++) { + if (alist->nr_addrs < call->count2) { + ret = afs_merge_fs_addr4(call->net, alist, *bp++, AFS_FS_PORT); + if (ret < 0) + return ret; + } + } call->count -= count; if (call->count > 0) @@ -450,7 +454,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (call->count > YFS_MAXENDPOINTS) return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num); - alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); + alist = afs_alloc_addrlist(call->count, FS_SERVICE); if (!alist) return -ENOMEM; alist->version = uniquifier; @@ -488,14 +492,18 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (ntohl(bp[0]) != sizeof(__be32) * 2) return afs_protocol_error( call, afs_eproto_yvl_fsendpt4_len); - afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); + ret = afs_merge_fs_addr4(call->net, alist, bp[1], ntohl(bp[2])); + if (ret < 0) + return ret; bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) return afs_protocol_error( call, afs_eproto_yvl_fsendpt6_len); - afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); + ret = afs_merge_fs_addr6(call->net, alist, bp + 1, ntohl(bp[5])); + if (ret < 0) + return ret; bp += 6; break; default: diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 5531dd08061e..0754c463224a 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +struct rxrpc_peer; enum rxrpc_abort_reason; enum rxrpc_interruptibility { @@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *, rxrpc_notify_new_call_t, rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, - struct sockaddr_rxrpc *srx, + struct rxrpc_peer *peer, struct key *key, unsigned long user_call_ID, s64 tx_total_len, u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, + u16 service_id, bool upgrade, enum rxrpc_interruptibility interruptibility, unsigned int debug_id); @@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call); void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call); -void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *); -bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *); +struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, + struct sockaddr_rxrpc *srx, gfp_t gfp); +void rxrpc_kernel_put_peer(struct rxrpc_peer *peer); +struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer); +struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call); +const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer); +const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer); +unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index f7e537f64db4..4c1ef7b3705c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -178,7 +178,9 @@ #define rxrpc_peer_traces \ EM(rxrpc_peer_free, "FREE ") \ EM(rxrpc_peer_get_accept, "GET accept ") \ + EM(rxrpc_peer_get_application, "GET app ") \ EM(rxrpc_peer_get_bundle, "GET bundle ") \ + EM(rxrpc_peer_get_call, "GET call ") \ EM(rxrpc_peer_get_client_conn, "GET cln-conn") \ EM(rxrpc_peer_get_input, "GET input ") \ EM(rxrpc_peer_get_input_error, "GET inpt-err") \ @@ -187,6 +189,7 @@ EM(rxrpc_peer_get_service_conn, "GET srv-conn") \ EM(rxrpc_peer_new_client, "NEW client ") \ EM(rxrpc_peer_new_prealloc, "NEW prealloc") \ + EM(rxrpc_peer_put_application, "PUT app ") \ EM(rxrpc_peer_put_bundle, "PUT bundle ") \ EM(rxrpc_peer_put_call, "PUT call ") \ EM(rxrpc_peer_put_conn, "PUT conn ") \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fa8aec78f63d..465bfe5eb061 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -258,16 +258,62 @@ static int rxrpc_listen(struct socket *sock, int backlog) return ret; } +/** + * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address + * @sock: The socket through which it will be accessed + * @srx: The network address + * @gfp: Allocation flags + * + * Lookup or create a remote transport endpoint record for the specified + * address and return it with a ref held. + */ +struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, + struct sockaddr_rxrpc *srx, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); + if (ret < 0) + return ERR_PTR(ret); + + return rxrpc_lookup_peer(rx->local, srx, gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_lookup_peer); + +/** + * rxrpc_kernel_get_peer - Get a reference on a peer + * @peer: The peer to get a reference on. + * + * Get a record for the remote peer in a call. + */ +struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer) +{ + return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference + * @peer: The peer to drop a ref on + */ +void rxrpc_kernel_put_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_peer(peer, rxrpc_peer_put_application); +} +EXPORT_SYMBOL(rxrpc_kernel_put_peer); + /** * rxrpc_kernel_begin_call - Allow a kernel service to begin a call * @sock: The socket on which to make the call - * @srx: The address of the peer to contact + * @peer: The peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use * @tx_total_len: Total length of data to transmit during the call (or -1) * @hard_timeout: The maximum lifespan of the call in sec * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue + * @service_id: The ID of the service to contact * @upgrade: Request service upgrade for call * @interruptibility: The call is interruptible, or can be canceled. * @debug_id: The debug ID for tracing to be assigned to the call @@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog) * supplying @srx and @key. */ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, - struct sockaddr_rxrpc *srx, + struct rxrpc_peer *peer, struct key *key, unsigned long user_call_ID, s64 tx_total_len, u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, + u16 service_id, bool upgrade, enum rxrpc_interruptibility interruptibility, unsigned int debug_id) @@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; _enter(",,%x,%lx", key_serial(key), user_call_ID); - ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); - if (ret < 0) - return ERR_PTR(ret); + if (WARN_ON_ONCE(peer->local != rx->local)) + return ERR_PTR(-EIO); lock_sock(&rx->sk); @@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = key; cp.security_level = rx->min_sec_level; cp.exclusive = false; cp.upgrade = upgrade; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); + cp.service_id = service_id; + call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e8e14c6f904d..8eea7a487380 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -364,6 +364,7 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Representation of remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -867,7 +868,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); void rxrpc_start_call_timer(struct rxrpc_call *call); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 773eecd1e979..beea25ac88f5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, * Allocate a new client call. */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, struct rxrpc_conn_parameters *cp, struct rxrpc_call_params *p, gfp_t gfp, @@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->dest_srx = *srx; + call->dest_srx = cp->peer->srx; + call->dest_srx.srx_service = cp->service_id; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; call->key = key_get(cp->key); + call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call); call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); call->security_level = cp->security_level; if (p->kernel) @@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); - if (!call->peer) - goto error; - ret = rxrpc_look_up_bundle(call, gfp); if (ret < 0) goto error; @@ -334,7 +331,6 @@ error: */ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) @@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); + if (WARN_ON_ONCE(!cp->peer)) { + release_sock(&rx->sk); + return ERR_PTR(-EIO); + } + limiter = rxrpc_get_call_slot(p, gfp); if (!limiter) { release_sock(&rx->sk); return ERR_PTR(-ERESTARTSYS); } - call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id); + call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 8d7a715a0bb1..49dcda67a0d5 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -22,6 +22,8 @@ #include #include "ar-internal.h" +static const struct sockaddr_rxrpc rxrpc_null_addr; + /* * Hash a peer key. */ @@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) } /** - * rxrpc_kernel_get_peer - Get the peer address of a call + * rxrpc_kernel_get_call_peer - Get the peer address of a call * @sock: The socket on which the call is in progress. * @call: The call to query - * @_srx: Where to place the result * - * Get the address of the remote peer in a call. + * Get a record for the remote peer in a call. */ -void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *_srx) +struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { - *_srx = call->peer->srx; + return call->peer; } -EXPORT_SYMBOL(rxrpc_kernel_get_peer); +EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); /** * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT - * @sock: The socket on which the call is in progress. - * @call: The call to query - * @_srtt: Where to store the SRTT value. + * @peer: The peer to query * - * Get the call's peer smoothed RTT in uS. + * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples. */ -bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call, - u32 *_srtt) +unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { - struct rxrpc_peer *peer = call->peer; + return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX; +} +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); - if (peer->rtt_count == 0) { - *_srtt = 1000000; /* 1S */ - return false; - } +/** + * rxrpc_kernel_remote_srx - Get the address of a peer + * @peer: The peer to query + * + * Get a pointer to the address from a peer record. The caller is responsible + * for making sure that the address is not deallocated. + */ +const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer) +{ + return peer ? &peer->srx : &rxrpc_null_addr; +} +EXPORT_SYMBOL(rxrpc_kernel_remote_srx); - *_srtt = call->peer->srtt_us >> 3; - return true; +/** + * rxrpc_kernel_remote_addr - Get the peer transport address of a call + * @peer: The peer to query + * + * Get a pointer to the transport address from a peer record. The caller is + * responsible for making sure that the address is not deallocated. + */ +const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) +{ + return (const struct sockaddr *) + (peer ? &peer->srx.transport : &rxrpc_null_addr.transport); } -EXPORT_SYMBOL(rxrpc_kernel_get_srtt); +EXPORT_SYMBOL(rxrpc_kernel_remote_addr); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8e0b94714e84..5677d5690a02 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; + struct rxrpc_peer *peer; struct rxrpc_call *call; struct key *key; @@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, return ERR_PTR(-EDESTADDRREQ); } + peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL); + if (!peer) { + release_sock(&rx->sk); + return ERR_PTR(-ENOMEM); + } + key = rx->key; if (key && !rx->key->payload.data[0]) key = NULL; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = rx->key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ + rxrpc_put_peer(peer, rxrpc_peer_put_application); _leave(" = %p\n", call); return call; } -- cgit v1.2.3 From 1e5d8493254db9b28d4dce4fed87e56d9a2fefa5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Oct 2023 13:59:03 +0100 Subject: afs: Add a tracepoint for struct afs_addr_list Add a tracepoint to track the lifetime of the afs_addr_list struct. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/addr_list.c | 33 ++++++++++++++++++++++++++++---- fs/afs/fs_probe.c | 4 ++-- fs/afs/internal.h | 10 +++------- fs/afs/rotate.c | 4 ++-- fs/afs/rxrpc.c | 4 ++-- fs/afs/server.c | 9 +++++---- fs/afs/vl_list.c | 11 ++++++----- fs/afs/vl_rotate.c | 9 ++------- fs/afs/vlclient.c | 4 ++-- include/trace/events/afs.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 100 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index a1f3c995e328..41ef0c879239 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -20,17 +20,39 @@ static void afs_free_addrlist(struct rcu_head *rcu) for (i = 0; i < alist->nr_addrs; i++) rxrpc_kernel_put_peer(alist->addrs[i].peer); + trace_afs_alist(alist->debug_id, refcount_read(&alist->usage), afs_alist_trace_free); + kfree(alist); } /* * Release an address list. */ -void afs_put_addrlist(struct afs_addr_list *alist) +void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason) { - if (alist && refcount_dec_and_test(&alist->usage)) + unsigned int debug_id; + bool dead; + int r; + + if (!alist) + return; + debug_id = alist->debug_id; + dead = __refcount_dec_and_test(&alist->usage, &r); + trace_afs_alist(debug_id, r - 1, reason); + if (dead) call_rcu(&alist->rcu, afs_free_addrlist); } +struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason) +{ + int r; + + if (alist) { + __refcount_inc(&alist->usage, &r); + trace_afs_alist(alist->debug_id, r + 1, reason); + } + return alist; +} + /* * Allocate an address list. */ @@ -38,6 +60,7 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, u16 service_id) { struct afs_addr_list *alist; unsigned int i; + static atomic_t debug_id; _enter("%u,%u", nr, service_id); @@ -50,9 +73,11 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, u16 service_id) refcount_set(&alist->usage, 1); alist->max_addrs = nr; + alist->debug_id = atomic_inc_return(&debug_id); for (i = 0; i < nr; i++) alist->addrs[i].service_id = service_id; + trace_afs_alist(alist->debug_id, 1, afs_alist_trace_alloc); return alist; } @@ -217,7 +242,7 @@ bad_address: problem, p - text, (int)len, (int)len, text); ret = -EINVAL; error: - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_parse_error); error_vl: afs_put_vlserverlist(net, vllist); return ERR_PTR(ret); @@ -403,7 +428,7 @@ void afs_end_cursor(struct afs_addr_cursor *ac) ac->index != alist->preferred && test_bit(ac->alist->preferred, &ac->tried)) WRITE_ONCE(alist->preferred, ac->index); - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_end_cursor); ac->alist = NULL; } } diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index fbb91ad775b9..18891492c0b4 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -205,7 +205,7 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, read_lock(&server->fs_lock); ac.alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&server->fs_lock)); - afs_get_addrlist(ac.alist); + afs_get_addrlist(ac.alist, afs_alist_trace_get_probe); read_unlock(&server->fs_lock); server->probed_at = jiffies; @@ -226,7 +226,7 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, afs_fs_probe_not_done(net, server, &ac); } - afs_put_addrlist(ac.alist); + afs_put_addrlist(ac.alist, afs_alist_trace_put_probe); } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d67c75d4d2bd..d00fda99f401 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -85,6 +85,7 @@ struct afs_addr_list { struct rcu_head rcu; refcount_t usage; u32 version; /* Version */ + unsigned int debug_id; unsigned char max_addrs; unsigned char nr_addrs; unsigned char preferred; /* Preferred address */ @@ -969,14 +970,9 @@ static inline bool afs_is_folio_dirty_mmapped(unsigned long priv) /* * addr_list.c */ -static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist) -{ - if (alist) - refcount_inc(&alist->usage); - return alist; -} +struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason); extern struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, u16 service_id); -extern void afs_put_addrlist(struct afs_addr_list *); +extern void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason); extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, const char *, size_t, char, unsigned short, unsigned short); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index a778d53681fe..fa2ba45a5941 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -484,7 +484,7 @@ selected_server: read_lock(&server->fs_lock); alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&server->fs_lock)); - afs_get_addrlist(alist); + afs_get_addrlist(alist, afs_alist_trace_get_fsrotate_set); read_unlock(&server->fs_lock); retry_server: @@ -493,7 +493,7 @@ retry_server: if (!op->ac.alist) op->ac.alist = alist; else - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_retry_server); op->ac.index = -1; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 0b3e2f20b0e0..5bbf5a23af85 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -187,7 +187,7 @@ void afs_put_call(struct afs_call *call) call->type->destructor(call); afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); - afs_put_addrlist(call->alist); + afs_put_addrlist(call->alist, afs_alist_trace_put_call); kfree(call->request); trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, @@ -315,7 +315,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) atomic_read(&call->net->nr_outstanding_calls)); call->addr_ix = ac->index; - call->alist = afs_get_addrlist(ac->alist); + call->alist = afs_get_addrlist(ac->alist, afs_alist_trace_get_make_call); /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data diff --git a/fs/afs/server.c b/fs/afs/server.c index f7791ef13618..6c13f00b10d8 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -275,13 +275,13 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, candidate = afs_alloc_server(cell, uuid, alist); if (!candidate) { - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_server_oom); return ERR_PTR(-ENOMEM); } server = afs_install_server(cell, candidate); if (server != candidate) { - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_server_dup); kfree(candidate); } else { /* Immediately dispatch an asynchronous probe to each interface @@ -421,7 +421,8 @@ static void afs_server_rcu(struct rcu_head *rcu) trace_afs_server(server->debug_id, refcount_read(&server->ref), atomic_read(&server->active), afs_server_trace_free); - afs_put_addrlist(rcu_access_pointer(server->addresses)); + afs_put_addrlist(rcu_access_pointer(server->addresses), + afs_alist_trace_put_server); kfree(server); } @@ -643,7 +644,7 @@ static noinline bool afs_update_server_record(struct afs_operation *op, write_unlock(&server->fs_lock); } - afs_put_addrlist(discard); + afs_put_addrlist(discard, afs_alist_trace_put_server_update); _leave(" = t"); return true; } diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index ba89140eee9e..3a2875933261 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -33,7 +33,8 @@ static void afs_vlserver_rcu(struct rcu_head *rcu) { struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu); - afs_put_addrlist(rcu_access_pointer(vlserver->addresses)); + afs_put_addrlist(rcu_access_pointer(vlserver->addresses), + afs_alist_trace_put_vlserver); kfree_rcu(vlserver, rcu); } @@ -145,7 +146,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net, error: *_b = b; - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_parse_error); return ERR_PTR(ret); } @@ -260,7 +261,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, if (vllist->nr_servers >= nr_servers) { _debug("skip %u >= %u", vllist->nr_servers, nr_servers); - afs_put_addrlist(addrs); + afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty); afs_put_vlserver(cell->net, server); continue; } @@ -269,7 +270,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, addrs->status = bs.status; if (addrs->nr_addrs == 0) { - afs_put_addrlist(addrs); + afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty); if (!rcu_access_pointer(server->addresses)) { afs_put_vlserver(cell->net, server); continue; @@ -281,7 +282,7 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, old = rcu_replace_pointer(server->addresses, old, lockdep_is_held(&server->lock)); write_unlock(&server->lock); - afs_put_addrlist(old); + afs_put_addrlist(old, afs_alist_trace_put_vlserver_old); } diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 7ae73418697d..e8fbbeb551bb 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -231,16 +231,11 @@ selected_server: read_lock(&vlserver->lock); alist = rcu_dereference_protected(vlserver->addresses, lockdep_is_held(&vlserver->lock)); - afs_get_addrlist(alist); + afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set); read_unlock(&vlserver->lock); memset(&vc->ac, 0, sizeof(vc->ac)); - - if (!vc->ac.alist) - vc->ac.alist = alist; - else - afs_put_addrlist(alist); - + vc->ac.alist = alist; vc->ac.index = -1; iterate_address: diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index db7e94584e87..8dea7b56b75a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -314,7 +314,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, alist = call->ret_alist; afs_put_call(call); if (vc->call_error) { - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_getaddru); return ERR_PTR(vc->call_error); } return alist; @@ -668,7 +668,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, alist = call->ret_alist; afs_put_call(call); if (vc->call_error) { - afs_put_addrlist(alist); + afs_put_addrlist(alist, afs_alist_trace_put_getaddru); return ERR_PTR(vc->call_error); } return alist; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 597677acc6b1..ed91666ca4cc 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -202,6 +202,27 @@ enum yfs_cm_operation { EM(afs_cell_trace_use_sbi, "USE sbi ") \ E_(afs_cell_trace_wait, "WAIT ") +#define afs_alist_traces \ + EM(afs_alist_trace_alloc, "ALLOC ") \ + EM(afs_alist_trace_get_fsrotate_set, "GET fs-rot") \ + EM(afs_alist_trace_get_make_call, "GET mkcall") \ + EM(afs_alist_trace_get_probe, "GET probe ") \ + EM(afs_alist_trace_get_vlrotate_set, "GET vl-rot") \ + EM(afs_alist_trace_put_call, "PUT call ") \ + EM(afs_alist_trace_put_end_cursor, "PUT endcur") \ + EM(afs_alist_trace_put_getaddru, "PUT GtAdrU") \ + EM(afs_alist_trace_put_parse_empty, "PUT p-empt") \ + EM(afs_alist_trace_put_parse_error, "PUT p-err ") \ + EM(afs_alist_trace_put_probe, "PUT probe ") \ + EM(afs_alist_trace_put_retry_server, "PUT retry ") \ + EM(afs_alist_trace_put_server, "PUT server") \ + EM(afs_alist_trace_put_server_dup, "PUT sv-dup") \ + EM(afs_alist_trace_put_server_oom, "PUT sv-oom") \ + EM(afs_alist_trace_put_server_update, "PUT sv-upd") \ + EM(afs_alist_trace_put_vlserver, "PUT vlsrvr") \ + EM(afs_alist_trace_put_vlserver_old, "PUT vs-old") \ + E_(afs_alist_trace_free, "FREE ") + #define afs_fs_operations \ EM(afs_FS_FetchData, "FS.FetchData") \ EM(afs_FS_FetchStatus, "FS.FetchStatus") \ @@ -420,6 +441,7 @@ enum yfs_cm_operation { #define EM(a, b) a, #define E_(a, b) a +enum afs_alist_trace { afs_alist_traces } __mode(byte); enum afs_call_trace { afs_call_traces } __mode(byte); enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte); enum afs_cell_trace { afs_cell_traces } __mode(byte); @@ -443,6 +465,7 @@ enum afs_volume_trace { afs_volume_traces } __mode(byte); #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +afs_alist_traces; afs_call_traces; afs_server_traces; afs_cell_traces; @@ -1330,6 +1353,30 @@ TRACE_EVENT(afs_cell, __entry->active) ); +TRACE_EVENT(afs_alist, + TP_PROTO(unsigned int alist_debug_id, int ref, enum afs_alist_trace reason), + + TP_ARGS(alist_debug_id, ref, reason), + + TP_STRUCT__entry( + __field(unsigned int, alist) + __field(int, ref) + __field(int, active) + __field(int, reason) + ), + + TP_fast_assign( + __entry->alist = alist_debug_id; + __entry->ref = ref; + __entry->reason = reason; + ), + + TP_printk("AL=%08x %s r=%d", + __entry->alist, + __print_symbolic(__entry->reason, afs_alist_traces), + __entry->ref) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 98f9fda2057ba34b720c4d353351024d6dcee90f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Oct 2023 16:13:03 +0100 Subject: afs: Fold the afs_addr_cursor struct in Fold the afs_addr_cursor struct into the afs_operation struct and the afs_vl_cursor struct and fold its operations into their callers also. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/addr_list.c | 53 ---------------------------- fs/afs/fs_operation.c | 21 ++++++++--- fs/afs/fs_probe.c | 41 ++++++++++----------- fs/afs/fsclient.c | 31 ++++++++++------ fs/afs/internal.h | 58 +++++++++++++++--------------- fs/afs/rotate.c | 71 +++++++++++++++++++++++-------------- fs/afs/rxrpc.c | 13 ++----- fs/afs/server.c | 6 +--- fs/afs/vl_probe.c | 23 ++++++------ fs/afs/vl_rotate.c | 88 ++++++++++++++++++++++++++++++++++------------ fs/afs/vlclient.c | 34 ++++++++++-------- include/trace/events/afs.h | 18 +++++++--- 12 files changed, 243 insertions(+), 214 deletions(-) (limited to 'include') diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 032e6963c5d8..18c286efa3a5 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -375,56 +375,3 @@ int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist, alist->nr_addrs++; return 0; } - -/* - * Get an address to try. - */ -bool afs_iterate_addresses(struct afs_addr_cursor *ac) -{ - unsigned long set, failed; - int index; - - if (!ac->alist) - return false; - - set = ac->alist->responded; - failed = ac->alist->probe_failed; - _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index); - - ac->nr_iterations++; - - set &= ~(failed | ac->tried); - - if (!set) - return false; - - index = READ_ONCE(ac->alist->preferred); - if (test_bit(index, &set)) - goto selected; - - index = __ffs(set); - -selected: - ac->index = index; - set_bit(index, &ac->tried); - ac->call_responded = false; - return true; -} - -/* - * Release an address list cursor. - */ -void afs_end_cursor(struct afs_addr_cursor *ac) -{ - struct afs_addr_list *alist; - - alist = ac->alist; - if (alist) { - if (ac->call_responded && - ac->index != alist->preferred && - test_bit(ac->alist->preferred, &ac->tried)) - WRITE_ONCE(alist->preferred, ac->index); - afs_put_addrlist(alist, afs_alist_trace_put_end_cursor); - ac->alist = NULL; - } -} diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index cebe4fad8192..00e22259be36 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -179,6 +179,7 @@ void afs_wait_for_operation(struct afs_operation *op) _enter(""); while (afs_select_fileserver(op)) { + op->call_responded = false; op->call_error = 0; op->call_abort_code = 0; op->cb_s_break = op->server->cb_s_break; @@ -191,17 +192,19 @@ void afs_wait_for_operation(struct afs_operation *op) op->call_error = -ENOTSUPP; if (op->call) { - afs_wait_for_call_to_complete(op->call, &op->ac); + afs_wait_for_call_to_complete(op->call); op->call_abort_code = op->call->abort_code; op->call_error = op->call->error; op->call_responded = op->call->responded; - op->ac.call_responded = true; - WRITE_ONCE(op->ac.alist->addrs[op->ac.index].last_error, + WRITE_ONCE(op->alist->addrs[op->addr_index].last_error, op->call_error); afs_put_call(op->call); } } + if (op->call_responded) + set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags); + if (!afs_op_error(op)) { _debug("success"); op->ops->success(op); @@ -227,6 +230,7 @@ void afs_wait_for_operation(struct afs_operation *op) */ int afs_put_operation(struct afs_operation *op) { + struct afs_addr_list *alist; int i, ret = afs_op_error(op); _enter("op=%08x,%d", op->debug_id, ret); @@ -249,7 +253,16 @@ int afs_put_operation(struct afs_operation *op) kfree(op->more_files); } - afs_end_cursor(&op->ac); + alist = op->alist; + if (alist) { + if (op->call_responded && + op->addr_index != alist->preferred && + test_bit(alist->preferred, &op->addr_tried)) + WRITE_ONCE(alist->preferred, op->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_operation); + op->alist = NULL; + } + afs_put_serverlist(op->net, op->server_list); afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op); key_put(op->key); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 337673e65f87..aef16ac3f577 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -74,11 +74,9 @@ static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server */ static void afs_fs_probe_not_done(struct afs_net *net, struct afs_server *server, - struct afs_addr_cursor *ac) + struct afs_addr_list *alist, + int index) { - struct afs_addr_list *alist = ac->alist; - unsigned int index = ac->index; - _enter(""); trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); @@ -100,10 +98,10 @@ static void afs_fs_probe_not_done(struct afs_net *net, */ void afs_fileserver_probe_result(struct afs_call *call) { - struct afs_addr_list *alist = call->alist; - struct afs_address *addr = &alist->addrs[call->addr_ix]; + struct afs_addr_list *alist = call->probe_alist; + struct afs_address *addr = &alist->addrs[call->probe_index]; struct afs_server *server = call->server; - unsigned int index = call->addr_ix; + unsigned int index = call->probe_index; unsigned int rtt_us = 0, cap0; int ret = call->error; @@ -196,37 +194,36 @@ out: void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, struct key *key, bool all) { - struct afs_addr_cursor ac = { - .index = 0, - }; + struct afs_addr_list *alist; + unsigned int index; _enter("%pU", &server->uuid); read_lock(&server->fs_lock); - ac.alist = rcu_dereference_protected(server->addresses, - lockdep_is_held(&server->fs_lock)); - afs_get_addrlist(ac.alist, afs_alist_trace_get_probe); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(alist, afs_alist_trace_get_probe); read_unlock(&server->fs_lock); server->probed_at = jiffies; - atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); + atomic_set(&server->probe_outstanding, all ? alist->nr_addrs : 1); memset(&server->probe, 0, sizeof(server->probe)); server->probe.rtt = UINT_MAX; - ac.index = ac.alist->preferred; - if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) + index = alist->preferred; + if (index < 0 || index >= alist->nr_addrs) all = true; if (all) { - for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) - if (!afs_fs_get_capabilities(net, server, &ac, key)) - afs_fs_probe_not_done(net, server, &ac); + for (index = 0; index < alist->nr_addrs; index++) + if (!afs_fs_get_capabilities(net, server, alist, index, key)) + afs_fs_probe_not_done(net, server, alist, index); } else { - if (!afs_fs_get_capabilities(net, server, &ac, key)) - afs_fs_probe_not_done(net, server, &ac); + if (!afs_fs_get_capabilities(net, server, alist, index, key)) + afs_fs_probe_not_done(net, server, alist, index); } - afs_put_addrlist(ac.alist, afs_alist_trace_put_probe); + afs_put_addrlist(alist, afs_alist_trace_put_probe); } /* diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 2b64641b20a4..4f98b43b0dde 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1605,10 +1605,8 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { /* * Flush all the callbacks we have on a server. */ -int afs_fs_give_up_all_callbacks(struct afs_net *net, - struct afs_server *server, - struct afs_addr_cursor *ac, - struct key *key) +int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server, + struct afs_address *addr, struct key *key) { struct afs_call *call; __be32 *bp; @@ -1621,7 +1619,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, return -ENOMEM; call->key = key; - call->peer = rxrpc_kernel_get_peer(ac->alist->addrs[ac->index].peer); + call->peer = rxrpc_kernel_get_peer(addr->peer); call->service_id = server->service_id; /* marshall the parameters */ @@ -1629,9 +1627,11 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, *bp++ = htonl(FSGIVEUPALLCALLBACKS); call->server = afs_use_server(server, afs_server_trace_give_up_cb); - afs_make_call(ac, call, GFP_NOFS); - afs_wait_for_call_to_complete(call, ac); + afs_make_call(call, GFP_NOFS); + afs_wait_for_call_to_complete(call); ret = call->error; + if (call->responded) + set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); afs_put_call(call); return ret; } @@ -1695,6 +1695,12 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) return 0; } +static void afs_fs_get_capabilities_destructor(struct afs_call *call) +{ + afs_put_addrlist(call->probe_alist, afs_alist_trace_put_getcaps); + afs_flat_call_destructor(call); +} + /* * FS.GetCapabilities operation type */ @@ -1703,7 +1709,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, .done = afs_fileserver_probe_result, - .destructor = afs_flat_call_destructor, + .destructor = afs_fs_get_capabilities_destructor, }; /* @@ -1713,7 +1719,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { * ->done() - otherwise we return false to indicate we didn't even try. */ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, - struct afs_addr_cursor *ac, struct key *key) + struct afs_addr_list *alist, unsigned int addr_index, + struct key *key) { struct afs_call *call; __be32 *bp; @@ -1726,7 +1733,9 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, call->key = key; call->server = afs_use_server(server, afs_server_trace_get_caps); - call->peer = rxrpc_kernel_get_peer(ac->alist->addrs[ac->index].peer); + call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer); + call->probe_alist = afs_get_addrlist(alist, afs_alist_trace_get_getcaps); + call->probe_index = addr_index; call->service_id = server->service_id; call->upgrade = true; call->async = true; @@ -1737,7 +1746,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, *bp++ = htonl(FSGETCAPABILITIES); trace_afs_make_fs_call(call, NULL); - afs_make_call(ac, call, GFP_NOFS); + afs_make_call(call, GFP_NOFS); afs_put_call(call); return true; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3a2aa2af072a..ae33dd8ae49b 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -102,7 +102,6 @@ struct afs_addr_list { */ struct afs_call { const struct afs_call_type *type; /* type of call */ - struct afs_addr_list *alist; /* Address is alist[addr_ix] */ wait_queue_head_t waitq; /* processes awaiting completion */ struct work_struct async_work; /* async I/O processor */ struct work_struct work; /* actual work processor */ @@ -123,6 +122,10 @@ struct afs_call { }; void *buffer; /* reply receive buffer */ union { + struct { + struct afs_addr_list *probe_alist; + unsigned char probe_index; /* Address in ->probe_alist */ + }; struct afs_addr_list *ret_alist; struct afs_vldb_entry *ret_vldb; char *ret_str; @@ -139,7 +142,6 @@ struct afs_call { unsigned reply_max; /* maximum size of reply */ unsigned count2; /* count used in unmarshalling */ unsigned char unmarshall; /* unmarshalling phase */ - unsigned char addr_ix; /* Address in ->alist */ bool drop_ref; /* T if need to drop ref for incoming call */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ @@ -729,31 +731,23 @@ struct afs_error { bool aborted; /* T if ->error is from an abort */ }; -/* - * Cursor for iterating over a server's address list. - */ -struct afs_addr_cursor { - struct afs_addr_list *alist; /* Current address list (pins ref) */ - unsigned long tried; /* Tried addresses */ - signed char index; /* Current address */ - unsigned short nr_iterations; /* Number of address iterations */ - bool call_responded; -}; - /* * Cursor for iterating over a set of volume location servers. */ struct afs_vl_cursor { - struct afs_addr_cursor ac; struct afs_cell *cell; /* The cell we're querying */ struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ struct afs_vlserver *server; /* Server on which this resides */ + struct afs_addr_list *alist; /* Current address list (pins ref) */ struct key *key; /* Key for the server */ unsigned long untried_servers; /* Bitmask of untried servers */ + unsigned long addr_tried; /* Tried addresses */ struct afs_error cumul_error; /* Cumulative error */ + unsigned int debug_id; s32 call_abort_code; short call_error; /* Error from single call */ short server_index; /* Current server */ + signed char addr_index; /* Current address */ unsigned short flags; #define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ #define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ @@ -812,8 +806,6 @@ struct afs_operation { struct timespec64 ctime; /* Change time to set */ struct afs_error cumul_error; /* Cumulative error */ short nr_files; /* Number of entries in file[], more_files */ - short call_error; /* Error from single call */ - s32 call_abort_code; /* Abort code from single call */ unsigned int debug_id; unsigned int cb_v_break; /* Volume break counter before op */ @@ -862,16 +854,19 @@ struct afs_operation { }; /* Fileserver iteration state */ - struct afs_addr_cursor ac; struct afs_server_list *server_list; /* Current server list (pins ref) */ struct afs_server *server; /* Server we're using (ref pinned by server_list) */ + struct afs_addr_list *alist; /* Current address list (pins ref) */ struct afs_call *call; unsigned long untried_servers; /* Bitmask of untried servers */ + unsigned long addr_tried; /* Tried addresses */ + s32 call_abort_code; /* Abort code from single call */ + short call_error; /* Error from single call */ short server_index; /* Current server */ short nr_iterations; /* Number of server iterations */ + signed char addr_index; /* Current address */ bool call_responded; /* T if the current address responded */ - unsigned int flags; #define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */ #define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */ @@ -981,8 +976,6 @@ extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, bool afs_addr_list_same(const struct afs_addr_list *a, const struct afs_addr_list *b); extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); -extern bool afs_iterate_addresses(struct afs_addr_cursor *); -extern void afs_end_cursor(struct afs_addr_cursor *ac); extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr, __be32 xdr, u16 port); @@ -1123,10 +1116,11 @@ extern void afs_fs_get_volume_status(struct afs_operation *); extern void afs_fs_set_lock(struct afs_operation *); extern void afs_fs_extend_lock(struct afs_operation *); extern void afs_fs_release_lock(struct afs_operation *); -extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, - struct afs_addr_cursor *, struct key *); -extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *, - struct afs_addr_cursor *, struct key *); +int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server, + struct afs_address *addr, struct key *key); +bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, + struct afs_addr_list *alist, unsigned int addr_index, + struct key *key); extern void afs_fs_inline_bulk_status(struct afs_operation *); struct afs_acl { @@ -1306,8 +1300,8 @@ extern int __net_init afs_open_socket(struct afs_net *); extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); -extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t); -void afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor *ac); +void afs_make_call(struct afs_call *call, gfp_t gfp); +void afs_wait_for_call_to_complete(struct afs_call *call); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, size_t, size_t); @@ -1325,9 +1319,9 @@ static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *c call->op = op; call->key = op->key; call->intr = !(op->flags & AFS_OPERATION_UNINTR); - call->peer = rxrpc_kernel_get_peer(op->ac.alist->addrs[op->ac.index].peer); + call->peer = rxrpc_kernel_get_peer(op->alist->addrs[op->addr_index].peer); call->service_id = op->server->service_id; - afs_make_call(&op->ac, call, gfp); + afs_make_call(call, gfp); } static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) @@ -1493,8 +1487,12 @@ extern void afs_fs_exit(void); extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, const char *, int); extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); -extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, - struct key *, struct afs_vlserver *, unsigned int); +struct afs_call *afs_vl_get_capabilities(struct afs_net *net, + struct afs_addr_list *alist, + unsigned int addr_index, + struct key *key, + struct afs_vlserver *server, + unsigned int server_index); extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 6c1aa9bafc82..a6bda8f44c0f 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -109,19 +109,20 @@ static bool afs_sleep_and_retry(struct afs_operation *op) */ bool afs_select_fileserver(struct afs_operation *op) { - struct afs_addr_list *alist; + struct afs_addr_list *alist = op->alist; struct afs_server *server; struct afs_vnode *vnode = op->file[0].vnode; + unsigned long set; unsigned int rtt; s32 abort_code = op->call_abort_code; - int error = op->call_error, i; + int error = op->call_error, addr_index, i; op->nr_iterations++; - _enter("OP=%x+%x,%llx,%lx[%d],%lx[%d],%d,%d", + _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d", op->debug_id, op->nr_iterations, op->volume->vid, - op->untried_servers, op->server_index, - op->ac.tried, op->ac.index, + op->server_index, op->untried_servers, + op->addr_index, op->addr_tried, error, abort_code); if (op->flags & AFS_OPERATION_STOP) { @@ -398,12 +399,14 @@ bool afs_select_fileserver(struct afs_operation *op) restart_from_beginning: _debug("restart"); - afs_end_cursor(&op->ac); + afs_put_addrlist(alist, afs_alist_trace_put_restart_rotate); + alist = op->alist = NULL; op->server = NULL; afs_put_serverlist(op->net, op->server_list); op->server_list = NULL; start: _debug("start"); + ASSERTCMP(alist, ==, NULL); /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ @@ -420,6 +423,7 @@ start: pick_server: _debug("pick [%lx]", op->untried_servers); + ASSERTCMP(alist, ==, NULL); error = afs_wait_for_fs_probes(op->server_list, op->untried_servers); if (error < 0) { @@ -463,7 +467,7 @@ selected_server: * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. */ - ASSERTCMP(op->ac.alist, ==, NULL); + ASSERTCMP(alist, ==, NULL); server = op->server_list->servers[op->server_index].server; if (!afs_check_server_record(op, server)) @@ -484,32 +488,34 @@ selected_server: read_lock(&server->fs_lock); alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&server->fs_lock)); - afs_get_addrlist(alist, afs_alist_trace_get_fsrotate_set); + op->alist = afs_get_addrlist(alist, afs_alist_trace_get_fsrotate_set); read_unlock(&server->fs_lock); retry_server: - memset(&op->ac, 0, sizeof(op->ac)); - - if (!op->ac.alist) - op->ac.alist = alist; - else - afs_put_addrlist(alist, afs_alist_trace_put_retry_server); - - op->ac.index = -1; + op->addr_tried = 0; + op->addr_index = -1; iterate_address: - ASSERT(op->ac.alist); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - if (!afs_iterate_addresses(&op->ac)) + set = READ_ONCE(alist->responded); + set &= ~(READ_ONCE(alist->probe_failed) | op->addr_tried); + if (!set) goto out_of_addresses; - _debug("address [%u] %u/%u %pISp", - op->server_index, op->ac.index, op->ac.alist->nr_addrs, - rxrpc_kernel_remote_addr(op->ac.alist->addrs[op->ac.index].peer)); + addr_index = READ_ONCE(alist->preferred); + if (!test_bit(addr_index, &set)) + addr_index = __ffs(set); + + op->addr_index = addr_index; + set_bit(addr_index, &op->addr_tried); + op->alist = alist; op->call_responded = false; + _debug("address [%u] %u/%u %pISp", + op->server_index, addr_index, alist->nr_addrs, + rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer)); _leave(" = t"); return true; @@ -519,7 +525,6 @@ out_of_addresses: */ afs_probe_fileserver(op->net, op->server); if (op->flags & AFS_OPERATION_RETRY_SERVER) { - alist = op->ac.alist; error = afs_wait_for_one_fs_probe( op->server, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { @@ -537,7 +542,13 @@ out_of_addresses: next_server: _debug("next"); - afs_end_cursor(&op->ac); + ASSERT(alist); + if (op->call_responded && + op->addr_index != READ_ONCE(alist->preferred) && + test_bit(alist->preferred, &op->addr_tried)) + WRITE_ONCE(alist->preferred, op->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_next_server); + alist = op->alist = NULL; goto pick_server; no_more_servers: @@ -557,7 +568,14 @@ no_more_servers: failed: op->flags |= AFS_OPERATION_STOP; - afs_end_cursor(&op->ac); + if (alist) { + if (op->call_responded && + op->addr_index != READ_ONCE(alist->preferred) && + test_bit(alist->preferred, &op->addr_tried)) + WRITE_ONCE(alist->preferred, op->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_op_failed); + op->alist = NULL; + } _leave(" = f [failed %d]", afs_op_error(op)); return false; } @@ -602,13 +620,12 @@ void afs_dump_edestaddrreq(const struct afs_operation *op) a->preferred); pr_notice("FC: - R=%lx F=%lx\n", a->responded, a->probe_failed); - if (a == op->ac.alist) + if (a == op->alist) pr_notice("FC: - current\n"); } } } - pr_notice("AC: t=%lx ax=%u ni=%u\n", - op->ac.tried, op->ac.index, op->ac.nr_iterations); + pr_notice("AC: t=%lx ax=%u\n", op->addr_tried, op->addr_index); rcu_read_unlock(); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index e8490b3e9d37..81013bc8bbfd 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -189,7 +189,6 @@ void afs_put_call(struct afs_call *call) call->type->destructor(call); afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); - afs_put_addrlist(call->alist, afs_alist_trace_put_call); kfree(call->request); trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, @@ -296,7 +295,7 @@ static void afs_notify_end_request_tx(struct sock *sock, * Initiate a call and synchronously queue up the parameters for dispatch. Any * error is stored into the call struct, which the caller must check for. */ -void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) +void afs_make_call(struct afs_call *call, gfp_t gfp) { struct rxrpc_call *rxcall; struct msghdr msg; @@ -314,9 +313,6 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) call, call->type->name, key_serial(call->key), atomic_read(&call->net->nr_outstanding_calls)); - call->addr_ix = ac->index; - call->alist = afs_get_addrlist(ac->alist, afs_alist_trace_get_make_call); - /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data * after the initial fixed part. @@ -392,7 +388,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) /* Note that at this point, we may have received the reply or an abort * - and an asynchronous call may already have completed. * - * afs_wait_for_call_to_complete(call, ac) + * afs_wait_for_call_to_complete(call) * must be called to synchronously clean up. */ return; @@ -577,7 +573,7 @@ call_complete: /* * Wait synchronously for a call to complete. */ -void afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor *ac) +void afs_wait_for_call_to_complete(struct afs_call *call) { bool rxrpc_complete = false; @@ -627,9 +623,6 @@ void afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor afs_set_call_complete(call, -EINTR, 0); } } - - if (call->error == 0 || call->error == -ECONNABORTED) - call->responded = true; } /* diff --git a/fs/afs/server.c b/fs/afs/server.c index e2c7f65eea33..62d453365689 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -436,12 +436,8 @@ static void __afs_put_server(struct afs_net *net, struct afs_server *server) static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server) { struct afs_addr_list *alist = rcu_access_pointer(server->addresses); - struct afs_addr_cursor ac = { - .alist = alist, - .index = alist->preferred, - }; - afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + afs_fs_give_up_all_callbacks(net, server, &alist->addrs[alist->preferred], NULL); } /* diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index d9a99ba9fc78..f868ae5d40e5 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -46,12 +46,12 @@ static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up) */ void afs_vlserver_probe_result(struct afs_call *call) { - struct afs_addr_list *alist = call->alist; + struct afs_addr_list *alist = call->probe_alist; struct afs_vlserver *server = call->vlserver; - struct afs_address *addr = &alist->addrs[call->addr_ix]; + struct afs_address *addr = &alist->addrs[call->probe_index]; unsigned int server_index = call->server_index; unsigned int rtt_us = 0; - unsigned int index = call->addr_ix; + unsigned int index = call->probe_index; bool have_result = false; int ret = call->error; @@ -148,25 +148,25 @@ static bool afs_do_probe_vlserver(struct afs_net *net, unsigned int server_index, struct afs_error *_e) { - struct afs_addr_cursor ac = { - .index = 0, - }; + struct afs_addr_list *alist; struct afs_call *call; + unsigned int index; bool in_progress = false; _enter("%s", server->name); read_lock(&server->lock); - ac.alist = rcu_dereference_protected(server->addresses, - lockdep_is_held(&server->lock)); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->lock)); + afs_get_addrlist(alist, afs_alist_trace_get_vlprobe); read_unlock(&server->lock); - atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + atomic_set(&server->probe_outstanding, alist->nr_addrs); memset(&server->probe, 0, sizeof(server->probe)); server->probe.rtt = UINT_MAX; - for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { - call = afs_vl_get_capabilities(net, &ac, key, server, + for (index = 0; index < alist->nr_addrs; index++) { + call = afs_vl_get_capabilities(net, alist, index, key, server, server_index); if (!IS_ERR(call)) { afs_prioritise_error(_e, call->error, call->abort_code); @@ -178,6 +178,7 @@ static bool afs_do_probe_vlserver(struct afs_net *net, } } + afs_put_addrlist(alist, afs_alist_trace_put_vlprobe); return in_progress; } diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index f895eb94129e..91168528179c 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -17,6 +17,8 @@ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, struct key *key) { + static atomic_t debug_ids; + memset(vc, 0, sizeof(*vc)); vc->cell = cell; vc->key = key; @@ -29,6 +31,7 @@ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cel return false; } + vc->debug_id = atomic_inc_return(&debug_ids); return true; } @@ -89,17 +92,18 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) */ bool afs_select_vlserver(struct afs_vl_cursor *vc) { - struct afs_addr_list *alist; + struct afs_addr_list *alist = vc->alist; struct afs_vlserver *vlserver; + unsigned long set, failed; unsigned int rtt; s32 abort_code = vc->call_abort_code; int error = vc->call_error, i; vc->nr_iterations++; - _enter("%lx[%d],%lx[%d],%d,%d", - vc->untried_servers, vc->server_index, - vc->ac.tried, vc->ac.index, + _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d", + vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers, + vc->addr_index, vc->addr_tried, error, abort_code); if (vc->flags & AFS_VL_CURSOR_STOP) { @@ -165,7 +169,13 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) restart_from_beginning: _debug("restart"); - afs_end_cursor(&vc->ac); + if (vc->call_responded && + vc->addr_index != vc->alist->preferred && + test_bit(alist->preferred, &vc->addr_tried)) + WRITE_ONCE(alist->preferred, vc->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart); + alist = vc->alist = NULL; + afs_put_vlserverlist(vc->cell->net, vc->server_list); vc->server_list = NULL; if (vc->flags & AFS_VL_CURSOR_RETRIED) @@ -173,6 +183,7 @@ restart_from_beginning: vc->flags |= AFS_VL_CURSOR_RETRIED; start: _debug("start"); + ASSERTCMP(alist, ==, NULL); if (!afs_start_vl_iteration(vc)) goto failed; @@ -185,6 +196,7 @@ start: pick_server: _debug("pick [%lx]", vc->untried_servers); + ASSERTCMP(alist, ==, NULL); error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers); if (error < 0) { @@ -222,7 +234,6 @@ selected_server: * check it, find its address list and probe its capabilities before we * use it. */ - ASSERTCMP(vc->ac.alist, ==, NULL); vlserver = vc->server_list->servers[vc->server_index].server; vc->server = vlserver; @@ -231,30 +242,48 @@ selected_server: read_lock(&vlserver->lock); alist = rcu_dereference_protected(vlserver->addresses, lockdep_is_held(&vlserver->lock)); - afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set); + vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set); read_unlock(&vlserver->lock); - memset(&vc->ac, 0, sizeof(vc->ac)); - vc->ac.alist = alist; - vc->ac.index = -1; + vc->addr_tried = 0; + vc->addr_index = -1; iterate_address: - ASSERT(vc->ac.alist); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - if (!afs_iterate_addresses(&vc->ac)) + set = READ_ONCE(alist->responded); + failed = READ_ONCE(alist->probe_failed); + vc->addr_index = READ_ONCE(alist->preferred); + + _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index); + + set &= ~(failed | vc->addr_tried); + + if (!set) goto next_server; - _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + if (!test_bit(vc->addr_index, &set)) + vc->addr_index = __ffs(set); + + set_bit(vc->addr_index, &vc->addr_tried); + vc->alist = alist; + + _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs); vc->call_responded = false; - _leave(" = t %pISpc", rxrpc_kernel_remote_addr(vc->ac.alist->addrs[vc->ac.index].peer)); + _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer)); return true; next_server: _debug("next"); - afs_end_cursor(&vc->ac); + ASSERT(alist); + if (vc->call_responded && + vc->addr_index != alist->preferred && + test_bit(alist->preferred, &vc->addr_tried)) + WRITE_ONCE(alist->preferred, vc->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next); + alist = vc->alist = NULL; goto pick_server; no_more_servers: @@ -274,8 +303,15 @@ no_more_servers: } failed: + if (alist) { + if (vc->call_responded && + vc->addr_index != alist->preferred && + test_bit(alist->preferred, &vc->addr_tried)) + WRITE_ONCE(alist->preferred, vc->addr_index); + afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail); + alist = vc->alist = NULL; + } vc->flags |= AFS_VL_CURSOR_STOP; - afs_end_cursor(&vc->ac); _leave(" = f [failed %d]", vc->cumul_error.error); return false; } @@ -299,8 +335,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) pr_notice("DNS: src=%u st=%u lc=%x\n", cell->dns_source, cell->dns_status, cell->dns_lookup_count); pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", - vc->untried_servers, vc->server_index, vc->nr_iterations, vc->flags, - vc->cumul_error.error); + vc->untried_servers, vc->server_index, vc->nr_iterations, + vc->flags, vc->cumul_error.error); pr_notice("VC: call er=%d ac=%d r=%u\n", vc->call_error, vc->call_abort_code, vc->call_responded); @@ -320,14 +356,13 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) a->preferred); pr_notice("VC: - R=%lx F=%lx\n", a->responded, a->probe_failed); - if (a == vc->ac.alist) + if (a == vc->alist) pr_notice("VC: - current\n"); } } } - pr_notice("AC: t=%lx ax=%u ni=%u\n", - vc->ac.tried, vc->ac.index, vc->ac.nr_iterations); + pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index); rcu_read_unlock(); } @@ -338,6 +373,8 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc) { struct afs_net *net = vc->cell->net; + _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations); + switch (vc->cumul_error.error) { case -EDESTADDRREQ: case -EADDRNOTAVAIL: @@ -347,7 +384,14 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc) break; } - afs_end_cursor(&vc->ac); + if (vc->alist) { + if (vc->call_responded && + vc->addr_index != vc->alist->preferred && + test_bit(vc->alist->preferred, &vc->addr_tried)) + WRITE_ONCE(vc->alist->preferred, vc->addr_index); + afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end); + vc->alist = NULL; + } afs_put_vlserverlist(net, vc->server_list); return vc->cumul_error.error; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 4bf98a38c3a1..39a0b7614d05 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -149,7 +149,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, call->key = vc->key; call->ret_vldb = entry; call->max_lifespan = AFS_VL_MAX_LIFESPAN; - call->peer = rxrpc_kernel_get_peer(vc->ac.alist->addrs[vc->ac.index].peer); + call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer); call->service_id = vc->server->service_id; /* Marshall the parameters */ @@ -161,8 +161,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, memset((void *)bp + volnamesz, 0, padsz); trace_afs_make_vl_call(call); - afs_make_call(&vc->ac, call, GFP_KERNEL); - afs_wait_for_call_to_complete(call, &vc->ac); + afs_make_call(call, GFP_KERNEL); + afs_wait_for_call_to_complete(call); vc->call_abort_code = call->abort_code; vc->call_error = call->error; vc->call_responded = call->responded; @@ -290,7 +290,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, call->key = vc->key; call->ret_alist = NULL; call->max_lifespan = AFS_VL_MAX_LIFESPAN; - call->peer = rxrpc_kernel_get_peer(vc->ac.alist->addrs[vc->ac.index].peer); + call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer); call->service_id = vc->server->service_id; /* Marshall the parameters */ @@ -310,8 +310,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); - afs_make_call(&vc->ac, call, GFP_KERNEL); - afs_wait_for_call_to_complete(call, &vc->ac); + afs_make_call(call, GFP_KERNEL); + afs_wait_for_call_to_complete(call); vc->call_abort_code = call->abort_code; vc->call_error = call->error; vc->call_responded = call->responded; @@ -371,6 +371,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) static void afs_destroy_vl_get_capabilities(struct afs_call *call) { + afs_put_addrlist(call->probe_alist, afs_alist_trace_put_vlgetcaps); afs_put_vlserver(call->net, call->vlserver); afs_flat_call_destructor(call); } @@ -394,7 +395,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { * other end supports. */ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, - struct afs_addr_cursor *ac, + struct afs_addr_list *alist, + unsigned int addr_index, struct key *key, struct afs_vlserver *server, unsigned int server_index) @@ -411,7 +413,9 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, call->key = key; call->vlserver = afs_get_vlserver(server); call->server_index = server_index; - call->peer = rxrpc_kernel_get_peer(ac->alist->addrs[ac->index].peer); + call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer); + call->probe_alist = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps); + call->probe_index = addr_index; call->service_id = server->service_id; call->upgrade = true; call->async = true; @@ -423,7 +427,7 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, /* Can't take a ref on server */ trace_afs_make_vl_call(call); - afs_make_call(ac, call, GFP_KERNEL); + afs_make_call(call, GFP_KERNEL); return call; } @@ -658,7 +662,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, call->key = vc->key; call->ret_alist = NULL; call->max_lifespan = AFS_VL_MAX_LIFESPAN; - call->peer = rxrpc_kernel_get_peer(vc->ac.alist->addrs[vc->ac.index].peer); + call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer); call->service_id = vc->server->service_id; /* Marshall the parameters */ @@ -668,8 +672,8 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ trace_afs_make_vl_call(call); - afs_make_call(&vc->ac, call, GFP_KERNEL); - afs_wait_for_call_to_complete(call, &vc->ac); + afs_make_call(call, GFP_KERNEL); + afs_wait_for_call_to_complete(call); vc->call_abort_code = call->abort_code; vc->call_error = call->error; vc->call_responded = call->responded; @@ -777,7 +781,7 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc) call->key = vc->key; call->ret_str = NULL; call->max_lifespan = AFS_VL_MAX_LIFESPAN; - call->peer = rxrpc_kernel_get_peer(vc->ac.alist->addrs[vc->ac.index].peer); + call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer); call->service_id = vc->server->service_id; /* marshall the parameters */ @@ -786,8 +790,8 @@ char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc) /* Can't take a ref on server */ trace_afs_make_vl_call(call); - afs_make_call(&vc->ac, call, GFP_KERNEL); - afs_wait_for_call_to_complete(call, &vc->ac); + afs_make_call(call, GFP_KERNEL); + afs_wait_for_call_to_complete(call); vc->call_abort_code = call->abort_code; vc->call_error = call->error; vc->call_responded = call->responded; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index ed91666ca4cc..0f68d67f52c8 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -204,21 +204,31 @@ enum yfs_cm_operation { #define afs_alist_traces \ EM(afs_alist_trace_alloc, "ALLOC ") \ + EM(afs_alist_trace_get_getcaps, "GET getcap") \ EM(afs_alist_trace_get_fsrotate_set, "GET fs-rot") \ - EM(afs_alist_trace_get_make_call, "GET mkcall") \ EM(afs_alist_trace_get_probe, "GET probe ") \ + EM(afs_alist_trace_get_vlgetcaps, "GET vgtcap") \ + EM(afs_alist_trace_get_vlprobe, "GET vprobe") \ EM(afs_alist_trace_get_vlrotate_set, "GET vl-rot") \ - EM(afs_alist_trace_put_call, "PUT call ") \ - EM(afs_alist_trace_put_end_cursor, "PUT endcur") \ EM(afs_alist_trace_put_getaddru, "PUT GtAdrU") \ + EM(afs_alist_trace_put_getcaps, "PUT getcap") \ + EM(afs_alist_trace_put_next_server, "PUT nx-srv") \ + EM(afs_alist_trace_put_op_failed, "PUT op-fai") \ + EM(afs_alist_trace_put_operation, "PUT op ") \ EM(afs_alist_trace_put_parse_empty, "PUT p-empt") \ EM(afs_alist_trace_put_parse_error, "PUT p-err ") \ EM(afs_alist_trace_put_probe, "PUT probe ") \ - EM(afs_alist_trace_put_retry_server, "PUT retry ") \ + EM(afs_alist_trace_put_restart_rotate, "PUT rstrot") \ EM(afs_alist_trace_put_server, "PUT server") \ EM(afs_alist_trace_put_server_dup, "PUT sv-dup") \ EM(afs_alist_trace_put_server_oom, "PUT sv-oom") \ EM(afs_alist_trace_put_server_update, "PUT sv-upd") \ + EM(afs_alist_trace_put_vlgetcaps, "PUT vgtcap") \ + EM(afs_alist_trace_put_vlprobe, "PUT vprobe") \ + EM(afs_alist_trace_put_vlrotate_end, "PUT vr-end") \ + EM(afs_alist_trace_put_vlrotate_fail, "PUT vr-fai") \ + EM(afs_alist_trace_put_vlrotate_next, "PUT vr-nxt") \ + EM(afs_alist_trace_put_vlrotate_restart,"PUT vr-rst") \ EM(afs_alist_trace_put_vlserver, "PUT vlsrvr") \ EM(afs_alist_trace_put_vlserver_old, "PUT vs-old") \ E_(afs_alist_trace_free, "FREE ") -- cgit v1.2.3 From 92f091cdddace38e57ad570663b058a38b4d8bed Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 30 Oct 2023 11:43:24 +0000 Subject: afs: Dispatch fileserver probes in priority order When probing all the addresses for a fileserver, dispatch them in order of descending priority to try and get back highest priority one first. Also add a tracepoint to show the transmission and completion of the probes. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/fs_probe.c | 25 +++++++++++++++++++++++-- include/trace/events/afs.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 8008d3ecabab..c5702698b18b 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -102,7 +102,7 @@ void afs_fileserver_probe_result(struct afs_call *call) struct afs_address *addr = &alist->addrs[call->probe_index]; struct afs_server *server = call->server; unsigned int index = call->probe_index; - unsigned int rtt_us = 0, cap0; + unsigned int rtt_us = -1, cap0; int ret = call->error; _enter("%pU,%u", &server->uuid, index); @@ -182,6 +182,7 @@ responded: out: spin_unlock(&server->probe_lock); + trace_afs_fs_probe(server, false, alist, index, call->error, call->abort_code, rtt_us); _debug("probe %pU [%u] %pISpc rtt=%d ret=%d", &server->uuid, index, rxrpc_kernel_remote_addr(alist->addrs[index].peer), rtt_us, ret); @@ -207,6 +208,8 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, afs_get_addrlist(alist, afs_alist_trace_get_probe); read_unlock(&server->fs_lock); + afs_get_address_preferences(net, alist); + server->probed_at = jiffies; atomic_set(&server->probe_outstanding, all ? alist->nr_addrs : 1); memset(&server->probe, 0, sizeof(server->probe)); @@ -217,10 +220,28 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, all = true; if (all) { - for (index = 0; index < alist->nr_addrs; index++) + unsigned long unprobed = (1UL << alist->nr_addrs) - 1; + unsigned int i; + int best_prio; + + while (unprobed) { + best_prio = -1; + index = 0; + for (i = 0; i < alist->nr_addrs; i++) { + if (test_bit(i, &unprobed) && + alist->addrs[i].prio > best_prio) { + index = i; + best_prio = alist->addrs[i].prio; + } + } + __clear_bit(index, &unprobed); + + trace_afs_fs_probe(server, true, alist, index, 0, 0, 0); if (!afs_fs_get_capabilities(net, server, alist, index, key)) afs_fs_probe_not_done(net, server, alist, index); + } } else { + trace_afs_fs_probe(server, true, alist, index, 0, 0, 0); if (!afs_fs_get_capabilities(net, server, alist, index, key)) afs_fs_probe_not_done(net, server, alist, index); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 0f68d67f52c8..81eb87fbcfa7 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1387,6 +1387,39 @@ TRACE_EVENT(afs_alist, __entry->ref) ); +TRACE_EVENT(afs_fs_probe, + TP_PROTO(struct afs_server *server, bool tx, struct afs_addr_list *alist, + unsigned int addr_index, int error, s32 abort_code, unsigned int rtt_us), + + TP_ARGS(server, tx, alist, addr_index, error, abort_code, rtt_us), + + TP_STRUCT__entry( + __field(unsigned int, server) + __field(bool, tx) + __field(u16, addr_index) + __field(short, error) + __field(s32, abort_code) + __field(unsigned int, rtt_us) + __field_struct(struct sockaddr_rxrpc, srx) + ), + + TP_fast_assign( + __entry->server = server->debug_id; + __entry->tx = tx; + __entry->addr_index = addr_index; + __entry->error = error; + __entry->abort_code = abort_code; + __entry->rtt_us = rtt_us; + memcpy(&__entry->srx, rxrpc_kernel_remote_srx(alist->addrs[addr_index].peer), + sizeof(__entry->srx)); + ), + + TP_printk("s=%08x %s ax=%u e=%d ac=%d rtt=%d %pISpc", + __entry->server, __entry->tx ? "tx" : "rx", __entry->addr_index, + __entry->error, __entry->abort_code, __entry->rtt_us, + &__entry->srx.transport) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From e6a7d7f71b17e0a44e2155bdad47eae7b5368503 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 30 Oct 2023 11:53:16 +0000 Subject: afs: Dispatch vlserver probes in priority order When probing all the addresses for a volume location server, dispatch them in order of descending priority to try and get back highest priority one first. Also add a tracepoint to show the transmission and completion of the probes. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/internal.h | 1 + fs/afs/vl_list.c | 2 ++ fs/afs/vl_probe.c | 20 ++++++++++++++++++-- include/trace/events/afs.h | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9a1e151e77e7..88db04220773 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -447,6 +447,7 @@ struct afs_vlserver { rwlock_t lock; /* Lock on addresses */ refcount_t ref; unsigned int rtt; /* Server's current RTT in uS */ + unsigned int debug_id; /* Probe state */ wait_queue_head_t probe_wq; diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index 5c4cd71caccf..9b1c20daac53 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -13,6 +13,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, unsigned short port) { struct afs_vlserver *vlserver; + static atomic_t debug_ids; vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), GFP_KERNEL); @@ -21,6 +22,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, rwlock_init(&vlserver->lock); init_waitqueue_head(&vlserver->probe_wq); spin_lock_init(&vlserver->probe_lock); + vlserver->debug_id = atomic_inc_return(&debug_ids); vlserver->rtt = UINT_MAX; vlserver->name_len = name_len; vlserver->service_id = VL_SERVICE; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index f868ae5d40e5..b128dc3d8af7 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -131,6 +131,7 @@ responded: out: spin_unlock(&server->probe_lock); + trace_afs_vl_probe(server, false, alist, index, call->error, call->abort_code, rtt_us); _debug("probe [%u][%u] %pISpc rtt=%d ret=%d", server_index, index, rxrpc_kernel_remote_addr(addr->peer), rtt_us, ret); @@ -150,8 +151,10 @@ static bool afs_do_probe_vlserver(struct afs_net *net, { struct afs_addr_list *alist; struct afs_call *call; - unsigned int index; + unsigned long unprobed; + unsigned int index, i; bool in_progress = false; + int best_prio; _enter("%s", server->name); @@ -165,7 +168,20 @@ static bool afs_do_probe_vlserver(struct afs_net *net, memset(&server->probe, 0, sizeof(server->probe)); server->probe.rtt = UINT_MAX; - for (index = 0; index < alist->nr_addrs; index++) { + unprobed = (1UL << alist->nr_addrs) - 1; + while (unprobed) { + best_prio = -1; + index = 0; + for (i = 0; i < alist->nr_addrs; i++) { + if (test_bit(i, &unprobed) && + alist->addrs[i].prio > best_prio) { + index = i; + best_prio = alist->addrs[i].prio; + } + } + __clear_bit(index, &unprobed); + + trace_afs_vl_probe(server, true, alist, index, 0, 0, 0); call = afs_vl_get_capabilities(net, alist, index, key, server, server_index); if (!IS_ERR(call)) { diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 81eb87fbcfa7..f1815b3dafb0 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1420,6 +1420,40 @@ TRACE_EVENT(afs_fs_probe, &__entry->srx.transport) ); +TRACE_EVENT(afs_vl_probe, + TP_PROTO(struct afs_vlserver *server, bool tx, struct afs_addr_list *alist, + unsigned int addr_index, int error, s32 abort_code, unsigned int rtt_us), + + TP_ARGS(server, tx, alist, addr_index, error, abort_code, rtt_us), + + TP_STRUCT__entry( + __field(unsigned int, server) + __field(bool, tx) + __field(unsigned short, flags) + __field(u16, addr_index) + __field(short, error) + __field(s32, abort_code) + __field(unsigned int, rtt_us) + __field_struct(struct sockaddr_rxrpc, srx) + ), + + TP_fast_assign( + __entry->server = server->debug_id; + __entry->tx = tx; + __entry->addr_index = addr_index; + __entry->error = error; + __entry->abort_code = abort_code; + __entry->rtt_us = rtt_us; + memcpy(&__entry->srx, rxrpc_kernel_remote_srx(alist->addrs[addr_index].peer), + sizeof(__entry->srx)); + ), + + TP_printk("vl=%08x %s ax=%u e=%d ac=%d rtt=%d %pISpc", + __entry->server, __entry->tx ? "tx" : "rx", __entry->addr_index, + __entry->error, __entry->abort_code, __entry->rtt_us, + &__entry->srx.transport) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From f49b594df3ebca53c91f4d6448680463f10aa479 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Oct 2023 16:30:37 +0000 Subject: afs: Keep a record of the current fileserver endpoint state Keep a record of the current fileserver endpoint state, including the probe state, and replace it when a new probe is started rather than just squelching the old state and overwriting it. Clearance of the old state can cause a race if there's another thread also currently trying to communicate with that server. It appears that this race might be the culprit for some occasions where kafs complains about invalid data in the RPC reply because the rotation algorithm fell all the way through without actually issuing an RPC call and the error return got filled in from the probe state (which has a zero error recorded). Whatever happens to be in the caller's reply buffer is then taken as the response. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/fs_operation.c | 19 ++-- fs/afs/fs_probe.c | 235 +++++++++++++++++++++++++++------------------ fs/afs/fsclient.c | 8 +- fs/afs/internal.h | 66 +++++++++---- fs/afs/proc.c | 21 ++-- fs/afs/rotate.c | 80 ++++++++------- fs/afs/server.c | 69 ++++++++----- fs/afs/vl_alias.c | 4 +- fs/afs/vl_probe.c | 2 +- fs/afs/vlclient.c | 4 +- include/trace/events/afs.h | 69 ++++++++++--- 11 files changed, 366 insertions(+), 211 deletions(-) (limited to 'include') diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index e760e11d5bcb..8c6d827f999d 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -228,6 +228,7 @@ void afs_wait_for_operation(struct afs_operation *op) */ int afs_put_operation(struct afs_operation *op) { + struct afs_endpoint_state *estate = op->estate; struct afs_addr_list *alist; int i, ret = afs_op_error(op); @@ -251,14 +252,16 @@ int afs_put_operation(struct afs_operation *op) kfree(op->more_files); } - alist = op->alist; - if (alist) { - if (op->call_responded && - op->addr_index != alist->preferred && - test_bit(alist->preferred, &op->addr_tried)) - WRITE_ONCE(alist->preferred, op->addr_index); - afs_put_addrlist(alist, afs_alist_trace_put_operation); - op->alist = NULL; + if (estate) { + alist = estate->addresses; + if (alist) { + if (op->call_responded && + op->addr_index != alist->preferred && + test_bit(alist->preferred, &op->addr_tried)) + WRITE_ONCE(alist->preferred, op->addr_index); + } + afs_put_endpoint_state(estate, afs_estate_trace_put_operation); + op->estate = NULL; } afs_put_serverlist(op->net, op->server_list); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c5702698b18b..a669aee033c5 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -15,6 +15,42 @@ static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; +struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate, + enum afs_estate_trace where) +{ + if (estate) { + int r; + + __refcount_inc(&estate->ref, &r); + trace_afs_estate(estate->server_id, estate->probe_seq, r, where); + } + return estate; +} + +static void afs_endpoint_state_rcu(struct rcu_head *rcu) +{ + struct afs_endpoint_state *estate = container_of(rcu, struct afs_endpoint_state, rcu); + + trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref), + afs_estate_trace_free); + afs_put_addrlist(estate->addresses, afs_alist_trace_put_estate); + kfree(estate); +} + +void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where) +{ + if (estate) { + unsigned int server_id = estate->server_id, probe_seq = estate->probe_seq; + bool dead; + int r; + + dead = __refcount_dec_and_test(&estate->ref, &r); + trace_afs_estate(server_id, probe_seq, r, where); + if (dead) + call_rcu(&estate->rcu, afs_endpoint_state_rcu); + } +} + /* * Start the probe polling timer. We have to supply it with an inc on the * outstanding server count. @@ -38,9 +74,10 @@ static void afs_schedule_fs_probe(struct afs_net *net, /* * Handle the completion of a set of probes. */ -static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) +static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server, + struct afs_endpoint_state *estate) { - bool responded = server->probe.responded; + bool responded = estate->responded; write_seqlock(&net->fs_lock); if (responded) { @@ -50,6 +87,7 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); list_add_tail(&server->probe_link, &net->fs_probe_fast); } + write_sequnlock(&net->fs_lock); afs_schedule_fs_probe(net, server, !responded); @@ -58,12 +96,13 @@ static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server /* * Handle the completion of a probe. */ -static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) +static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server, + struct afs_endpoint_state *estate) { _enter(""); - if (atomic_dec_and_test(&server->probe_outstanding)) - afs_finished_fs_probe(net, server); + if (atomic_dec_and_test(&estate->nr_probing)) + afs_finished_fs_probe(net, server, estate); wake_up_all(&server->probe_wq); } @@ -74,7 +113,7 @@ static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server */ static void afs_fs_probe_not_done(struct afs_net *net, struct afs_server *server, - struct afs_addr_list *alist, + struct afs_endpoint_state *estate, int index) { _enter(""); @@ -82,14 +121,14 @@ static void afs_fs_probe_not_done(struct afs_net *net, trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); spin_lock(&server->probe_lock); - server->probe.local_failure = true; - if (server->probe.error == 0) - server->probe.error = -ENOMEM; + estate->local_failure = true; + if (estate->error == 0) + estate->error = -ENOMEM; - set_bit(index, &alist->probe_failed); + set_bit(index, &estate->failed_set); spin_unlock(&server->probe_lock); - return afs_done_one_fs_probe(net, server); + return afs_done_one_fs_probe(net, server, estate); } /* @@ -98,7 +137,8 @@ static void afs_fs_probe_not_done(struct afs_net *net, */ void afs_fileserver_probe_result(struct afs_call *call) { - struct afs_addr_list *alist = call->probe_alist; + struct afs_endpoint_state *estate = call->probe; + struct afs_addr_list *alist = estate->addresses; struct afs_address *addr = &alist->addrs[call->probe_index]; struct afs_server *server = call->server; unsigned int index = call->probe_index; @@ -113,18 +153,18 @@ void afs_fileserver_probe_result(struct afs_call *call) switch (ret) { case 0: - server->probe.error = 0; + estate->error = 0; goto responded; case -ECONNABORTED: - if (!server->probe.responded) { - server->probe.abort_code = call->abort_code; - server->probe.error = ret; + if (!estate->responded) { + estate->abort_code = call->abort_code; + estate->error = ret; } goto responded; case -ENOMEM: case -ENONET: - clear_bit(index, &alist->responded); - server->probe.local_failure = true; + clear_bit(index, &estate->responsive_set); + estate->local_failure = true; trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); goto out; case -ECONNRESET: /* Responded, but call expired. */ @@ -137,28 +177,28 @@ void afs_fileserver_probe_result(struct afs_call *call) case -ETIMEDOUT: case -ETIME: default: - clear_bit(index, &alist->responded); - set_bit(index, &alist->probe_failed); - if (!server->probe.responded && - (server->probe.error == 0 || - server->probe.error == -ETIMEDOUT || - server->probe.error == -ETIME)) - server->probe.error = ret; + clear_bit(index, &estate->responsive_set); + set_bit(index, &estate->failed_set); + if (!estate->responded && + (estate->error == 0 || + estate->error == -ETIMEDOUT || + estate->error == -ETIME)) + estate->error = ret; trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); goto out; } responded: - clear_bit(index, &alist->probe_failed); + clear_bit(index, &estate->failed_set); if (call->service_id == YFS_FS_SERVICE) { - server->probe.is_yfs = true; + estate->is_yfs = true; set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); server->service_id = call->service_id; } else { - server->probe.not_yfs = true; - if (!server->probe.is_yfs) { - clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + estate->not_yfs = true; + if (!estate->is_yfs) { + estate->is_yfs = false; server->service_id = call->service_id; } cap0 = ntohl(call->tmp); @@ -169,84 +209,90 @@ responded: } rtt_us = rxrpc_kernel_get_srtt(addr->peer); - if (rtt_us < server->probe.rtt) { - server->probe.rtt = rtt_us; + if (rtt_us < estate->rtt) { + estate->rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; } smp_wmb(); /* Set rtt before responded. */ - server->probe.responded = true; - set_bit(index, &alist->responded); + estate->responded = true; + set_bit(index, &estate->responsive_set); set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); out: spin_unlock(&server->probe_lock); - trace_afs_fs_probe(server, false, alist, index, call->error, call->abort_code, rtt_us); - _debug("probe %pU [%u] %pISpc rtt=%d ret=%d", - &server->uuid, index, rxrpc_kernel_remote_addr(alist->addrs[index].peer), + trace_afs_fs_probe(server, false, estate, index, call->error, call->abort_code, rtt_us); + _debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d", + estate->probe_seq, &server->uuid, index, + rxrpc_kernel_remote_addr(alist->addrs[index].peer), rtt_us, ret); - return afs_done_one_fs_probe(call->net, server); + return afs_done_one_fs_probe(call->net, server, estate); } /* - * Probe one or all of a fileserver's addresses to find out the best route and - * to query its capabilities. + * Probe all of a fileserver's addresses to find out the best route and to + * query its capabilities. */ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, - struct key *key, bool all) + struct afs_addr_list *new_alist, struct key *key) { + struct afs_endpoint_state *estate, *old; struct afs_addr_list *alist; - unsigned int index; + unsigned long unprobed; _enter("%pU", &server->uuid); - read_lock(&server->fs_lock); - alist = rcu_dereference_protected(server->addresses, - lockdep_is_held(&server->fs_lock)); - afs_get_addrlist(alist, afs_alist_trace_get_probe); - read_unlock(&server->fs_lock); + estate = kzalloc(sizeof(*estate), GFP_KERNEL); + if (!estate) + return; + + refcount_set(&estate->ref, 1); + estate->server_id = server->debug_id; + estate->rtt = UINT_MAX; + + write_lock(&server->fs_lock); + + old = rcu_dereference_protected(server->endpoint_state, + lockdep_is_held(&server->fs_lock)); + estate->responsive_set = old->responsive_set; + estate->addresses = afs_get_addrlist(new_alist ?: old->addresses, + afs_alist_trace_get_estate); + alist = estate->addresses; + estate->probe_seq = ++server->probe_counter; + atomic_set(&estate->nr_probing, alist->nr_addrs); + + rcu_assign_pointer(server->endpoint_state, estate); + old->superseded = true; + write_unlock(&server->fs_lock); + + trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref), + afs_estate_trace_alloc_probe); afs_get_address_preferences(net, alist); server->probed_at = jiffies; - atomic_set(&server->probe_outstanding, all ? alist->nr_addrs : 1); - memset(&server->probe, 0, sizeof(server->probe)); - server->probe.rtt = UINT_MAX; - - index = alist->preferred; - if (index < 0 || index >= alist->nr_addrs) - all = true; - - if (all) { - unsigned long unprobed = (1UL << alist->nr_addrs) - 1; - unsigned int i; - int best_prio; - - while (unprobed) { - best_prio = -1; - index = 0; - for (i = 0; i < alist->nr_addrs; i++) { - if (test_bit(i, &unprobed) && - alist->addrs[i].prio > best_prio) { - index = i; - best_prio = alist->addrs[i].prio; - } + unprobed = (1UL << alist->nr_addrs) - 1; + while (unprobed) { + unsigned int index = 0, i; + int best_prio = -1; + + for (i = 0; i < alist->nr_addrs; i++) { + if (test_bit(i, &unprobed) && + alist->addrs[i].prio > best_prio) { + index = i; + best_prio = alist->addrs[i].prio; } - __clear_bit(index, &unprobed); - - trace_afs_fs_probe(server, true, alist, index, 0, 0, 0); - if (!afs_fs_get_capabilities(net, server, alist, index, key)) - afs_fs_probe_not_done(net, server, alist, index); } - } else { - trace_afs_fs_probe(server, true, alist, index, 0, 0, 0); - if (!afs_fs_get_capabilities(net, server, alist, index, key)) - afs_fs_probe_not_done(net, server, alist, index); + __clear_bit(index, &unprobed); + + trace_afs_fs_probe(server, true, estate, index, 0, 0, 0); + if (!afs_fs_get_capabilities(net, server, estate, index, key)) + afs_fs_probe_not_done(net, server, estate, index); } - afs_put_addrlist(alist, afs_alist_trace_put_probe); + afs_put_endpoint_state(old, afs_estate_trace_put_probe); } /* @@ -254,6 +300,7 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, */ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) { + struct afs_endpoint_state *estate; struct wait_queue_entry *waits; struct afs_server *server; unsigned int rtt = UINT_MAX, rtt_s; @@ -263,15 +310,18 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) _enter("%u,%lx", slist->nr_servers, untried); /* Only wait for servers that have a probe outstanding. */ + rcu_read_lock(); for (i = 0; i < slist->nr_servers; i++) { if (test_bit(i, &untried)) { server = slist->servers[i].server; - if (!atomic_read(&server->probe_outstanding)) + estate = rcu_dereference(server->endpoint_state); + if (!atomic_read(&estate->nr_probing)) __clear_bit(i, &untried); - if (server->probe.responded) + if (estate->responded) have_responders = true; } } + rcu_read_unlock(); if (have_responders || !untried) return 0; @@ -294,9 +344,9 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) for (i = 0; i < slist->nr_servers; i++) { if (test_bit(i, &untried)) { server = slist->servers[i].server; - if (server->probe.responded) + if (estate->responded) goto stop; - if (atomic_read(&server->probe_outstanding)) + if (atomic_read(&estate->nr_probing)) still_probing = true; } } @@ -348,7 +398,7 @@ void afs_fs_probe_timer(struct timer_list *timer) /* * Dispatch a probe to a server. */ -static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) +static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server) __releases(&net->fs_lock) { struct key *key = NULL; @@ -361,7 +411,7 @@ static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server afs_get_server(server, afs_server_trace_get_probe); write_sequnlock(&net->fs_lock); - afs_fs_probe_fileserver(net, server, key, all); + afs_fs_probe_fileserver(net, server, NULL, key); afs_put_server(net, server, afs_server_trace_put_probe); } @@ -373,7 +423,7 @@ void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) { write_seqlock(&net->fs_lock); if (!list_empty(&server->probe_link)) - return afs_dispatch_fs_probe(net, server, true); + return afs_dispatch_fs_probe(net, server); write_sequnlock(&net->fs_lock); } @@ -433,7 +483,7 @@ again: _debug("probe %pU", &server->uuid); if (server && (first_pass || !need_resched())) { - afs_dispatch_fs_probe(net, server, server == fast); + afs_dispatch_fs_probe(net, server); first_pass = false; goto again; } @@ -457,12 +507,13 @@ again: /* * Wait for a probe on a particular fileserver to complete for 2s. */ -int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) +int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate, + bool is_intr) { struct wait_queue_entry wait; unsigned long timo = 2 * HZ; - if (atomic_read(&server->probe_outstanding) == 0) + if (atomic_read(&estate->nr_probing) == 0) goto dont_wait; init_wait_entry(&wait, 0); @@ -470,8 +521,8 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) prepare_to_wait_event(&server->probe_wq, &wait, is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (timo == 0 || - server->probe.responded || - atomic_read(&server->probe_outstanding) == 0 || + estate->responded || + atomic_read(&estate->nr_probing) == 0 || (is_intr && signal_pending(current))) break; timo = schedule_timeout(timo); @@ -480,7 +531,7 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) finish_wait(&server->probe_wq, &wait); dont_wait: - if (server->probe.responded) + if (estate->responded) return 0; if (is_intr && signal_pending(current)) return -ERESTARTSYS; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4f98b43b0dde..f1f879ba9cf7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1697,7 +1697,7 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) static void afs_fs_get_capabilities_destructor(struct afs_call *call) { - afs_put_addrlist(call->probe_alist, afs_alist_trace_put_getcaps); + afs_put_endpoint_state(call->probe, afs_estate_trace_put_getcaps); afs_flat_call_destructor(call); } @@ -1719,7 +1719,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { * ->done() - otherwise we return false to indicate we didn't even try. */ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, - struct afs_addr_list *alist, unsigned int addr_index, + struct afs_endpoint_state *estate, unsigned int addr_index, struct key *key) { struct afs_call *call; @@ -1733,8 +1733,8 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, call->key = key; call->server = afs_use_server(server, afs_server_trace_get_caps); - call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer); - call->probe_alist = afs_get_addrlist(alist, afs_alist_trace_get_getcaps); + call->peer = rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer); + call->probe = afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps); call->probe_index = addr_index; call->service_id = server->service_id; call->upgrade = true; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 88db04220773..4d42f84a8da4 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -33,6 +33,7 @@ struct pagevec; struct afs_call; struct afs_vnode; +struct afs_server_probe; /* * Partial file-locking emulation mode. (The problem being that AFS3 only @@ -146,14 +147,13 @@ struct afs_call { }; void *buffer; /* reply receive buffer */ union { - struct { - struct afs_addr_list *probe_alist; - unsigned char probe_index; /* Address in ->probe_alist */ - }; + struct afs_endpoint_state *probe; + struct afs_addr_list *vl_probe; struct afs_addr_list *ret_alist; struct afs_vldb_entry *ret_vldb; char *ret_str; }; + unsigned char probe_index; /* Address in ->probe_alist */ struct afs_operation *op; unsigned int server_index; refcount_t ref; @@ -520,6 +520,32 @@ struct afs_vldb_entry { u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; +/* + * Fileserver endpoint state. The records the addresses of a fileserver's + * endpoints and the state and result of a round of probing on them. This + * allows the rotation algorithm to access those results without them being + * erased by a subsequent round of probing. + */ +struct afs_endpoint_state { + struct rcu_head rcu; + struct afs_addr_list *addresses; /* The addresses being probed */ + unsigned long responsive_set; /* Bitset of responsive endpoints */ + unsigned long failed_set; /* Bitset of endpoints we failed to probe */ + refcount_t ref; + unsigned int server_id; /* Debug ID of server */ + unsigned int probe_seq; /* Probe sequence (from server::probe_counter) */ + + atomic_t nr_probing; /* Number of outstanding probes */ + unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */ + s32 abort_code; + short error; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + bool superseded:1; /* Set if has been superseded */ +}; + /* * Record of fileserver with which we're actively communicating. */ @@ -530,7 +556,6 @@ struct afs_server { struct afs_uuid _uuid; }; - struct afs_addr_list __rcu *addresses; struct afs_cell *cell; /* Cell to which belongs (pins ref) */ struct rb_node uuid_rb; /* Link in net->fs_servers */ struct afs_server __rcu *uuid_next; /* Next server with same UUID */ @@ -568,19 +593,11 @@ struct afs_server { unsigned cb_s_break; /* Break-everything counter. */ /* Probe state */ + struct afs_endpoint_state __rcu *endpoint_state; /* Latest endpoint/probe state */ unsigned long probed_at; /* Time last probe was dispatched (jiffies) */ wait_queue_head_t probe_wq; - atomic_t probe_outstanding; + unsigned int probe_counter; /* Number of probes issued */ spinlock_t probe_lock; - struct { - unsigned int rtt; /* Best RTT in uS (or UINT_MAX) */ - u32 abort_code; - short error; - bool responded:1; - bool is_yfs:1; - bool not_yfs:1; - bool local_failure:1; - } probe; }; /* @@ -883,7 +900,7 @@ struct afs_operation { /* Fileserver iteration state */ struct afs_server_list *server_list; /* Current server list (pins ref) */ struct afs_server *server; /* Server we're using (ref pinned by server_list) */ - struct afs_addr_list *alist; /* Current address list (pins ref) */ + struct afs_endpoint_state *estate; /* Current endpoint state (pins ref) */ struct afs_call *call; unsigned long untried_servers; /* Bitmask of untried servers */ unsigned long addr_tried; /* Tried addresses */ @@ -1153,7 +1170,7 @@ extern void afs_fs_release_lock(struct afs_operation *); int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server, struct afs_address *addr, struct key *key); bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, - struct afs_addr_list *alist, unsigned int addr_index, + struct afs_endpoint_state *estate, unsigned int addr_index, struct key *key); extern void afs_fs_inline_bulk_status(struct afs_operation *); @@ -1190,12 +1207,17 @@ static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n, /* * fs_probe.c */ +struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate, + enum afs_estate_trace where); +void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where); extern void afs_fileserver_probe_result(struct afs_call *); -extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool); +void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, + struct afs_addr_list *new_addrs, struct key *key); extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); extern void afs_fs_probe_dispatcher(struct work_struct *); -extern int afs_wait_for_one_fs_probe(struct afs_server *, bool); +int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate, + bool is_intr); extern void afs_fs_probe_cleanup(struct afs_net *); /* @@ -1348,12 +1370,14 @@ extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause); static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call, gfp_t gfp) { + struct afs_addr_list *alist = op->estate->addresses; + op->call = call; op->type = call->type; call->op = op; call->key = op->key; call->intr = !(op->flags & AFS_OPERATION_UNINTR); - call->peer = rxrpc_kernel_get_peer(op->alist->addrs[op->addr_index].peer); + call->peer = rxrpc_kernel_get_peer(alist->addrs[op->addr_index].peer); call->service_id = op->server->service_id; afs_make_call(call, gfp); } @@ -1476,7 +1500,7 @@ extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); extern void afs_fs_probe_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); -extern bool afs_check_server_record(struct afs_operation *, struct afs_server *); +bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key); static inline void afs_inc_servers_outstanding(struct afs_net *net) { diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 944eb51e75a1..a138022d8e0d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -424,8 +424,9 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = { */ static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct afs_server *server; + struct afs_endpoint_state *estate; struct afs_addr_list *alist; + struct afs_server *server; unsigned long failed; int i; @@ -435,7 +436,8 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) } server = list_entry(v, struct afs_server, proc_link); - alist = rcu_dereference(server->addresses); + estate = rcu_dereference(server->endpoint_state); + alist = estate->addresses; seq_printf(m, "%pU %3d %3d %s\n", &server->uuid, refcount_read(&server->ref), @@ -443,13 +445,14 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) server->cell->name); seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n", server->flags, server->rtt, server->cb_s_break); - seq_printf(m, " - probe: last=%d out=%d\n", - (int)(jiffies - server->probed_at) / HZ, - atomic_read(&server->probe_outstanding)); - failed = alist->probe_failed; - seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx ap=%u\n", - alist->version, alist->responded, alist->probe_failed, - alist->addr_pref_version); + seq_printf(m, " - probe: last=%d\n", + (int)(jiffies - server->probed_at) / HZ); + failed = estate->failed_set; + seq_printf(m, " - ESTATE pq=%x np=%u rsp=%lx f=%lx\n", + estate->probe_seq, atomic_read(&estate->nr_probing), + estate->responsive_set, estate->failed_set); + seq_printf(m, " - ALIST v=%u ap=%u\n", + alist->version, alist->addr_pref_version); for (i = 0; i < alist->nr_addrs; i++) { const struct afs_address *addr = &alist->addrs[i]; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 5423ac80f4e0..e8635f60b97d 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -109,10 +109,11 @@ static bool afs_sleep_and_retry(struct afs_operation *op) */ bool afs_select_fileserver(struct afs_operation *op) { - struct afs_addr_list *alist = op->alist; + struct afs_endpoint_state *estate = op->estate; + struct afs_addr_list *alist; struct afs_server *server; struct afs_vnode *vnode = op->file[0].vnode; - unsigned long set; + unsigned long set, failed; unsigned int rtt; s32 abort_code = op->call_abort_code; int error = op->call_error, addr_index, i; @@ -133,7 +134,7 @@ bool afs_select_fileserver(struct afs_operation *op) if (op->nr_iterations == 0) goto start; - WRITE_ONCE(alist->addrs[op->addr_index].last_error, error); + WRITE_ONCE(estate->addresses->addrs[op->addr_index].last_error, error); /* Evaluate the result of the previous operation, if there was one. */ switch (op->call_error) { @@ -401,14 +402,14 @@ bool afs_select_fileserver(struct afs_operation *op) restart_from_beginning: _debug("restart"); - afs_put_addrlist(alist, afs_alist_trace_put_restart_rotate); - alist = op->alist = NULL; + afs_put_endpoint_state(estate, afs_estate_trace_put_restart_rotate); + estate = op->estate = NULL; op->server = NULL; afs_put_serverlist(op->net, op->server_list); op->server_list = NULL; start: _debug("start"); - ASSERTCMP(alist, ==, NULL); + ASSERTCMP(estate, ==, NULL); /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ @@ -425,7 +426,7 @@ start: pick_server: _debug("pick [%lx]", op->untried_servers); - ASSERTCMP(alist, ==, NULL); + ASSERTCMP(estate, ==, NULL); error = afs_wait_for_fs_probes(op->server_list, op->untried_servers); if (error < 0) { @@ -452,9 +453,9 @@ pick_server: if (!test_bit(i, &op->untried_servers) || !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) continue; - if (s->probe.rtt <= rtt) { + if (s->rtt <= rtt) { op->server_index = i; - rtt = s->probe.rtt; + rtt = s->rtt; } } @@ -469,10 +470,10 @@ selected_server: * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. */ - ASSERTCMP(alist, ==, NULL); + ASSERTCMP(estate, ==, NULL); server = op->server_list->servers[op->server_index].server; - if (!afs_check_server_record(op, server)) + if (!afs_check_server_record(op, server, op->key)) goto failed; _debug("USING SERVER: %pU", &server->uuid); @@ -488,9 +489,9 @@ selected_server: } read_lock(&server->fs_lock); - alist = rcu_dereference_protected(server->addresses, - lockdep_is_held(&server->fs_lock)); - op->alist = afs_get_addrlist(alist, afs_alist_trace_get_fsrotate_set); + estate = rcu_dereference_protected(server->endpoint_state, + lockdep_is_held(&server->fs_lock)); + op->estate = afs_get_endpoint_state(estate, afs_estate_trace_get_fsrotate_set); read_unlock(&server->fs_lock); retry_server: @@ -501,18 +502,20 @@ iterate_address: /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - set = READ_ONCE(alist->responded); - set &= ~(READ_ONCE(alist->probe_failed) | op->addr_tried); + set = READ_ONCE(estate->responsive_set); + failed = READ_ONCE(estate->failed_set); + _debug("iterate ES=%x rs=%lx fs=%lx", estate->probe_seq, set, failed); + set &= ~(failed | op->addr_tried); if (!set) goto out_of_addresses; + alist = estate->addresses; addr_index = READ_ONCE(alist->preferred); if (!test_bit(addr_index, &set)) addr_index = __ffs(set); op->addr_index = addr_index; set_bit(addr_index, &op->addr_tried); - op->alist = alist; op->call_responded = false; _debug("address [%u] %u/%u %pISp", @@ -527,8 +530,8 @@ out_of_addresses: */ afs_probe_fileserver(op->net, op->server); if (op->flags & AFS_OPERATION_RETRY_SERVER) { - error = afs_wait_for_one_fs_probe( - op->server, !(op->flags & AFS_OPERATION_UNINTR)); + error = afs_wait_for_one_fs_probe(op->server, estate, + !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { case 0: op->flags &= ~AFS_OPERATION_RETRY_SERVER; @@ -544,13 +547,14 @@ out_of_addresses: next_server: _debug("next"); - ASSERT(alist); + ASSERT(estate); + alist = estate->addresses; if (op->call_responded && op->addr_index != READ_ONCE(alist->preferred) && test_bit(alist->preferred, &op->addr_tried)) WRITE_ONCE(alist->preferred, op->addr_index); - afs_put_addrlist(alist, afs_alist_trace_put_next_server); - alist = op->alist = NULL; + afs_put_endpoint_state(estate, afs_estate_trace_put_next_server); + estate = op->estate = NULL; goto pick_server; no_more_servers: @@ -560,23 +564,28 @@ no_more_servers: if (op->flags & AFS_OPERATION_VBUSY) goto restart_from_beginning; + rcu_read_lock(); for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_endpoint_state *estate; struct afs_server *s = op->server_list->servers[i].server; - error = READ_ONCE(s->probe.error); + estate = rcu_dereference(s->endpoint_state); + error = READ_ONCE(estate->error); if (error < 0) - afs_op_accumulate_error(op, error, s->probe.abort_code); + afs_op_accumulate_error(op, error, estate->abort_code); } + rcu_read_unlock(); failed: op->flags |= AFS_OPERATION_STOP; - if (alist) { + if (estate) { + alist = estate->addresses; if (op->call_responded && op->addr_index != READ_ONCE(alist->preferred) && test_bit(alist->preferred, &op->addr_tried)) WRITE_ONCE(alist->preferred, op->addr_index); - afs_put_addrlist(alist, afs_alist_trace_put_op_failed); - op->alist = NULL; + afs_put_endpoint_state(estate, afs_estate_trace_put_op_failed); + op->estate = NULL; } _leave(" = f [failed %d]", afs_op_error(op)); return false; @@ -607,27 +616,30 @@ void afs_dump_edestaddrreq(const struct afs_operation *op) if (op->server_list) { const struct afs_server_list *sl = op->server_list; + pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", sl->nr_servers, sl->preferred, sl->vnovol_mask); for (i = 0; i < sl->nr_servers; i++) { const struct afs_server *s = sl->servers[i].server; + const struct afs_endpoint_state *e = + rcu_dereference(s->endpoint_state); + const struct afs_addr_list *a = e->addresses; + pr_notice("FC: server fl=%lx av=%u %pU\n", s->flags, s->addr_version, &s->uuid); - if (s->addresses) { - const struct afs_addr_list *a = - rcu_dereference(s->addresses); + pr_notice("FC: - pq=%x R=%lx F=%lx\n", + e->probe_seq, e->responsive_set, e->failed_set); + if (a) { pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", a->version, a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); - pr_notice("FC: - R=%lx F=%lx\n", - a->responded, a->probe_failed); - if (a == op->alist) + if (a == e->addresses) pr_notice("FC: - current\n"); } } } - pr_notice("AC: t=%lx ax=%u\n", op->addr_tried, op->addr_index); + pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index); rcu_read_unlock(); } diff --git a/fs/afs/server.c b/fs/afs/server.c index 62d453365689..281625c71aff 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -23,6 +23,7 @@ static void __afs_put_server(struct afs_net *, struct afs_server *); */ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer *peer) { + const struct afs_endpoint_state *estate; const struct afs_addr_list *alist; struct afs_server *server = NULL; unsigned int i; @@ -38,7 +39,8 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer read_seqbegin_or_lock(&net->fs_addr_lock, &seq); hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { - alist = rcu_dereference(server->addresses); + estate = rcu_dereference(server->endpoint_state); + alist = estate->addresses; for (i = 0; i < alist->nr_addrs; i++) if (alist->addrs[i].peer == peer) goto found; @@ -111,6 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu static struct afs_server *afs_install_server(struct afs_cell *cell, struct afs_server *candidate) { + const struct afs_endpoint_state *estate; const struct afs_addr_list *alist; struct afs_server *server, *next; struct afs_net *net = cell->net; @@ -162,8 +165,9 @@ static struct afs_server *afs_install_server(struct afs_cell *cell, added_dup: write_seqlock(&net->fs_addr_lock); - alist = rcu_dereference_protected(server->addresses, - lockdep_is_held(&net->fs_addr_lock.lock)); + estate = rcu_dereference_protected(server->endpoint_state, + lockdep_is_held(&net->fs_addr_lock.lock)); + alist = estate->addresses; /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install * it in the IPv4 and/or IPv6 reverse-map lists. @@ -193,6 +197,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid, struct afs_addr_list *alist) { + struct afs_endpoint_state *estate; struct afs_server *server; struct afs_net *net = cell->net; @@ -202,10 +207,13 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, if (!server) goto enomem; + estate = kzalloc(sizeof(struct afs_endpoint_state), GFP_KERNEL); + if (!estate) + goto enomem_server; + refcount_set(&server->ref, 1); atomic_set(&server->active, 1); server->debug_id = atomic_inc_return(&afs_server_debug_id); - RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; rwlock_init(&server->fs_lock); @@ -217,11 +225,23 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server->rtt = UINT_MAX; server->service_id = FS_SERVICE; + server->probe_counter = 1; + server->probed_at = jiffies - LONG_MAX / 2; + refcount_set(&estate->ref, 1); + estate->addresses = alist; + estate->server_id = server->debug_id; + estate->probe_seq = 1; + rcu_assign_pointer(server->endpoint_state, estate); + afs_inc_servers_outstanding(net); trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc); + trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref), + afs_estate_trace_alloc_server); _leave(" = %p", server); return server; +enomem_server: + kfree(server); enomem: _leave(" = NULL [nomem]"); return NULL; @@ -289,7 +309,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, * on the fileserver. This will make sure the repeat-probing * service is started. */ - afs_fs_probe_fileserver(cell->net, server, key, true); + afs_fs_probe_fileserver(cell->net, server, alist, key); } return server; @@ -422,8 +442,8 @@ static void afs_server_rcu(struct rcu_head *rcu) trace_afs_server(server->debug_id, refcount_read(&server->ref), atomic_read(&server->active), afs_server_trace_free); - afs_put_addrlist(rcu_access_pointer(server->addresses), - afs_alist_trace_put_server); + afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state), + afs_estate_trace_put_server); kfree(server); } @@ -435,7 +455,8 @@ static void __afs_put_server(struct afs_net *net, struct afs_server *server) static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server) { - struct afs_addr_list *alist = rcu_access_pointer(server->addresses); + struct afs_endpoint_state *estate = rcu_access_pointer(server->endpoint_state); + struct afs_addr_list *alist = estate->addresses; afs_fs_give_up_all_callbacks(net, server, &alist->addrs[alist->preferred], NULL); } @@ -607,9 +628,12 @@ void afs_purge_servers(struct afs_net *net) * Get an update for a server's address list. */ static noinline bool afs_update_server_record(struct afs_operation *op, - struct afs_server *server) + struct afs_server *server, + struct key *key) { - struct afs_addr_list *alist, *discard; + struct afs_endpoint_state *estate; + struct afs_addr_list *alist; + bool has_addrs; _enter(""); @@ -619,10 +643,15 @@ static noinline bool afs_update_server_record(struct afs_operation *op, alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid); if (IS_ERR(alist)) { + rcu_read_lock(); + estate = rcu_dereference(server->endpoint_state); + has_addrs = estate->addresses; + rcu_read_unlock(); + if ((PTR_ERR(alist) == -ERESTARTSYS || PTR_ERR(alist) == -EINTR) && (op->flags & AFS_OPERATION_UNINTR) && - server->addresses) { + has_addrs) { _leave(" = t [intr]"); return true; } @@ -631,17 +660,10 @@ static noinline bool afs_update_server_record(struct afs_operation *op, return false; } - discard = alist; - if (server->addr_version != alist->version) { - write_lock(&server->fs_lock); - discard = rcu_dereference_protected(server->addresses, - lockdep_is_held(&server->fs_lock)); - rcu_assign_pointer(server->addresses, alist); - server->addr_version = alist->version; - write_unlock(&server->fs_lock); - } + if (server->addr_version != alist->version) + afs_fs_probe_fileserver(op->net, server, alist, key); - afs_put_addrlist(discard, afs_alist_trace_put_server_update); + afs_put_addrlist(alist, afs_alist_trace_put_server_update); _leave(" = t"); return true; } @@ -649,7 +671,8 @@ static noinline bool afs_update_server_record(struct afs_operation *op, /* * See if a server's address list needs updating. */ -bool afs_check_server_record(struct afs_operation *op, struct afs_server *server) +bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, + struct key *key) { bool success; int ret, retries = 0; @@ -669,7 +692,7 @@ retry: update: if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); - success = afs_update_server_record(op, server); + success = afs_update_server_record(op, server, key); clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); _leave(" = %d", success); diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 89cadd9a69e1..43788d0c18e8 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -41,8 +41,8 @@ static int afs_compare_fs_alists(const struct afs_server *server_a, const struct afs_addr_list *la, *lb; int a = 0, b = 0, addr_matches = 0; - la = rcu_dereference(server_a->addresses); - lb = rcu_dereference(server_b->addresses); + la = rcu_dereference(server_a->endpoint_state)->addresses; + lb = rcu_dereference(server_b->endpoint_state)->addresses; while (a < la->nr_addrs && b < lb->nr_addrs) { unsigned long pa = (unsigned long)la->addrs[a].peer; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index b128dc3d8af7..3d2e0c925460 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -46,7 +46,7 @@ static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up) */ void afs_vlserver_probe_result(struct afs_call *call) { - struct afs_addr_list *alist = call->probe_alist; + struct afs_addr_list *alist = call->vl_probe; struct afs_vlserver *server = call->vlserver; struct afs_address *addr = &alist->addrs[call->probe_index]; unsigned int server_index = call->server_index; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 39a0b7614d05..cef02a265edc 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -371,7 +371,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) static void afs_destroy_vl_get_capabilities(struct afs_call *call) { - afs_put_addrlist(call->probe_alist, afs_alist_trace_put_vlgetcaps); + afs_put_addrlist(call->vl_probe, afs_alist_trace_put_vlgetcaps); afs_put_vlserver(call->net, call->vlserver); afs_flat_call_destructor(call); } @@ -414,7 +414,7 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net, call->vlserver = afs_get_vlserver(server); call->server_index = server_index; call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer); - call->probe_alist = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps); + call->vl_probe = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps); call->probe_index = addr_index; call->service_id = server->service_id; call->upgrade = true; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f1815b3dafb0..cf2fa4fddd5b 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -204,22 +204,14 @@ enum yfs_cm_operation { #define afs_alist_traces \ EM(afs_alist_trace_alloc, "ALLOC ") \ - EM(afs_alist_trace_get_getcaps, "GET getcap") \ - EM(afs_alist_trace_get_fsrotate_set, "GET fs-rot") \ - EM(afs_alist_trace_get_probe, "GET probe ") \ + EM(afs_alist_trace_get_estate, "GET estate") \ EM(afs_alist_trace_get_vlgetcaps, "GET vgtcap") \ EM(afs_alist_trace_get_vlprobe, "GET vprobe") \ EM(afs_alist_trace_get_vlrotate_set, "GET vl-rot") \ + EM(afs_alist_trace_put_estate, "PUT estate") \ EM(afs_alist_trace_put_getaddru, "PUT GtAdrU") \ - EM(afs_alist_trace_put_getcaps, "PUT getcap") \ - EM(afs_alist_trace_put_next_server, "PUT nx-srv") \ - EM(afs_alist_trace_put_op_failed, "PUT op-fai") \ - EM(afs_alist_trace_put_operation, "PUT op ") \ EM(afs_alist_trace_put_parse_empty, "PUT p-empt") \ EM(afs_alist_trace_put_parse_error, "PUT p-err ") \ - EM(afs_alist_trace_put_probe, "PUT probe ") \ - EM(afs_alist_trace_put_restart_rotate, "PUT rstrot") \ - EM(afs_alist_trace_put_server, "PUT server") \ EM(afs_alist_trace_put_server_dup, "PUT sv-dup") \ EM(afs_alist_trace_put_server_oom, "PUT sv-oom") \ EM(afs_alist_trace_put_server_update, "PUT sv-upd") \ @@ -233,6 +225,20 @@ enum yfs_cm_operation { EM(afs_alist_trace_put_vlserver_old, "PUT vs-old") \ E_(afs_alist_trace_free, "FREE ") +#define afs_estate_traces \ + EM(afs_estate_trace_alloc_probe, "ALLOC prob") \ + EM(afs_estate_trace_alloc_server, "ALLOC srvr") \ + EM(afs_estate_trace_get_fsrotate_set, "GET fs-rot") \ + EM(afs_estate_trace_get_getcaps, "GET getcap") \ + EM(afs_estate_trace_put_getcaps, "PUT getcap") \ + EM(afs_estate_trace_put_next_server, "PUT nx-srv") \ + EM(afs_estate_trace_put_op_failed, "PUT op-fai") \ + EM(afs_estate_trace_put_operation, "PUT op ") \ + EM(afs_estate_trace_put_probe, "PUT probe ") \ + EM(afs_estate_trace_put_restart_rotate, "PUT rstrot") \ + EM(afs_estate_trace_put_server, "PUT server") \ + E_(afs_estate_trace_free, "FREE ") + #define afs_fs_operations \ EM(afs_FS_FetchData, "FS.FetchData") \ EM(afs_FS_FetchStatus, "FS.FetchStatus") \ @@ -458,6 +464,7 @@ enum afs_cell_trace { afs_cell_traces } __mode(byte); enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte); enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte); enum afs_eproto_cause { afs_eproto_causes } __mode(byte); +enum afs_estate_trace { afs_estate_traces } __mode(byte); enum afs_file_error { afs_file_errors } __mode(byte); enum afs_flock_event { afs_flock_events } __mode(byte); enum afs_flock_operation { afs_flock_operations } __mode(byte); @@ -486,6 +493,7 @@ yfs_cm_operations; afs_edit_dir_ops; afs_edit_dir_reasons; afs_eproto_causes; +afs_estate_traces; afs_io_errors; afs_file_errors; afs_flock_types; @@ -1387,14 +1395,43 @@ TRACE_EVENT(afs_alist, __entry->ref) ); +TRACE_EVENT(afs_estate, + TP_PROTO(unsigned int server_debug_id, unsigned int estate_debug_id, + int ref, enum afs_estate_trace reason), + + TP_ARGS(server_debug_id, estate_debug_id, ref, reason), + + TP_STRUCT__entry( + __field(unsigned int, server) + __field(unsigned int, estate) + __field(int, ref) + __field(int, active) + __field(int, reason) + ), + + TP_fast_assign( + __entry->server = server_debug_id; + __entry->estate = estate_debug_id; + __entry->ref = ref; + __entry->reason = reason; + ), + + TP_printk("ES=%08x[%x] %s r=%d", + __entry->server, + __entry->estate, + __print_symbolic(__entry->reason, afs_estate_traces), + __entry->ref) + ); + TRACE_EVENT(afs_fs_probe, - TP_PROTO(struct afs_server *server, bool tx, struct afs_addr_list *alist, + TP_PROTO(struct afs_server *server, bool tx, struct afs_endpoint_state *estate, unsigned int addr_index, int error, s32 abort_code, unsigned int rtt_us), - TP_ARGS(server, tx, alist, addr_index, error, abort_code, rtt_us), + TP_ARGS(server, tx, estate, addr_index, error, abort_code, rtt_us), TP_STRUCT__entry( __field(unsigned int, server) + __field(unsigned int, estate) __field(bool, tx) __field(u16, addr_index) __field(short, error) @@ -1404,7 +1441,9 @@ TRACE_EVENT(afs_fs_probe, ), TP_fast_assign( + struct afs_addr_list *alist = estate->addresses; __entry->server = server->debug_id; + __entry->estate = estate->probe_seq; __entry->tx = tx; __entry->addr_index = addr_index; __entry->error = error; @@ -1414,9 +1453,9 @@ TRACE_EVENT(afs_fs_probe, sizeof(__entry->srx)); ), - TP_printk("s=%08x %s ax=%u e=%d ac=%d rtt=%d %pISpc", - __entry->server, __entry->tx ? "tx" : "rx", __entry->addr_index, - __entry->error, __entry->abort_code, __entry->rtt_us, + TP_printk("s=%08x %s pq=%x ax=%u e=%d ac=%d rtt=%d %pISpc", + __entry->server, __entry->tx ? "tx" : "rx", __entry->estate, + __entry->addr_index, __entry->error, __entry->abort_code, __entry->rtt_us, &__entry->srx.transport) ); -- cgit v1.2.3 From 32222f09782f1894fcfc37f6505ca676a6f4d1d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Nov 2023 17:59:33 +0000 Subject: afs: Apply server breaks to mmap'd files in the call processor Apply server breaks to mmap'd files that are being used from that server from the call processor work function rather than punting it off to a workqueue. The work item, afs_server_init_callback(), then bumps each individual inode off to its own work item introducing a potentially lengthy delay. This reduces that delay at the cost of extending the amount of time we delay replying to the CB.InitCallBack3 notification RPC from the server. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/callback.c | 33 +++++++++++++++++++-------------- fs/afs/cell.c | 2 +- fs/afs/internal.h | 4 +--- fs/afs/server.c | 2 -- fs/afs/server_list.c | 22 +++++++++++----------- include/trace/events/afs.h | 2 ++ 6 files changed, 34 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 90f9b2a46ff4..f67e88076761 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -33,9 +33,8 @@ void afs_invalidate_mmap_work(struct work_struct *work) unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false); } -void afs_server_init_callback_work(struct work_struct *work) +static void afs_server_init_callback(struct afs_server *server) { - struct afs_server *server = container_of(work, struct afs_server, initcb_work); struct afs_vnode *vnode; struct afs_cell *cell = server->cell; @@ -57,15 +56,19 @@ void afs_server_init_callback_work(struct work_struct *work) */ void afs_init_callback_state(struct afs_server *server) { - rcu_read_lock(); + struct afs_cell *cell = server->cell; + + down_read(&cell->vs_lock); + do { server->cb_s_break++; atomic_inc(&server->cell->fs_s_break); if (!list_empty(&server->cell->fs_open_mmaps)) - queue_work(system_unbound_wq, &server->initcb_work); + afs_server_init_callback(server); } while ((server = rcu_dereference(server->uuid_next))); - rcu_read_unlock(); + + up_read(&cell->vs_lock); } /* @@ -112,7 +115,7 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, struct rb_node *p; int seq = 1; - do { + for (;;) { /* Unfortunately, rbtree walking doesn't give reliable results * under just the RCU read lock, so we have to check for * changes. @@ -133,7 +136,12 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, volume = NULL; } - } while (need_seqretry(&cell->volume_lock, seq)); + if (volume && afs_try_get_volume(volume, afs_volume_trace_get_callback)) + break; + if (!need_seqretry(&cell->volume_lock, seq)) + break; + seq |= 1; /* Want a lock next time */ + } done_seqretry(&cell->volume_lock, seq); return volume; @@ -188,12 +196,11 @@ static void afs_break_some_callbacks(struct afs_server *server, afs_volid_t vid = cbb->fid.vid; size_t i; + rcu_read_lock(); volume = afs_lookup_volume_rcu(server->cell, vid); - /* TODO: Find all matching volumes if we couldn't match the server and * break them anyway. */ - for (i = *_count; i > 0; cbb++, i--) { if (cbb->fid.vid == vid) { _debug("- Fid { vl=%08llx n=%llu u=%u }", @@ -207,6 +214,9 @@ static void afs_break_some_callbacks(struct afs_server *server, *residue++ = *cbb; } } + + rcu_read_unlock(); + afs_put_volume(volume, afs_volume_trace_put_callback); } /* @@ -219,11 +229,6 @@ void afs_break_callbacks(struct afs_server *server, size_t count, ASSERT(server != NULL); - rcu_read_lock(); - while (count > 0) afs_break_some_callbacks(server, callbacks, &count); - - rcu_read_unlock(); - return; } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 55ee194e31ff..e4b6a80763d7 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -161,7 +161,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, refcount_set(&cell->ref, 1); atomic_set(&cell->active, 0); INIT_WORK(&cell->manager, afs_manage_cell_work); - spin_lock_init(&cell->vs_lock); + init_rwsem(&cell->vs_lock); cell->volumes = RB_ROOT; INIT_HLIST_HEAD(&cell->proc_volumes); seqlock_init(&cell->volume_lock); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4a3d946b1d2a..5ae4ca999d65 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -414,7 +414,7 @@ struct afs_cell { unsigned int debug_id; /* The volumes belonging to this cell */ - spinlock_t vs_lock; /* Lock for server->volumes */ + struct rw_semaphore vs_lock; /* Lock for server->volumes */ struct rb_root volumes; /* Tree of volumes on this server */ struct hlist_head proc_volumes; /* procfs volume list */ seqlock_t volume_lock; /* For volumes */ @@ -566,7 +566,6 @@ struct afs_server { struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ struct list_head volumes; /* RCU list of afs_server_entry objects */ - struct work_struct initcb_work; /* Work for CB.InitCallBackState* */ struct afs_server *gc_next; /* Next server in manager's list */ time64_t unuse_time; /* Time at which last unused */ unsigned long flags; @@ -1041,7 +1040,6 @@ void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alis * callback.c */ extern void afs_invalidate_mmap_work(struct work_struct *); -extern void afs_server_init_callback_work(struct work_struct *work); extern void afs_init_callback_state(struct afs_server *); extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); diff --git a/fs/afs/server.c b/fs/afs/server.c index db2f66b11b40..e169121f603e 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -218,7 +218,6 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server->uuid = *uuid; rwlock_init(&server->fs_lock); INIT_LIST_HEAD(&server->volumes); - INIT_WORK(&server->initcb_work, afs_server_init_callback_work); init_waitqueue_head(&server->probe_wq); INIT_LIST_HEAD(&server->probe_link); spin_lock_init(&server->probe_lock); @@ -470,7 +469,6 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_give_up_callbacks(net, server); - flush_work(&server->initcb_work); afs_put_server(net, server, afs_server_trace_destroy); } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 4d6369477f54..cfd900eb09ed 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -136,7 +136,7 @@ void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_l struct list_head *p; unsigned int i; - spin_lock(&volume->cell->vs_lock); + down_write(&volume->cell->vs_lock); for (i = 0; i < slist->nr_servers; i++) { se = &slist->servers[i]; @@ -147,11 +147,11 @@ void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_l if (volume->vid <= pe->volume->vid) break; } - list_add_tail_rcu(&se->slink, p); + list_add_tail(&se->slink, p); } slist->attached = true; - spin_unlock(&volume->cell->vs_lock); + up_write(&volume->cell->vs_lock); } /* @@ -164,7 +164,7 @@ void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server { unsigned int n = 0, o = 0; - spin_lock(&volume->cell->vs_lock); + down_write(&volume->cell->vs_lock); while (n < new->nr_servers || o < old->nr_servers) { struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL; @@ -174,7 +174,7 @@ void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server int diff; if (pn && po && pn->server == po->server) { - list_replace_rcu(&po->slink, &pn->slink); + list_replace(&po->slink, &pn->slink); n++; o++; continue; @@ -192,15 +192,15 @@ void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server if (volume->vid <= s->volume->vid) break; } - list_add_tail_rcu(&pn->slink, p); + list_add_tail(&pn->slink, p); n++; } else { - list_del_rcu(&po->slink); + list_del(&po->slink); o++; } } - spin_unlock(&volume->cell->vs_lock); + up_write(&volume->cell->vs_lock); } /* @@ -213,11 +213,11 @@ void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server if (!slist->attached) return; - spin_lock(&volume->cell->vs_lock); + down_write(&volume->cell->vs_lock); for (i = 0; i < slist->nr_servers; i++) - list_del_rcu(&slist->servers[i].slink); + list_del(&slist->servers[i].slink); slist->attached = false; - spin_unlock(&volume->cell->vs_lock); + up_write(&volume->cell->vs_lock); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index cf2fa4fddd5b..63ab23876be8 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -151,9 +151,11 @@ enum yfs_cm_operation { EM(afs_volume_trace_alloc, "ALLOC ") \ EM(afs_volume_trace_free, "FREE ") \ EM(afs_volume_trace_get_alloc_sbi, "GET sbi-alloc ") \ + EM(afs_volume_trace_get_callback, "GET callback ") \ EM(afs_volume_trace_get_cell_insert, "GET cell-insrt") \ EM(afs_volume_trace_get_new_op, "GET op-new ") \ EM(afs_volume_trace_get_query_alias, "GET cell-alias") \ + EM(afs_volume_trace_put_callback, "PUT callback ") \ EM(afs_volume_trace_put_cell_dup, "PUT cell-dup ") \ EM(afs_volume_trace_put_cell_root, "PUT cell-root ") \ EM(afs_volume_trace_put_destroy_sbi, "PUT sbi-destry") \ -- cgit v1.2.3 From 16069e1349a0c5535e189f9dc5d937bfd7631a06 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 5 Nov 2023 16:11:07 +0000 Subject: afs: Parse the VolSync record in the reply of a number of RPC ops A number of fileserver RPC operations return a VolSync record as part of their reply that gives some information about the state of the volume being accessed, including: (1) A volume Creation timestamp. For an RW volume, this is the time at which the volume was created; if it changes, the RW volume was presumably restored from a backup and all cached data should be scrubbed as Data Version numbers could regress on the files in the volume. For an RO volume, this is the time it was last snapshotted from the RW volume. It is expected to advance each time this happens; if it regresses, cached data should be scrubbed. (2) A volume Update timestamp (Auristor only). For an RW volume, this is updated any time any change is made to a volume or its contents. If it regresses, all cached data must be scrubbed. For an RO volume, this is a copy of the RW volume's Update timestamp at the point of snapshotting. It can be used as a version number when checking to see if a callback on a RO volume was due to a snapshot. If it regresses, all cached data must be scrubbed. but this is currently not made use of by the in-kernel afs filesystem. Make the afs filesystem use this by: (1) Add an update time field to the afs_volsync struct and use a value of TIME64_MIN in both that and the creation time to indicate that they are unset. (2) Add creation and update time fields to the afs_volume struct and use this to track the two timestamps. (3) Add a volsync_lock mutex to the afs_volume struct to control modification access for when we detect a change in these values. (3) Add a 'pre-op volsync' struct to the afs_operation struct to record the state of the volume tracking before the op. (4) Add a new counter, cb_scrub, to the afs_volume struct to count events that require all data to be scrubbed. A copy is placed in the afs_vnode struct (inode) and if they no longer match, a scrub takes place. (5) When the result of an operation is being parsed, parse the VolSync data too, if it is provided. Note that the two timestamps are handled separately, since they don't work in quite the same way. - If the afs_volume tracking is unset, just set it and do nothing else. - If the result timestamps are the same as the ones in afs_volume, do nothing. - If the timestamps regress, increment cb_scrub if not already done so. - If the creation timestamp on a RW volume changes, increment cb_scrub if not already done so. - If the creation timestamp on a RO volume advances, update the server list and see if the current server has been excluded, if so reissue the op. Once over half of the replication sites have been updated, increment cb_ro_snapshot to indicate updates may be required and switch over to excluding unupdated replication sites. - If the creation timestamp on a Backup volume advances, just increment cb_ro_snapshot to trigger updates. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/afs.h | 3 +- fs/afs/callback.c | 7 +- fs/afs/fs_operation.c | 14 ++-- fs/afs/fsclient.c | 5 +- fs/afs/inode.c | 2 +- fs/afs/internal.h | 16 +++- fs/afs/rotate.c | 4 +- fs/afs/validation.c | 199 ++++++++++++++++++++++++++++++++++++++++++++- fs/afs/volume.c | 3 + fs/afs/yfsclient.c | 5 +- include/trace/events/afs.h | 30 ++++++- 11 files changed, 268 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 81815724db6c..b488072aee87 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -165,7 +165,8 @@ struct afs_status_cb { * AFS volume synchronisation information */ struct afs_volsync { - time64_t creation; /* volume creation time */ + time64_t creation; /* Volume creation time (or TIME64_MIN) */ + time64_t update; /* Volume update time (or TIME64_MIN) */ }; /* diff --git a/fs/afs/callback.c b/fs/afs/callback.c index f67e88076761..8ddc99c9c16b 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -81,7 +81,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; - vnode->cb_v_break = vnode->volume->cb_v_break; + vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); afs_clear_permits(vnode); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) @@ -159,12 +159,13 @@ static void afs_break_one_callback(struct afs_volume *volume, struct super_block *sb; struct afs_vnode *vnode; struct inode *inode; + unsigned int cb_v_break; if (fid->vnode == 0 && fid->unique == 0) { /* The callback break applies to an entire volume. */ write_lock(&volume->cb_v_break_lock); - volume->cb_v_break++; - trace_afs_cb_break(fid, volume->cb_v_break, + cb_v_break = atomic_inc_return(&volume->cb_v_break); + trace_afs_cb_break(fid, cb_v_break, afs_cb_break_for_volume_callback, false); write_unlock(&volume->cb_v_break_lock); return; diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 10137681aa7d..99d1e649e929 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -35,11 +35,13 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo key_get(key); } - op->key = key; - op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op); - op->net = volume->cell->net; - op->cb_v_break = volume->cb_v_break; - op->debug_id = atomic_inc_return(&afs_operation_debug_counter); + op->key = key; + op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op); + op->net = volume->cell->net; + op->cb_v_break = atomic_read(&volume->cb_v_break); + op->pre_volsync.creation = volume->creation_time; + op->pre_volsync.update = volume->update_time; + op->debug_id = atomic_inc_return(&afs_operation_debug_counter); op->nr_iterations = -1; afs_op_set_error(op, -EDESTADDRREQ); @@ -147,7 +149,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op) afs_prepare_vnode(op, &op->file[0], 0); afs_prepare_vnode(op, &op->file[1], 1); - op->cb_v_break = op->volume->cb_v_break; + op->cb_v_break = atomic_read(&op->volume->cb_v_break); _leave(" = true"); return true; } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index f1f879ba9cf7..80f7d9e796e3 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1870,7 +1870,10 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSVolSync(&bp, &op->volsync); + /* Unfortunately, prior to OpenAFS-1.6, volsync here is filled + * with rubbish. + */ + xdr_decode_AFSVolSync(&bp, NULL); call->unmarshall++; fallthrough; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 102e7c37d33c..df3d37577b5b 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -542,7 +542,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_break = as->volume->cb_v_break, + vnode->cb_v_break = atomic_read(&as->volume->cb_v_break), afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3d90415c2527..4b730cbcf63e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -662,7 +662,15 @@ struct afs_volume { rwlock_t servers_lock; /* Lock for ->servers */ unsigned int servers_seq; /* Incremented each time ->servers changes */ - unsigned cb_v_break; /* Break-everything counter. */ + /* RO release tracking */ + struct mutex volsync_lock; /* Time/state evaluation lock */ + time64_t creation_time; /* Volume creation time (or TIME64_MIN) */ + time64_t update_time; /* Volume update time (or TIME64_MIN) */ + + /* Callback management */ + atomic_t cb_ro_snapshot; /* RO volume update-from-snapshot counter */ + atomic_t cb_v_break; /* Volume-break event counter. */ + atomic_t cb_scrub; /* Scrub-all-data event counter. */ rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ @@ -856,7 +864,8 @@ struct afs_operation { struct afs_volume *volume; /* Volume being accessed */ struct afs_vnode_param file[2]; struct afs_vnode_param *more_files; - struct afs_volsync volsync; + struct afs_volsync pre_volsync; /* Volsync before op */ + struct afs_volsync volsync; /* Volsync returned by op */ struct dentry *dentry; /* Dentry to be altered */ struct dentry *dentry_2; /* Second dentry to be altered */ struct timespec64 mtime; /* Modification time to record */ @@ -1063,7 +1072,7 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) static inline bool afs_cb_is_broken(unsigned int cb_break, const struct afs_vnode *vnode) { - return cb_break != (vnode->cb_break + vnode->volume->cb_v_break); + return cb_break != (vnode->cb_break + atomic_read(&vnode->volume->cb_v_break)); } /* @@ -1555,6 +1564,7 @@ extern void afs_fs_exit(void); /* * validation.c */ +int afs_update_volume_state(struct afs_operation *op); bool afs_check_validity(struct afs_vnode *vnode); bool afs_pagecache_valid(struct afs_vnode *vnode); int afs_validate(struct afs_vnode *vnode, struct key *key); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 3ab85a907a1d..5c50c9aa1f87 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -486,7 +486,7 @@ selected_server: vnode->cb_server = server; vnode->cb_s_break = server->cb_s_break; vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break); - vnode->cb_v_break = vnode->volume->cb_v_break; + vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } @@ -519,6 +519,8 @@ iterate_address: op->addr_index = addr_index; set_bit(addr_index, &op->addr_tried); + op->volsync.creation = TIME64_MIN; + op->volsync.update = TIME64_MIN; op->call_responded = false; _debug("address [%u] %u/%u %pISp", op->server_index, addr_index, alist->nr_addrs, diff --git a/fs/afs/validation.c b/fs/afs/validation.c index 18ba2c5e8ead..6aadd5e075e4 100644 --- a/fs/afs/validation.c +++ b/fs/afs/validation.c @@ -10,6 +10,201 @@ #include #include "internal.h" +/* + * See if the server we've just talked to is currently excluded. + */ +static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume) +{ + const struct afs_server_entry *se; + const struct afs_server_list *slist; + bool is_excluded = true; + int i; + + rcu_read_lock(); + + slist = rcu_dereference(volume->servers); + for (i = 0; i < slist->nr_servers; i++) { + se = &slist->servers[i]; + if (op->server == se->server) { + is_excluded = test_bit(AFS_SE_EXCLUDED, &se->flags); + break; + } + } + + rcu_read_unlock(); + return is_excluded; +} + +/* + * Update the volume's server list when the creation time changes and see if + * the server we've just talked to is currently excluded. + */ +static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume) +{ + int ret; + + if (__afs_is_server_excluded(op, volume)) + return 1; + + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + ret = afs_check_volume_status(op->volume, op); + if (ret < 0) + return ret; + + return __afs_is_server_excluded(op, volume); +} + +/* + * Handle a change to the volume creation time in the VolSync record. + */ +static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume) +{ + unsigned int snap; + time64_t cur = volume->creation_time; + time64_t old = op->pre_volsync.creation; + time64_t new = op->volsync.creation; + int ret; + + _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new); + + if (cur == TIME64_MIN) { + volume->creation_time = new; + return 0; + } + + if (new == cur) + return 0; + + /* Try to advance the creation timestamp from what we had before the + * operation to what we got back from the server. This should + * hopefully ensure that in a race between multiple operations only one + * of them will do this. + */ + if (cur != old) + return 0; + + /* If the creation time changes in an unexpected way, we need to scrub + * our caches. For a RW vol, this will only change if the volume is + * restored from a backup; for a RO/Backup vol, this will advance when + * the volume is updated to a new snapshot (eg. "vos release"). + */ + if (volume->type == AFSVL_RWVOL) + goto regressed; + if (volume->type == AFSVL_BACKVOL) { + if (new < old) + goto regressed; + goto advance; + } + + /* We have an RO volume, we need to query the VL server and look at the + * server flags to see if RW->RO replication is in progress. + */ + ret = afs_is_server_excluded(op, volume); + if (ret < 0) + return ret; + if (ret > 0) { + snap = atomic_read(&volume->cb_ro_snapshot); + trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_volume_excluded); + return ret; + } + +advance: + snap = atomic_inc_return(&volume->cb_ro_snapshot); + trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_for_vos_release); + volume->creation_time = new; + return 0; + +regressed: + atomic_inc(&volume->cb_scrub); + trace_afs_cb_v_break(volume->vid, 0, afs_cb_break_for_creation_regress); + volume->creation_time = new; + return 0; +} + +/* + * Handle a change to the volume update time in the VolSync record. + */ +static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume) +{ + enum afs_cb_break_reason reason = afs_cb_break_no_break; + time64_t cur = volume->update_time; + time64_t old = op->pre_volsync.update; + time64_t new = op->volsync.update; + + _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new); + + if (cur == TIME64_MIN) { + volume->update_time = new; + return; + } + + if (new == cur) + return; + + /* If the volume update time changes in an unexpected way, we need to + * scrub our caches. For a RW vol, this will advance on every + * modification op; for a RO/Backup vol, this will advance when the + * volume is updated to a new snapshot (eg. "vos release"). + */ + if (new < old) + reason = afs_cb_break_for_update_regress; + + /* Try to advance the update timestamp from what we had before the + * operation to what we got back from the server. This should + * hopefully ensure that in a race between multiple operations only one + * of them will do this. + */ + if (cur == old) { + if (reason == afs_cb_break_for_update_regress) { + atomic_inc(&volume->cb_scrub); + trace_afs_cb_v_break(volume->vid, 0, reason); + } + volume->update_time = new; + } +} + +static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume) +{ + int ret = 0; + + if (likely(op->volsync.creation == volume->creation_time && + op->volsync.update == volume->update_time)) + return 0; + + mutex_lock(&volume->volsync_lock); + if (op->volsync.creation != volume->creation_time) { + ret = afs_update_volume_creation_time(op, volume); + if (ret < 0) + goto out; + } + if (op->volsync.update != volume->update_time) + afs_update_volume_update_time(op, volume); +out: + mutex_unlock(&volume->volsync_lock); + return ret; +} + +/* + * Update the state of a volume. Returns 1 to redo the operation from the start. + */ +int afs_update_volume_state(struct afs_operation *op) +{ + struct afs_volume *volume = op->volume; + int ret; + + _enter("%llx", op->volume->vid); + + if (op->volsync.creation != TIME64_MIN || op->volsync.update != TIME64_MIN) { + ret = afs_update_volume_times(op, volume); + if (ret != 0) { + _leave(" = %d", ret); + return ret; + } + } + + return 0; +} + /* * mark the data attached to an inode as obsolete due to a write on the server * - might also want to ditch all the outstanding writes and dirty pages @@ -74,7 +269,7 @@ bool afs_check_validity(struct afs_vnode *vnode) cb_break = vnode->cb_break; if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_v_break != vnode->volume->cb_v_break) + if (vnode->cb_v_break != atomic_read(&vnode->volume->cb_v_break)) need_clear = afs_cb_break_for_v_break; else if (!afs_check_server_good(vnode)) need_clear = afs_cb_break_for_s_reinit; @@ -95,7 +290,7 @@ bool afs_check_validity(struct afs_vnode *vnode) write_seqlock(&vnode->cb_lock); if (need_clear == afs_cb_break_no_promise) - vnode->cb_v_break = vnode->volume->cb_v_break; + vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); else if (cb_break == vnode->cb_break) __afs_break_callback(vnode, need_clear); else diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 4982fce25057..41ab1d3ff3ea 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -90,11 +90,14 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, volume->type = params->type; volume->type_force = params->force; volume->name_len = vldb->name_len; + volume->creation_time = TIME64_MIN; + volume->update_time = TIME64_MIN; refcount_set(&volume->ref, 1); INIT_HLIST_NODE(&volume->proc_link); INIT_WORK(&volume->destructor, afs_destroy_volume); rwlock_init(&volume->servers_lock); + mutex_init(&volume->volsync_lock); rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 11571cca86c1..2d6943f05ea5 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -245,12 +245,15 @@ static void xdr_decode_YFSVolSync(const __be32 **_bp, struct afs_volsync *volsync) { struct yfs_xdr_YFSVolSync *x = (void *)*_bp; - u64 creation; + u64 creation, update; if (volsync) { creation = xdr_to_u64(x->vol_creation_date); do_div(creation, 10 * 1000 * 1000); volsync->creation = creation; + update = xdr_to_u64(x->vol_update_date); + do_div(update, 10 * 1000 * 1000); + volsync->update = update; } *_bp += xdr_size(x); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 63ab23876be8..bbe8dcab4b32 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -440,13 +440,17 @@ enum yfs_cm_operation { EM(afs_cb_break_no_break, "no-break") \ EM(afs_cb_break_no_promise, "no-promise") \ EM(afs_cb_break_for_callback, "break-cb") \ + EM(afs_cb_break_for_creation_regress, "creation-regress") \ EM(afs_cb_break_for_deleted, "break-del") \ EM(afs_cb_break_for_lapsed, "break-lapsed") \ EM(afs_cb_break_for_s_reinit, "s-reinit") \ EM(afs_cb_break_for_unlink, "break-unlink") \ + EM(afs_cb_break_for_update_regress, "update-regress") \ EM(afs_cb_break_for_v_break, "break-v") \ EM(afs_cb_break_for_volume_callback, "break-v-cb") \ - E_(afs_cb_break_for_zap, "break-zap") + EM(afs_cb_break_for_vos_release, "break-vos-release") \ + EM(afs_cb_break_for_zap, "break-zap") \ + E_(afs_cb_break_volume_excluded, "vol-excluded") /* * Generate enums for tracing information. @@ -1249,6 +1253,30 @@ TRACE_EVENT(afs_get_tree, __entry->cell, __entry->volume, __entry->vid) ); +TRACE_EVENT(afs_cb_v_break, + TP_PROTO(afs_volid_t vid, unsigned int cb_v_break, + enum afs_cb_break_reason reason), + + TP_ARGS(vid, cb_v_break, reason), + + TP_STRUCT__entry( + __field(afs_volid_t, vid) + __field(unsigned int, cb_v_break) + __field(enum afs_cb_break_reason, reason) + ), + + TP_fast_assign( + __entry->vid = vid; + __entry->cb_v_break = cb_v_break; + __entry->reason = reason; + ), + + TP_printk("%llx vb=%x %s", + __entry->vid, + __entry->cb_v_break, + __print_symbolic(__entry->reason, afs_cb_break_reasons)) + ); + TRACE_EVENT(afs_cb_break, TP_PROTO(struct afs_fid *fid, unsigned int cb_break, enum afs_cb_break_reason reason, bool skipped), -- cgit v1.2.3 From 453924de6212ac159f946b75c6b59918e2e30944 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Nov 2023 13:57:42 +0000 Subject: afs: Overhaul invalidation handling to better support RO volumes Overhaul the third party-induced invalidation handling, making use of the previously added volume-level event counters (cb_scrub and cb_ro_snapshot) that are now being parsed out of the VolSync record returned by the fileserver in many of its replies. This allows better handling of RO (and Backup) volumes. Since these are snapshot of a RW volume that are updated atomically simultantanously across all servers that host them, they only require a single callback promise for the entire volume. The currently upstream code assumes that RO volumes operate in the same manner as RW volumes, and that each file has its own individual callback - which means that it does a status fetch for *every* file in a RO volume, whether or not the volume got "released" (volume callback breaks can occur for other reasons too, such as the volumeserver taking ownership of a volume from a fileserver). To this end, make the following changes: (1) Change the meaning of the volume's cb_v_break counter so that it is now a hint that we need to issue a status fetch to work out the state of a volume. cb_v_break is incremented by volume break callbacks and by server initialisation callbacks. (2) Add a second counter, cb_v_check, to the afs_volume struct such that if this differs from cb_v_break, we need to do a check. When the check is complete, cb_v_check is advanced to what cb_v_break was at the start of the status fetch. (3) Move the list of mmap'd vnodes to the volume and trigger removal of PTEs that map to files on a volume break rather than on a server break. (4) When a server reinitialisation callback comes in, use the server-to-volume reverse mapping added in a preceding patch to iterate over all the volumes using that server and clear the volume callback promises for that server and the general volume promise as a whole to trigger reanalysis. (5) Replace the AFS_VNODE_CB_PROMISED flag with an AFS_NO_CB_PROMISE (TIME64_MIN) value in the cb_expires_at field, reducing the number of checks we need to make. (6) Change afs_check_validity() to quickly see if various event counters have been incremented or if the vnode or volume callback promise is due to expire/has expired without making any changes to the state. That is now left to afs_validate() as this may get more complicated in future as we may have to examine server records too. (7) Overhaul afs_validate() so that it does a single status fetch if we need to check the state of either the vnode or the volume - and do so under appropriate locking. The function does the following steps: (A) If the vnode/volume is no longer seen as valid, then we take the vnode validation lock and, if the volume promise has expired, the volume check lock also. The latter prevents redundant checks being made to find out if a new version of the volume got released. (B) If a previous RPC call found that the volsync changed unexpectedly or that a RO volume was updated, then we unmap all PTEs pointing to the file to stop mmap being used for access. (C) If the vnode is still seen to be of uncertain validity, then we perform an FS.FetchStatus RPC op to jointly update the volume status and the vnode status. This assessment is done as part of parsing the reply: If the RO volume creation timestamp advances, cb_ro_snapshot is incremented; if either the creation or update timestamps changes in an unexpected way, the cb_scrub counter is incremented If the Data Version returned doesn't match the copy we have locally, then we ask for the pagecache to be zapped. This takes care of handling RO update. (D) If cb_scrub differs between volume and vnode, the vnode's pagecache is zapped and the vnode's cb_scrub is updated unless the file is marked as having been deleted. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/callback.c | 122 +++++++++++------- fs/afs/cell.c | 2 - fs/afs/dir.c | 10 +- fs/afs/file.c | 13 +- fs/afs/fs_operation.c | 3 +- fs/afs/inode.c | 21 ++-- fs/afs/internal.h | 34 ++--- fs/afs/proc.c | 4 +- fs/afs/rotate.c | 24 +++- fs/afs/server_list.c | 2 + fs/afs/validation.c | 305 +++++++++++++++++++++++++++++---------------- fs/afs/volume.c | 3 + include/trace/events/afs.h | 4 - 13 files changed, 344 insertions(+), 203 deletions(-) (limited to 'include') diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 8ddc99c9c16b..99b2c8172021 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -33,21 +33,20 @@ void afs_invalidate_mmap_work(struct work_struct *work) unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false); } -static void afs_server_init_callback(struct afs_server *server) +static void afs_volume_init_callback(struct afs_volume *volume) { struct afs_vnode *vnode; - struct afs_cell *cell = server->cell; - down_read(&cell->fs_open_mmaps_lock); + down_read(&volume->open_mmaps_lock); - list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) { - if (vnode->cb_server == server) { - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) { + if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) { + atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); queue_work(system_unbound_wq, &vnode->cb_work); } } - up_read(&cell->fs_open_mmaps_lock); + up_read(&volume->open_mmaps_lock); } /* @@ -56,19 +55,20 @@ static void afs_server_init_callback(struct afs_server *server) */ void afs_init_callback_state(struct afs_server *server) { - struct afs_cell *cell = server->cell; + struct afs_server_entry *se; - down_read(&cell->vs_lock); + down_read(&server->cell->vs_lock); - do { - server->cb_s_break++; - atomic_inc(&server->cell->fs_s_break); - if (!list_empty(&server->cell->fs_open_mmaps)) - afs_server_init_callback(server); - - } while ((server = rcu_dereference(server->uuid_next))); + list_for_each_entry(se, &server->volumes, slink) { + se->cb_expires_at = AFS_NO_CB_PROMISE; + se->volume->cb_expires_at = AFS_NO_CB_PROMISE; + trace_afs_cb_v_break(se->volume->vid, atomic_read(&se->volume->cb_v_break), + afs_cb_break_for_s_reinit); + if (!list_empty(&se->volume->open_mmaps)) + afs_volume_init_callback(se->volume); + } - up_read(&cell->vs_lock); + up_read(&server->cell->vs_lock); } /* @@ -79,9 +79,9 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas _enter(""); clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) { vnode->cb_break++; - vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); + vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break); afs_clear_permits(vnode); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) @@ -147,29 +147,51 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, return volume; } +/* + * Allow the fileserver to break callbacks at the volume-level. This is + * typically done when, for example, a R/W volume is snapshotted to a R/O + * volume (the only way to change an R/O volume). It may also, however, happen + * when a volserver takes control of a volume (offlining it, moving it, etc.). + * + * Every file in that volume will need to be reevaluated. + */ +static void afs_break_volume_callback(struct afs_server *server, + struct afs_volume *volume) + __releases(RCU) +{ + struct afs_server_list *slist = rcu_dereference(volume->servers); + unsigned int i, cb_v_break; + + write_lock(&volume->cb_v_break_lock); + + for (i = 0; i < slist->nr_servers; i++) + if (slist->servers[i].server == server) + slist->servers[i].cb_expires_at = AFS_NO_CB_PROMISE; + volume->cb_expires_at = AFS_NO_CB_PROMISE; + + cb_v_break = atomic_inc_return_release(&volume->cb_v_break); + trace_afs_cb_v_break(volume->vid, cb_v_break, afs_cb_break_for_volume_callback); + + write_unlock(&volume->cb_v_break_lock); + rcu_read_unlock(); + + if (!list_empty(&volume->open_mmaps)) + afs_volume_init_callback(volume); +} + /* * allow the fileserver to explicitly break one callback * - happens when * - the backing file is changed * - a lock is released */ -static void afs_break_one_callback(struct afs_volume *volume, +static void afs_break_one_callback(struct afs_server *server, + struct afs_volume *volume, struct afs_fid *fid) { struct super_block *sb; struct afs_vnode *vnode; struct inode *inode; - unsigned int cb_v_break; - - if (fid->vnode == 0 && fid->unique == 0) { - /* The callback break applies to an entire volume. */ - write_lock(&volume->cb_v_break_lock); - cb_v_break = atomic_inc_return(&volume->cb_v_break); - trace_afs_cb_break(fid, cb_v_break, - afs_cb_break_for_volume_callback, false); - write_unlock(&volume->cb_v_break_lock); - return; - } /* See if we can find a matching inode - even an I_NEW inode needs to * be marked as it can have its callback broken before we finish @@ -199,24 +221,32 @@ static void afs_break_some_callbacks(struct afs_server *server, rcu_read_lock(); volume = afs_lookup_volume_rcu(server->cell, vid); - /* TODO: Find all matching volumes if we couldn't match the server and - * break them anyway. - */ - for (i = *_count; i > 0; cbb++, i--) { - if (cbb->fid.vid == vid) { - _debug("- Fid { vl=%08llx n=%llu u=%u }", - cbb->fid.vid, - cbb->fid.vnode, - cbb->fid.unique); - --*_count; - if (volume) - afs_break_one_callback(volume, &cbb->fid); - } else { - *residue++ = *cbb; + if (cbb->fid.vnode == 0 && cbb->fid.unique == 0) { + afs_break_volume_callback(server, volume); + *_count -= 1; + if (*_count) + memmove(cbb, cbb + 1, sizeof(*cbb) * *_count); + } else { + /* TODO: Find all matching volumes if we couldn't match the server and + * break them anyway. + */ + + for (i = *_count; i > 0; cbb++, i--) { + if (cbb->fid.vid == vid) { + _debug("- Fid { vl=%08llx n=%llu u=%u }", + cbb->fid.vid, + cbb->fid.vnode, + cbb->fid.unique); + --*_count; + if (volume) + afs_break_one_callback(server, volume, &cbb->fid); + } else { + *residue++ = *cbb; + } } + rcu_read_unlock(); } - rcu_read_unlock(); afs_put_volume(volume, afs_volume_trace_put_callback); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index e4b6a80763d7..caa09875f520 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -167,8 +167,6 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, seqlock_init(&cell->volume_lock); cell->fs_servers = RB_ROOT; seqlock_init(&cell->fs_lock); - INIT_LIST_HEAD(&cell->fs_open_mmaps); - init_rwsem(&cell->fs_open_mmaps_lock); rwlock_init(&cell->vl_servers_lock); cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e232f713ece1..c14533ef108f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1118,7 +1118,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) dir = AFS_FS_I(d_inode(parent)); /* validate the parent directory */ - afs_validate(dir, key); + ret = afs_validate(dir, key); + if (ret == -ERESTARTSYS) { + dput(parent); + key_put(key); + return ret; + } if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); @@ -1260,6 +1265,7 @@ void afs_check_for_remote_deletion(struct afs_operation *op) switch (afs_op_abort_code(op)) { case VNOVNODE: set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_nlink(&vnode->netfs.inode); afs_break_callback(vnode, afs_cb_break_for_deleted); } } @@ -1375,7 +1381,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) clear_nlink(&vnode->netfs.inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); } } diff --git a/fs/afs/file.c b/fs/afs/file.c index 8f9b42427569..30914e0d9cb2 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -514,13 +514,12 @@ static bool afs_release_folio(struct folio *folio, gfp_t gfp) static void afs_add_open_mmap(struct afs_vnode *vnode) { if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { - down_write(&vnode->volume->cell->fs_open_mmaps_lock); + down_write(&vnode->volume->open_mmaps_lock); if (list_empty(&vnode->cb_mmap_link)) - list_add_tail(&vnode->cb_mmap_link, - &vnode->volume->cell->fs_open_mmaps); + list_add_tail(&vnode->cb_mmap_link, &vnode->volume->open_mmaps); - up_write(&vnode->volume->cell->fs_open_mmaps_lock); + up_write(&vnode->volume->open_mmaps_lock); } } @@ -529,12 +528,12 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode) if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) return; - down_write(&vnode->volume->cell->fs_open_mmaps_lock); + down_write(&vnode->volume->open_mmaps_lock); if (atomic_read(&vnode->cb_nr_mmap) == 0) list_del_init(&vnode->cb_mmap_link); - up_write(&vnode->volume->cell->fs_open_mmaps_lock); + up_write(&vnode->volume->open_mmaps_lock); flush_work(&vnode->cb_work); } @@ -570,7 +569,7 @@ static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pg { struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file)); - if (afs_pagecache_valid(vnode)) + if (afs_check_validity(vnode)) return filemap_map_pages(vmf, start_pgoff, end_pgoff); return 0; } diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 99d1e649e929..cecc44af6a5f 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -42,7 +42,7 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo op->pre_volsync.creation = volume->creation_time; op->pre_volsync.update = volume->update_time; op->debug_id = atomic_inc_return(&afs_operation_debug_counter); - op->nr_iterations = -1; + op->nr_iterations = -1; afs_op_set_error(op, -EDESTADDRREQ); _leave(" = [op=%08x]", op->debug_id); @@ -184,7 +184,6 @@ void afs_wait_for_operation(struct afs_operation *op) op->call_responded = false; op->call_error = 0; op->call_abort_code = 0; - op->cb_s_break = op->server->cb_s_break; if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && op->ops->issue_yfs_rpc) op->ops->issue_yfs_rpc(op); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index df3d37577b5b..4f04f6f33f46 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -85,8 +85,7 @@ static int afs_inode_init_from_status(struct afs_operation *op, write_seqlock(&vnode->cb_lock); - vnode->cb_v_break = op->cb_v_break; - vnode->cb_s_break = op->cb_s_break; + vnode->cb_v_check = op->cb_v_break; vnode->status = *status; t = status->mtime_client; @@ -146,11 +145,10 @@ static int afs_inode_init_from_status(struct afs_operation *op, if (!vp->scb.have_cb) { /* it's a symlink we just created (the fileserver * didn't give us a callback) */ - vnode->cb_expires_at = ktime_get_real_seconds(); + atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); } else { - vnode->cb_expires_at = vp->scb.callback.expires_at; vnode->cb_server = op->server; - set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at); } write_sequnlock(&vnode->cb_lock); @@ -214,7 +212,8 @@ static void afs_apply_status(struct afs_operation *op, vnode->status = *status; if (vp->dv_before + vp->dv_delta != status->data_version) { - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) && + atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n", vnode->fid.vid, vnode->fid.vnode, (unsigned long long)vp->dv_before + vp->dv_delta, @@ -268,9 +267,9 @@ static void afs_apply_callback(struct afs_operation *op, struct afs_vnode *vnode = vp->vnode; if (!afs_cb_is_broken(vp->cb_break_before, vnode)) { - vnode->cb_expires_at = cb->expires_at; - vnode->cb_server = op->server; - set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + if (op->volume->type == AFSVL_RWVOL) + vnode->cb_server = op->server; + atomic64_set(&vnode->cb_expires_at, cb->expires_at); } } @@ -542,7 +541,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_break = atomic_read(&as->volume->cb_v_break), + vnode->cb_v_check = atomic_read(&as->volume->cb_v_break), afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); @@ -587,7 +586,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path, if (vnode->volume && !(query_flags & AT_STATX_DONT_SYNC) && - !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4b730cbcf63e..6d0cd886b548 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -422,9 +422,6 @@ struct afs_cell { /* Active fileserver interaction state. */ struct rb_root fs_servers; /* afs_server (by server UUID) */ seqlock_t fs_lock; /* For fs_servers */ - struct rw_semaphore fs_open_mmaps_lock; - struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */ - atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */ /* VL server list. */ rwlock_t vl_servers_lock; /* Lock on vl_servers */ @@ -591,9 +588,6 @@ struct afs_server { /* file service access */ rwlock_t fs_lock; /* access lock */ - /* callback promise management */ - unsigned cb_s_break; /* Break-everything counter. */ - /* Probe state */ struct afs_endpoint_state __rcu *endpoint_state; /* Latest endpoint/probe state */ unsigned long probed_at; /* Time last probe was dispatched (jiffies) */ @@ -615,6 +609,7 @@ struct afs_server_entry { struct afs_server *server; struct afs_volume *volume; struct list_head slink; /* Link in server->volumes */ + time64_t cb_expires_at; /* Time at which volume-level callback expires */ unsigned long flags; #define AFS_SE_EXCLUDED 0 /* Set if server is to be excluded in rotation */ }; @@ -668,10 +663,15 @@ struct afs_volume { time64_t update_time; /* Volume update time (or TIME64_MIN) */ /* Callback management */ + struct mutex cb_check_lock; /* Lock to control race to check after v_break */ + time64_t cb_expires_at; /* Earliest volume callback expiry time */ atomic_t cb_ro_snapshot; /* RO volume update-from-snapshot counter */ atomic_t cb_v_break; /* Volume-break event counter. */ + atomic_t cb_v_check; /* Volume-break has-been-checked counter. */ atomic_t cb_scrub; /* Scrub-all-data event counter. */ rwlock_t cb_v_break_lock; + struct rw_semaphore open_mmaps_lock; + struct list_head open_mmaps; /* List of vnodes that are mmapped */ afs_voltype_t type; /* type of volume */ char type_force; /* force volume type (suppress R/O -> R/W) */ @@ -710,7 +710,6 @@ struct afs_vnode { spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; -#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ #define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ @@ -736,13 +735,14 @@ struct afs_vnode { struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */ void *cb_server; /* Server with callback/filelock */ atomic_t cb_nr_mmap; /* Number of mmaps */ - unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */ - unsigned int cb_s_break; /* Mass break counter on ->server */ - unsigned int cb_v_break; /* Mass break counter on ->volume */ + unsigned int cb_ro_snapshot; /* RO volume release counter on ->volume */ + unsigned int cb_scrub; /* Scrub counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ + unsigned int cb_v_check; /* Break check counter on ->volume */ seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */ - time64_t cb_expires_at; /* time at which callback expires */ + atomic64_t cb_expires_at; /* time at which callback expires */ +#define AFS_NO_CB_PROMISE TIME64_MIN }; static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) @@ -839,7 +839,7 @@ struct afs_vnode_param { struct afs_fid fid; /* Fid to access */ struct afs_status_cb scb; /* Returned status and callback promise */ afs_dataversion_t dv_before; /* Data version before the call */ - unsigned int cb_break_before; /* cb_break + cb_s_break before the call */ + unsigned int cb_break_before; /* cb_break before the call */ u8 dv_delta; /* Expected change in data version */ bool put_vnode:1; /* T if we have a ref on the vnode */ bool need_io_lock:1; /* T if we need the I/O lock on this */ @@ -875,7 +875,6 @@ struct afs_operation { unsigned int debug_id; unsigned int cb_v_break; /* Volume break counter before op */ - unsigned int cb_s_break; /* Server break counter before op */ union { struct { @@ -1066,13 +1065,15 @@ extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) { - return vnode->cb_break + vnode->cb_v_break; + return vnode->cb_break + vnode->cb_ro_snapshot + vnode->cb_scrub; } static inline bool afs_cb_is_broken(unsigned int cb_break, const struct afs_vnode *vnode) { - return cb_break != (vnode->cb_break + atomic_read(&vnode->volume->cb_v_break)); + return cb_break != (vnode->cb_break + + atomic_read(&vnode->volume->cb_ro_snapshot) + + atomic_read(&vnode->volume->cb_scrub)); } /* @@ -1564,9 +1565,8 @@ extern void afs_fs_exit(void); /* * validation.c */ +bool afs_check_validity(const struct afs_vnode *vnode); int afs_update_volume_state(struct afs_operation *op); -bool afs_check_validity(struct afs_vnode *vnode); -bool afs_pagecache_valid(struct afs_vnode *vnode); int afs_validate(struct afs_vnode *vnode, struct key *key); /* diff --git a/fs/afs/proc.c b/fs/afs/proc.c index a138022d8e0d..3bd02571f30d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -443,8 +443,8 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) refcount_read(&server->ref), atomic_read(&server->active), server->cell->name); - seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n", - server->flags, server->rtt, server->cb_s_break); + seq_printf(m, " - info: fl=%lx rtt=%u\n", + server->flags, server->rtt); seq_printf(m, " - probe: last=%d\n", (int)(jiffies - server->probed_at) / HZ); failed = estate->failed_set; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 5c50c9aa1f87..a8554b4d91b8 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -59,7 +59,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op, write_seqlock(&vnode->cb_lock); ASSERTCMP(cb_server, ==, vnode->cb_server); vnode->cb_server = NULL; - if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) vnode->cb_break++; write_sequnlock(&vnode->cb_lock); } @@ -140,6 +140,22 @@ bool afs_select_fileserver(struct afs_operation *op) switch (op->call_error) { case 0: op->cumul_error.responded = true; + + /* We succeeded, but we may need to redo the op from another + * server if we're looking at a set of RO volumes where some of + * the servers have not yet been brought up to date lest we + * regress the data. We only switch to the new version once + * >=50% of the servers are updated. + */ + error = afs_update_volume_state(op); + if (error != 0) { + if (error == 1) { + afs_sleep_and_retry(op); + goto restart_from_beginning; + } + afs_op_set_error(op, error); + goto failed; + } fallthrough; default: /* Success or local failure. Stop. */ @@ -484,10 +500,8 @@ selected_server: op->server = server; if (vnode->cb_server != server) { vnode->cb_server = server; - vnode->cb_s_break = server->cb_s_break; - vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break); - vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break); + atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); } read_lock(&server->fs_lock); diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index fb0f4afcb304..ac4a7afff45e 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -110,6 +110,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, slist->servers[j].server = server; slist->servers[j].volume = volume; slist->servers[j].flags = se_flags; + slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE; slist->nr_servers++; } @@ -210,6 +211,7 @@ void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server int diff; if (pn && po && pn->server == po->server) { + pn->cb_expires_at = po->cb_expires_at; list_replace(&po->slink, &pn->slink); n++; o++; diff --git a/fs/afs/validation.c b/fs/afs/validation.c index 6aadd5e075e4..46b37f2cce7d 100644 --- a/fs/afs/validation.c +++ b/fs/afs/validation.c @@ -10,6 +10,131 @@ #include #include "internal.h" +/* + * Data validation is managed through a number of mechanisms from the server: + * + * (1) On first contact with a server (such as if it has just been rebooted), + * the server sends us a CB.InitCallBackState* request. + * + * (2) On a RW volume, in response to certain vnode (inode)-accessing RPC + * calls, the server maintains a time-limited per-vnode promise that it + * will send us a CB.CallBack request if a third party alters the vnodes + * accessed. + * + * Note that a vnode-level callbacks may also be sent for other reasons, + * such as filelock release. + * + * (3) On a RO (or Backup) volume, in response to certain vnode-accessing RPC + * calls, each server maintains a time-limited per-volume promise that it + * will send us a CB.CallBack request if the RO volume is updated to a + * snapshot of the RW volume ("vos release"). This is an atomic event + * that cuts over all instances of the RO volume across multiple servers + * simultaneously. + * + * Note that a volume-level callbacks may also be sent for other reasons, + * such as the volumeserver taking over control of the volume from the + * fileserver. + * + * Note also that each server maintains an independent time limit on an + * independent callback. + * + * (4) Certain RPC calls include a volume information record "VolSync" in + * their reply. This contains a creation date for the volume that should + * remain unchanged for a RW volume (but will be changed if the volume is + * restored from backup) or will be bumped to the time of snapshotting + * when a RO volume is released. + * + * In order to track this events, the following are provided: + * + * ->cb_v_break. A counter of events that might mean that the contents of + * a volume have been altered since we last checked a vnode. + * + * ->cb_v_check. A counter of the number of events that we've sent a + * query to the server for. Everything's up to date if this equals + * cb_v_break. + * + * ->cb_scrub. A counter of the number of regression events for which we + * have to completely wipe the cache. + * + * ->cb_ro_snapshot. A counter of the number of times that we've + * recognised that a RO volume has been updated. + * + * ->cb_break. A counter of events that might mean that the contents of a + * vnode have been altered. + * + * ->cb_expires_at. The time at which the callback promise expires or + * AFS_NO_CB_PROMISE if we have no promise. + * + * The way we manage things is: + * + * (1) When a volume-level CB.CallBack occurs, we increment ->cb_v_break on + * the volume and reset ->cb_expires_at (ie. set AFS_NO_CB_PROMISE) on the + * volume and volume's server record. + * + * (2) When a CB.InitCallBackState occurs, we treat this as a volume-level + * callback break on all the volumes that have been using that volume + * (ie. increment ->cb_v_break and reset ->cb_expires_at). + * + * (3) When a vnode-level CB.CallBack occurs, we increment ->cb_break on the + * vnode and reset its ->cb_expires_at. If the vnode is mmapped, we also + * dispatch a work item to unmap all PTEs to the vnode's pagecache to + * force reentry to the filesystem for revalidation. + * + * (4) When entering the filesystem, we call afs_validate() to check the + * validity of a vnode. This first checks to see if ->cb_v_check and + * ->cb_v_break match, and if they don't, we lock volume->cb_check_lock + * exclusively and perform an FS.FetchStatus on the vnode. + * + * After checking the volume, we check the vnode. If there's a mismatch + * between the volume counters and the vnode's mirrors of those counters, + * we lock vnode->validate_lock and issue an FS.FetchStatus on the vnode. + * + * (5) When the reply from FS.FetchStatus arrives, the VolSync record is + * parsed: + * + * (A) If the Creation timestamp has changed on a RW volume or regressed + * on a RO volume, we try to increment ->cb_scrub; if it advances on a + * RO volume, we assume "vos release" happened and try to increment + * ->cb_ro_snapshot. + * + * (B) If the Update timestamp has regressed, we try to increment + * ->cb_scrub. + * + * Note that in both of these cases, we only do the increment if we can + * cmpxchg the value of the timestamp from the value we noted before the + * op. This tries to prevent parallel ops from fighting one another. + * + * volume->cb_v_check is then set to ->cb_v_break. + * + * (6) The AFSCallBack record included in the FS.FetchStatus reply is also + * parsed and used to set the promise in ->cb_expires_at for the vnode, + * the volume and the volume's server record. + * + * (7) If ->cb_scrub is seen to have advanced, we invalidate the pagecache for + * the vnode. + */ + +/* + * Check the validity of a vnode/inode and its parent volume. + */ +bool afs_check_validity(const struct afs_vnode *vnode) +{ + const struct afs_volume *volume = vnode->volume; + time64_t deadline = ktime_get_real_seconds() + 10; + + if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) || + atomic64_read(&vnode->cb_expires_at) <= deadline || + volume->cb_expires_at <= deadline || + vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) || + vnode->cb_scrub != atomic_read(&volume->cb_scrub) || + test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + _debug("inval"); + return false; + } + + return true; +} + /* * See if the server we've just talked to is currently excluded. */ @@ -185,11 +310,17 @@ out: } /* - * Update the state of a volume. Returns 1 to redo the operation from the start. + * Update the state of a volume, including recording the expiration time of the + * callback promise. Returns 1 to redo the operation from the start. */ int afs_update_volume_state(struct afs_operation *op) { + struct afs_server_list *slist = op->server_list; + struct afs_server_entry *se = &slist->servers[op->server_index]; + struct afs_callback *cb = &op->file[0].scb.callback; struct afs_volume *volume = op->volume; + unsigned int cb_v_break = atomic_read(&volume->cb_v_break); + unsigned int cb_v_check = atomic_read(&volume->cb_v_check); int ret; _enter("%llx", op->volume->vid); @@ -202,6 +333,18 @@ int afs_update_volume_state(struct afs_operation *op) } } + if (op->cb_v_break == cb_v_break && + (op->file[0].scb.have_cb || op->file[1].scb.have_cb)) { + time64_t expires_at = cb->expires_at; + + if (!op->file[0].scb.have_cb) + expires_at = op->file[1].scb.callback.expires_at; + + se->cb_expires_at = expires_at; + volume->cb_expires_at = expires_at; + } + if (cb_v_check < op->cb_v_break) + atomic_cmpxchg(&volume->cb_v_check, cb_v_check, op->cb_v_break); return 0; } @@ -224,99 +367,6 @@ static void afs_zap_data(struct afs_vnode *vnode) invalidate_inode_pages2(vnode->netfs.inode.i_mapping); } -/* - * Check to see if we have a server currently serving this volume and that it - * hasn't been reinitialised or dropped from the list. - */ -static bool afs_check_server_good(struct afs_vnode *vnode) -{ - struct afs_server_list *slist; - struct afs_server *server; - bool good; - int i; - - if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break)) - return true; - - rcu_read_lock(); - - slist = rcu_dereference(vnode->volume->servers); - for (i = 0; i < slist->nr_servers; i++) { - server = slist->servers[i].server; - if (server == vnode->cb_server) { - good = (vnode->cb_s_break == server->cb_s_break); - rcu_read_unlock(); - return good; - } - } - - rcu_read_unlock(); - return false; -} - -/* - * Check the validity of a vnode/inode. - */ -bool afs_check_validity(struct afs_vnode *vnode) -{ - enum afs_cb_break_reason need_clear = afs_cb_break_no_break; - time64_t now = ktime_get_real_seconds(); - unsigned int cb_break; - int seq; - - do { - seq = read_seqbegin(&vnode->cb_lock); - cb_break = vnode->cb_break; - - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_v_break != atomic_read(&vnode->volume->cb_v_break)) - need_clear = afs_cb_break_for_v_break; - else if (!afs_check_server_good(vnode)) - need_clear = afs_cb_break_for_s_reinit; - else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) - need_clear = afs_cb_break_for_zap; - else if (vnode->cb_expires_at - 10 <= now) - need_clear = afs_cb_break_for_lapsed; - } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - ; - } else { - need_clear = afs_cb_break_no_promise; - } - - } while (read_seqretry(&vnode->cb_lock, seq)); - - if (need_clear == afs_cb_break_no_break) - return true; - - write_seqlock(&vnode->cb_lock); - if (need_clear == afs_cb_break_no_promise) - vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); - else if (cb_break == vnode->cb_break) - __afs_break_callback(vnode, need_clear); - else - trace_afs_cb_miss(&vnode->fid, need_clear); - write_sequnlock(&vnode->cb_lock); - return false; -} - -/* - * Returns true if the pagecache is still valid. Does not sleep. - */ -bool afs_pagecache_valid(struct afs_vnode *vnode) -{ - if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) { - if (vnode->netfs.inode.i_nlink) - clear_nlink(&vnode->netfs.inode); - return true; - } - - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && - afs_check_validity(vnode)) - return true; - - return false; -} - /* * validate a vnode/inode * - there are several things we need to check @@ -328,23 +378,48 @@ bool afs_pagecache_valid(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { + struct afs_volume *volume = vnode->volume; + unsigned int cb_ro_snapshot, cb_scrub; + time64_t deadline = ktime_get_real_seconds() + 10; + bool zap = false, locked_vol = false; int ret; _enter("{v={%llx:%llu} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - if (afs_pagecache_valid(vnode)) - goto valid; + if (afs_check_validity(vnode)) + return 0; - down_write(&vnode->validate_lock); + ret = down_write_killable(&vnode->validate_lock); + if (ret < 0) + goto error; + + /* Validate a volume after the v_break has changed or the volume + * callback expired. We only want to do this once per volume per + * v_break change. The actual work will be done when parsing the + * status fetch reply. + */ + if (volume->cb_expires_at <= deadline || + atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break)) { + ret = mutex_lock_interruptible(&volume->cb_check_lock); + if (ret < 0) + goto error_unlock; + locked_vol = true; + } - /* if the promise has expired, we need to check the server again to get - * a new promise - note that if the (parent) directory's metadata was - * changed then the security may be different and we may no longer have - * access */ - if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - _debug("not promised"); + cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot); + cb_scrub = atomic_read(&volume->cb_scrub); + if (vnode->cb_ro_snapshot != cb_ro_snapshot || + vnode->cb_scrub != cb_scrub) + unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false); + + if (vnode->cb_ro_snapshot != cb_ro_snapshot || + vnode->cb_scrub != cb_scrub || + volume->cb_expires_at <= deadline || + atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) || + atomic64_read(&vnode->cb_expires_at) <= deadline + ) { ret = afs_fetch_status(vnode, key, false, NULL); if (ret < 0) { if (ret == -ENOENT) { @@ -353,9 +428,26 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } goto error_unlock; } + _debug("new promise [fl=%lx]", vnode->flags); } + /* We can drop the volume lock now as. */ + if (locked_vol) { + mutex_unlock(&volume->cb_check_lock); + locked_vol = false; + } + + cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot); + cb_scrub = atomic_read(&volume->cb_scrub); + _debug("vnode inval %x==%x %x==%x", + vnode->cb_ro_snapshot, cb_ro_snapshot, + vnode->cb_scrub, cb_scrub); + if (vnode->cb_scrub != cb_scrub) + zap = true; + vnode->cb_ro_snapshot = cb_ro_snapshot; + vnode->cb_scrub = cb_scrub; + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { _debug("file already deleted"); ret = -ESTALE; @@ -364,15 +456,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) /* if the vnode's data version number changed then its contents are * different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); + if (zap) afs_zap_data(vnode); up_write(&vnode->validate_lock); -valid: _leave(" = 0"); return 0; error_unlock: + if (locked_vol) + mutex_unlock(&volume->cb_check_lock); up_write(&vnode->validate_lock); +error: _leave(" = %d", ret); return ret; } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 41ab1d3ff3ea..cc207dca1b21 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -98,7 +98,10 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, INIT_WORK(&volume->destructor, afs_destroy_volume); rwlock_init(&volume->servers_lock); mutex_init(&volume->volsync_lock); + mutex_init(&volume->cb_check_lock); rwlock_init(&volume->cb_v_break_lock); + INIT_LIST_HEAD(&volume->open_mmaps); + init_rwsem(&volume->open_mmaps_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); for (i = 0; i < AFS_MAXTYPES; i++) diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index bbe8dcab4b32..2df7d0fd3f21 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -438,18 +438,14 @@ enum yfs_cm_operation { #define afs_cb_break_reasons \ EM(afs_cb_break_no_break, "no-break") \ - EM(afs_cb_break_no_promise, "no-promise") \ EM(afs_cb_break_for_callback, "break-cb") \ EM(afs_cb_break_for_creation_regress, "creation-regress") \ EM(afs_cb_break_for_deleted, "break-del") \ - EM(afs_cb_break_for_lapsed, "break-lapsed") \ EM(afs_cb_break_for_s_reinit, "s-reinit") \ EM(afs_cb_break_for_unlink, "break-unlink") \ EM(afs_cb_break_for_update_regress, "update-regress") \ - EM(afs_cb_break_for_v_break, "break-v") \ EM(afs_cb_break_for_volume_callback, "break-v-cb") \ EM(afs_cb_break_for_vos_release, "break-vos-release") \ - EM(afs_cb_break_for_zap, "break-zap") \ E_(afs_cb_break_volume_excluded, "vol-excluded") /* -- cgit v1.2.3 From 495f2ae9e3552c30f7b83be3d142a932885d506e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Oct 2023 09:24:01 +0100 Subject: afs: Fix fileserver rotation Fix the fileserver rotation so that it doesn't use RTT as the basis for deciding which server and address to use as this doesn't necessarily give a good indication of the best path. Instead, use the configurable preference list in conjunction with whatever probes have succeeded at the time of looking. To this end, make the following changes: (1) Keep an array of "server states" to track what addresses we've tried on each server and move the waitqueue entries there that we'll need for probing. (2) Each afs_server_state struct is made to pin the corresponding server's endpoint state rather than the afs_operation struct carrying a pin on the server we're currently looking at. (3) Drop the server list preference; we now always rescan the server list. (4) afs_wait_for_probes() now uses the server state list to guide it in what it waits for (and to provide the waitqueue entries) and returns an indication of whether we'd got a response, run out of responsive addresses or the endpoint state had been superseded and we need to restart the iteration. (5) Call afs_get_address_preferences*() occasionally to refresh the preference values. (6) When picking a server, scan the addresses of the servers for which we have as-yet untested communications, looking for the highest priority one and use that instead of trying all the addresses for a particular server in ascending-RTT order. (7) When a Busy or Offline state is seen across all available servers, do a short sleep. (8) If we detect that we accessed a future RO volume version whilst it is undergoing replication, reissue the op against the older version until at least half of the servers are replicated. (9) Whilst RO replication is ongoing, increase the frequency of Volume Location server checks for that volume to every ten minutes instead of hourly. Also add a tracepoint to track progress through the rotation algorithm. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/fs_operation.c | 8 +- fs/afs/fs_probe.c | 103 ++++++++++---------------- fs/afs/internal.h | 23 +++++- fs/afs/rotate.c | 178 +++++++++++++++++++++++++++++++++------------ fs/afs/server_list.c | 14 +--- fs/afs/volume.c | 6 +- include/trace/events/afs.h | 81 ++++++++++++++++++--- 7 files changed, 267 insertions(+), 146 deletions(-) (limited to 'include') diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index cecc44af6a5f..3546b087e791 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -229,7 +229,6 @@ void afs_wait_for_operation(struct afs_operation *op) */ int afs_put_operation(struct afs_operation *op) { - struct afs_endpoint_state *estate = op->estate; struct afs_addr_list *alist; int i, ret = afs_op_error(op); @@ -253,18 +252,17 @@ int afs_put_operation(struct afs_operation *op) kfree(op->more_files); } - if (estate) { - alist = estate->addresses; + if (op->estate) { + alist = op->estate->addresses; if (alist) { if (op->call_responded && op->addr_index != alist->preferred && test_bit(alist->preferred, &op->addr_tried)) WRITE_ONCE(alist->preferred, op->addr_index); } - afs_put_endpoint_state(estate, afs_estate_trace_put_operation); - op->estate = NULL; } + afs_clear_server_states(op); afs_put_serverlist(op->net, op->server_list); afs_put_volume(op->volume, afs_volume_trace_put_put_op); key_put(op->key); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c00d38b98a67..580de4adaaf6 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -296,58 +296,48 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, } /* - * Wait for the first as-yet untried fileserver to respond. + * Wait for the first as-yet untried fileserver to respond, for the probe state + * to be superseded or for all probes to finish. */ -int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) +int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr) { struct afs_endpoint_state *estate; - struct wait_queue_entry *waits; - struct afs_server *server; - unsigned int rtt = UINT_MAX, rtt_s; - bool have_responders = false; - int pref = -1, i; + struct afs_server_list *slist = op->server_list; + bool still_probing = true; + int ret = 0, i; - _enter("%u,%lx", slist->nr_servers, untried); + _enter("%u", slist->nr_servers); - /* Only wait for servers that have a probe outstanding. */ - rcu_read_lock(); for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - estate = rcu_dereference(server->endpoint_state); - if (!atomic_read(&estate->nr_probing)) - __clear_bit(i, &untried); - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) - have_responders = true; - } + estate = states[i].endpoint_state; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) + return 2; + if (atomic_read(&estate->nr_probing)) + still_probing = true; + if (estate->responsive_set & states[i].untried_addrs) + return 1; } - rcu_read_unlock(); - if (have_responders || !untried) + if (!still_probing) return 0; - waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); - if (!waits) - return -ENOMEM; - - for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - init_waitqueue_entry(&waits[i], current); - add_wait_queue(&server->probe_wq, &waits[i]); - } - } + for (i = 0; i < slist->nr_servers; i++) + add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter); for (;;) { - bool still_probing = false; + still_probing = false; - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) - goto stop; - if (atomic_read(&estate->nr_probing)) - still_probing = true; + estate = states[i].endpoint_state; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) { + ret = 2; + goto stop; + } + if (atomic_read(&estate->nr_probing)) + still_probing = true; + if (estate->responsive_set & states[i].untried_addrs) { + ret = 1; + goto stop; } } @@ -359,28 +349,12 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) stop: set_current_state(TASK_RUNNING); - for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - rtt_s = READ_ONCE(server->rtt); - if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && - rtt_s < rtt) { - pref = i; - rtt = rtt_s; - } - - remove_wait_queue(&server->probe_wq, &waits[i]); - } - } - - kfree(waits); - - if (pref == -1 && signal_pending(current)) - return -ERESTARTSYS; + for (i = 0; i < slist->nr_servers; i++) + remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter); - if (pref >= 0) - slist->preferred = pref; - return 0; + if (!ret && signal_pending(current)) + ret = -ERESTARTSYS; + return ret; } /* @@ -508,7 +482,7 @@ again: * Wait for a probe on a particular fileserver to complete for 2s. */ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate, - bool is_intr) + unsigned long exclude, bool is_intr) { struct wait_queue_entry wait; unsigned long timo = 2 * HZ; @@ -521,7 +495,8 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta prepare_to_wait_event(&server->probe_wq, &wait, is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (timo == 0 || - test_bit(AFS_ESTATE_RESPONDED, &estate->flags) || + test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) || + (estate->responsive_set & ~exclude) || atomic_read(&estate->nr_probing) == 0 || (is_intr && signal_pending(current))) break; @@ -531,7 +506,9 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) + if (estate->responsive_set & ~exclude) + return 1; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; if (is_intr && signal_pending(current)) return -ERESTARTSYS; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6d0cd886b548..e3e373c1fecf 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -620,7 +620,6 @@ struct afs_server_list { bool attached; /* T if attached to servers */ enum afs_ro_replicating ro_replicating; /* RW->RO update (probably) in progress */ unsigned char nr_servers; - unsigned char preferred; /* Preferred server */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ rwlock_t lock; @@ -821,6 +820,20 @@ struct afs_vl_cursor { bool call_responded; /* T if the current address responded */ }; +/* + * Fileserver state tracking for an operation. An array of these is kept, + * indexed by server index. + */ +struct afs_server_state { + /* Tracking of fileserver probe state. Other operations may interfere + * by probing a fileserver when accessing other volumes. + */ + unsigned int probe_seq; + unsigned long untried_addrs; /* Addresses we haven't tried yet */ + struct wait_queue_entry probe_waiter; + struct afs_endpoint_state *endpoint_state; /* Endpoint state being monitored */ +}; + /* * Fileserver operation methods. */ @@ -921,7 +934,8 @@ struct afs_operation { /* Fileserver iteration state */ struct afs_server_list *server_list; /* Current server list (pins ref) */ struct afs_server *server; /* Server we're using (ref pinned by server_list) */ - struct afs_endpoint_state *estate; /* Current endpoint state (pins ref) */ + struct afs_endpoint_state *estate; /* Current endpoint state (doesn't pin ref) */ + struct afs_server_state *server_states; /* States of the servers involved */ struct afs_call *call; unsigned long untried_servers; /* Bitmask of untried servers */ unsigned long addr_tried; /* Tried addresses */ @@ -1235,11 +1249,11 @@ void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_t extern void afs_fileserver_probe_result(struct afs_call *); void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, struct afs_addr_list *new_addrs, struct key *key); -extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); +int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr); extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); extern void afs_fs_probe_dispatcher(struct work_struct *); int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate, - bool is_intr); + unsigned long exclude, bool is_intr); extern void afs_fs_probe_cleanup(struct afs_net *); /* @@ -1363,6 +1377,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} /* * rotate.c */ +void afs_clear_server_states(struct afs_operation *op); extern bool afs_select_fileserver(struct afs_operation *); extern void afs_dump_edestaddrreq(const struct afs_operation *); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index a8554b4d91b8..ef7fe70777be 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -15,6 +15,18 @@ #include "afs_fs.h" #include "protocol_uae.h" +void afs_clear_server_states(struct afs_operation *op) +{ + unsigned int i; + + if (op->server_states) { + for (i = 0; i < op->server_list->nr_servers; i++) + afs_put_endpoint_state(op->server_states[i].endpoint_state, + afs_estate_trace_put_server_state); + kfree(op->server_states); + } +} + /* * Begin iteration through a server list, starting with the vnode's last used * server if possible, or the last recorded good server if not. @@ -26,14 +38,41 @@ static bool afs_start_fs_iteration(struct afs_operation *op, void *cb_server; int i; + trace_afs_rotate(op, afs_rotate_trace_start, 0); + read_lock(&op->volume->servers_lock); op->server_list = afs_get_serverlist( rcu_dereference_protected(op->volume->servers, lockdep_is_held(&op->volume->servers_lock))); read_unlock(&op->volume->servers_lock); + op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]), + GFP_KERNEL); + if (!op->server_states) { + afs_op_nomem(op); + trace_afs_rotate(op, afs_rotate_trace_nomem, 0); + return false; + } + + rcu_read_lock(); + for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_endpoint_state *estate; + struct afs_server_state *s = &op->server_states[i]; + + server = op->server_list->servers[i].server; + estate = rcu_dereference(server->endpoint_state); + s->endpoint_state = afs_get_endpoint_state(estate, + afs_estate_trace_get_server_state); + s->probe_seq = estate->probe_seq; + s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1; + init_waitqueue_entry(&s->probe_waiter, current); + afs_get_address_preferences(op->net, estate->addresses); + } + rcu_read_unlock(); + + op->untried_servers = (1UL << op->server_list->nr_servers) - 1; - op->server_index = READ_ONCE(op->server_list->preferred); + op->server_index = -1; cb_server = vnode->cb_server; if (cb_server) { @@ -52,6 +91,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op, */ if (op->flags & AFS_OPERATION_CUR_ONLY) { afs_op_set_error(op, -ESTALE); + trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0); return false; } @@ -90,6 +130,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) */ static bool afs_sleep_and_retry(struct afs_operation *op) { + trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0); if (!(op->flags & AFS_OPERATION_UNINTR)) { msleep_interruptible(1000); if (signal_pending(current)) { @@ -109,14 +150,13 @@ static bool afs_sleep_and_retry(struct afs_operation *op) */ bool afs_select_fileserver(struct afs_operation *op) { - struct afs_endpoint_state *estate = op->estate; struct afs_addr_list *alist; struct afs_server *server; struct afs_vnode *vnode = op->file[0].vnode; unsigned long set, failed; - unsigned int rtt; s32 abort_code = op->call_abort_code; - int error = op->call_error, addr_index, i; + int best_prio = 0; + int error = op->call_error, addr_index, i, j; op->nr_iterations++; @@ -127,6 +167,7 @@ bool afs_select_fileserver(struct afs_operation *op) error, abort_code); if (op->flags & AFS_OPERATION_STOP) { + trace_afs_rotate(op, afs_rotate_trace_stopped, 0); _leave(" = f [stopped]"); return false; } @@ -134,7 +175,8 @@ bool afs_select_fileserver(struct afs_operation *op) if (op->nr_iterations == 0) goto start; - WRITE_ONCE(estate->addresses->addrs[op->addr_index].last_error, error); + WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error); + trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error); /* Evaluate the result of the previous operation, if there was one. */ switch (op->call_error) { @@ -161,6 +203,7 @@ bool afs_select_fileserver(struct afs_operation *op) /* Success or local failure. Stop. */ afs_op_set_error(op, error); op->flags |= AFS_OPERATION_STOP; + trace_afs_rotate(op, afs_rotate_trace_stop, error); _leave(" = f [okay/local %d]", error); return false; @@ -173,6 +216,7 @@ bool afs_select_fileserver(struct afs_operation *op) * errors instead. IBM AFS and OpenAFS fileservers, however, do leak * these abort codes. */ + trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code); op->cumul_error.responded = true; switch (abort_code) { case VNOVOL: @@ -278,10 +322,6 @@ bool afs_select_fileserver(struct afs_operation *op) afs_op_set_error(op, -EADV); goto failed; } - if (op->flags & AFS_OPERATION_CUR_ONLY) { - afs_op_set_error(op, -ESTALE); - goto failed; - } goto busy; case VRESTARTING: /* The fileserver is either shutting down or starting up. */ @@ -417,19 +457,22 @@ bool afs_select_fileserver(struct afs_operation *op) } restart_from_beginning: + trace_afs_rotate(op, afs_rotate_trace_restart, 0); _debug("restart"); - afs_put_endpoint_state(estate, afs_estate_trace_put_restart_rotate); - estate = op->estate = NULL; + op->estate = NULL; op->server = NULL; + afs_clear_server_states(op); + op->server_states = NULL; afs_put_serverlist(op->net, op->server_list); op->server_list = NULL; start: _debug("start"); - ASSERTCMP(estate, ==, NULL); + ASSERTCMP(op->estate, ==, NULL); /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ error = afs_check_volume_status(op->volume, op); + trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error); if (error < 0) { afs_op_set_error(op, error); goto failed; @@ -442,16 +485,29 @@ start: pick_server: _debug("pick [%lx]", op->untried_servers); - ASSERTCMP(estate, ==, NULL); + ASSERTCMP(op->estate, ==, NULL); - error = afs_wait_for_fs_probes(op->server_list, op->untried_servers); - if (error < 0) { + error = afs_wait_for_fs_probes(op, op->server_states, + !(op->flags & AFS_OPERATION_UNINTR)); + switch (error) { + case 0: /* No untried responsive servers and no outstanding probes */ + trace_afs_rotate(op, afs_rotate_trace_probe_none, 0); + goto no_more_servers; + case 1: /* Got a response */ + trace_afs_rotate(op, afs_rotate_trace_probe_response, 0); + break; + case 2: /* Probe data superseded */ + trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0); + goto restart_from_beginning; + default: + trace_afs_rotate(op, afs_rotate_trace_probe_error, error); afs_op_set_error(op, error); goto failed; } - /* Pick the untried server with the lowest RTT. If we have outstanding - * callbacks, we stick with the server we're already using if we can. + /* Pick the untried server with the highest priority untried endpoint. + * If we have outstanding callbacks, we stick with the server we're + * already using if we can. */ if (op->server) { _debug("server %u", op->server_index); @@ -461,34 +517,47 @@ pick_server: _debug("no server"); } + rcu_read_lock(); op->server_index = -1; - rtt = UINT_MAX; + best_prio = -1; for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_endpoint_state *es; struct afs_server_entry *se = &op->server_list->servers[i]; + struct afs_addr_list *sal; struct afs_server *s = se->server; if (!test_bit(i, &op->untried_servers) || test_bit(AFS_SE_EXCLUDED, &se->flags) || !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) continue; - if (s->rtt <= rtt) { - op->server_index = i; - rtt = s->rtt; + es = op->server_states->endpoint_state; + sal = es->addresses; + + afs_get_address_preferences_rcu(op->net, sal); + for (j = 0; j < sal->nr_addrs; j++) { + if (!sal->addrs[j].peer) + continue; + if (sal->addrs[j].prio > best_prio) { + op->server_index = i; + best_prio = sal->addrs[j].prio; + } } } + rcu_read_unlock(); if (op->server_index == -1) goto no_more_servers; selected_server: - _debug("use %d", op->server_index); + trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio); + _debug("use %d prio %u", op->server_index, best_prio); __clear_bit(op->server_index, &op->untried_servers); /* We're starting on a different fileserver from the list. We need to * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. */ - ASSERTCMP(estate, ==, NULL); + ASSERTCMP(op->estate, ==, NULL); server = op->server_list->servers[op->server_index].server; if (!afs_check_server_record(op, server, op->key)) @@ -504,12 +573,6 @@ selected_server: atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); } - read_lock(&server->fs_lock); - estate = rcu_dereference_protected(server->endpoint_state, - lockdep_is_held(&server->fs_lock)); - op->estate = afs_get_endpoint_state(estate, afs_estate_trace_get_fsrotate_set); - read_unlock(&server->fs_lock); - retry_server: op->addr_tried = 0; op->addr_index = -1; @@ -518,14 +581,23 @@ iterate_address: /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - set = READ_ONCE(estate->responsive_set); - failed = READ_ONCE(estate->failed_set); - _debug("iterate ES=%x rs=%lx fs=%lx", estate->probe_seq, set, failed); + op->estate = op->server_states[op->server_index].endpoint_state; + set = READ_ONCE(op->estate->responsive_set); + failed = READ_ONCE(op->estate->failed_set); + _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed); set &= ~(failed | op->addr_tried); + trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set); if (!set) - goto out_of_addresses; + goto wait_for_more_probe_results; + + alist = op->estate->addresses; + for (i = 0; i < alist->nr_addrs; i++) { + if (alist->addrs[i].prio > best_prio) { + addr_index = i; + best_prio = alist->addrs[i].prio; + } + } - alist = estate->addresses; addr_index = READ_ONCE(alist->preferred); if (!test_bit(addr_index, &set)) addr_index = __ffs(set); @@ -542,17 +614,24 @@ iterate_address: _leave(" = t"); return true; -out_of_addresses: +wait_for_more_probe_results: + error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, + !(op->flags & AFS_OPERATION_UNINTR)); + if (!error) + goto iterate_address; + /* We've now had a failure to respond on all of a server's addresses - * immediately probe them again and consider retrying the server. */ + trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0); afs_probe_fileserver(op->net, op->server); if (op->flags & AFS_OPERATION_RETRY_SERVER) { - error = afs_wait_for_one_fs_probe(op->server, estate, + error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, !(op->flags & AFS_OPERATION_UNINTR)); switch (error) { case 0: op->flags &= ~AFS_OPERATION_RETRY_SERVER; + trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); goto retry_server; case -ERESTARTSYS: afs_op_set_error(op, error); @@ -564,30 +643,33 @@ out_of_addresses: } next_server: + trace_afs_rotate(op, afs_rotate_trace_next_server, 0); _debug("next"); - ASSERT(estate); - alist = estate->addresses; + ASSERT(op->estate); + alist = op->estate->addresses; if (op->call_responded && op->addr_index != READ_ONCE(alist->preferred) && test_bit(alist->preferred, &op->addr_tried)) WRITE_ONCE(alist->preferred, op->addr_index); - afs_put_endpoint_state(estate, afs_estate_trace_put_next_server); - estate = op->estate = NULL; + op->estate = NULL; goto pick_server; no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ - if (op->flags & AFS_OPERATION_VBUSY) + trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0); + if (op->flags & AFS_OPERATION_VBUSY) { + afs_sleep_and_retry(op); + op->flags &= ~AFS_OPERATION_VBUSY; goto restart_from_beginning; + } rcu_read_lock(); for (i = 0; i < op->server_list->nr_servers; i++) { struct afs_endpoint_state *estate; - struct afs_server *s = op->server_list->servers[i].server; - estate = rcu_dereference(s->endpoint_state); + estate = op->server_states->endpoint_state; error = READ_ONCE(estate->error); if (error < 0) afs_op_accumulate_error(op, error, estate->abort_code); @@ -595,14 +677,14 @@ no_more_servers: rcu_read_unlock(); failed: + trace_afs_rotate(op, afs_rotate_trace_failed, 0); op->flags |= AFS_OPERATION_STOP; - if (estate) { - alist = estate->addresses; + if (op->estate) { + alist = op->estate->addresses; if (op->call_responded && op->addr_index != READ_ONCE(alist->preferred) && test_bit(alist->preferred, &op->addr_tried)) WRITE_ONCE(alist->preferred, op->addr_index); - afs_put_endpoint_state(estate, afs_estate_trace_put_op_failed); op->estate = NULL; } _leave(" = f [failed %d]", afs_op_error(op)); @@ -635,8 +717,8 @@ void afs_dump_edestaddrreq(const struct afs_operation *op) if (op->server_list) { const struct afs_server_list *sl = op->server_list; - pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", - sl->nr_servers, sl->preferred, sl->vnovol_mask); + pr_notice("FC: SL nr=%u vnov=%hx\n", + sl->nr_servers, sl->vnovol_mask); for (i = 0; i < sl->nr_servers; i++) { const struct afs_server *s = sl->servers[i].server; const struct afs_endpoint_state *e = diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index ac4a7afff45e..7e7e567a7f8a 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -134,8 +134,7 @@ bool afs_annotate_server_list(struct afs_server_list *new, struct afs_server_list *old) { unsigned long mask = 1UL << AFS_SE_EXCLUDED; - struct afs_server *cur; - int i, j; + int i; if (old->nr_servers != new->nr_servers || old->ro_replicating != new->ro_replicating) @@ -148,18 +147,7 @@ bool afs_annotate_server_list(struct afs_server_list *new, goto changed; } return false; - changed: - /* Maintain the same preferred server as before if possible. */ - cur = old->servers[old->preferred].server; - for (j = 0; j < new->nr_servers; j++) { - if (new->servers[j].server == cur) { - if (!test_bit(AFS_SE_EXCLUDED, &new->servers[j].flags)) - new->preferred = j; - break; - } - } - return true; } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index cc207dca1b21..020ecd45e476 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -397,7 +397,11 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) discard = old; } - volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + /* Check more often if replication is ongoing. */ + if (new->ro_replicating) + volume->update_at = ktime_get_real_seconds() + 10 * 60; + else + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; write_unlock(&volume->servers_lock); if (discard == old) diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 2df7d0fd3f21..b2e0847eca47 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -230,15 +230,12 @@ enum yfs_cm_operation { #define afs_estate_traces \ EM(afs_estate_trace_alloc_probe, "ALLOC prob") \ EM(afs_estate_trace_alloc_server, "ALLOC srvr") \ - EM(afs_estate_trace_get_fsrotate_set, "GET fs-rot") \ + EM(afs_estate_trace_get_server_state, "GET srv-st") \ EM(afs_estate_trace_get_getcaps, "GET getcap") \ EM(afs_estate_trace_put_getcaps, "PUT getcap") \ - EM(afs_estate_trace_put_next_server, "PUT nx-srv") \ - EM(afs_estate_trace_put_op_failed, "PUT op-fai") \ - EM(afs_estate_trace_put_operation, "PUT op ") \ EM(afs_estate_trace_put_probe, "PUT probe ") \ - EM(afs_estate_trace_put_restart_rotate, "PUT rstrot") \ EM(afs_estate_trace_put_server, "PUT server") \ + EM(afs_estate_trace_put_server_state, "PUT srv-st") \ E_(afs_estate_trace_free, "FREE ") #define afs_fs_operations \ @@ -448,6 +445,29 @@ enum yfs_cm_operation { EM(afs_cb_break_for_vos_release, "break-vos-release") \ E_(afs_cb_break_volume_excluded, "vol-excluded") +#define afs_rotate_traces \ + EM(afs_rotate_trace_aborted, "Abortd") \ + EM(afs_rotate_trace_busy_sleep, "BsySlp") \ + EM(afs_rotate_trace_check_vol_status, "VolStt") \ + EM(afs_rotate_trace_failed, "Failed") \ + EM(afs_rotate_trace_iter, "Iter ") \ + EM(afs_rotate_trace_iterate_addr, "ItAddr") \ + EM(afs_rotate_trace_next_server, "NextSv") \ + EM(afs_rotate_trace_no_more_servers, "NoMore") \ + EM(afs_rotate_trace_nomem, "Nomem ") \ + EM(afs_rotate_trace_probe_error, "PrbErr") \ + EM(afs_rotate_trace_probe_fileserver, "PrbFsv") \ + EM(afs_rotate_trace_probe_none, "PrbNon") \ + EM(afs_rotate_trace_probe_response, "PrbRsp") \ + EM(afs_rotate_trace_probe_superseded, "PrbSup") \ + EM(afs_rotate_trace_restart, "Rstart") \ + EM(afs_rotate_trace_retry_server, "RtrySv") \ + EM(afs_rotate_trace_selected_server, "SlctSv") \ + EM(afs_rotate_trace_stale_lock, "StlLck") \ + EM(afs_rotate_trace_start, "Start ") \ + EM(afs_rotate_trace_stop, "Stop ") \ + E_(afs_rotate_trace_stopped, "Stoppd") + /* * Generate enums for tracing information. */ @@ -471,6 +491,7 @@ enum afs_file_error { afs_file_errors } __mode(byte); enum afs_flock_event { afs_flock_events } __mode(byte); enum afs_flock_operation { afs_flock_operations } __mode(byte); enum afs_io_error { afs_io_errors } __mode(byte); +enum afs_rotate_trace { afs_rotate_traces } __mode(byte); enum afs_server_trace { afs_server_traces } __mode(byte); enum afs_volume_trace { afs_volume_traces } __mode(byte); @@ -486,21 +507,22 @@ enum afs_volume_trace { afs_volume_traces } __mode(byte); afs_alist_traces; afs_call_traces; -afs_server_traces; +afs_cb_break_reasons; afs_cell_traces; -afs_fs_operations; -afs_vl_operations; afs_cm_operations; -yfs_cm_operations; afs_edit_dir_ops; afs_edit_dir_reasons; afs_eproto_causes; afs_estate_traces; -afs_io_errors; afs_file_errors; -afs_flock_types; afs_flock_operations; -afs_cb_break_reasons; +afs_flock_types; +afs_fs_operations; +afs_io_errors; +afs_rotate_traces; +afs_server_traces; +afs_vl_operations; +yfs_cm_operations; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -1519,6 +1541,41 @@ TRACE_EVENT(afs_vl_probe, &__entry->srx.transport) ); +TRACE_EVENT(afs_rotate, + TP_PROTO(struct afs_operation *op, enum afs_rotate_trace reason, unsigned int extra), + + TP_ARGS(op, reason, extra), + + TP_STRUCT__entry( + __field(unsigned int, op) + __field(unsigned int, flags) + __field(unsigned int, extra) + __field(unsigned short, iteration) + __field(short, server_index) + __field(short, addr_index) + __field(enum afs_rotate_trace, reason) + ), + + TP_fast_assign( + __entry->op = op->debug_id; + __entry->flags = op->flags; + __entry->iteration = op->nr_iterations; + __entry->server_index = op->server_index; + __entry->addr_index = op->addr_index; + __entry->reason = reason; + __entry->extra = extra; + ), + + TP_printk("OP=%08x it=%02x %s fl=%x sx=%d ax=%d ext=%d", + __entry->op, + __entry->iteration, + __print_symbolic(__entry->reason, afs_rotate_traces), + __entry->flags, + __entry->server_index, + __entry->addr_index, + __entry->extra) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From abcbd3bfbbfe97a8912d0c929d4aa18f50d9bc52 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Nov 2023 09:20:28 +0000 Subject: afs: trace: Log afs_make_call(), including server address Add a tracepoint to log calls to afs_make_call(), including the destination server address. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/fsclient.c | 22 ++++++++++++++++++++++ fs/afs/internal.h | 1 + fs/afs/rxrpc.c | 2 ++ fs/afs/yfsclient.c | 20 ++++++++++++++++++++ include/trace/events/afs.h | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+) (limited to 'include') diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 80f7d9e796e3..79cd30775b7a 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -290,6 +290,7 @@ void afs_fs_fetch_status(struct afs_operation *op) bp[2] = htonl(vp->fid.vnode); bp[3] = htonl(vp->fid.unique); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -442,6 +443,7 @@ static void afs_fs_fetch_data64(struct afs_operation *op) bp[6] = 0; bp[7] = htonl(lower_32_bits(req->len)); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -476,6 +478,7 @@ void afs_fs_fetch_data(struct afs_operation *op) bp[4] = htonl(lower_32_bits(req->pos)); bp[5] = htonl(lower_32_bits(req->len)); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -559,6 +562,7 @@ void afs_fs_create_file(struct afs_operation *op) *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -612,6 +616,7 @@ void afs_fs_make_dir(struct afs_operation *op) *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -685,6 +690,7 @@ void afs_fs_remove_file(struct afs_operation *op) bp = (void *) bp + padsz; } + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -732,6 +738,7 @@ void afs_fs_remove_dir(struct afs_operation *op) bp = (void *) bp + padsz; } + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -812,6 +819,7 @@ void afs_fs_link(struct afs_operation *op) *bp++ = htonl(vp->fid.vnode); *bp++ = htonl(vp->fid.unique); + call->fid = vp->fid; trace_afs_make_fs_call1(call, &vp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -907,6 +915,7 @@ void afs_fs_symlink(struct afs_operation *op) *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -1003,6 +1012,7 @@ void afs_fs_rename(struct afs_operation *op) bp = (void *) bp + n_padsz; } + call->fid = orig_dvp->fid; trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); afs_make_op_call(op, call, GFP_NOFS); } @@ -1090,6 +1100,7 @@ static void afs_fs_store_data64(struct afs_operation *op) *bp++ = htonl(upper_32_bits(op->store.i_size)); *bp++ = htonl(lower_32_bits(op->store.i_size)); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1140,6 +1151,7 @@ void afs_fs_store_data(struct afs_operation *op) *bp++ = htonl(lower_32_bits(op->store.size)); *bp++ = htonl(lower_32_bits(op->store.i_size)); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1206,6 +1218,7 @@ static void afs_fs_setattr_size64(struct afs_operation *op) *bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */ *bp++ = htonl(lower_32_bits(attr->ia_size)); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1247,6 +1260,7 @@ static void afs_fs_setattr_size(struct afs_operation *op) *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1283,6 +1297,7 @@ void afs_fs_setattr(struct afs_operation *op) xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1446,6 +1461,7 @@ void afs_fs_get_volume_status(struct afs_operation *op) bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vp->fid.vid); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1528,6 +1544,7 @@ void afs_fs_set_lock(struct afs_operation *op) *bp++ = htonl(vp->fid.unique); *bp++ = htonl(op->lock.type); + call->fid = vp->fid; trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); afs_make_op_call(op, call, GFP_NOFS); } @@ -1554,6 +1571,7 @@ void afs_fs_extend_lock(struct afs_operation *op) *bp++ = htonl(vp->fid.vnode); *bp++ = htonl(vp->fid.unique); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1580,6 +1598,7 @@ void afs_fs_release_lock(struct afs_operation *op) *bp++ = htonl(vp->fid.vnode); *bp++ = htonl(vp->fid.unique); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1948,6 +1967,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op) *bp++ = htonl(op->more_files[i].fid.unique); } + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -2053,6 +2073,7 @@ void afs_fs_fetch_acl(struct afs_operation *op) bp[2] = htonl(vp->fid.vnode); bp[3] = htonl(vp->fid.unique); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_KERNEL); } @@ -2098,6 +2119,7 @@ void afs_fs_store_acl(struct afs_operation *op) if (acl->size != size) memset((void *)&bp[5] + acl->size, 0, size - acl->size); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_KERNEL); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a6a4fc417dba..e33ace259cc6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -153,6 +153,7 @@ struct afs_call { struct afs_vldb_entry *ret_vldb; char *ret_str; }; + struct afs_fid fid; /* Primary vnode ID (or all zeroes) */ unsigned char probe_index; /* Address in ->probe_alist */ struct afs_operation *op; unsigned int server_index; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 81013bc8bbfd..c453428f3c8b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -313,6 +313,8 @@ void afs_make_call(struct afs_call *call, gfp_t gfp) call, call->type->name, key_serial(call->key), atomic_read(&call->net->nr_outstanding_calls)); + trace_afs_make_call(call); + /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data * after the initial fixed part. diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 2d6943f05ea5..f521e66d3bf6 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -493,6 +493,7 @@ void yfs_fs_fetch_data(struct afs_operation *op) bp = xdr_encode_u64(bp, req->len); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -575,6 +576,7 @@ void yfs_fs_create_file(struct afs_operation *op) bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -623,6 +625,7 @@ void yfs_fs_make_dir(struct afs_operation *op) bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime); yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -707,6 +710,7 @@ void yfs_fs_remove_file2(struct afs_operation *op) bp = xdr_encode_name(bp, name); yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -776,6 +780,7 @@ void yfs_fs_remove_file(struct afs_operation *op) bp = xdr_encode_name(bp, name); yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -817,6 +822,7 @@ void yfs_fs_remove_dir(struct afs_operation *op) bp = xdr_encode_name(bp, name); yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -890,6 +896,7 @@ void yfs_fs_link(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call1(call, &vp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -971,6 +978,7 @@ void yfs_fs_symlink(struct afs_operation *op) bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime); yfs_check_req(call, bp); + call->fid = dvp->fid; trace_afs_make_fs_call1(call, &dvp->fid, name); afs_make_op_call(op, call, GFP_NOFS); } @@ -1050,6 +1058,7 @@ void yfs_fs_rename(struct afs_operation *op) bp = xdr_encode_name(bp, new_name); yfs_check_req(call, bp); + call->fid = orig_dvp->fid; trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); afs_make_op_call(op, call, GFP_NOFS); } @@ -1105,6 +1114,7 @@ void yfs_fs_store_data(struct afs_operation *op) bp = xdr_encode_u64(bp, op->store.i_size); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1161,6 +1171,7 @@ static void yfs_fs_setattr_size(struct afs_operation *op) bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1199,6 +1210,7 @@ void yfs_fs_setattr(struct afs_operation *op) bp = xdr_encode_YFS_StoreStatus(bp, attr); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1369,6 +1381,7 @@ void yfs_fs_get_volume_status(struct afs_operation *op) bp = xdr_encode_u64(bp, vp->fid.vid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1433,6 +1446,7 @@ void yfs_fs_set_lock(struct afs_operation *op) bp = xdr_encode_u32(bp, op->lock.type); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); afs_make_op_call(op, call, GFP_NOFS); } @@ -1463,6 +1477,7 @@ void yfs_fs_extend_lock(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1493,6 +1508,7 @@ void yfs_fs_release_lock(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1559,6 +1575,7 @@ void yfs_fs_fetch_status(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1739,6 +1756,7 @@ void yfs_fs_inline_bulk_status(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_NOFS); } @@ -1901,6 +1919,7 @@ void yfs_fs_fetch_opaque_acl(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_KERNEL); } @@ -1951,6 +1970,7 @@ void yfs_fs_store_opaque_acl2(struct afs_operation *op) bp += size / sizeof(__be32); yfs_check_req(call, bp); + call->fid = vp->fid; trace_afs_make_fs_call(call, &vp->fid); afs_make_op_call(op, call, GFP_KERNEL); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index b2e0847eca47..5194b7e6dc8d 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1576,6 +1576,42 @@ TRACE_EVENT(afs_rotate, __entry->extra) ); +TRACE_EVENT(afs_make_call, + TP_PROTO(struct afs_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(bool, is_vl) + __field(enum afs_fs_operation, op) + __field_struct(struct afs_fid, fid) + __field_struct(struct sockaddr_rxrpc, srx) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->op = call->operation_ID; + __entry->fid = call->fid; + memcpy(&__entry->srx, rxrpc_kernel_remote_srx(call->peer), + sizeof(__entry->srx)); + __entry->srx.srx_service = call->service_id; + __entry->is_vl = (__entry->srx.srx_service == VL_SERVICE || + __entry->srx.srx_service == YFS_VL_SERVICE); + ), + + TP_printk("c=%08x %pISpc+%u %s %llx:%llx:%x", + __entry->call, + &__entry->srx.transport, + __entry->srx.srx_service, + __entry->is_vl ? + __print_symbolic(__entry->op, afs_vl_operations) : + __print_symbolic(__entry->op, afs_fs_operations), + __entry->fid.vid, + __entry->fid.vnode, + __entry->fid.unique) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ -- cgit v1.2.3