From 1ebcca4716cdd93a4e26a53fde9e5dff078317d8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Sep 2002 02:42:58 -0700 Subject: [NAPI]: Do not check netif_running() in netif_rx_schedule_prep. --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6ed74303fbda..a8b79f10637a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -711,8 +711,7 @@ enum { static inline int netif_rx_schedule_prep(struct net_device *dev) { - return netif_running(dev) && - !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); } /* Add interface to tail of rx poll list. This assumes that _prep has -- cgit v1.2.3 From 269f04b762fa11f693d3c3167afcdd4ca505fca6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Sep 2002 07:48:48 -0300 Subject: [LLC] use just one struct sock per connection With this PF_LLC is tightly integrated with the core and that is a good thing 8) . kill llc_ui_opt, the only non-duplicated bit is struct sockaddr_llc and this now lives in llc_opt . remove debug code from llc_sk_alloc/free (previously llc_sock_alloc/free) . the skbs allocated for event processing don't need to have any payload at all, just the skb->cb is enough, so remove the bogus 1 from alloc_skb calls . llc_conn_disc put on death row . llc_process_tmr_ev callers have to hold the socket lock . the request functions in llc_if.c doesn't hold the socket lock anymore its up to its callers on the socket layer (llc_sock.c) . llc_sk_alloc now receives a priority for sk_alloc call and is the only way to alloc a new sock (from llc_mac and llc_sock, bottom and top) . added the traditional struct sock REFCNT_DEBUG support for llc . llc_sock was simplified and is on the zen route to cleanliness, wait for the next patches, it'll shrink a lot when I zap all the crap (as in not needed) list handling, using the existing list maintained in struct llc_sap for that, probably splitting it in two, one for listening sockets and other for (being) established ones. Ah, and the sap->ind and sap->req and friends will die. --- include/linux/llc.h | 11 - include/net/llc_conn.h | 64 +---- include/net/llc_if.h | 2 - net/llc/llc_actn.c | 2 +- net/llc/llc_c_ac.c | 42 ++-- net/llc/llc_conn.c | 34 +-- net/llc/llc_if.c | 56 +---- net/llc/llc_mac.c | 19 +- net/llc/llc_main.c | 59 +++-- net/llc/llc_sock.c | 619 ++++++++++++++++++++++++------------------------- 10 files changed, 409 insertions(+), 499 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llc.h b/include/linux/llc.h index 824a149e9e6b..77ac5d9df544 100644 --- a/include/linux/llc.h +++ b/include/linux/llc.h @@ -78,17 +78,6 @@ enum llc_sockopts { #define LLC_SAP_DYN_STOP 0xDE #define LLC_SAP_DYN_TRIES 4 -struct sock; - -struct llc_ui_opt { - u16 link; /* network layer link number */ - struct llc_sap *sap; /* pointer to parent SAP */ - struct sock *core_sk; - struct net_device *dev; /* device to send to remote */ - struct sockaddr_llc addr; /* address sock is bound to */ -}; - -#define llc_ui_sk(__sk) ((struct llc_ui_opt *)(__sk)->protinfo) #define llc_ui_skb_cb(__skb) ((struct sockaddr_llc *)&((__skb)->cb[0])) #ifdef CONFIG_LLC_UI diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea7725fa705f..3b2d68e45075 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -2,7 +2,7 @@ #define LLC_CONN_H /* * Copyright (c) 1997 by Procom Technology, Inc. - * 2001 by Arnaldo Carvalho de Melo + * 2001, 2002 by Arnaldo Carvalho de Melo * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. @@ -13,8 +13,7 @@ */ #include #include - -#define DEBUG_LLC_CONN_ALLOC +#include struct llc_timer { struct timer_list timer; @@ -25,7 +24,7 @@ struct llc_timer { struct llc_opt { struct list_head node; /* entry in sap->sk_list.list */ struct sock *sk; /* sock that has this llc_opt */ - void *handler; /* for upper layers usage */ + struct sockaddr_llc addr; /* address sock is bound to */ u8 state; /* state of connection */ struct llc_sap *sap; /* pointer to parent SAP */ struct llc_addr laddr; /* lsap/mac pair */ @@ -80,60 +79,11 @@ struct llc_opt { struct llc_conn_state_ev; -extern struct sock *__llc_sock_alloc(int family); -extern void __llc_sock_free(struct sock *sk, u8 free); - -#ifdef DEBUG_LLC_CONN_ALLOC -#define dump_stack() printk(KERN_INFO "call trace: %p, %p, %p\n", \ - __builtin_return_address(0), \ - __builtin_return_address(1), \ - __builtin_return_address(2)); -#define llc_sock_alloc(family) ({ \ - struct sock *__sk = __llc_sock_alloc(family); \ - if (__sk) { \ - llc_sk(__sk)->f_alloc = __FUNCTION__; \ - llc_sk(__sk)->l_alloc = __LINE__; \ - } \ - __sk;}) -#define __llc_sock_assert(__sk) \ - if (llc_sk(__sk)->f_free) { \ - printk(KERN_ERR \ - "%p conn (alloc'd @ %s(%d)) " \ - "already freed @ %s(%d) " \ - "being used again @ %s(%d)\n", \ - llc_sk(__sk), \ - llc_sk(__sk)->f_alloc, llc_sk(__sk)->l_alloc, \ - llc_sk(__sk)->f_free, llc_sk(__sk)->l_free, \ - __FUNCTION__, __LINE__); \ - dump_stack(); -#define llc_sock_free(__sk) \ -{ \ - __llc_sock_assert(__sk) \ - } else { \ - __llc_sock_free(__sk, 0); \ - llc_sk(__sk)->f_free = __FUNCTION__; \ - llc_sk(__sk)->l_free = __LINE__; \ - } \ -} -#define llc_sock_assert(__sk) \ -{ \ - __llc_sock_assert(__sk); \ - return; } \ -} -#define llc_sock_assert_ret(__sk, __ret) \ -{ \ - __llc_sock_assert(__sk); \ - return __ret; } \ -} -#else /* DEBUG_LLC_CONN_ALLOC */ -#define llc_sock_alloc(family) __llc_sock_alloc(family) -#define llc_sock_free(__sk) __llc_sock_free(__sk, 1) -#define llc_sock_assert(__sk) -#define llc_sock_assert_ret(__sk) -#endif /* DEBUG_LLC_CONN_ALLOC */ +extern struct sock *llc_sk_alloc(int family, int priority); +extern void llc_sk_free(struct sock *sk); -extern void llc_sock_reset(struct sock *sk); -extern int llc_sock_init(struct sock *sk); +extern void llc_sk_reset(struct sock *sk); +extern int llc_sk_init(struct sock *sk); /* Access to a connection */ extern int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/llc_if.h b/include/net/llc_if.h index 975485fc42c0..e294c1d976b1 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -73,8 +73,6 @@ struct llc_prim_conn { u8 pri; /* service_class */ struct net_device *dev; struct sock *sk; /* returned from REQUEST */ - void *handler; /* upper layer use, - stored in llc_opt->handler */ u16 link; struct sk_buff *skb; /* received SABME */ }; diff --git a/net/llc/llc_actn.c b/net/llc/llc_actn.c index cb8704409d8e..dadb22054749 100644 --- a/net/llc/llc_actn.c +++ b/net/llc/llc_actn.c @@ -134,7 +134,7 @@ int llc_station_ac_report_status(struct llc_station *station, static void llc_station_ack_tmr_callback(unsigned long timeout_data) { struct llc_station *station = (struct llc_station *)timeout_data; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); station->ack_tmr_running = 0; if (skb) { diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index bc296905424d..ecb6d6f1658d 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1454,8 +1454,9 @@ int llc_conn_ac_set_f_flag_p(struct sock *sk, struct sk_buff *skb) void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) { struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); + bh_lock_sock(sk); llc_sk(sk)->pf_cycle_timer.running = 0; if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -1464,13 +1465,15 @@ void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) ev->data.tmr.timer_specific = NULL; llc_process_tmr_ev(sk, skb); } + bh_unlock_sock(sk); } static void llc_conn_busy_tmr_cb(unsigned long timeout_data) { struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); + bh_lock_sock(sk); llc_sk(sk)->busy_state_timer.running = 0; if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -1479,13 +1482,15 @@ static void llc_conn_busy_tmr_cb(unsigned long timeout_data) ev->data.tmr.timer_specific = NULL; llc_process_tmr_ev(sk, skb); } + bh_unlock_sock(sk); } void llc_conn_ack_tmr_cb(unsigned long timeout_data) { struct sock* sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); + bh_lock_sock(sk); llc_sk(sk)->ack_timer.running = 0; if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -1494,13 +1499,15 @@ void llc_conn_ack_tmr_cb(unsigned long timeout_data) ev->data.tmr.timer_specific = NULL; llc_process_tmr_ev(sk, skb); } + bh_unlock_sock(sk); } static void llc_conn_rej_tmr_cb(unsigned long timeout_data) { struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); + bh_lock_sock(sk); llc_sk(sk)->rej_sent_timer.running = 0; if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -1509,6 +1516,7 @@ static void llc_conn_rej_tmr_cb(unsigned long timeout_data) ev->data.tmr.timer_specific = NULL; llc_process_tmr_ev(sk, skb); } + bh_unlock_sock(sk); } int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) @@ -1536,14 +1544,11 @@ int llc_conn_ac_upd_vs(struct sock *sk, struct sk_buff *skb) * llc_conn_disc - removes connection from SAP list and frees it * @sk: closed connection * @skb: occurred event - * - * Returns 2, to indicate the state machine that the connection was freed. */ int llc_conn_disc(struct sock *sk, struct sk_buff *skb) { - llc_sap_unassign_sock(llc_sk(sk)->sap, sk); - llc_sock_free(sk); - return 2; + /* FIXME: this thing seems to want to die */ + return 0; } /** @@ -1555,7 +1560,7 @@ int llc_conn_disc(struct sock *sk, struct sk_buff *skb) */ int llc_conn_reset(struct sock *sk, struct sk_buff *skb) { - llc_sock_reset(sk); + llc_sk_reset(sk); return 0; } @@ -1589,19 +1594,16 @@ u8 llc_circular_between(u8 a, u8 b, u8 c) */ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) { - bh_lock_sock(sk); if (llc_sk(sk)->state == LLC_CONN_OUT_OF_SVC) { printk(KERN_WARNING "%s: timer called on closed connection\n", __FUNCTION__); llc_conn_free_ev(skb); - goto out; - } - if (!sk->lock.users) - llc_conn_state_process(sk, skb); - else { - llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + } else { + if (!sk->lock.users) + llc_conn_state_process(sk, skb); + else { + llc_set_backlog_type(skb, LLC_EVENT); + sk_add_backlog(sk, skb); + } } -out: - bh_unlock_sock(sk); } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ef6093acb1c3..46e54e9621ba 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -43,7 +43,7 @@ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; * @sk: connection * @skb: occurred event * - * Sends an event to connection state machine. after processing event + * Sends an event to connection state machine. After processing event * (executing it's actions and changing state), upper layer will be * indicated or confirmed, if needed. Returns 0 for success, 1 for * failure. The socket lock has to be held before calling this function. @@ -65,15 +65,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) llc_conn_free_ev(skb); else if (ind_prim && cfm_prim) skb_get(skb); -#ifdef THIS_BREAKS_DISCONNECT_NOTIFICATION_BADLY - /* check if the connection was freed by the state machine by - * means of llc_conn_disc */ - if (rc == 2) { - printk(KERN_INFO "%s: rc == 2\n", __FUNCTION__); - rc = -ECONNABORTED; - goto out; - } -#endif /* THIS_BREAKS_DISCONNECT_NOTIFICATION_BADLY */ if (!flag) /* indicate or confirm not required */ goto out; rc = 0; @@ -83,10 +74,13 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * sock crap */ if (flag == LLC_DATA_PRIM + 1) { - struct sock *upper = llc_sk(skb->sk)->handler; - - skb->sk = upper; - if (sock_queue_rcv_skb(upper, skb)) { + if (sock_queue_rcv_skb(skb->sk, skb)) { + /* + * FIXME: have to sync the LLC state + * machine wrt mem usage with + * sk->{r,w}mem_alloc, will do + * this soon 8) + */ printk(KERN_ERR "%s: sock_queue_rcv_skb failed!\n", __FUNCTION__); @@ -105,10 +99,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) } if (!llc_data_accept_state(llc->state)) { /* In this state, we can send I pdu */ - struct sock* upper = llc_sk(skb->sk)->handler; - - if (upper) - wake_up(upper->sleep); + if (skb->sk) + skb->sk->write_space(skb->sk); } else rc = llc->failed_data_req = 1; kfree_skb(skb); @@ -118,7 +110,6 @@ out: void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) { - llc_sock_assert(sk); /* queue PDU to send to MAC layer */ skb_queue_tail(&sk->write_queue, skb); llc_conn_send_pdus(sk); @@ -380,11 +371,10 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, * llc_exec_conn_trans_actions - executes related actions * @sk: connection * @trans: transition that it's actions must be performed - * @skb: happened event + * @skb: event * * Executes actions that is related to happened event. Returns 0 for - * success, 1 to indicate failure of at least one action or 2 if the - * connection was freed (llc_conn_disc was called) + * success, 1 to indicate failure of at least one action. */ static int llc_exec_conn_trans_actions(struct sock *sk, struct llc_conn_state_trans *trans, diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index cab3de5fb797..8af1ec3a1417 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -249,7 +250,6 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) int rc = -ECONNABORTED; struct llc_opt *llc = llc_sk(sk); - lock_sock(sk); if (llc->state == LLC_CONN_STATE_ADM) goto out; rc = -EBUSY; @@ -269,7 +269,6 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) skb->dev = llc->dev; rc = llc_conn_state_process(sk, skb); out: - release_sock(sk); return rc; } @@ -299,7 +298,6 @@ static void llc_confirm_impossible(struct llc_prim_if_block *prim) static int llc_conn_req_handler(struct llc_prim_if_block *prim) { int rc = -EBUSY; - struct llc_opt *llc; struct llc_sap *sap = prim->sap; struct sk_buff *skb; struct net_device *ddev = mac_dev_peer(prim->data->conn.dev, @@ -319,37 +317,16 @@ static int llc_conn_req_handler(struct llc_prim_if_block *prim) daddr.lsap = prim->data->conn.daddr.lsap; sk = llc_lookup_established(sap, &daddr, &laddr); if (sk) { - llc_confirm_impossible(prim); - goto out_put; - } - rc = -ENOMEM; - if (prim->data->conn.sk) { - sk = prim->data->conn.sk; - if (llc_sock_init(sk)) - goto out; - } else { - /* - * FIXME: this one will die as soon as core and - * llc_sock starts sharing a struct sock. - */ - sk = llc_sock_alloc(PF_LLC); - if (!sk) { + if (sk->state == TCP_ESTABLISHED) { llc_confirm_impossible(prim); - goto out; - } - prim->data->conn.sk = sk; + goto out_put; + } else + sock_put(sk); } + rc = -ENOMEM; + sk = prim->data->conn.sk; sock_hold(sk); - lock_sock(sk); - /* assign new connection to it's SAP */ - llc_sap_assign_sock(sap, sk); - llc = llc_sk(sk); - memcpy(&llc->daddr, &daddr, sizeof(llc->daddr)); - memcpy(&llc->laddr, &laddr, sizeof(llc->laddr)); - llc->dev = ddev; - llc->link = prim->data->conn.link; - llc->handler = prim->data->conn.handler; - skb = alloc_skb(1, GFP_ATOMIC); + skb = alloc_skb(0, GFP_ATOMIC); if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -359,15 +336,10 @@ static int llc_conn_req_handler(struct llc_prim_if_block *prim) ev->data.prim.data = prim; rc = llc_conn_state_process(sk, skb); } - if (rc) { - llc_sap_unassign_sock(sap, sk); - llc_sock_free(sk); + if (rc) llc_confirm_impossible(prim); - } - release_sock(sk); out_put: sock_put(sk); -out: return rc; } @@ -388,7 +360,6 @@ static int llc_disc_req_handler(struct llc_prim_if_block *prim) struct sock* sk = prim->data->disc.sk; sock_hold(sk); - lock_sock(sk); if (llc_sk(sk)->state == LLC_CONN_STATE_ADM || llc_sk(sk)->state == LLC_CONN_OUT_OF_SVC) goto out; @@ -396,7 +367,7 @@ static int llc_disc_req_handler(struct llc_prim_if_block *prim) * Postpone unassigning the connection from its SAP and returning the * connection until all ACTIONs have been completely executed */ - skb = alloc_skb(1, GFP_ATOMIC); + skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; ev = llc_conn_ev(skb); @@ -406,7 +377,6 @@ static int llc_disc_req_handler(struct llc_prim_if_block *prim) ev->data.prim.data = prim; rc = llc_conn_state_process(sk, skb); out: - release_sock(sk); sock_put(sk); return rc; } @@ -426,8 +396,7 @@ static int llc_rst_req_handler(struct llc_prim_if_block *prim) int rc = 1; struct sock *sk = prim->data->res.sk; - lock_sock(sk); - skb = alloc_skb(1, GFP_ATOMIC); + skb = alloc_skb(0, GFP_ATOMIC); if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -437,7 +406,6 @@ static int llc_rst_req_handler(struct llc_prim_if_block *prim) ev->data.prim.data = prim; rc = llc_conn_state_process(sk, skb); } - release_sock(sk); return rc; } @@ -498,7 +466,7 @@ static int llc_rst_rsp_handler(struct llc_prim_if_block *prim) * package as event and send it to connection event handler */ struct sock *sk = prim->data->res.sk; - struct sk_buff *skb = alloc_skb(1, GFP_ATOMIC); + struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); diff --git a/net/llc/llc_mac.c b/net/llc/llc_mac.c index 0020b3801d6a..b8b031510043 100644 --- a/net/llc/llc_mac.c +++ b/net/llc/llc_mac.c @@ -27,7 +27,7 @@ #include #include -#if 1 +#if 0 #define dprintk(args...) printk(KERN_DEBUG args) #else #define dprintk(args...) @@ -123,23 +123,30 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, sk = llc_lookup_established(sap, &saddr, &daddr); if (!sk) { + struct llc_opt *llc; + + dprintk("%s: llc_lookup_established failed\n", __FUNCTION__); /* * FIXME: here we'll pass the sk->family of the * listening socket, if found, when * llc_lookup_listener is added in the next patches. */ - sk = llc_sock_alloc(PF_LLC); + sk = llc_sk_alloc(PF_LLC, GFP_ATOMIC); if (!sk) goto drop; - memcpy(&llc_sk(sk)->daddr, &saddr, sizeof(saddr)); + llc = llc_sk(sk); + memcpy(&llc->laddr, &daddr, sizeof(llc->laddr)); + memcpy(&llc->daddr, &saddr, sizeof(llc->daddr)); llc_sap_assign_sock(sap, sk); sock_hold(sk); } skb->sk = sk; bh_lock_sock(sk); - if (!sk->lock.users) - rc = llc_conn_rcv(sk, skb); - else { + if (!sk->lock.users) { + /* rc = */ llc_conn_rcv(sk, skb); + rc = 0; + } else { + dprintk("%s: adding to backlog...\n", __FUNCTION__); llc_set_backlog_type(skb, LLC_PACKET); sk_add_backlog(sk, skb); rc = 0; diff --git a/net/llc/llc_main.c b/net/llc/llc_main.c index fe24722bd45a..a1768084605b 100644 --- a/net/llc/llc_main.c +++ b/net/llc/llc_main.c @@ -52,6 +52,11 @@ static int llc_rtn_all_conns(struct llc_sap *sap); static struct llc_station llc_main_station; /* only one of its kind */ +#undef LLC_REFCNT_DEBUG +#ifdef LLC_REFCNT_DEBUG +static atomic_t llc_sock_nr; +#endif + /** * llc_sap_alloc - allocates and initializes sap. * @@ -165,10 +170,12 @@ static int llc_backlog_rcv(struct sock *sk, struct sk_buff *skb) } /** - * llc_sock_init - Initialize a socket with default llc values. + * llc_sk_init - Initializes a socket with default llc values. * @sk: socket to intiailize. + * + * Initializes a socket with default llc values. */ -int llc_sock_init(struct sock* sk) +int llc_sk_init(struct sock* sk) { struct llc_opt *llc = kmalloc(sizeof(*llc), GFP_ATOMIC); int rc = -ENOMEM; @@ -198,61 +205,83 @@ out: } /** - * __llc_sock_alloc - Allocates LLC sock + * llc_sk_alloc - Allocates LLC sock * @family: upper layer protocol family + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *__llc_sock_alloc(int family) +struct sock *llc_sk_alloc(int family, int priority) { - struct sock *sk = sk_alloc(family, GFP_ATOMIC, 1, NULL); + struct sock *sk = sk_alloc(family, priority, 1, NULL); + MOD_INC_USE_COUNT; if (!sk) - goto out; - if (llc_sock_init(sk)) + goto decmod; + if (llc_sk_init(sk)) goto outsk; sock_init_data(NULL, sk); +#ifdef LLC_REFCNT_DEBUG + atomic_inc(&llc_sock_nr); + printk(KERN_DEBUG "LLC socket %p created in %s, now we have %d alive\n", sk, + __FUNCTION__, atomic_read(&llc_sock_nr)); +#endif out: return sk; outsk: sk_free(sk); sk = NULL; +decmod: + MOD_DEC_USE_COUNT; goto out; } /** - * __llc_sock_free - Frees a LLC socket + * llc_sk_free - Frees a LLC socket * @sk - socket to free * * Frees a LLC socket */ -void __llc_sock_free(struct sock *sk, u8 free) +void llc_sk_free(struct sock *sk) { struct llc_opt *llc = llc_sk(sk); llc->state = LLC_CONN_OUT_OF_SVC; - /* stop all (possibly) running timers */ + /* Stop all (possibly) running timers */ llc_conn_ac_stop_all_timers(sk, NULL); #ifdef DEBUG_LLC_CONN_ALLOC printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __FUNCTION__, skb_queue_len(&llc->pdu_unack_q), skb_queue_len(&sk->write_queue)); #endif + skb_queue_purge(&sk->receive_queue); skb_queue_purge(&sk->write_queue); skb_queue_purge(&llc->pdu_unack_q); - if (free) - sock_put(sk); +#ifdef LLC_REFCNT_DEBUG + if (atomic_read(&sk->refcnt) != 1) { + printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n", + sk, __FUNCTION__, atomic_read(&sk->refcnt)); + printk(KERN_DEBUG "%d LLC sockets are still alive\n", + atomic_read(&llc_sock_nr)); + } else { + atomic_dec(&llc_sock_nr); + printk(KERN_DEBUG "LLC socket %p released in %s, %d are still alive\n", sk, + __FUNCTION__, atomic_read(&llc_sock_nr)); + } +#endif + sock_put(sk); + MOD_DEC_USE_COUNT; } /** - * llc_sock_reset - resets a connection + * llc_sk_reset - resets a connection * @sk: LLC socket to reset * * Resets a connection to the out of service state. Stops its timers * and frees any frames in the queues of the connection. */ -void llc_sock_reset(struct sock *sk) +void llc_sk_reset(struct sock *sk) { struct llc_opt *llc = llc_sk(sk); @@ -585,7 +614,7 @@ static int __init llc_init(void) skb_queue_head_init(&llc_main_station.mac_pdu_q); skb_queue_head_init(&llc_main_station.ev_q.list); spin_lock_init(&llc_main_station.ev_q.lock); - skb = alloc_skb(1, GFP_ATOMIC); + skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto err; llc_build_offset_table(); diff --git a/net/llc/llc_sock.c b/net/llc/llc_sock.c index 0b2c3a9966d4..5753d316feaf 100644 --- a/net/llc/llc_sock.c +++ b/net/llc/llc_sock.c @@ -11,6 +11,7 @@ * connections. * * Copyright (c) 2001 by Jay Schulist + * 2002 by Arnaldo Carvalho de Melo * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. @@ -57,6 +58,13 @@ static int llc_ui_confirm(struct llc_prim_if_block *prim); static int llc_ui_wait_for_conn(struct sock *sk, int timeout); static int llc_ui_wait_for_disc(struct sock *sk, int timeout); static int llc_ui_wait_for_data(struct sock *sk, int timeout); +static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout); + +#if 0 +#define dprintk(args...) printk(KERN_DEBUG args) +#else +#define dprintk(args...) +#endif /** * llc_ui_next_link_no - return the next unused link number for a sap @@ -130,7 +138,7 @@ static int llc_ui_send_conn(struct sock *sk, struct llc_sap *sap, struct sockaddr_llc *addr, struct net_device *dev, int link) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); union llc_u_prim_data prim_data; struct llc_prim_if_block prim; @@ -139,10 +147,9 @@ static int llc_ui_send_conn(struct sock *sk, struct llc_sap *sap, prim.prim = LLC_CONN_PRIM; prim_data.conn.dev = dev; prim_data.conn.link = link; - prim_data.conn.sk = NULL; - prim_data.conn.handler = sk; + prim_data.conn.sk = sk; prim_data.conn.pri = 0; - prim_data.conn.saddr.lsap = llc_ui->addr.sllc_ssap; + prim_data.conn.saddr.lsap = llc->addr.sllc_ssap; prim_data.conn.daddr.lsap = addr->sllc_dsap; memcpy(prim_data.conn.saddr.mac, dev->dev_addr, IFHWADDRLEN); memcpy(prim_data.conn.daddr.mac, addr->sllc_dmac, IFHWADDRLEN); @@ -159,55 +166,50 @@ static int llc_ui_send_conn(struct sock *sk, struct llc_sap *sap, */ static int llc_ui_send_disc(struct sock *sk) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); union llc_u_prim_data prim_data; struct llc_prim_if_block prim; int rc = 0; - if (sk->type != SOCK_STREAM || sk->state != TCP_ESTABLISHED) + if (sk->type != SOCK_STREAM || sk->state != TCP_ESTABLISHED) { + rc = 1; goto out; + } sk->state = TCP_CLOSING; prim.data = &prim_data; - prim.sap = llc_ui->sap; + prim.sap = llc->sap; prim.prim = LLC_DISC_PRIM; - prim_data.disc.sk = llc_ui->core_sk; - prim_data.disc.link = llc_ui->link; - rc = llc_ui->sap->req(&prim); + prim_data.disc.sk = sk; + prim_data.disc.link = llc->link; + rc = llc->sap->req(&prim); out: return rc; } /** * llc_ui_send_data - send data via reliable llc2 connection - * @sap: Sap the socket is bound to. * @sk: Connection the socket is using. * @skb: Data the user wishes to send. * @addr: Source and destination fields provided by the user. + * @noblock: can we block waiting for data? * * Send data via reliable llc2 connection. - * Returns 0 upon success, non-zero if action did not succeed. + * Returns 0 upon success, non-zero if action did not succeed. */ -static int llc_ui_send_data(struct llc_sap *sap, struct sock* sk, - struct sk_buff *skb, struct sockaddr_llc *addr) +static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, + struct sockaddr_llc *addr, int noblock) { - struct llc_ui_opt* llc_ui = llc_ui_sk(sk); - struct llc_opt* llc_core = llc_sk(llc_ui->core_sk); - int rc; + struct llc_opt* llc = llc_sk(sk); + int rc = 0; skb->protocol = llc_ui_protocol_type(addr->sllc_arphrd); - sock_hold(sk); -try: - rc = llc_build_and_send_pkt(llc_ui->core_sk, skb); - if (rc != -EBUSY) - goto out; - rc = wait_event_interruptible(sk->socket->wait, !llc_ui->core_sk || - !llc_core->failed_data_req); + if (llc_data_accept_state(llc->state) || llc->p_flag) { + int timeout = sock_sndtimeo(sk, noblock); + + rc = llc_ui_wait_for_busy_core(sk, timeout); + } if (!rc) - goto try; - if (!llc_ui->core_sk) - rc = -ENOTCONN; -out: - sock_put(sk); + rc = llc_build_and_send_pkt(sk, skb); return rc; } @@ -255,12 +257,12 @@ static __inline__ struct llc_sap *llc_ui_find_sap(u8 sap) read_lock_bh(&llc_ui_sockets_lock); for (sk = llc_ui_sockets; sk; sk = sk->next) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); - if (!llc_ui->sap) + if (!llc->sap) continue; - if (llc_ui->sap->laddr.lsap == sap) { - s = llc_ui->sap; + if (llc->sap->laddr.lsap == sap) { + s = llc->sap; break; } } @@ -274,13 +276,13 @@ static struct sock *__llc_ui_find_sk_by_exact(struct llc_addr *laddr, struct sock *sk; for (sk = llc_ui_sockets; sk; sk = sk->next) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); - if (llc_ui->addr.sllc_ssap == laddr->lsap && - llc_ui->addr.sllc_dsap == daddr->lsap && - llc_mac_null(llc_ui->addr.sllc_mmac) && - llc_mac_match(llc_ui->addr.sllc_smac, laddr->mac) && - llc_mac_match(llc_ui->addr.sllc_dmac, daddr->mac)) + if (llc->addr.sllc_ssap == laddr->lsap && + llc->addr.sllc_dsap == daddr->lsap && + llc_mac_null(llc->addr.sllc_mmac) && + llc_mac_match(llc->addr.sllc_smac, laddr->mac) && + llc_mac_match(llc->addr.sllc_dmac, daddr->mac)) break; } return sk; @@ -303,31 +305,31 @@ static struct sock *__llc_ui_find_sk_by_addr(struct llc_addr *laddr, struct sock *sk, *tmp_sk; for (sk = llc_ui_sockets; sk; sk = sk->next) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); - if (llc_ui->addr.sllc_ssap != laddr->lsap) + if (llc->addr.sllc_ssap != laddr->lsap) continue; - if (llc_mac_null(llc_ui->addr.sllc_smac)) { - if (!llc_mac_null(llc_ui->addr.sllc_mmac) && - !llc_mac_match(llc_ui->addr.sllc_mmac, - laddr->mac)) + if (llc_mac_null(llc->addr.sllc_smac)) { + if (!llc_mac_null(llc->addr.sllc_mmac) && + !llc_mac_match(llc->addr.sllc_mmac, + laddr->mac)) continue; break; } - if (dev && !llc_mac_null(llc_ui->addr.sllc_mmac) && - llc_mac_match(llc_ui->addr.sllc_mmac, laddr->mac) && - llc_mac_match(llc_ui->addr.sllc_smac, dev->dev_addr)) + if (dev && !llc_mac_null(llc->addr.sllc_mmac) && + llc_mac_match(llc->addr.sllc_mmac, laddr->mac) && + llc_mac_match(llc->addr.sllc_smac, dev->dev_addr)) break; if (dev->flags & IFF_LOOPBACK) break; - if (!llc_mac_match(llc_ui->addr.sllc_smac, laddr->mac)) + if (!llc_mac_match(llc->addr.sllc_smac, laddr->mac)) continue; tmp_sk = __llc_ui_find_sk_by_exact(laddr, daddr); if (tmp_sk) { sk = tmp_sk; break; } - if (llc_mac_null(llc_ui->addr.sllc_dmac)) + if (llc_mac_null(llc->addr.sllc_dmac)) break; } return sk; @@ -393,7 +395,8 @@ static __inline__ void llc_ui_remove_socket(struct sock *sk) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; - /* this only makes sense if the socket was inserted on the + /* + * This only makes sense if the socket was inserted on the * list, if sk->pprev is NULL it wasn't */ sock_put(sk); @@ -401,38 +404,13 @@ static __inline__ void llc_ui_remove_socket(struct sock *sk) write_unlock_bh(&llc_ui_sockets_lock); } -/** - * llc_ui_destroy_sk - destroy socket - * @data: Socket which is to be destroyed. - * - * Really destroy the socket. - */ -static void llc_ui_destroy_sk(struct sock *sk) -{ - skb_queue_purge(&sk->receive_queue); - skb_queue_purge(&sk->write_queue); - sock_put(sk); - MOD_DEC_USE_COUNT; -} - -/** - * llc_ui_destroy_timer - try to destroy socket again - * @data: Socket which is to be destroyed. - * - * Attempt to destroy a socket which was previously destroyed but - * was still in use at the time. - */ -static void llc_ui_destroy_timer(unsigned long data) +static void llc_ui_sk_init(struct socket *sock, struct sock *sk) { - struct sock *sk = (struct sock *)data; - - if (!atomic_read(&sk->wmem_alloc) && - !atomic_read(&sk->rmem_alloc) && sk->dead) - llc_ui_destroy_sk(sk); - else { - sk->timer.expires = jiffies + SOCK_DESTROY_TIME; - add_timer(&sk->timer); - } + sk->type = sock->type; + sk->sleep = &sock->wait; + sk->socket = sock; + sock->sk = sk; + sock->ops = &llc_ui_ops; } /** @@ -447,31 +425,17 @@ static void llc_ui_destroy_timer(unsigned long data) static int llc_ui_create(struct socket *sock, int protocol) { struct sock *sk; - struct llc_ui_opt *llc_ui; int rc = -ESOCKTNOSUPPORT; - MOD_INC_USE_COUNT; - if (sock->type != SOCK_DGRAM && sock->type != SOCK_STREAM) - goto decmod; - rc = -ENOMEM; - sk = sk_alloc(PF_LLC, GFP_KERNEL, 1, NULL); - if (!sk) - goto decmod; - llc_ui = kmalloc(sizeof(*llc_ui), GFP_KERNEL); - if (!llc_ui) - goto outsk; - memset(llc_ui, 0, sizeof(*llc_ui)); - rc = 0; - sock_init_data(sock, sk); - llc_ui_sk(sk) = llc_ui; - sock->ops = &llc_ui_ops; -out: + if (sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM) { + rc = -ENOMEM; + sk = llc_sk_alloc(PF_LLC, GFP_KERNEL); + if (sk) { + rc = 0; + llc_ui_sk_init(sock, sk); + } + } return rc; -outsk: - sk_free(sk); -decmod: - MOD_DEC_USE_COUNT; - goto out; } /** @@ -483,33 +447,25 @@ decmod: static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui; + struct llc_opt *llc; if (!sk) goto out; sock_hold(sk); lock_sock(sk); - llc_ui = llc_ui_sk(sk); - if (llc_ui->core_sk && !llc_ui_send_disc(sk)) + llc = llc_sk(sk); + dprintk("%s: closing local(%02X) remote(%02X)\n", __FUNCTION__, + llc->laddr.lsap, llc->daddr.lsap); + if (!llc_ui_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->rcvtimeo); - llc_ui_remove_socket(sk); + llc_sap_unassign_sock(llc->sap, sk); release_sock(sk); + llc_ui_remove_socket(sk); - if (llc_ui->sap && !llc_ui_find_sap(llc_ui->sap->laddr.lsap)) - llc_sap_close(llc_ui->sap); - sock_orphan(sk); - sock->sk = NULL; - if (!atomic_read(&sk->wmem_alloc) && - !atomic_read(&sk->rmem_alloc) && sk->dead) - llc_ui_destroy_sk(sk); - else { - init_timer(&sk->timer); - sk->timer.expires = jiffies + SOCK_DESTROY_TIME; - sk->timer.function = llc_ui_destroy_timer; - sk->timer.data = (unsigned long)sk; - add_timer(&sk->timer); - } + if (llc->sap && list_empty(&llc->sap->sk_list.list)) + llc_sap_close(llc->sap); sock_put(sk); + llc_sk_free(sk); out: return 0; } @@ -563,7 +519,7 @@ out: static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); struct llc_sap *sap; struct net_device *dev = NULL; int rc = -EINVAL; @@ -578,7 +534,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) rc = -ENETUNREACH; if (!dev) goto out; - llc_ui->dev = dev; + llc->dev = dev; } /* bind to a specific sap, optional. */ if (!addr->sllc_ssap) { @@ -619,10 +575,15 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) goto out; } } - memcpy(&llc_ui->addr, addr, sizeof(*addr)); - llc_ui->sap = sap; + llc->laddr.lsap = addr->sllc_ssap; + memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); + llc->daddr.lsap = addr->sllc_dsap; + memcpy(llc->daddr.mac, addr->sllc_dmac, IFHWADDRLEN); + memcpy(&llc->addr, addr, sizeof(llc->addr)); rc = sk->zapped = 0; llc_ui_insert_socket(sk); + /* assign new connection to it's SAP */ + llc_sap_assign_sock(sap, sk); out: return rc; } @@ -651,6 +612,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct sock *sk = sock->sk; int rc = -EINVAL; + dprintk("%s: binding %02X\n", __FUNCTION__, addr->sllc_ssap); if (!sk->zapped || addrlen != sizeof(*addr)) goto out; rc = -EAFNOSUPPORT; @@ -686,7 +648,7 @@ static int llc_ui_shutdown(struct socket *sock, int how) goto out; rc = llc_ui_send_disc(sk); if (!rc) - llc_ui_wait_for_disc(sk, sk->rcvtimeo); + rc = llc_ui_wait_for_disc(sk, sk->rcvtimeo); /* Wake up anyone sleeping in poll */ sk->state_change(sk); out: @@ -712,7 +674,7 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct net_device *dev; int rc = -EINVAL; @@ -730,14 +692,15 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, if (rc) goto out; } - if (!llc_ui->dev) { + if (!llc->dev) { rtnl_lock(); dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_smac); rtnl_unlock(); if (!dev) goto out; + llc->dev = dev; } else - dev = llc_ui->dev; + dev = llc->dev; if (sk->type != SOCK_STREAM) goto out; rc = -EALREADY; @@ -745,14 +708,17 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, goto out; sock->state = SS_CONNECTING; sk->state = TCP_SYN_SENT; - llc_ui->link = llc_ui_next_link_no(llc_ui->sap->laddr.lsap); - rc = llc_ui_send_conn(sk, llc_ui->sap, addr, dev, llc_ui->link); + llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); + rc = llc_ui_send_conn(sk, llc->sap, addr, dev, llc->link); if (rc) { + dprintk("%s: llc_ui_send_conn failed :-(\n", __FUNCTION__); sock->state = SS_UNCONNECTED; sk->state = TCP_CLOSE; goto out; } rc = llc_ui_wait_for_conn(sk, sk->rcvtimeo); + if (rc) + dprintk("%s: llc_ui_wait_for_conn failed=%d\n", __FUNCTION__, rc); out: release_sock(sk); return rc; @@ -802,6 +768,12 @@ static int llc_ui_wait_for_disc(struct sock *sk, int timeout) add_wait_queue_exclusive(sk->sleep, &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); + rc = -ERESTARTSYS; + if (signal_pending(current)) + break; + rc = -EAGAIN; + if (!timeout) + break; rc = 0; if (sk->state != TCP_CLOSE) { release_sock(sk); @@ -809,12 +781,6 @@ static int llc_ui_wait_for_disc(struct sock *sk, int timeout) lock_sock(sk); } else break; - rc = -ERESTARTSYS; - if (signal_pending(current)) - break; - rc = -EAGAIN; - if (!timeout) - break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk->sleep, &wait); @@ -823,23 +789,12 @@ static int llc_ui_wait_for_disc(struct sock *sk, int timeout) static int llc_ui_wait_for_conn(struct sock *sk, int timeout) { - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); DECLARE_WAITQUEUE(wait, current); int rc; add_wait_queue_exclusive(sk->sleep, &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); - rc = 0; - if (sk->state != TCP_ESTABLISHED) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else { - if (!llc_ui->core_sk) - rc = -EAGAIN; - break; - } rc = -EAGAIN; if (sk->state == TCP_CLOSE) break; @@ -849,6 +804,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, int timeout) rc = -EAGAIN; if (!timeout) break; + rc = 0; + if (sk->state != TCP_ESTABLISHED) { + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + } else + break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk->sleep, &wait); @@ -858,11 +820,19 @@ static int llc_ui_wait_for_conn(struct sock *sk, int timeout) static int llc_ui_wait_for_data(struct sock *sk, int timeout) { DECLARE_WAITQUEUE(wait, current); - int rc; + int rc = 0; add_wait_queue_exclusive(sk->sleep, &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); + if (sk->shutdown & RCV_SHUTDOWN) + break; + rc = -ERESTARTSYS; + if (signal_pending(current)) + break; + rc = -EAGAIN; + if (!timeout) + break; rc = 0; if (skb_queue_empty(&sk->receive_queue)) { release_sock(sk); @@ -870,12 +840,38 @@ static int llc_ui_wait_for_data(struct sock *sk, int timeout) lock_sock(sk); } else break; + } + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sleep, &wait); + return rc; +} + +static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct llc_opt *llc = llc_sk(sk); + int rc; + + add_wait_queue_exclusive(sk->sleep, &wait); + for (;;) { + dprintk("%s: looping...\n", __FUNCTION__); + __set_current_state(TASK_INTERRUPTIBLE); + rc = -ENOTCONN; + if (sk->shutdown & RCV_SHUTDOWN) + break; rc = -ERESTARTSYS; if (signal_pending(current)) break; rc = -EAGAIN; if (!timeout) break; + rc = 0; + if (llc_data_accept_state(llc->state) || llc->p_flag) { + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + } else + break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk->sleep, &wait); @@ -894,11 +890,11 @@ static int llc_ui_wait_for_data(struct sock *sk, int timeout) static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk = sock->sk, *newsk; - struct llc_ui_opt *llc_ui, *newllc_ui; - struct llc_opt *newllc_core; + struct llc_opt *llc, *newllc; struct sk_buff *skb; int rc = -EOPNOTSUPP; + dprintk("%s: accepting on %02X\n", __FUNCTION__, llc_sk(sk)->addr.sllc_ssap); lock_sock(sk); if (sk->type != SOCK_STREAM) goto out; @@ -909,40 +905,32 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) rc = llc_ui_wait_for_data(sk, sk->rcvtimeo); if (rc) goto out; + dprintk("%s: got a new connection on %02X\n", __FUNCTION__, llc_sk(sk)->addr.sllc_ssap); skb = skb_dequeue(&sk->receive_queue); - rc = -EINVAL; if (!skb->sk) goto frees; - /* attach connection to a new socket. */ - rc = llc_ui_create(newsock, sk->protocol); - if (rc) - goto frees; rc = 0; - newsk = newsock->sk; + newsk = skb->sk; + /* attach connection to a new socket. */ + llc_ui_sk_init(newsock, newsk); newsk->pair = NULL; - newsk->socket = newsock; - newsk->sleep = &newsock->wait; newsk->zapped = 0; newsk->state = TCP_ESTABLISHED; newsock->state = SS_CONNECTED; - llc_ui = llc_ui_sk(sk); - newllc_ui = llc_ui_sk(newsk); - newllc_ui->sap = llc_ui->sap; - newllc_ui->dev = llc_ui->dev; - newllc_ui->core_sk = skb->sk; - newllc_core = llc_sk(newllc_ui->core_sk); - newllc_ui->link = newllc_core->link; - newllc_core->handler = newsk; - memcpy(&newllc_ui->addr, &llc_ui->addr, sizeof(newllc_ui->addr)); - memcpy(newllc_ui->addr.sllc_dmac, newllc_core->daddr.mac, IFHWADDRLEN); - newllc_ui->addr.sllc_dsap = newllc_core->daddr.lsap; + llc = llc_sk(sk); + newllc = llc_sk(newsk); + memcpy(&newllc->addr, &llc->addr, sizeof(newllc->addr)); + memcpy(newllc->addr.sllc_dmac, newllc->daddr.mac, IFHWADDRLEN); + newllc->addr.sllc_dsap = newllc->daddr.lsap; /* put original socket back into a clean listen state. */ sk->state = TCP_LISTEN; sk->ack_backlog--; llc_ui_insert_socket(newsk); skb->sk = NULL; + dprintk("%s: ok success on %02X, client on %02X\n", __FUNCTION__, + llc_sk(sk)->addr.sllc_ssap, newllc->addr.sllc_dsap); frees: kfree_skb(skb); out: @@ -967,12 +955,21 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, int size, struct sock *sk = sock->sk; struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name; struct sk_buff *skb; - int rc = -ENOMEM, copied = 0; + int rc = -ENOMEM, copied = 0, timeout; int noblock = flags & MSG_DONTWAIT; + dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__, + llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); lock_sock(sk); - skb = skb_recv_datagram(sk, flags, noblock, &rc); - if (!skb) + timeout = sock_rcvtimeo(sk, noblock); + rc = llc_ui_wait_for_data(sk, timeout); + if (rc) { + dprintk("%s: llc_ui_wait_for_data failed recv in %02X from %02X\n", + __FUNCTION__, llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); + goto out; + } + skb = skb_dequeue(&sk->receive_queue); + if (!skb) /* shutdown */ goto out; copied = skb->len; if (copied > size) { @@ -986,7 +983,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, int size, memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); dgram_free: - skb_free_datagram(sk, skb); /* Free the datagram. */ + kfree_skb(skb); out: release_sock(sk); return rc ? : copied; @@ -1006,24 +1003,23 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)msg->msg_name; int flags = msg->msg_flags; + int noblock = flags & MSG_DONTWAIT; struct net_device *dev; struct sk_buff *skb; - int rc = -EOPNOTSUPP, size = 0; + int rc = -EINVAL, size = 0; + dprintk("%s: sending from %02X to %02X\n", __FUNCTION__, llc->laddr.lsap, llc->daddr.lsap); lock_sock(sk); - if (flags & ~MSG_DONTWAIT) - goto release; - rc = -EINVAL; if (addr) { if (msg->msg_namelen < sizeof(*addr)) goto release; } else { - if (llc_ui_addr_null(&llc_ui->addr)) + if (llc_ui_addr_null(&llc->addr)) goto release; - addr = &llc_ui->addr; + addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ if (sk->zapped) { @@ -1032,7 +1028,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, int len, if (rc) goto release; } - if (!llc_ui->dev) { + if (!llc->dev) { rtnl_lock(); dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_smac); rtnl_unlock(); @@ -1040,12 +1036,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, int len, if (!dev) goto release; } else - dev = llc_ui->dev; + dev = llc->dev; size = dev->hard_header_len + len + llc_ui_header_len(sk, addr); rc = -EMSGSIZE; if (size > dev->mtu) goto release; - skb = sock_alloc_send_skb(sk, size, flags & MSG_DONTWAIT, &rc); + skb = sock_alloc_send_skb(sk, size, noblock, &rc); if (!skb) goto release; skb->sk = sk; @@ -1053,30 +1049,32 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, int len, skb_reserve(skb, dev->hard_header_len + llc_ui_header_len(sk, addr)); rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (rc) - goto release; + goto out; if (addr->sllc_test) { - rc = llc_ui_send_llc1(llc_ui->sap, skb, addr, LLC_TEST_PRIM); + rc = llc_ui_send_llc1(llc->sap, skb, addr, LLC_TEST_PRIM); goto out; } if (addr->sllc_xid) { - rc = llc_ui_send_llc1(llc_ui->sap, skb, addr, LLC_XID_PRIM); + rc = llc_ui_send_llc1(llc->sap, skb, addr, LLC_XID_PRIM); goto out; } if (sk->type == SOCK_DGRAM || addr->sllc_ua) { - rc = llc_ui_send_llc1(llc_ui->sap, skb, addr, LLC_DATAUNIT_PRIM); + rc = llc_ui_send_llc1(llc->sap, skb, addr, LLC_DATAUNIT_PRIM); goto out; } rc = -ENOPROTOOPT; if (!(sk->type == SOCK_STREAM && !addr->sllc_ua)) goto out; - rc = -ENOTCONN; - if (!llc_ui->core_sk) - goto out; - rc = llc_ui_send_data(llc_ui->sap, sk, skb, addr); + rc = llc_ui_send_data(sk, skb, addr, noblock); + if (rc) + dprintk("%s: llc_ui_send_data failed: %d\n", __FUNCTION__, rc); out: if (rc) - skb_free_datagram(sk, skb); + kfree_skb(skb); release: + if (rc) + dprintk("%s: failed sending from %02X to %02X: %d\n", + __FUNCTION__, llc->laddr.lsap, llc->daddr.lsap, rc); release_sock(sk); return rc ? : len; } @@ -1095,7 +1093,7 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_llc sllc; struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct llc_opt *llc = llc_sk(sk); int rc = 0; lock_sock(sk); @@ -1107,20 +1105,19 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, rc = -ENOTCONN; if (sk->state != TCP_ESTABLISHED) goto out; - if(llc_ui->dev) - sllc.sllc_arphrd = llc_ui->dev->type; - sllc.sllc_dsap = llc_sk(llc_ui->core_sk)->daddr.lsap; - memcpy(&sllc.sllc_dmac, &llc_sk(llc_ui->core_sk)->daddr.mac, - IFHWADDRLEN); + if(llc->dev) + sllc.sllc_arphrd = llc->dev->type; + sllc.sllc_dsap = llc->daddr.lsap; + memcpy(&sllc.sllc_dmac, &llc->daddr.mac, IFHWADDRLEN); } else { rc = -EINVAL; - if (!llc_ui->sap) + if (!llc->sap) goto out; - sllc.sllc_ssap = llc_ui->sap->laddr.lsap; + sllc.sllc_ssap = llc->sap->laddr.lsap; - if (llc_ui->dev) { - sllc.sllc_arphrd = llc_ui->dev->type; - memcpy(&sllc.sllc_smac, &llc_ui->dev->dev_addr, + if (llc->dev) { + sllc.sllc_arphrd = llc->dev->type; + memcpy(&sllc.sllc_smac, &llc->dev->dev_addr, IFHWADDRLEN); } } @@ -1160,61 +1157,56 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); - struct llc_opt *llc_core; + struct llc_opt *llc = llc_sk(sk); int rc = -EINVAL, opt; lock_sock(sk); if (level != SOL_LLC || optlen != sizeof(int)) goto out; - rc = -ENOTCONN; - if (!llc_ui->core_sk) - goto out; rc = get_user(opt, (int *)optval); if (rc) goto out; rc = -EINVAL; - llc_core = llc_sk(llc_ui->core_sk); switch (optname) { case LLC_OPT_RETRY: if (opt > LLC_OPT_MAX_RETRY) goto out; - llc_core->n2 = opt; + llc->n2 = opt; break; case LLC_OPT_SIZE: if (opt > LLC_OPT_MAX_SIZE) goto out; - llc_core->n1 = opt; + llc->n1 = opt; break; case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; - llc_core->ack_timer.expire = opt; + llc->ack_timer.expire = opt; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; - llc_core->pf_cycle_timer.expire = opt; + llc->pf_cycle_timer.expire = opt; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; - llc_core->rej_sent_timer.expire = opt; + llc->rej_sent_timer.expire = opt; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; - llc_core->busy_state_timer.expire = opt; + llc->busy_state_timer.expire = opt; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; - llc_core->k = opt; + llc->k = opt; break; case LLC_OPT_RX_WIN: if (opt > LLC_OPT_MAX_WIN) goto out; - llc_core->rw = opt; + llc->rw = opt; break; default: rc = -ENOPROTOOPT; @@ -1240,40 +1232,35 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { struct sock *sk = sock->sk; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); - struct llc_opt *llc_core; + struct llc_opt *llc = llc_sk(sk); int val = 0, len = 0, rc = -EINVAL; lock_sock(sk); if (level != SOL_LLC) goto out; - rc = -ENOTCONN; - if (!llc_ui->core_sk) - goto out; rc = get_user(len, optlen); if (rc) goto out; rc = -EINVAL; if (len != sizeof(int)) goto out; - llc_core = llc_sk(llc_ui->core_sk); switch (optname) { case LLC_OPT_RETRY: - val = llc_core->n2; break; + val = llc->n2; break; case LLC_OPT_SIZE: - val = llc_core->n1; break; + val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: - val = llc_core->ack_timer.expire; break; + val = llc->ack_timer.expire; break; case LLC_OPT_P_TMR_EXP: - val = llc_core->pf_cycle_timer.expire; break; + val = llc->pf_cycle_timer.expire; break; case LLC_OPT_REJ_TMR_EXP: - val = llc_core->rej_sent_timer.expire; break; + val = llc->rej_sent_timer.expire; break; case LLC_OPT_BUSY_TMR_EXP: - val = llc_core->busy_state_timer.expire; break; + val = llc->busy_state_timer.expire; break; case LLC_OPT_TX_WIN: - val = llc_core->k; break; + val = llc->k; break; case LLC_OPT_RX_WIN: - val = llc_core->rw; break; + val = llc->rw; break; default: rc = -ENOPROTOOPT; goto out; @@ -1296,7 +1283,7 @@ static void llc_ui_ind_test(struct llc_prim_if_block *prim) { struct llc_prim_test *prim_data = &prim->data->test; struct sk_buff *skb = prim_data->skb; - struct sockaddr_llc *llc_ui = llc_ui_skb_cb(skb); + struct sockaddr_llc *addr = llc_ui_skb_cb(skb); struct sock *sk = llc_ui_find_sk_by_addr(&prim_data->daddr, &prim_data->saddr, skb->dev); if (!sk) @@ -1304,15 +1291,15 @@ static void llc_ui_ind_test(struct llc_prim_if_block *prim) if (sk->state == TCP_LISTEN) goto out_put; /* save primitive for use by the user. */ - llc_ui->sllc_family = AF_LLC; - llc_ui->sllc_arphrd = skb->dev->type; - llc_ui->sllc_test = 1; - llc_ui->sllc_xid = 0; - llc_ui->sllc_ua = 0; - llc_ui->sllc_dsap = prim_data->daddr.lsap; - memcpy(llc_ui->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); - llc_ui->sllc_ssap = prim_data->saddr.lsap; - memcpy(llc_ui->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); + addr->sllc_family = AF_LLC; + addr->sllc_arphrd = skb->dev->type; + addr->sllc_test = 1; + addr->sllc_xid = 0; + addr->sllc_ua = 0; + addr->sllc_dsap = prim_data->daddr.lsap; + memcpy(addr->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); + addr->sllc_ssap = prim_data->saddr.lsap; + memcpy(addr->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); /* queue skb to the user. */ if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); @@ -1331,7 +1318,7 @@ static void llc_ui_ind_xid(struct llc_prim_if_block *prim) { struct llc_prim_xid *prim_data = &prim->data->xid; struct sk_buff *skb = prim_data->skb; - struct sockaddr_llc *llc_ui = llc_ui_skb_cb(skb); + struct sockaddr_llc *addr = llc_ui_skb_cb(skb); struct sock *sk = llc_ui_find_sk_by_addr(&prim_data->daddr, &prim_data->saddr, skb->dev); if (!sk) @@ -1339,15 +1326,15 @@ static void llc_ui_ind_xid(struct llc_prim_if_block *prim) if (sk->state == TCP_LISTEN) goto out_put; /* save primitive for use by the user. */ - llc_ui->sllc_family = AF_LLC; - llc_ui->sllc_arphrd = 0; - llc_ui->sllc_test = 0; - llc_ui->sllc_xid = 1; - llc_ui->sllc_ua = 0; - llc_ui->sllc_dsap = prim_data->daddr.lsap; - memcpy(llc_ui->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); - llc_ui->sllc_ssap = prim_data->saddr.lsap; - memcpy(llc_ui->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); + addr->sllc_family = AF_LLC; + addr->sllc_arphrd = 0; + addr->sllc_test = 0; + addr->sllc_xid = 1; + addr->sllc_ua = 0; + addr->sllc_dsap = prim_data->daddr.lsap; + memcpy(addr->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); + addr->sllc_ssap = prim_data->saddr.lsap; + memcpy(addr->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); /* queue skb to the user. */ if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); @@ -1366,7 +1353,7 @@ static void llc_ui_ind_dataunit(struct llc_prim_if_block *prim) { struct llc_prim_unit_data *prim_data = &prim->data->udata; struct sk_buff *skb = prim_data->skb; - struct sockaddr_llc *llc_ui = llc_ui_skb_cb(skb); + struct sockaddr_llc *addr = llc_ui_skb_cb(skb); struct sock *sk = llc_ui_find_sk_by_addr(&prim_data->daddr, &prim_data->saddr, skb->dev); if (!sk) @@ -1374,15 +1361,15 @@ static void llc_ui_ind_dataunit(struct llc_prim_if_block *prim) if (sk->state == TCP_LISTEN) goto out_put; /* save primitive for use by the user. */ - llc_ui->sllc_family = AF_LLC; - llc_ui->sllc_arphrd = skb->dev->type; - llc_ui->sllc_test = 0; - llc_ui->sllc_xid = 0; - llc_ui->sllc_ua = 1; - llc_ui->sllc_dsap = prim_data->daddr.lsap; - memcpy(llc_ui->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); - llc_ui->sllc_ssap = prim_data->saddr.lsap; - memcpy(llc_ui->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); + addr->sllc_family = AF_LLC; + addr->sllc_arphrd = skb->dev->type; + addr->sllc_test = 0; + addr->sllc_xid = 0; + addr->sllc_ua = 1; + addr->sllc_dsap = prim_data->daddr.lsap; + memcpy(addr->sllc_dmac, prim_data->daddr.mac, IFHWADDRLEN); + addr->sllc_ssap = prim_data->saddr.lsap; + memcpy(addr->sllc_smac, prim_data->saddr.mac, IFHWADDRLEN); /* queue skb to the user. */ if (sock_queue_rcv_skb(sk, skb)) kfree_skb(skb); @@ -1400,29 +1387,36 @@ out:; static void llc_ui_ind_conn(struct llc_prim_if_block *prim) { struct llc_prim_conn *prim_data = &prim->data->conn; - struct sock* sk; + struct sock* newsk = prim_data->sk, *parent; + struct llc_opt *newllc = llc_sk(newsk); struct sk_buff *skb2; - llc_sk(prim_data->sk)->laddr.lsap = prim->sap->laddr.lsap; - sk = llc_ui_find_sk_by_addr(&llc_sk(prim_data->sk)->laddr, - &prim_data->saddr, prim_data->dev); - if (!sk) + parent = llc_ui_find_sk_by_addr(&newllc->laddr, &prim_data->saddr, + prim_data->dev); + if (!parent) { + dprintk("%s: can't find a parent :-(\n", __FUNCTION__); goto out; - if (sk->type != SOCK_STREAM || sk->state != TCP_LISTEN) + } + dprintk("%s: found parent of remote %02X, its local %02X\n", __FUNCTION__, + newllc->daddr.lsap, llc_sk(parent)->laddr.lsap); + if (parent->type != SOCK_STREAM || parent->state != TCP_LISTEN) { + dprintk("%s: bad parent :-(\n", __FUNCTION__); goto out_put; - if (prim->data->conn.status) + } + if (prim->data->conn.status) { + dprintk("%s: bad status :-(\n", __FUNCTION__); goto out_put; /* bad status. */ + } /* give this connection a link number. */ - llc_sk(prim_data->sk)->link = - llc_ui_next_link_no(llc_sk(prim_data->sk)->laddr.lsap); + newllc->link = llc_ui_next_link_no(newllc->laddr.lsap); skb2 = alloc_skb(0, GFP_ATOMIC); if (!skb2) goto out_put; - skb2->sk = prim_data->sk; - skb_queue_tail(&sk->receive_queue, skb2); - sk->state_change(sk); + skb2->sk = newsk; + skb_queue_tail(&parent->receive_queue, skb2); + parent->state_change(parent); out_put: - sock_put(sk); + sock_put(parent); out:; } @@ -1435,14 +1429,13 @@ out:; static void llc_ui_ind_disc(struct llc_prim_if_block *prim) { struct llc_prim_disc *prim_data = &prim->data->disc; - struct sock* sk = llc_sk(prim_data->sk)->handler; + struct sock* sk = prim_data->sk; - if (!sk) - goto out; sock_hold(sk); - if (sk->type != SOCK_STREAM || sk->state != TCP_ESTABLISHED) + if (sk->type != SOCK_STREAM || sk->state != TCP_ESTABLISHED) { + dprintk("%s: bad socket...\n", __FUNCTION__); goto out_put; - llc_ui_sk(sk)->core_sk = NULL; + } sk->shutdown = SHUTDOWN_MASK; sk->socket->state = SS_UNCONNECTED; sk->state = TCP_CLOSE; @@ -1452,7 +1445,6 @@ static void llc_ui_ind_disc(struct llc_prim_if_block *prim) } out_put: sock_put(sk); -out:; } /** @@ -1475,8 +1467,8 @@ static int llc_ui_indicate(struct llc_prim_if_block *prim) case LLC_CONN_PRIM: llc_ui_ind_conn(prim); break; case LLC_DATA_PRIM: - printk(KERN_ERR "%s: shouldn't happen, LLC_DATA_PRIM " - "is gone for ->ind()...\n", __FUNCTION__); + dprintk("%s: shouldn't happen, LLC_DATA_PRIM " + "is gone for ->ind()...\n", __FUNCTION__); break; case LLC_DISC_PRIM: llc_ui_ind_disc(prim); break; @@ -1496,28 +1488,21 @@ static int llc_ui_indicate(struct llc_prim_if_block *prim) static void llc_ui_conf_conn(struct llc_prim_if_block *prim) { struct llc_prim_conn *prim_data = &prim->data->conn; - struct llc_opt *llc_core = llc_sk(prim_data->sk); - struct sock* sk = llc_core->handler; - struct llc_ui_opt *llc_ui = llc_ui_sk(sk); + struct sock* sk = prim_data->sk; - if (!sk) - goto out; sock_hold(sk); if (sk->type != SOCK_STREAM || sk->state != TCP_SYN_SENT) goto out_put; if (!prim->data->conn.status) { sk->socket->state = SS_CONNECTED; sk->state = TCP_ESTABLISHED; - llc_ui->core_sk = prim_data->sk; } else { sk->socket->state = SS_UNCONNECTED; sk->state = TCP_CLOSE; - llc_ui->core_sk = NULL; } sk->state_change(sk); out_put: sock_put(sk); -out:; } /** @@ -1529,20 +1514,16 @@ out:; static void llc_ui_conf_disc(struct llc_prim_if_block *prim) { struct llc_prim_disc *prim_data = &prim->data->disc; - struct sock* sk = llc_sk(prim_data->sk)->handler; + struct sock* sk = prim_data->sk; - if (!sk) - goto out; sock_hold(sk); if (sk->type != SOCK_STREAM || sk->state != TCP_CLOSING) goto out_put; - llc_ui_sk(sk)->core_sk = NULL; sk->socket->state = SS_UNCONNECTED; sk->state = TCP_CLOSE; sk->state_change(sk); out_put: sock_put(sk); -out:; } /** @@ -1559,8 +1540,8 @@ static int llc_ui_confirm(struct llc_prim_if_block *prim) case LLC_CONN_PRIM: llc_ui_conf_conn(prim); break; case LLC_DATA_PRIM: - printk(KERN_ERR "%s: shouldn't happen, LLC_DATA_PRIM " - "is gone for ->conf()...\n", __FUNCTION__); + dprintk("%s: shouldn't happen, LLC_DATA_PRIM " + "is gone for ->conf()...\n", __FUNCTION__); break; case LLC_DISC_PRIM: llc_ui_conf_disc(prim); break; @@ -1596,46 +1577,42 @@ static int llc_ui_get_info(char *buffer, char **start, off_t offset, int length) off_t pos = 0; off_t begin = 0; struct sock *s; - int len = sprintf(buffer, "SocketID SKt Mc local_mac_sap\t " - "remote_mac_sap\t tx_queue rx_queue st uid " - "link_no\n"); + int len = sprintf(buffer, "SKt Mc local_mac_sap " + "remote_mac_sap tx_queue rx_queue st uid " + "link\n"); /* Output the LLC socket data for the /proc filesystem */ read_lock_bh(&llc_ui_sockets_lock); for (s = llc_ui_sockets; s; s = s->next) { - struct llc_ui_opt *llc_ui = llc_ui_sk(s); - len += sprintf(buffer + len, "%p %02X %02X ", s, s->type, - !llc_mac_null(llc_ui->addr.sllc_mmac)); - if (llc_ui->sap) { - if (llc_ui->dev && - llc_mac_null(llc_ui->addr.sllc_mmac)) + struct llc_opt *llc = llc_sk(s); + + len += sprintf(buffer + len, "%2X %2X ", s->type, + !llc_mac_null(llc->addr.sllc_mmac)); + if (llc->sap) { + if (llc->dev && llc_mac_null(llc->addr.sllc_mmac)) llc_ui_format_mac(buffer + len, - llc_ui->dev->dev_addr); + llc->dev->dev_addr); else { - if (!llc_mac_null(llc_ui->addr.sllc_mmac)) + if (!llc_mac_null(llc->addr.sllc_mmac)) llc_ui_format_mac(buffer + len, - llc_ui->addr.sllc_mmac); + llc->addr.sllc_mmac); else sprintf(buffer + len, "00:00:00:00:00:00"); } len += MAC_FORMATTED_SIZE; len += sprintf(buffer + len, "@%02X ", - llc_ui->sap->laddr.lsap); + llc->sap->laddr.lsap); } else len += sprintf(buffer + len, "00:00:00:00:00:00@00 "); - llc_ui_format_mac(buffer + len, llc_ui->addr.sllc_dmac); + llc_ui_format_mac(buffer + len, llc->addr.sllc_dmac); len += MAC_FORMATTED_SIZE; len += sprintf(buffer + len, - "@%02X %08d:%08d %02d %-3d ", - llc_ui->addr.sllc_dsap, + "@%02X %8d %8d %2d %-3d ", + llc->addr.sllc_dsap, atomic_read(&s->wmem_alloc), atomic_read(&s->rmem_alloc), s->state, SOCK_INODE(s->socket)->i_uid); - if (llc_ui->core_sk) - len += sprintf(buffer + len, "%-7d\n", - llc_sk(llc_ui->core_sk)->link); - else - len += sprintf(buffer + len, "no_link\n"); + len += sprintf(buffer + len, "%-4d\n", llc->link); /* Are we still dumping unwanted data then discard the record */ pos = begin + len; @@ -1686,7 +1663,7 @@ static struct proto_ops SOCKOPS_WRAPPED(llc_ui_ops) = { SOCKOPS_WRAP(llc_ui, PF_LLC); static char llc_ui_banner[] __initdata = - KERN_INFO "NET4.0 IEEE 802.2 User Interface SAPs, Jay Schulist, 2001\n"; + KERN_INFO "NET4.0 IEEE 802.2 BSD sockets, Jay Schulist, 2001, Arnaldo C. Melo, 2002\n"; int __init llc_ui_init(void) { -- cgit v1.2.3 From f6224ba8646030861e84b8f7f4acb03226c5c4f0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Sep 2002 05:11:23 -0700 Subject: [NAPI]: Set SCHED before dev->open, clear if fails. Restore netif_running check to netif_rx_schedule_prep. --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a8b79f10637a..6ed74303fbda 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -711,7 +711,8 @@ enum { static inline int netif_rx_schedule_prep(struct net_device *dev) { - return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); + return netif_running(dev) && + !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); } /* Add interface to tail of rx poll list. This assumes that _prep has diff --git a/net/core/dev.c b/net/core/dev.c index e67776f3a9ba..be5f1c87d480 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -694,10 +694,14 @@ int dev_open(struct net_device *dev) * Call device private open method */ if (try_inc_mod_count(dev->owner)) { + set_bit(__LINK_STATE_START, &dev->state); if (dev->open) { ret = dev->open(dev); - if (ret && dev->owner) - __MOD_DEC_USE_COUNT(dev->owner); + if (ret) { + clear_bit(__LINK_STATE_START, &dev->state); + if (dev->owner) + __MOD_DEC_USE_COUNT(dev->owner); + } } } else { ret = -ENODEV; @@ -713,8 +717,6 @@ int dev_open(struct net_device *dev) */ dev->flags |= IFF_UP; - set_bit(__LINK_STATE_START, &dev->state); - /* * Initialize multicasting status */ -- cgit v1.2.3 From 37ee17224e359fdaf4fcd7fba1d8c8a5be5b34ee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Sep 2002 12:39:03 -0700 Subject: [TIGON3]: Add 5704 support. --- drivers/net/tg3.c | 158 ++++++++++++++++++++++++++++++++++++++++++------ drivers/net/tg3.h | 26 +++++++- drivers/pci/pci.ids | 1 + include/linux/pci_ids.h | 1 + 4 files changed, 164 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 79d2d5400418..122b5aa10204 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -149,6 +149,8 @@ static struct pci_device_id tg3_pci_tbl[] __devinitdata = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X, @@ -407,7 +409,6 @@ static int tg3_set_power_state(struct tg3 *tp, int state) tr32(GRC_LOCAL_CTRL); udelay(100); - tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x02); return 0; case 1: @@ -873,6 +874,8 @@ static int tg3_setup_copper_phy(struct tg3 *tp) tr32(MAC_MI_MODE); udelay(40); + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x02); + if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) { tg3_readphy(tp, MII_BMSR, &bmsr); tg3_readphy(tp, MII_BMSR, &bmsr); @@ -3111,9 +3114,11 @@ static void tg3_chip_reset(struct tg3 *tp) tp->misc_host_ctrl); /* Set MAX PCI retry to zero. */ - pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, - (PCISTATE_ROM_ENABLE | - PCISTATE_ROM_RETRY_ENABLE)); + val = (PCISTATE_ROM_ENABLE | PCISTATE_ROM_RETRY_ENABLE); + if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 && + (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) + val |= PCISTATE_RETRY_SAME_DMA; + pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val); pci_restore_state(tp->pdev, tp->pci_cfg_state); @@ -3127,12 +3132,34 @@ static void tg3_chip_reset(struct tg3 *tp) tw32(TG3PCI_MISC_HOST_CTRL, tp->misc_host_ctrl); } +/* tp->lock is held. */ +static void tg3_stop_fw(struct tg3 *tp) +{ + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) { + u32 val; + int i; + + tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_PAUSE_FW); + val = tr32(GRC_RX_CPU_EVENT); + val |= (1 << 14); + tw32(GRC_RX_CPU_EVENT, val); + + /* Wait for RX cpu to ACK the event. */ + for (i = 0; i < 100; i++) { + if (!(tr32(GRC_RX_CPU_EVENT) & (1 << 14))) + break; + udelay(1); + } + } +} + /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp) { u32 val; int i; + tg3_stop_fw(tp); tg3_abort_hw(tp); tg3_chip_reset(tp); tg3_write_mem(tp, @@ -3152,6 +3179,17 @@ static int tg3_halt(struct tg3 *tp) return -ENODEV; } + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) { + if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE) + tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, + DRV_STATE_WOL); + else + tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, + DRV_STATE_UNLOAD); + } else + tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, + DRV_STATE_SUSPEND); + return 0; } @@ -3849,6 +3887,8 @@ static int tg3_reset_hw(struct tg3 *tp) tg3_disable_ints(tp); + tg3_stop_fw(tp); + if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) { err = tg3_abort_hw(tp); if (err) @@ -3883,6 +3923,13 @@ static int tg3_reset_hw(struct tg3 *tp) return -ENODEV; } + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) + tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, + DRV_STATE_START); + else + tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, + DRV_STATE_SUSPEND); + /* This works around an issue with Athlon chipsets on * B3 tigon3 silicon. This bit has no effect on any * other revision. @@ -3892,6 +3939,13 @@ static int tg3_reset_hw(struct tg3 *tp) tw32(TG3PCI_CLOCK_CTRL, val); tr32(TG3PCI_CLOCK_CTRL); + if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 && + (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) { + val = tr32(TG3PCI_PCISTATE); + val |= PCISTATE_RETRY_SAME_DMA; + tw32(TG3PCI_PCISTATE, val); + } + /* Clear statistics/status block in chip, and status block in ram. */ for (i = NIC_SRAM_STATS_BLK; i < NIC_SRAM_STATUS_BLK + TG3_HW_STATUS_SIZE; @@ -3929,7 +3983,10 @@ static int tg3_reset_hw(struct tg3 *tp) /* Initialize MBUF/DESC pool. */ tw32(BUFMGR_MB_POOL_ADDR, NIC_SRAM_MBUF_POOL_BASE); - tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) + tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE64); + else + tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE96); tw32(BUFMGR_DMA_DESC_POOL_ADDR, NIC_SRAM_DMA_DESC_POOL_BASE); tw32(BUFMGR_DMA_DESC_POOL_SIZE, NIC_SRAM_DMA_DESC_POOL_SIZE); @@ -4195,11 +4252,25 @@ static int tg3_reset_hw(struct tg3 *tp) tr32(WDMAC_MODE); udelay(40); - tw32(RDMAC_MODE, (RDMAC_MODE_ENABLE | RDMAC_MODE_TGTABORT_ENAB | - RDMAC_MODE_MSTABORT_ENAB | RDMAC_MODE_PARITYERR_ENAB | - RDMAC_MODE_ADDROFLOW_ENAB | RDMAC_MODE_FIFOOFLOW_ENAB | - RDMAC_MODE_FIFOURUN_ENAB | RDMAC_MODE_FIFOOREAD_ENAB | - RDMAC_MODE_LNGREAD_ENAB)); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 && + (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) { + val = tr32(TG3PCI_X_CAPS); + val &= ~(PCIX_CAPS_SPLIT_MASK | PCIX_CAPS_BURST_MASK); + val |= (PCIX_CAPS_MAX_BURST_5704 << PCIX_CAPS_BURST_SHIFT); + if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) + val |= (tp->split_mode_max_reqs << + PCIX_CAPS_SPLIT_SHIFT); + tw32(TG3PCI_X_CAPS, val); + } + + val = (RDMAC_MODE_ENABLE | RDMAC_MODE_TGTABORT_ENAB | + RDMAC_MODE_MSTABORT_ENAB | RDMAC_MODE_PARITYERR_ENAB | + RDMAC_MODE_ADDROFLOW_ENAB | RDMAC_MODE_FIFOOFLOW_ENAB | + RDMAC_MODE_FIFOURUN_ENAB | RDMAC_MODE_FIFOOREAD_ENAB | + RDMAC_MODE_LNGREAD_ENAB); + if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) + val |= RDMAC_MODE_SPLIT_ENABLE; + tw32(RDMAC_MODE, val); tr32(RDMAC_MODE); udelay(40); @@ -4392,6 +4463,21 @@ static void tg3_timer(unsigned long __opaque) tp->timer_counter = tp->timer_multiplier; } + /* Heartbeat is only sent once every 120 seconds. */ + if (!--tp->asf_counter) { + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) { + u32 val; + + tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_ALIVE); + tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 4); + tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, 3); + val = tr32(GRC_RX_CPU_EVENT); + val |= (1 << 14); + tw32(GRC_RX_CPU_EVENT, val); + } + tp->asf_counter = tp->asf_multiplier; + } + spin_unlock(&tp->tx_lock); spin_unlock_irq(&tp->lock); @@ -4440,6 +4526,7 @@ static int tg3_open(struct net_device *dev) } else { tp->timer_offset = HZ / 10; tp->timer_counter = tp->timer_multiplier = 10; + tp->asf_counter = tp->asf_multiplier = (10 * 120); init_timer(&tp->timer); tp->timer.expires = jiffies + tp->timer_offset; @@ -5703,6 +5790,9 @@ static int __devinit tg3_phy_probe(struct tg3 *tp) tp->pci_chip_rev_id == CHIPREV_ID_5703_A2) && (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP)) tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; + + if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) + tp->tg3_flags |= TG3_FLAG_ENABLE_ASF; } /* Now read the physical PHY_ID from the chip and verify @@ -5770,6 +5860,11 @@ static int __devinit tg3_phy_probe(struct tg3 *tp) tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x2aaa); } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) { + tg3_writephy(tp, 0x1c, 0x8d68); + tg3_writephy(tp, 0x1c, 0x8d68); + } + /* Enable Ethernet@WireSpeed */ tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007); tg3_readphy(tp, MII_TG3_AUX_CTRL, &val); @@ -6085,9 +6180,16 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5702FE && grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703 && grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703S && + grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704 && grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_AC91002A1) return -ENODEV; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 && + grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5704CIOBE) { + tp->tg3_flags |= TG3_FLAG_SPLIT_MODE; + tp->split_mode_max_reqs = SPLIT_MODE_5704_MAX_REQ; + } + /* ROFL, you should see Broadcom's driver code implementing * this, stuff like "if (a || b)" where a and b are always * mutually exclusive. DaveM finds like 6 bugs today, hello! @@ -6177,7 +6279,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) static int __devinit tg3_get_device_address(struct tg3 *tp) { struct net_device *dev = tp->dev; - u32 hi, lo; + u32 hi, lo, mac_offset; + + if (PCI_FUNC(tp->pdev->devfn) == 0) + mac_offset = 0x7c; + else + mac_offset = 0xcc; /* First try to get it from MAC address mailbox. */ tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi); @@ -6192,8 +6299,8 @@ static int __devinit tg3_get_device_address(struct tg3 *tp) dev->dev_addr[5] = (lo >> 0) & 0xff; } /* Next, try NVRAM. */ - else if (!tg3_nvram_read(tp, 0x7c, &hi) && - !tg3_nvram_read(tp, 0x80, &lo)) { + else if (!tg3_nvram_read(tp, mac_offset + 0, &hi) && + !tg3_nvram_read(tp, mac_offset + 4, &lo)) { dev->dev_addr[0] = ((hi >> 16) & 0xff); dev->dev_addr[1] = ((hi >> 24) & 0xff); dev->dev_addr[2] = ((lo >> 0) & 0xff); @@ -6321,16 +6428,26 @@ static int __devinit tg3_test_dma(struct tg3 *tp) (0x7 << DMA_RWCTRL_READ_WATER_SHIFT) | (0x0f << DMA_RWCTRL_MIN_DMA_SHIFT); } else { - tp->dma_rwctrl = - (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) | - (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) | - (0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) | - (0x3 << DMA_RWCTRL_READ_WATER_SHIFT) | - (0x0f << DMA_RWCTRL_MIN_DMA_SHIFT); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) + tp->dma_rwctrl = + (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) | + (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) | + (0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) | + (0x7 << DMA_RWCTRL_READ_WATER_SHIFT) | + (0x00 << DMA_RWCTRL_MIN_DMA_SHIFT); + else + tp->dma_rwctrl = + (0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) | + (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) | + (0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) | + (0x3 << DMA_RWCTRL_READ_WATER_SHIFT) | + (0x0f << DMA_RWCTRL_MIN_DMA_SHIFT); /* Wheee, some more chip bugs... */ if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1 || - tp->pci_chip_rev_id == CHIPREV_ID_5703_A2) + tp->pci_chip_rev_id == CHIPREV_ID_5703_A2 || + tp->pci_chip_rev_id == CHIPREV_ID_5703_A3 || + tp->pci_chip_rev_id == CHIPREV_ID_5704_A0) tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA; } @@ -6505,6 +6622,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp) case PHY_ID_BCM5411: return "5411"; case PHY_ID_BCM5701: return "5701"; case PHY_ID_BCM5703: return "5703"; + case PHY_ID_BCM5704: return "5704"; case PHY_ID_BCM8002: return "8002"; case PHY_ID_SERDES: return "serdes"; default: return "unknown"; diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 445c5067178a..6b358ff82387 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -58,6 +58,11 @@ #define TG3PCI_MAX_LAT 0x0000003f #define TG3PCI_X_CAPS 0x00000040 #define PCIX_CAPS_RELAXED_ORDERING 0x00020000 +#define PCIX_CAPS_SPLIT_MASK 0x00700000 +#define PCIX_CAPS_SPLIT_SHIFT 20 +#define PCIX_CAPS_BURST_MASK 0x000c0000 +#define PCIX_CAPS_BURST_SHIFT 18 +#define PCIX_CAPS_MAX_BURST_5704 2 #define TG3PCI_PM_CAP_PTR 0x00000041 #define TG3PCI_X_COMMAND 0x00000042 #define TG3PCI_X_STATUS 0x00000044 @@ -109,10 +114,13 @@ #define CHIPREV_ID_5703_A0 0x1000 #define CHIPREV_ID_5703_A1 0x1001 #define CHIPREV_ID_5703_A2 0x1002 +#define CHIPREV_ID_5703_A3 0x1003 +#define CHIPREV_ID_5704_A0 0x2000 #define GET_ASIC_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 12) #define ASIC_REV_5700 0x07 #define ASIC_REV_5701 0x00 #define ASIC_REV_5703 0x01 +#define ASIC_REV_5704 0x02 #define GET_CHIP_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 8) #define CHIPREV_5700_AX 0x70 #define CHIPREV_5700_BX 0x71 @@ -165,6 +173,7 @@ #define PCISTATE_ROM_ENABLE 0x00000020 #define PCISTATE_ROM_RETRY_ENABLE 0x00000040 #define PCISTATE_FLAT_VIEW 0x00000100 +#define PCISTATE_RETRY_SAME_DMA 0x00002000 #define TG3PCI_CLOCK_CTRL 0x00000074 #define CLOCK_CTRL_CORECLK_DISABLE 0x00000200 #define CLOCK_CTRL_RXCLK_DISABLE 0x00000400 @@ -843,6 +852,8 @@ #define RDMAC_MODE_FIFOURUN_ENAB 0x00000080 #define RDMAC_MODE_FIFOOREAD_ENAB 0x00000100 #define RDMAC_MODE_LNGREAD_ENAB 0x00000200 +#define RDMAC_MODE_SPLIT_ENABLE 0x00000800 +#define RDMAC_MODE_SPLIT_RESET 0x00001000 #define RDMAC_STATUS 0x00004804 #define RDMAC_STATUS_TGTABORT 0x00000004 #define RDMAC_STATUS_MSTABORT 0x00000008 @@ -1126,6 +1137,8 @@ #define GRC_MISC_CFG_BOARD_ID_5702FE 0x00004000 #define GRC_MISC_CFG_BOARD_ID_5703 0x00000000 #define GRC_MISC_CFG_BOARD_ID_5703S 0x00002000 +#define GRC_MISC_CFG_BOARD_ID_5704 0x00000000 +#define GRC_MISC_CFG_BOARD_ID_5704CIOBE 0x00000000 #define GRC_MISC_CFG_BOARD_ID_AC91002A1 0x00018000 #define GRC_LOCAL_CTRL 0x00006808 #define GRC_LCLCTRL_INT_ACTIVE 0x00000001 @@ -1297,7 +1310,8 @@ #define NIC_SRAM_RX_BUFFER_DESC 0x00006000 /* 256 entries */ #define NIC_SRAM_RX_JUMBO_BUFFER_DESC 0x00007000 /* 256 entries */ #define NIC_SRAM_MBUF_POOL_BASE 0x00008000 -#define NIC_SRAM_MBUF_POOL_SIZE 0x00018000 +#define NIC_SRAM_MBUF_POOL_SIZE96 0x00018000 +#define NIC_SRAM_MBUF_POOL_SIZE64 0x00018000 /* Currently this is fixed. */ #define PHY_ADDR 0x01 @@ -1749,6 +1763,7 @@ struct tg3 { #define TG3_FLAG_RX_CHECKSUMS 0x00000004 #define TG3_FLAG_USE_LINKCHG_REG 0x00000008 #define TG3_FLAG_USE_MI_INTERRUPT 0x00000010 +#define TG3_FLAG_ENABLE_ASF 0x00000020 #define TG3_FLAG_POLL_SERDES 0x00000080 #define TG3_FLAG_MBOX_WRITE_REORDER 0x00000100 #define TG3_FLAG_PCIX_TARGET_HWBUG 0x00000200 @@ -1772,14 +1787,20 @@ struct tg3 { #define TG3_FLAG_PAUSE_TX 0x08000000 #define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000 #define TG3_FLAG_GOT_SERDES_FLOWCTL 0x20000000 +#define TG3_FLAG_SPLIT_MODE 0x40000000 #define TG3_FLAG_INIT_COMPLETE 0x80000000 u32 msg_enable; + u32 split_mode_max_reqs; +#define SPLIT_MODE_5704_MAX_REQ 3 + struct timer_list timer; u16 timer_counter; u16 timer_multiplier; u32 timer_offset; + u16 asf_counter; + u16 asf_multiplier; struct tg3_link_config link_config; struct tg3_bufmgr_config bufmgr_config; @@ -1820,6 +1841,7 @@ struct tg3 { #define PHY_ID_BCM5411 0x60008070 #define PHY_ID_BCM5701 0x60008110 #define PHY_ID_BCM5703 0x60008160 +#define PHY_ID_BCM5704 0x60008190 #define PHY_ID_BCM8002 0x60010140 #define PHY_ID_SERDES 0xfeedbee0 #define PHY_ID_INVALID 0xffffffff @@ -1839,7 +1861,7 @@ struct tg3 { #define KNOWN_PHY_ID(X) \ ((X) == PHY_ID_BCM5400 || (X) == PHY_ID_BCM5401 || \ (X) == PHY_ID_BCM5411 || (X) == PHY_ID_BCM5701 || \ - (X) == PHY_ID_BCM5703 || \ + (X) == PHY_ID_BCM5703 || (X) == PHY_ID_BCM5704 || \ (X) == PHY_ID_BCM8002 || (X) == PHY_ID_SERDES) unsigned long regs; diff --git a/drivers/pci/pci.ids b/drivers/pci/pci.ids index 186c8fdd99ad..3290c4213660 100644 --- a/drivers/pci/pci.ids +++ b/drivers/pci/pci.ids @@ -5053,6 +5053,7 @@ 14e4 000b BCM5703 1000BaseTX 14e4 8009 BCM5703 1000BaseTX 14e4 800a BCM5703 1000BaseTX + 1648 NetXtreme BCM5704 Gigabit Ethernet 164d NetXtreme BCM5702FE Gigabit Ethernet 16a6 NetXtreme BCM5702X Gigabit Ethernet 16a7 NetXtreme BCM5703X Gigabit Ethernet diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5eea9545b69a..9a1ad256e2cc 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1561,6 +1561,7 @@ #define PCI_DEVICE_ID_TIGON3_5701 0x1645 #define PCI_DEVICE_ID_TIGON3_5702 0x1646 #define PCI_DEVICE_ID_TIGON3_5703 0x1647 +#define PCI_DEVICE_ID_TIGON3_5704 0x1648 #define PCI_DEVICE_ID_TIGON3_5702FE 0x164d #define PCI_DEVICE_ID_TIGON3_5702X 0x16a6 #define PCI_DEVICE_ID_TIGON3_5703X 0x16a7 -- cgit v1.2.3 From 2c66151cbc2cc68465489fdf64d16bd1c6ca4d0e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Sep 2002 21:36:11 -0700 Subject: [PATCH] sys_exit() threading improvements, BK-curr This implements the 'keep the initial thread around until every thread in the group exits' concept in a different, less intrusive way, along your suggestions. There is no exit_done completion handling anymore, freeing of the task is still done by wait4(). This has the following side-effect: detached threads/processes can only be started within a thread group, not in a standalone way. (This also fixes the bugs introduced by the ->exit_done code, which made it possible for a zombie task to be reactivated.) I've introduced the p->group_leader pointer, which can/will be used for other purposes in the future as well - since from now on the thread group leader is always existent. Right now it's used to notify the parent of the thread group leader from the last non-leader thread that exits [if the thread group leader is a zombie already]. --- fs/exec.c | 1 - include/linux/init_task.h | 1 + include/linux/sched.h | 6 ++++-- kernel/exit.c | 21 ++++++++++++++++----- kernel/fork.c | 9 ++++++++- kernel/signal.c | 38 +++++++++++++++++--------------------- 6 files changed, 46 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 50616244da2f..317b1c395ef2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -515,7 +515,6 @@ static inline int make_private_signals(void) atomic_set(&newsig->count, 1); newsig->group_exit = 0; newsig->group_exit_code = 0; - init_completion(&newsig->group_exit_done); memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); init_sigpending(&newsig->shared_pending); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bdf03241a009..c66cbf8d9f5c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -61,6 +61,7 @@ .parent = &tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ + .group_leader = &tsk, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\ .real_timer = { \ diff --git a/include/linux/sched.h b/include/linux/sched.h index bdce46f40af2..513c065e281c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -219,8 +219,6 @@ struct signal_struct { /* thread group exit support */ int group_exit; int group_exit_code; - - struct completion group_exit_done; }; /* @@ -316,6 +314,7 @@ struct task_struct { struct task_struct *parent; /* parent process */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ + struct task_struct *group_leader; struct list_head thread_group; /* PID hash table linkage. */ @@ -827,6 +826,9 @@ static inline task_t *prev_thread(task_t *p) #define thread_group_leader(p) (p->pid == p->tgid) +#define delay_group_leader(p) \ + (p->tgid == p->pid && !list_empty(&p->thread_group)) + extern void unhash_process(struct task_struct *p); /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ diff --git a/kernel/exit.c b/kernel/exit.c index a305043a5d0d..49f279e1a004 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -583,7 +583,6 @@ static void exit_notify(void) * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) */ - current->state = TASK_ZOMBIE; if (current->exit_signal != -1) do_notify_parent(current, current->exit_signal); @@ -592,6 +591,8 @@ static void exit_notify(void) while (!list_empty(¤t->ptrace_children)) zap_thread(list_entry(current->ptrace_children.next,struct task_struct,ptrace_list), current, 1); BUG_ON(!list_empty(¤t->children)); + + current->state = TASK_ZOMBIE; /* * No need to unlock IRQs, we'll schedule() immediately * anyway. In the preemption case this also makes it @@ -697,9 +698,9 @@ asmlinkage long sys_exit_group(int error_code) do_exit(sig->group_exit_code); } -static inline int eligible_child(pid_t pid, int options, task_t *p) +static int eligible_child(pid_t pid, int options, task_t *p) { - if (pid>0) { + if (pid > 0) { if (p->pid != pid) return 0; } else if (!pid) { @@ -725,6 +726,12 @@ static inline int eligible_child(pid_t pid, int options, task_t *p) if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) return 0; + /* + * Do not consider thread group leaders that are + * in a non-empty thread group: + */ + if (current->tgid != p->tgid && delay_group_leader(p)) + return 0; if (security_ops->task_wait(p)) return 0; @@ -781,8 +788,12 @@ repeat: current->cstime += p->stime + p->cstime; read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; - if (!retval && stat_addr) - retval = put_user(p->exit_code, stat_addr); + if (!retval && stat_addr) { + if (p->sig->group_exit) + retval = put_user(p->sig->group_exit_code, stat_addr); + else + retval = put_user(p->exit_code, stat_addr); + } if (retval) goto end_wait4; retval = p->pid; diff --git a/kernel/fork.c b/kernel/fork.c index bd902cc45283..5bc7f2a07915 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -628,7 +628,6 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t atomic_set(&sig->count, 1); sig->group_exit = 0; sig->group_exit_code = 0; - init_completion(&sig->group_exit_done); memcpy(sig->action, current->sig->action, sizeof(sig->action)); sig->curr_target = NULL; init_sigpending(&sig->shared_pending); @@ -672,6 +671,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ if (clone_flags & CLONE_THREAD) clone_flags |= CLONE_SIGHAND; + /* + * Detached threads can only be started up within the thread + * group. + */ + if (clone_flags & CLONE_DETACHED) + clone_flags |= CLONE_THREAD; retval = security_ops->task_create(clone_flags); if (retval) @@ -843,6 +848,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * Let it rip! */ p->tgid = p->pid; + p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); @@ -870,6 +876,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_namespace; } p->tgid = current->tgid; + p->group_leader = current->group_leader; list_add(&p->thread_group, ¤t->thread_group); spin_unlock(¤t->sig->siglock); } diff --git a/kernel/signal.c b/kernel/signal.c index 82ef92369ad1..c2dc0ca60a8b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -251,23 +251,6 @@ void __exit_sighand(struct task_struct *tsk) if (!atomic_read(&sig->count)) BUG(); spin_lock(&sig->siglock); - /* - * Do not let the thread group leader exit until all other - * threads are done: - */ - while (!list_empty(¤t->thread_group) && - current->tgid == current->pid && - atomic_read(&sig->count) > 1) { - - spin_unlock(&sig->siglock); - write_unlock_irq(&tasklist_lock); - - wait_for_completion(&sig->group_exit_done); - - write_lock_irq(&tasklist_lock); - spin_lock(&sig->siglock); - } - spin_lock(&tsk->sigmask_lock); tsk->sig = NULL; if (atomic_dec_and_test(&sig->count)) { @@ -276,10 +259,21 @@ void __exit_sighand(struct task_struct *tsk) flush_sigqueue(&sig->shared_pending); kmem_cache_free(sigact_cachep, sig); } else { - if (!list_empty(¤t->thread_group) && - atomic_read(&sig->count) == 1) - complete(&sig->group_exit_done); - __remove_thread_group(tsk, sig); + struct task_struct *leader = tsk->group_leader; + /* + * If we are the last non-leader member of the thread + * group, and the leader is zombie, then notify the + * group leader's parent process. + * + * (subtle: here we also rely on the fact that if we are the + * thread group leader then we are not zombied yet.) + */ + if (atomic_read(&sig->count) == 1 && + leader->state == TASK_ZOMBIE) { + __remove_thread_group(tsk, sig); + do_notify_parent(leader, leader->exit_signal); + } else + __remove_thread_group(tsk, sig); spin_unlock(&sig->siglock); } clear_tsk_thread_flag(tsk,TIF_SIGPENDING); @@ -1096,6 +1090,8 @@ void do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; int why, status; + if (delay_group_leader(tsk)) + return; if (sig == -1) BUG(); -- cgit v1.2.3 From a83638a44f54e97c24ebc0c106ccd5ff7f5d0160 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 13 Sep 2002 05:57:02 -0700 Subject: [PATCH] readv/writev speedup This is Janet Morgan's patch which converts the readv/writev code to submit all segments for IO before waiting on them, rather than submitting each segment separately. This is a critical performance fix for O_DIRECT reads and writes. Prior to this change, O_DIRECT vectored IO was forced to wait for completion against each segment of the iovec rather than submitting all segments and waiting on the lot. ie: for ten segments, this code will be ten times faster. There will also be moderate improvements for buffered IO - smaller code paths, plus writev() only takes i_sem once. The patch ended up quite large unfortunately - turned out that the only sane way to implement this without duplicating significant amounts of code (the generic_file_write() bounds checking, all the O_DIRECT handling, etc) was to redo generic_file_read() and generic_file_write() to take an iovec/nr_segs pair rather than `buf, count'. New exported functions generic_file_readv() and generic_file_writev() have been added: ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); ssize_t generic_file_writev(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t * ppos); If a driver does not use these in their file_operations then they will continue to use the old readv/writev code, which sits in a loop calling calls fops->read() or fops->write(). ext2, ext3, JFS and the blockdev driver are currently using this capability. Some coding cleanups were made in fs/read_write.c. Mainly: - pass "READ" or "WRITE" around to indicate the diretion of the operation, rather than the (confusing, inverted) VERIFY_READ/VERIFY_WRITE. - Use the identifier `nr_segs' everywhere to indicate the iovec length rather than `count', which is often used to indicate the number of bytes in the syscall. It was confusing the heck out of me. - Some cleanups to the raw driver. - Some additional generality in fs/direct_io.c: the core `struct dio' used to be a "populate-and-go" thing. Janet has broken that up so you can initialise a struct dio once, then loop around feeding it more file segments, then wait on completion against everything. - In a couple of places we needed to handle the situation where we knew, a-priori, that the user was going to get a short read or write. File size limit exceeded, read past i_size, etc. We handled that by shortening the iovec in-place with iov_shorten(). Which is not particularly pretty, but neither were the alternatives. --- drivers/char/raw.c | 52 +++++++++++++++------ fs/block_dev.c | 52 ++++++++++++--------- fs/direct-io.c | 132 ++++++++++++++++++++++++++++++---------------------- fs/ext2/file.c | 2 + fs/ext2/inode.c | 8 ++-- fs/ext3/file.c | 22 +++++---- fs/ext3/inode.c | 10 ++-- fs/jfs/file.c | 2 + fs/jfs/inode.c | 8 ++-- fs/read_write.c | 109 +++++++++++++++++++++++++++---------------- include/linux/fs.h | 19 ++++---- include/linux/uio.h | 15 ++++++ kernel/ksyms.c | 4 ++ mm/filemap.c | 108 +++++++++++++++++++++++++++++++++--------- 14 files changed, 362 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 1da088fc6626..a2f05f72791d 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -201,25 +201,29 @@ out: } static ssize_t -rw_raw_dev(int rw, struct file *filp, char *buf, size_t size, loff_t *offp) +rw_raw_dev(int rw, struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *offp) { const int minor = minor(filp->f_dentry->d_inode->i_rdev); struct block_device *bdev = raw_devices[minor].binding; struct inode *inode = bdev->bd_inode; + size_t count = iov_length(iov, nr_segs); ssize_t ret = 0; - if (size == 0) - goto out; - ret = -EINVAL; - if (size < 0) - goto out; - ret = -ENXIO; - if (*offp >= inode->i_size) - goto out; + if (count == 0) + goto out; + + if ((ssize_t)count < 0) + return -EINVAL; + + if (*offp >= inode->i_size) + return -ENXIO; + + if (count + *offp > inode->i_size) { + count = inode->i_size - *offp; + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, count); + } + ret = generic_file_direct_IO(rw, inode, iov, *offp, nr_segs); - if (size + *offp > inode->i_size) - size = inode->i_size - *offp; - ret = generic_file_direct_IO(rw, inode, buf, *offp, size); if (ret > 0) *offp += ret; out: @@ -227,15 +231,31 @@ out: } static ssize_t -raw_read(struct file *filp, char * buf, size_t size, loff_t *offp) +raw_read(struct file *filp, char *buf, size_t size, loff_t *offp) { - return rw_raw_dev(READ, filp, buf, size, offp); + struct iovec local_iov = { .iov_base = buf, .iov_len = size}; + + return rw_raw_dev(READ, filp, &local_iov, 1, offp); } static ssize_t raw_write(struct file *filp, const char *buf, size_t size, loff_t *offp) { - return rw_raw_dev(WRITE, filp, (char *)buf, size, offp); + struct iovec local_iov = { .iov_base = buf, .iov_len = size}; + + return rw_raw_dev(WRITE, filp, &local_iov, 1, offp); +} + +static ssize_t +raw_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *offp) +{ + return rw_raw_dev(READ, filp, iov, nr_segs, offp); +} + +static ssize_t +raw_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *offp) +{ + return rw_raw_dev(WRITE, filp, iov, nr_segs, offp); } static struct file_operations raw_fops = { @@ -244,6 +264,8 @@ static struct file_operations raw_fops = { .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, + .readv = raw_readv, + .writev = raw_writev, .owner = THIS_MODULE, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d8a089a9d0a..f5a3d314bcd4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -116,11 +116,11 @@ blkdev_get_blocks(struct inode *inode, sector_t iblock, } static int -blkdev_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count) +blkdev_direct_IO(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) { - return generic_direct_IO(rw, inode, buf, offset, - count, blkdev_get_blocks); + return generic_direct_IO(rw, inode, iov, offset, + nr_segs, blkdev_get_blocks); } static int blkdev_writepage(struct page * page) @@ -787,6 +787,14 @@ static int blkdev_reread_part(struct block_device *bdev) return res; } +static ssize_t blkdev_file_write(struct file *file, const char *buf, + size_t count, loff_t *ppos) +{ + struct iovec local_iov = { .iov_base = (void *)buf, .iov_len = count }; + + return generic_file_write_nolock(file, &local_iov, 1, ppos); +} + static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, unsigned long arg) { @@ -832,26 +840,28 @@ static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, } struct address_space_operations def_blk_aops = { - readpage: blkdev_readpage, - writepage: blkdev_writepage, - sync_page: block_sync_page, - prepare_write: blkdev_prepare_write, - commit_write: blkdev_commit_write, - writepages: generic_writepages, - vm_writeback: generic_vm_writeback, - direct_IO: blkdev_direct_IO, + .readpage = blkdev_readpage, + .writepage = blkdev_writepage, + .sync_page = block_sync_page, + .prepare_write = blkdev_prepare_write, + .commit_write = blkdev_commit_write, + .writepages = generic_writepages, + .vm_writeback = generic_vm_writeback, + .direct_IO = blkdev_direct_IO, }; struct file_operations def_blk_fops = { - open: blkdev_open, - release: blkdev_close, - llseek: block_llseek, - read: generic_file_read, - write: generic_file_write_nolock, - mmap: generic_file_mmap, - fsync: block_fsync, - ioctl: blkdev_ioctl, - sendfile: generic_file_sendfile, + .open = blkdev_open, + .release = blkdev_close, + .llseek = block_llseek, + .read = generic_file_read, + .write = blkdev_file_write, + .mmap = generic_file_mmap, + .fsync = block_fsync, + .ioctl = blkdev_ioctl, + .readv = generic_file_readv, + .writev = generic_file_writev, + .sendfile = generic_file_sendfile, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/fs/direct-io.c b/fs/direct-io.c index 015881a79147..fa4e46719c14 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -75,7 +75,7 @@ struct dio { */ static inline unsigned dio_pages_present(struct dio *dio) { - return dio->head - dio->tail; + return dio->tail - dio->head; } /* @@ -265,6 +265,10 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) static int dio_await_completion(struct dio *dio) { int ret = 0; + + if (dio->bio) + dio_bio_submit(dio); + while (atomic_read(&dio->bio_count)) { struct bio *bio = dio_await_one(dio); int ret2; @@ -523,29 +527,16 @@ out: return ret; } -/* - * The main direct-IO function. This is a library function for use by - * filesystem drivers. - */ int -generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, - size_t count, get_blocks_t get_blocks) +direct_io_worker(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks) { const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize_mask = (1 << blkbits) - 1; - const unsigned long user_addr = (unsigned long)buf; - int ret; - int ret2; + unsigned long user_addr; + int seg, ret2, ret = 0; struct dio dio; - size_t bytes; + size_t bytes, tot_bytes = 0; - /* Check the memory alignment. Blocks cannot straddle pages */ - if ((user_addr & blocksize_mask) || (count & blocksize_mask)) { - ret = -EINVAL; - goto out; - } - - /* BIO submission state */ dio.bio = NULL; dio.bvec = NULL; dio.inode = inode; @@ -553,31 +544,13 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, dio.blkbits = blkbits; dio.block_in_file = offset >> blkbits; dio.blocks_available = 0; - dio.final_block_in_request = (offset + count) >> blkbits; - /* Index into the first page of the first block */ - dio.first_block_in_page = (user_addr & (PAGE_SIZE - 1)) >> blkbits; dio.boundary = 0; dio.reap_counter = 0; dio.get_blocks = get_blocks; dio.last_block_in_bio = -1; dio.next_block_in_bio = -1; - /* Page fetching state */ - dio.curr_page = 0; - bytes = count; - dio.total_pages = 0; - if (user_addr & (PAGE_SIZE - 1)) { - dio.total_pages++; - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); - } - - dio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - dio.curr_user_address = user_addr; - - /* Page queue */ - dio.head = 0; - dio.tail = 0; dio.page_errors = 0; /* BIO completion state */ @@ -586,38 +559,75 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, dio.bio_list = NULL; dio.waiter = NULL; - ret = do_direct_IO(&dio); + for (seg = 0; seg < nr_segs; seg++) { + user_addr = (unsigned long)iov[seg].iov_base; + bytes = iov[seg].iov_len; + + /* Index into the first page of the first block */ + dio.first_block_in_page = (user_addr & (PAGE_SIZE - 1)) >> blkbits; + dio.final_block_in_request = dio.block_in_file + (bytes >> blkbits); + /* Page fetching state */ + dio.head = 0; + dio.tail = 0; + dio.curr_page = 0; + + dio.total_pages = 0; + if (user_addr & (PAGE_SIZE-1)) { + dio.total_pages++; + bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); + } + dio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; + dio.curr_user_address = user_addr; + + ret = do_direct_IO(&dio); + + if (ret) { + dio_cleanup(&dio); + break; + } + + tot_bytes += iov[seg].iov_len - ((dio.final_block_in_request - + dio.block_in_file) << blkbits); + + } /* end iovec loop */ - if (dio.bio) - dio_bio_submit(&dio); - if (ret) - dio_cleanup(&dio); ret2 = dio_await_completion(&dio); if (ret == 0) ret = ret2; if (ret == 0) ret = dio.page_errors; if (ret == 0) - ret = count - ((dio.final_block_in_request - - dio.block_in_file) << blkbits); -out: + ret = tot_bytes; + return ret; } -ssize_t -generic_file_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count) +/* + * This is a library function for use by filesystem drivers. + */ +int +generic_direct_IO(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks) { + int seg; + size_t size; + unsigned long addr; struct address_space *mapping = inode->i_mapping; - unsigned blocksize_mask; - ssize_t retval; + unsigned blocksize_mask = (1 << inode->i_blkbits) - 1; + ssize_t retval = -EINVAL; - blocksize_mask = (1 << inode->i_blkbits) - 1; - if ((offset & blocksize_mask) || (count & blocksize_mask)) { - retval = -EINVAL; + if (offset & blocksize_mask) { goto out; } + /* Check the memory alignment. Blocks cannot straddle pages */ + for (seg = 0; seg < nr_segs; seg++) { + addr = (unsigned long)iov[seg].iov_base; + size = iov[seg].iov_len; + if ((addr & blocksize_mask) || (size & blocksize_mask)) + goto out; + } + if (mapping->nrpages) { retval = filemap_fdatawrite(mapping); if (retval == 0) @@ -625,9 +635,21 @@ generic_file_direct_IO(int rw, struct inode *inode, char *buf, if (retval) goto out; } - retval = mapping->a_ops->direct_IO(rw, inode, buf, offset, count); + + retval = direct_io_worker(rw, inode, iov, offset, nr_segs, get_blocks); +out: + return retval; +} + +ssize_t +generic_file_direct_IO(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct address_space *mapping = inode->i_mapping; + ssize_t retval; + + retval = mapping->a_ops->direct_IO(rw, inode, iov, offset, nr_segs); if (inode->i_mapping->nrpages) invalidate_inode_pages2(inode->i_mapping); -out: return retval; } diff --git a/fs/ext2/file.c b/fs/ext2/file.c index e401b86da374..aff333ae8e5a 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -46,6 +46,8 @@ struct file_operations ext2_file_operations = { .open = generic_file_open, .release = ext2_release_file, .fsync = ext2_sync_file, + .readv = generic_file_readv, + .writev = generic_file_writev, .sendfile = generic_file_sendfile, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index d0c363f8062e..78a1b6ace494 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -619,11 +619,11 @@ ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, } static int -ext2_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count) +ext2_direct_IO(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) { - return generic_direct_IO(rw, inode, buf, - offset, count, ext2_get_blocks); + return generic_direct_IO(rw, inode, iov, + offset, nr_segs, ext2_get_blocks); } static int diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 412cbb6334cd..6ea4b8a091b8 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -76,19 +76,21 @@ ext3_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) } struct file_operations ext3_file_operations = { - .llseek = generic_file_llseek, /* BKL held */ - .read = generic_file_read, /* BKL not held. Don't need */ - .write = ext3_file_write, /* BKL not held. Don't need */ - .ioctl = ext3_ioctl, /* BKL held */ + .llseek = generic_file_llseek, + .read = generic_file_read, + .write = ext3_file_write, + .readv = generic_file_readv, + .writev = generic_file_writev, + .ioctl = ext3_ioctl, .mmap = generic_file_mmap, - .open = ext3_open_file, /* BKL not held. Don't need */ - .release = ext3_release_file, /* BKL not held. Don't need */ - .fsync = ext3_sync_file, /* BKL held */ - .sendfile = generic_file_sendfile, /* BKL not held. Don't need */ + .open = ext3_open_file, + .release = ext3_release_file, + .fsync = ext3_sync_file, + .sendfile = generic_file_sendfile, }; struct inode_operations ext3_file_inode_operations = { - .truncate = ext3_truncate, /* BKL held */ - .setattr = ext3_setattr, /* BKL held */ + .truncate = ext3_truncate, + .setattr = ext3_setattr, }; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 38e3decdae39..681d0dc715b3 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1399,13 +1399,15 @@ static int ext3_releasepage(struct page *page, int wait) * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */ -static int ext3_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count) +static int ext3_direct_IO(int rw, struct inode *inode, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) { struct ext3_inode_info *ei = EXT3_I(inode); handle_t *handle = NULL; int ret; int orphan = 0; + size_t count = iov_length(iov, nr_segs); if (rw == WRITE) { loff_t final_size = offset + count; @@ -1428,8 +1430,8 @@ static int ext3_direct_IO(int rw, struct inode *inode, char *buf, } } - ret = generic_direct_IO(rw, inode, buf, offset, - count, ext3_direct_io_get_blocks); + ret = generic_direct_IO(rw, inode, iov, offset, + nr_segs, ext3_direct_io_get_blocks); out_stop: if (handle) { diff --git a/fs/jfs/file.c b/fs/jfs/file.c index c65adbffe130..6a69ca7d8acd 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -108,6 +108,8 @@ struct file_operations jfs_file_operations = { .write = generic_file_write, .read = generic_file_read, .mmap = generic_file_mmap, + .readv = generic_file_readv, + .writev = generic_file_writev, .sendfile = generic_file_sendfile, .fsync = jfs_fsync, }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6af76fc84b14..65d1dff1f80d 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -309,11 +309,11 @@ static int jfs_bmap(struct address_space *mapping, long block) return generic_block_bmap(mapping, block, jfs_get_block); } -static int jfs_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count) +static int jfs_direct_IO(int rw, struct inode *inode, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) { - return generic_direct_IO(rw, inode, buf, - offset, count, jfs_get_blocks); + return generic_direct_IO(rw, inode, iov, + offset, nr_segs, jfs_get_blocks); } struct address_space_operations jfs_aops = { diff --git a/fs/read_write.c b/fs/read_write.c index 6a244f612720..306ead083cf0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -286,9 +286,29 @@ asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char *buf, return ret; } +/* + * Reduce an iovec's length in-place. Return the resulting number of segments + */ +unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) +{ + unsigned long seg = 0; + size_t len = 0; + + while (seg < nr_segs) { + seg++; + if (len + iov->iov_len >= to) { + iov->iov_len = to - len; + break; + } + len += iov->iov_len; + iov++; + } + return seg; +} + static ssize_t do_readv_writev(int type, struct file *file, const struct iovec * vector, - unsigned long count) + unsigned long nr_segs) { typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); @@ -296,73 +316,86 @@ static ssize_t do_readv_writev(int type, struct file *file, size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack; - ssize_t ret, i; + ssize_t ret = -EINVAL; + int seg; io_fn_t fn; iov_fn_t fnv; struct inode *inode; + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned -EINVAL for zero segments, so... + */ + if (nr_segs == 0) + goto out; + /* * First get the "struct iovec" from user memory and * verify all the pointers */ - ret = 0; - if (!count) - goto out_nofree; - ret = -EINVAL; - if (count > UIO_MAXIOV) - goto out_nofree; + if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) + goto out; if (!file->f_op) - goto out_nofree; - if (count > UIO_FASTIOV) { + goto out; + if (nr_segs > UIO_FASTIOV) { ret = -ENOMEM; - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); if (!iov) - goto out_nofree; + goto out; } ret = -EFAULT; - if (copy_from_user(iov, vector, count*sizeof(*vector))) + if (copy_from_user(iov, vector, nr_segs*sizeof(*vector))) goto out; /* * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an ssize_t - * The total length is fitting an ssize_t + * We should -EINVAL if an element length is not >= 0 and fitting an + * ssize_t. The total length is fitting an ssize_t * * Be careful here because iov_len is a size_t not an ssize_t */ - tot_len = 0; ret = -EINVAL; - for (i = 0 ; i < count ; i++) { + for (seg = 0 ; seg < nr_segs; seg++) { ssize_t tmp = tot_len; - ssize_t len = (ssize_t)iov[i].iov_len; + ssize_t len = (ssize_t)iov[seg].iov_len; if (len < 0) /* size_t not fitting an ssize_t .. */ goto out; tot_len += len; if (tot_len < tmp) /* maths overflow on the ssize_t */ goto out; } + if (tot_len == 0) { + ret = 0; + goto out; + } inode = file->f_dentry->d_inode; /* VERIFY_WRITE actually means a read, as we write to user space */ - ret = locks_verify_area((type == VERIFY_WRITE + ret = locks_verify_area((type == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), inode, file, file->f_pos, tot_len); - if (ret) goto out; + if (ret) + goto out; - fnv = (type == VERIFY_WRITE ? file->f_op->readv : file->f_op->writev); + fnv = NULL; + if (type == READ) { + fn = file->f_op->read; + fnv = file->f_op->readv; + } else { + fn = (io_fn_t)file->f_op->write; + fnv = file->f_op->writev; + } if (fnv) { - ret = fnv(file, iov, count, &file->f_pos); + ret = fnv(file, iov, nr_segs, &file->f_pos); goto out; } - /* VERIFY_WRITE actually means a read, as we write to user space */ - fn = (type == VERIFY_WRITE ? file->f_op->read : - (io_fn_t) file->f_op->write); - + /* Do it by hand, with file-ops */ ret = 0; vector = iov; - while (count > 0) { + while (nr_segs > 0) { void * base; size_t len; ssize_t nr; @@ -370,7 +403,7 @@ static ssize_t do_readv_writev(int type, struct file *file, base = vector->iov_base; len = vector->iov_len; vector++; - count--; + nr_segs--; nr = fn(file, base, len, &file->f_pos); @@ -382,20 +415,18 @@ static ssize_t do_readv_writev(int type, struct file *file, if (nr != len) break; } - out: if (iov != iovstack) kfree(iov); -out_nofree: - /* VERIFY_WRITE actually means a read, as we write to user space */ - if ((ret + (type == VERIFY_WRITE)) > 0) + if ((ret + (type == READ)) > 0) dnotify_parent(file->f_dentry, - (type == VERIFY_WRITE) ? DN_MODIFY : DN_ACCESS); + (type == READ) ? DN_MODIFY : DN_ACCESS); return ret; } -asmlinkage ssize_t sys_readv(unsigned long fd, const struct iovec * vector, - unsigned long count) + +asmlinkage ssize_t +sys_readv(unsigned long fd, const struct iovec *vector, unsigned long nr_segs) { struct file * file; ssize_t ret; @@ -409,7 +440,7 @@ asmlinkage ssize_t sys_readv(unsigned long fd, const struct iovec * vector, (file->f_op->readv || file->f_op->read)) { ret = security_ops->file_permission (file, MAY_READ); if (!ret) - ret = do_readv_writev(VERIFY_WRITE, file, vector, count); + ret = do_readv_writev(READ, file, vector, nr_segs); } fput(file); @@ -417,8 +448,8 @@ bad_file: return ret; } -asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector, - unsigned long count) +asmlinkage ssize_t +sys_writev(unsigned long fd, const struct iovec * vector, unsigned long nr_segs) { struct file * file; ssize_t ret; @@ -432,7 +463,7 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector, (file->f_op->writev || file->f_op->write)) { ret = security_ops->file_permission (file, MAY_WRITE); if (!ret) - ret = do_readv_writev(VERIFY_READ, file, vector, count); + ret = do_readv_writev(WRITE, file, vector, nr_segs); } fput(file); diff --git a/include/linux/fs.h b/include/linux/fs.h index d58cd8b88fa8..622481a00115 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -307,8 +307,7 @@ struct address_space_operations { int (*bmap)(struct address_space *, long); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); - int (*direct_IO)(int, struct inode *, char *buf, - loff_t offset, size_t count); + int (*direct_IO)(int, struct inode *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); }; struct backing_dev_info; @@ -1245,14 +1244,18 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); -extern ssize_t generic_file_write_nolock(struct file *, const char *, size_t, loff_t *); +ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t); extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); -ssize_t generic_file_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count); -int generic_direct_IO(int rw, struct inode *inode, char *buf, - loff_t offset, size_t count, get_blocks_t *get_blocks); - +extern ssize_t generic_file_direct_IO(int rw, struct inode *inode, + const struct iovec *iov, loff_t offset, unsigned long nr_segs); +extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec + *iov, loff_t offset, unsigned long nr_segs, get_blocks_t *get_blocks); +extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos); +ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); diff --git a/include/linux/uio.h b/include/linux/uio.h index beaafffd3cfb..ec098c8e6793 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -34,4 +34,19 @@ struct iovec /* Beg pardon: BSD has 1024 --ANK */ #endif +/* + * Total number of bytes covered by an iovec + */ +static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) +{ + unsigned long seg; + size_t ret = 0; + + for (seg = 0; seg < nr_segs; seg++) + ret += iov[seg].iov_len; + return ret; +} + +unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); + #endif diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 3b03394fe14d..d69272a39175 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -343,6 +344,9 @@ EXPORT_SYMBOL(register_disk); EXPORT_SYMBOL(read_dev_sector); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL_GPL(generic_file_direct_IO); +EXPORT_SYMBOL(generic_file_readv); +EXPORT_SYMBOL(generic_file_writev); +EXPORT_SYMBOL(iov_shorten); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff --git a/mm/filemap.c b/mm/filemap.c index f66ea3911500..ea1052accdf2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1121,14 +1122,18 @@ success: * This is the "read()" routine for all filesystems * that can use the page cache directly. */ -ssize_t -generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +static ssize_t +__generic_file_read(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { ssize_t retval; + unsigned long seg; + size_t count = iov_length(iov, nr_segs); if ((ssize_t) count < 0) return -EINVAL; + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { loff_t pos = *ppos, size; struct address_space *mapping; @@ -1141,10 +1146,13 @@ generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) goto out; /* skip atime */ size = inode->i_size; if (pos < size) { - if (pos + count > size) + if (pos + count > size) { count = size - pos; - retval = generic_file_direct_IO(READ, inode, - buf, pos, count); + nr_segs = iov_shorten((struct iovec *)iov, + nr_segs, count); + } + retval = generic_file_direct_IO(READ, inode, + iov, pos, nr_segs); if (retval > 0) *ppos = pos + retval; } @@ -1152,27 +1160,42 @@ generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) goto out; } - retval = -EFAULT; - if (access_ok(VERIFY_WRITE, buf, count)) { - retval = 0; + for (seg = 0; seg < nr_segs; seg++) { + if (!access_ok(VERIFY_WRITE,iov[seg].iov_base,iov[seg].iov_len)) + return -EFAULT; + } - if (count) { + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { read_descriptor_t desc; desc.written = 0; - desc.count = count; - desc.buf = buf; + desc.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; desc.error = 0; do_generic_file_read(filp,ppos,&desc,file_read_actor); - retval = desc.written; - if (!retval) + retval += desc.written; + if (!retval) { retval = desc.error; + break; + } } } out: return retval; } +ssize_t +generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +{ + struct iovec local_iov = { .iov_base = buf, .iov_len = count }; + + return __generic_file_read(filp, &local_iov, 1, ppos); +} + static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { ssize_t written; @@ -1926,11 +1949,14 @@ filemap_copy_from_user(struct page *page, unsigned long offset, * it for writing by marking it dirty. * okir@monad.swb.de */ -ssize_t generic_file_write_nolock(struct file *file, const char *buf, - size_t count, loff_t *ppos) +ssize_t +generic_file_write_nolock(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { struct address_space * mapping = file->f_dentry->d_inode->i_mapping; struct address_space_operations *a_ops = mapping->a_ops; + const size_t ocount = iov_length(iov, nr_segs); + size_t count = ocount; struct inode *inode = mapping->host; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; long status = 0; @@ -1942,12 +1968,19 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, unsigned bytes; time_t time_now; struct pagevec lru_pvec; + struct iovec *cur_iov; + unsigned iov_bytes; /* Cumulative count to the end of the + current iovec */ + unsigned long seg; + char *buf; if (unlikely((ssize_t)count < 0)) return -EINVAL; - if (unlikely(!access_ok(VERIFY_READ, buf, count))) - return -EFAULT; + for (seg = 0; seg < nr_segs; seg++) { + if (!access_ok(VERIFY_READ,iov[seg].iov_base,iov[seg].iov_len)) + return -EFAULT; + } pos = *ppos; if (unlikely(pos < 0)) @@ -2045,9 +2078,13 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, mark_inode_dirty_sync(inode); } + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { - written = generic_file_direct_IO(WRITE, inode, - (char *)buf, pos, count); + if (count != ocount) + nr_segs = iov_shorten((struct iovec *)iov, + nr_segs, count); + written = generic_file_direct_IO(WRITE, inode, + iov, pos, nr_segs); if (written > 0) { loff_t end = pos + written; if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { @@ -2065,6 +2102,9 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, goto out_status; } + cur_iov = (struct iovec *)iov; + iov_bytes = cur_iov->iov_len; + buf = cur_iov->iov_base; do { unsigned long index; unsigned long offset; @@ -2075,6 +2115,8 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; + if (bytes + written > iov_bytes) + bytes = iov_bytes - written; /* * Bring in the user page that we will copy from _first_. @@ -2084,7 +2126,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, */ fault_in_pages_readable(buf, bytes); - page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec); + page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); if (!page) { status = -ENOMEM; break; @@ -2115,6 +2157,11 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, count -= status; pos += status; buf += status; + if (written == iov_bytes && count) { + cur_iov++; + iov_bytes += cur_iov->iov_len; + buf = cur_iov->iov_base; + } } } if (!PageReferenced(page)) @@ -2151,10 +2198,29 @@ ssize_t generic_file_write(struct file *file, const char *buf, { struct inode *inode = file->f_dentry->d_inode->i_mapping->host; int err; + struct iovec local_iov = { .iov_base = (void *)buf, .iov_len = count }; down(&inode->i_sem); - err = generic_file_write_nolock(file, buf, count, ppos); + err = generic_file_write_nolock(file, &local_iov, 1, ppos); up(&inode->i_sem); return err; } + +ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + return __generic_file_read(filp, iov, nr_segs, ppos); +} + +ssize_t generic_file_writev(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t * ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + ssize_t ret; + + down(&inode->i_sem); + ret = generic_file_write_nolock(file, iov, nr_segs, ppos); + up(&inode->i_sem); + return ret; +} -- cgit v1.2.3 From acf7aa2ca0260db7d93c9924f21caeeda1fbd990 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 13 Sep 2002 05:57:07 -0700 Subject: [PATCH] Use a sync iocb for generic_file_read This adds support for synchronous iocbs and converts generic_file_read to use a sync iocb to call into generic_file_aio_read. The tests I've run with lmbench on a piii-866 showed no difference in file re-read speed when forced to use a completion path via aio_complete and an -EIOCBQUEUED return from generic_file_aio_read -- people with slower machines might want to test this to see if we can tune it any better. Also, a bug fix to correct a missing call into the aio code from the fork code is present. This patch sets things up for making generic_file_aio_read actually asynchronous. --- fs/aio.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/aio.h | 22 ++++++++++++++-------- include/linux/init_task.h | 30 ++++++++++++++++++++++-------- include/linux/sched.h | 5 ++++- kernel/fork.c | 4 ++++ mm/filemap.c | 29 ++++++++++++++++++++++++++--- 6 files changed, 112 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 1eedbcda29e0..0d2cf1efd48f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -30,10 +30,11 @@ #include #include #include +#include +#include #include #include -#include #if DEBUG > 1 #define dprintk printk @@ -304,10 +305,25 @@ void wait_for_all_aios(struct kioctx *ctx) schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } - set_task_state(tsk, TASK_RUNNING); + __set_task_state(tsk, TASK_RUNNING); remove_wait_queue(&ctx->wait, &wait); } +/* wait_on_sync_kiocb: + * Waits on the given sync kiocb to complete. + */ +ssize_t wait_on_sync_kiocb(struct kiocb *iocb) +{ + while (iocb->ki_users) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!iocb->ki_users) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return iocb->ki_user_data; +} + /* exit_aio: called when the last user of mm goes away. At this point, * there is no way for any new requests to be submited or any of the * io_* syscalls to be called on the context. However, there may be @@ -516,13 +532,36 @@ static inline struct kioctx *lookup_ioctx(unsigned long ctx_id) int aio_complete(struct kiocb *iocb, long res, long res2) { struct kioctx *ctx = iocb->ki_ctx; - struct aio_ring_info *info = &ctx->ring_info; + struct aio_ring_info *info; struct aio_ring *ring; struct io_event *event; unsigned long flags; unsigned long tail; int ret; + /* Special case handling for sync iocbs: events go directly + * into the iocb for fast handling. Note that this will not + * work if we allow sync kiocbs to be cancelled. in which + * case the usage count checks will have to move under ctx_lock + * for all cases. + */ + if (ctx == &ctx->mm->default_kioctx) { + int ret; + + iocb->ki_user_data = res; + if (iocb->ki_users == 1) { + iocb->ki_users = 0; + return 1; + } + spin_lock_irq(&ctx->ctx_lock); + iocb->ki_users--; + ret = (0 == iocb->ki_users); + spin_unlock_irq(&ctx->ctx_lock); + return 0; + } + + info = &ctx->ring_info; + /* add a completion event to the ring buffer. * must be done holding ctx->ctx_lock to prevent * other code from messing with the tail diff --git a/include/linux/aio.h b/include/linux/aio.h index 04f710ac0cc2..c819f731e4a2 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -1,7 +1,6 @@ #ifndef __LINUX__AIO_H #define __LINUX__AIO_H -#include #include #include @@ -21,10 +20,14 @@ struct kioctx; #define KIOCB_C_CANCELLED 0x01 #define KIOCB_C_COMPLETE 0x02 +#define KIOCB_SYNC_KEY (~0U) + #define KIOCB_PRIVATE_SIZE (16 * sizeof(long)) struct kiocb { int ki_users; + unsigned ki_key; /* id of this request */ + struct file *ki_filp; struct kioctx *ki_ctx; /* may be NULL for sync ops */ int (*ki_cancel)(struct kiocb *, struct io_event *); @@ -34,17 +37,19 @@ struct kiocb { void *ki_data; /* for use by the the file */ void *ki_user_obj; /* pointer to userland's iocb */ __u64 ki_user_data; /* user's data for completion */ - unsigned ki_key; /* id of this request */ long private[KIOCB_PRIVATE_SIZE/sizeof(long)]; }; -#define init_sync_kiocb(x, filp) \ - do { \ - (x)->ki_users = 1; \ - (x)->ki_filp = (filp); \ - (x)->ki_ctx = 0; \ - (x)->ki_cancel = NULL; \ +#define init_sync_kiocb(x, filp) \ + do { \ + struct task_struct *tsk = current; \ + (x)->ki_users = 1; \ + (x)->ki_key = KIOCB_SYNC_KEY; \ + (x)->ki_filp = (filp); \ + (x)->ki_ctx = &tsk->active_mm->default_kioctx; \ + (x)->ki_cancel = NULL; \ + (x)->ki_user_obj = tsk; \ } while (0) #define AIO_RING_MAGIC 0xa10a10a1 @@ -105,6 +110,7 @@ struct kioctx { /* prototypes */ extern unsigned aio_max_size; +extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb)); extern int FASTCALL(aio_put_req(struct kiocb *iocb)); extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2)); extern void FASTCALL(__put_ioctx(struct kioctx *ctx)); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index c66cbf8d9f5c..4bd8a09d2bbc 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -18,15 +18,29 @@ .fd_array = { NULL, } \ } +#define INIT_KIOCTX(name, which_mm) \ +{ \ + .users = ATOMIC_INIT(1), \ + .dead = 0, \ + .mm = &which_mm, \ + .user_id = 0, \ + .next = NULL, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ + .ctx_lock = SPIN_LOCK_UNLOCKED, \ + .reqs_active = 0U, \ + .max_reqs = ~0U, \ +} + #define INIT_MM(name) \ -{ \ - .mm_rb = RB_ROOT, \ - .pgd = swapper_pg_dir, \ - .mm_users = ATOMIC_INIT(2), \ - .mm_count = ATOMIC_INIT(1), \ - .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ - .page_table_lock = SPIN_LOCK_UNLOCKED, \ - .mmlist = LIST_HEAD_INIT(name.mmlist), \ +{ \ + .mm_rb = RB_ROOT, \ + .pgd = swapper_pg_dir, \ + .mm_users = ATOMIC_INIT(2), \ + .mm_count = ATOMIC_INIT(1), \ + .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ + .page_table_lock = SPIN_LOCK_UNLOCKED, \ + .mmlist = LIST_HEAD_INIT(name.mmlist), \ + .default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \ } #define INIT_SIGNALS(sig) { \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 513c065e281c..337b2f396589 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -169,7 +169,8 @@ struct namespace; /* Maximum number of active map areas.. This is a random (large) number */ #define MAX_MAP_COUNT (65536) -struct kioctx; +#include + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ rb_root_t mm_rb; @@ -202,6 +203,8 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; + + struct kioctx default_kioctx; }; extern int mmlist_nr; diff --git a/kernel/fork.c b/kernel/fork.c index 5bc7f2a07915..b210f1ee459b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -296,12 +296,16 @@ int mmlist_nr; #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +#include + static struct mm_struct * mm_init(struct mm_struct * mm) { atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); mm->page_table_lock = SPIN_LOCK_UNLOCKED; + mm->ioctx_list_lock = RW_LOCK_UNLOCKED; + mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->pgd = pgd_alloc(mm); if (mm->pgd) return mm; diff --git a/mm/filemap.c b/mm/filemap.c index ea1052accdf2..4ad7d36e71c4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1123,9 +1124,10 @@ success: * that can use the page cache directly. */ static ssize_t -__generic_file_read(struct file *filp, const struct iovec *iov, +__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { + struct file *filp = iocb->ki_filp; ssize_t retval; unsigned long seg; size_t count = iov_length(iov, nr_segs); @@ -1188,12 +1190,26 @@ out: return retval; } +ssize_t +generic_file_aio_read(struct kiocb *iocb, char *buf, size_t count, loff_t *ppos) +{ + struct iovec local_iov = { .iov_base = buf, .iov_len = count }; + + return __generic_file_aio_read(iocb, &local_iov, 1, ppos); +} + ssize_t generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { struct iovec local_iov = { .iov_base = buf, .iov_len = count }; + struct kiocb kiocb; + ssize_t ret; - return __generic_file_read(filp, &local_iov, 1, ppos); + init_sync_kiocb(&kiocb, filp); + ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + return ret; } static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) @@ -2210,7 +2226,14 @@ ssize_t generic_file_write(struct file *file, const char *buf, ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { - return __generic_file_read(filp, iov, nr_segs, ppos); + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + return ret; } ssize_t generic_file_writev(struct file *file, const struct iovec *iov, -- cgit v1.2.3 From a5d2bf7b3b206f9d0c453618d02a5a0ede982c1d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Sep 2002 19:35:26 -0700 Subject: [PATCH] hide-threads-2.5.34-C1 I fixed up the 'remove thread group inferiors from the tasklist' patch. I think i managed to find a reasonably good construct to iterate over all threads: do_each_thread(g, p) { ... } while_each_thread(g, p); the only caveat with this is that the construct suggests a single-loop - while it's two loops internally - and 'break' will not work. I added a comment to sched.h that warns about this, but perhaps it would help more to have naming that suggests two loops: for_each_process_do_each_thread(g, p) { ... } while_each_thread(g, p); but this looks a bit too long. I dont know. We might as well use it all unrolled and no helper macros - although with the above construct it's pretty straightforward to iterate over all threads in the system. --- arch/i386/kernel/vm86.c | 9 +++++---- drivers/char/sysrq.c | 2 +- drivers/char/tty_io.c | 10 +++++----- fs/fcntl.c | 4 ++-- fs/namespace.c | 12 ++++++------ fs/proc/base.c | 2 +- fs/proc/inode.c | 4 +++- include/linux/sched.h | 27 ++++++++++++++++++--------- kernel/capability.c | 12 ++++++------ kernel/exit.c | 6 +++--- kernel/fork.c | 7 ++++--- kernel/sched.c | 7 ++++--- kernel/signal.c | 10 +++++----- kernel/suspend.c | 13 +++++++------ kernel/sys.c | 24 ++++++++++++------------ mm/oom_kill.c | 16 +++++++++------- net/ipv4/netfilter/ipt_owner.c | 17 +++++++++-------- net/ipv6/netfilter/ip6t_owner.c | 11 ++++++----- 18 files changed, 106 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 90b273f1cb5b..a0fcf591080a 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -608,16 +608,17 @@ static inline void free_vm86_irq(int irqnumber) static inline int task_valid(struct task_struct *tsk) { - struct task_struct *p; + struct task_struct *g, *p; int ret = 0; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) if ((p == tsk) && (p->sig)) { ret = 1; - break; + goto out; } - } + while_each_thread(g, p); +out: read_unlock(&tasklist_lock); return ret; } diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index dc64c9071787..bb98bc235004 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -299,7 +299,7 @@ static void send_sig_all(int sig) { struct task_struct *p; - for_each_task(p) { + for_each_process(p) { if (p->mm && p->pid != 1) /* Not swapper, init nor kernel thread */ force_sig(sig, p); diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index f9fd6deb583c..9746fb9c1117 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -496,7 +496,7 @@ void do_tty_hangup(void *data) } read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { if ((tty->session > 0) && (p->session == tty->session) && p->leader) { send_sig(SIGHUP,p,1); @@ -598,7 +598,7 @@ void disassociate_ctty(int on_exit) tty->pgrp = -1; read_lock(&tasklist_lock); - for_each_task(p) + for_each_process(p) if (p->session == current->session) p->tty = NULL; read_unlock(&tasklist_lock); @@ -1223,7 +1223,7 @@ static void release_dev(struct file * filp) struct task_struct *p; read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { if (p->tty == tty || (o_tty && p->tty == o_tty)) p->tty = NULL; } @@ -1561,7 +1561,7 @@ static int tiocsctty(struct tty_struct *tty, int arg) struct task_struct *p; read_lock(&tasklist_lock); - for_each_task(p) + for_each_process(p) if (p->tty == tty) p->tty = NULL; read_unlock(&tasklist_lock); @@ -1834,7 +1834,7 @@ static void __do_SAK(void *arg) if (tty->driver.flush_buffer) tty->driver.flush_buffer(tty); read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { if ((p->tty == tty) || ((session > 0) && (p->session == session))) { printk(KERN_NOTICE "SAK: killed process %d" diff --git a/fs/fcntl.c b/fs/fcntl.c index c68baf1f81da..4a87786a0a43 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -493,7 +493,7 @@ void send_sigio(struct fown_struct *fown, int fd, int band) send_sigio_to_task(p, fown, fd, band); goto out_unlock_task; } - for_each_task(p) { + for_each_process(p) { int match = p->pid; if (pid < 0) match = -p->pgrp; @@ -531,7 +531,7 @@ int send_sigurg(struct fown_struct *fown) send_sigurg_to_task(p, fown); goto out_unlock_task; } - for_each_task(p) { + for_each_process(p) { int match = p->pid; if (pid < 0) match = -p->pgrp; diff --git a/fs/namespace.c b/fs/namespace.c index 79a4c5a459e8..3f2f41aa15cd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -883,11 +883,11 @@ out1: static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) { - struct task_struct *p; + struct task_struct *g, *p; struct fs_struct *fs; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { task_lock(p); fs = p->fs; if (fs) { @@ -900,7 +900,7 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) put_fs_struct(fs); } else task_unlock(p); - } + } while_each_thread(g, p); read_unlock(&tasklist_lock); } @@ -1012,7 +1012,7 @@ static void __init init_mount_tree(void) { struct vfsmount *mnt; struct namespace *namespace; - struct task_struct *p; + struct task_struct *g, *p; mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) @@ -1028,10 +1028,10 @@ static void __init init_mount_tree(void) init_task.namespace = namespace; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { get_namespace(namespace); p->namespace = namespace; - } + } while_each_thread(g, p); read_unlock(&tasklist_lock); set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); diff --git a/fs/proc/base.c b/fs/proc/base.c index 7dd33906d6d5..f82541d8e29a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1136,7 +1136,7 @@ static int get_pid_list(int index, unsigned int *pids) index--; read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { int pid = p->pid; if (!pid) continue; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2a7bb8eec4d8..86785974eeb2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -235,7 +235,9 @@ int proc_fill_super(struct super_block *s, void *data, int silent) * Fixup the root inode's nlink value */ read_lock(&tasklist_lock); - for_each_task(p) if (p->pid) root_inode->i_nlink++; + for_each_process(p) + if (p->pid) + root_inode->i_nlink++; read_unlock(&tasklist_lock); s->s_root = d_alloc_root(root_inode); if (!s->s_root) diff --git a/include/linux/sched.h b/include/linux/sched.h index 337b2f396589..5a7e7c21009f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -760,14 +760,16 @@ static inline void remove_wait_queue_locked(wait_queue_head_t *q, #define remove_parent(p) list_del_init(&(p)->sibling) #define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children) -#define REMOVE_LINKS(p) do { \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ +#define REMOVE_LINKS(p) do { \ + if (thread_group_leader(p)) \ + list_del_init(&(p)->tasks); \ + remove_parent(p); \ } while (0) -#define SET_LINKS(p) do { \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p, (p)->parent); \ +#define SET_LINKS(p) do { \ + if (thread_group_leader(p)) \ + list_add_tail(&(p)->tasks,&init_task.tasks); \ + add_parent(p, (p)->parent); \ } while (0) static inline struct task_struct *eldest_child(struct task_struct *p) @@ -797,11 +799,18 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) #define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) -#define for_each_task(p) \ +#define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) -#define for_each_thread(task) \ - for (task = next_thread(current) ; task != current ; task = next_thread(task)) +/* + * Careful: do_each_thread/while_each_thread is a double loop so + * 'break' will not work as expected - use goto instead. + */ +#define do_each_thread(g, t) \ + for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do + +#define while_each_thread(g, t) \ + while ((t = next_thread(t)) != g) static inline task_t *next_thread(task_t *p) { diff --git a/kernel/capability.c b/kernel/capability.c index 223d22dece42..774cf612d8c5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -83,13 +83,13 @@ static inline void cap_set_pg(int pgrp, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *target; + task_t *g, *target; - for_each_task(target) { + do_each_thread(g, target) { if (target->pgrp != pgrp) continue; security_ops->capset_set(target, effective, inheritable, permitted); - } + } while_each_thread(g, target); } /* @@ -100,13 +100,13 @@ static inline void cap_set_all(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *target; + task_t *g, *target; - for_each_task(target) { + do_each_thread(g, target) { if (target == current || target->pid == 1) continue; security_ops->capset_set(target, effective, inheritable, permitted); - } + } while_each_thread(g, target); } /* diff --git a/kernel/exit.c b/kernel/exit.c index 49f279e1a004..115a7cd7b807 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -115,7 +115,7 @@ int session_of_pgrp(int pgrp) fallback = -1; read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { if (p->session <= 0) continue; if (p->pgrp == pgrp) { @@ -141,7 +141,7 @@ static int __will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_ta { struct task_struct *p; - for_each_task(p) { + for_each_process(p) { if ((p == ignored_task) || (p->pgrp != pgrp) || (p->state == TASK_ZOMBIE) || (p->parent->pid == 1)) @@ -175,7 +175,7 @@ static inline int __has_stopped_jobs(int pgrp) int retval = 0; struct task_struct * p; - for_each_task(p) { + for_each_process(p) { if (p->pgrp != pgrp) continue; if (p->state != TASK_STOPPED) diff --git a/kernel/fork.c b/kernel/fork.c index b210f1ee459b..dec0136c59f1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -161,7 +161,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) static int get_pid(unsigned long flags) { - struct task_struct *p; + struct task_struct *g, *p; int pid; if (flags & CLONE_IDLETASK) @@ -178,7 +178,7 @@ inside: next_safe = pid_max; read_lock(&tasklist_lock); repeat: - for_each_task(p) { + do_each_thread(g, p) { if (p->pid == last_pid || p->pgrp == last_pid || p->session == last_pid) { @@ -195,7 +195,8 @@ inside: next_safe = p->pgrp; if (p->session > last_pid && next_safe > p->session) next_safe = p->session; - } + } while_each_thread(g, p); + read_unlock(&tasklist_lock); } pid = last_pid; diff --git a/kernel/sched.c b/kernel/sched.c index b094fa97b9b6..806b30b9da73 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1838,7 +1838,7 @@ char * render_sigset_t(sigset_t *set, char *buffer) void show_state(void) { - task_t *p; + task_t *g, *p; #if (BITS_PER_LONG == 32) printk("\n" @@ -1850,14 +1850,15 @@ void show_state(void) printk(" task PC stack pid father child younger older\n"); #endif read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { /* * reset the NMI-timeout, listing all files on a slow * console might take alot of time: */ touch_nmi_watchdog(); show_task(p); - } + } while_each_thread(g, p); + read_unlock(&tasklist_lock); } diff --git a/kernel/signal.c b/kernel/signal.c index 3052e34af8cc..a897b044b07d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -938,8 +938,8 @@ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) struct task_struct *p; retval = -ESRCH; - for_each_task(p) { - if (p->pgrp == pgrp && thread_group_leader(p)) { + for_each_process(p) { + if (p->pgrp == pgrp) { int err = send_sig_info(sig, info, p); if (retval) retval = err; @@ -976,7 +976,7 @@ kill_sl_info(int sig, struct siginfo *info, pid_t sess) retval = -ESRCH; read_lock(&tasklist_lock); - for_each_task(p) { + for_each_process(p) { if (p->leader && p->session == sess) { int err = send_sig_info(sig, info, p); if (retval) @@ -1020,8 +1020,8 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) struct task_struct * p; read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pid > 1 && p != current && thread_group_leader(p)) { + for_each_process(p) { + if (p->pid > 1 && p != current) { int err = send_sig_info(sig, info, p); ++count; if (err != -EPERM) diff --git a/kernel/suspend.c b/kernel/suspend.c index ae49384a38c8..2d7eeaabe127 100644 --- a/kernel/suspend.c +++ b/kernel/suspend.c @@ -204,14 +204,14 @@ void refrigerator(unsigned long flag) int freeze_processes(void) { int todo, start_time; - struct task_struct *p; + struct task_struct *g, *p; printk( "Stopping tasks: " ); start_time = jiffies; do { todo = 0; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { unsigned long flags; INTERESTING(p); if (p->flags & PF_FROZEN) @@ -224,7 +224,7 @@ int freeze_processes(void) signal_wake_up(p); spin_unlock_irqrestore(&p->sigmask_lock, flags); todo++; - } + } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); if (time_after(jiffies, start_time + TIMEOUT)) { @@ -240,18 +240,19 @@ int freeze_processes(void) void thaw_processes(void) { - struct task_struct *p; + struct task_struct *g, *p; printk( "Restarting tasks..." ); read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { INTERESTING(p); if (p->flags & PF_FROZEN) p->flags &= ~PF_FROZEN; else printk(KERN_INFO " Strange, %s not stopped\n", p->comm ); wake_up_process(p); - } + } while_each_thread(g, p); + read_unlock(&tasklist_lock); printk( " done\n" ); MDELAY(500); diff --git a/kernel/sys.c b/kernel/sys.c index 94976695e15c..7e0f8bea1201 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -227,7 +227,7 @@ static int proc_sel(struct task_struct *p, int which, int who) asmlinkage long sys_setpriority(int which, int who, int niceval) { - struct task_struct *p; + struct task_struct *g, *p; int error; if (which > 2 || which < 0) @@ -241,7 +241,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) niceval = 19; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { int no_nice; if (!proc_sel(p, which, who)) continue; @@ -262,8 +262,8 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) continue; } set_user_nice(p, niceval); + } while_each_thread(g, p); - } read_unlock(&tasklist_lock); return error; @@ -277,21 +277,21 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) */ asmlinkage long sys_getpriority(int which, int who) { - struct task_struct *p; + struct task_struct *g, *p; long retval = -ESRCH; if (which > 2 || which < 0) return -EINVAL; read_lock(&tasklist_lock); - for_each_task (p) { + do_each_thread(g, p) { long niceval; if (!proc_sel(p, which, who)) continue; niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; - } + } while_each_thread(g, p); read_unlock(&tasklist_lock); return retval; @@ -882,12 +882,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (p->leader) goto out; if (pgid != pid) { - struct task_struct * tmp; - for_each_task (tmp) { + struct task_struct *g, *tmp; + do_each_thread(g, tmp) { if (tmp->pgrp == pgid && tmp->session == current->session) goto ok_pgid; - } + } while_each_thread(g, tmp); goto out; } @@ -956,14 +956,14 @@ asmlinkage long sys_getsid(pid_t pid) asmlinkage long sys_setsid(void) { - struct task_struct * p; + struct task_struct *g, *p; int err = -EPERM; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) if (p->pgrp == current->pid) goto out; - } + while_each_thread(g, p); current->leader = 1; current->session = current->pgrp = current->pid; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index edbe91353067..afc2a7f26abd 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -116,10 +116,10 @@ static int badness(struct task_struct *p) static struct task_struct * select_bad_process(void) { int maxpoints = 0; - struct task_struct *p = NULL; + struct task_struct *g, *p; struct task_struct *chosen = NULL; - for_each_task(p) { + do_each_thread(g, p) if (p->pid) { int points = badness(p); if (points > maxpoints) { @@ -127,7 +127,7 @@ static struct task_struct * select_bad_process(void) maxpoints = points; } } - } + while_each_thread(g, p); return chosen; } @@ -166,7 +166,7 @@ void oom_kill_task(struct task_struct *p) */ static void oom_kill(void) { - struct task_struct *p, *q; + struct task_struct *g, *p, *q; read_lock(&tasklist_lock); p = select_bad_process(); @@ -176,9 +176,11 @@ static void oom_kill(void) panic("Out of memory and no killable processes...\n"); /* kill all processes that share the ->mm (i.e. all threads) */ - for_each_task(q) { - if(q->mm == p->mm) oom_kill_task(q); - } + do_each_thread(g, q) + if (q->mm == p->mm) + oom_kill_task(q); + while_each_thread(g, q); + read_unlock(&tasklist_lock); /* diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 7455f7efff90..acb9997276b4 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -14,12 +14,12 @@ static int match_comm(const struct sk_buff *skb, const char *comm) { - struct task_struct *p; + struct task_struct *g, *p; struct files_struct *files; int i; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { if(strncmp(p->comm, comm, sizeof(p->comm))) continue; @@ -38,7 +38,7 @@ match_comm(const struct sk_buff *skb, const char *comm) read_unlock(&files->file_lock); } task_unlock(p); - } + } while_each_thread(g, p); read_unlock(&tasklist_lock); return 0; } @@ -77,12 +77,12 @@ out: static int match_sid(const struct sk_buff *skb, pid_t sid) { - struct task_struct *p; + struct task_struct *g, *p; struct file *file = skb->sk->socket->file; int i, found=0; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { struct files_struct *files; if (p->session != sid) continue; @@ -100,9 +100,10 @@ match_sid(const struct sk_buff *skb, pid_t sid) read_unlock(&files->file_lock); } task_unlock(p); - if(found) - break; - } + if (found) + goto out; + } while_each_thread(g, p); +out: read_unlock(&tasklist_lock); return found; diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 1f31ef262f79..544543dc0a93 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -49,12 +49,12 @@ out: static int match_sid(const struct sk_buff *skb, pid_t sid) { - struct task_struct *p; + struct task_struct *g, *p; struct file *file = skb->sk->socket->file; int i, found=0; read_lock(&tasklist_lock); - for_each_task(p) { + do_each_thread(g, p) { struct files_struct *files; if (p->session != sid) continue; @@ -72,9 +72,10 @@ match_sid(const struct sk_buff *skb, pid_t sid) read_unlock(&files->file_lock); } task_unlock(p); - if(found) - break; - } + if (found) + goto out; + } while_each_thread(g, p); +out: read_unlock(&tasklist_lock); return found; -- cgit v1.2.3