在上章中分析了connect流程。这一章分析,服务端发送syn/ack后,客户端的处理流程。
这个流程在IPV4中的入口函数是tcp_v4_rcv.当ip层收到tcp包后,会将这种包上送到tcp_v4_rcv函数中。
这个函数首先检查数据包的合法性(数据包校验和是否正确,数据包是否完整)
然后在tcp 连接表中查找 sock。首先在established表中查找,如果在established表中差找不到,则在listener表中查找。由于我们分析的是syn/ack收包过程,这个sk应该会在established表中查找到。且sock 状态为TCP_SYN_SENT。
if (!sock_owned_by_user(sk)) {
ret = tcp_v4_do_rcv(sk, skb); //tcp_v4_rcv函数最后调用这个函数处理syn/ack报文
} else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
在tcp_v4_do_rcv函数中,会根据sk状态执行不同的操作,由于当前sk状态为TCP_SYN_SENT,因此直接执行tcp_rcv_state_process函数。
if (tcp_rcv_state_process(sk, skb)) { //改变tcp sk状态函数。这个函数根据当前状态以及skb包中标志位,改变sk状态
rsk = sk;
goto reset;
}
在tcp_rcv_state_process函数中,根据当前TCP sk 状态 为TCP_SYN_SENT,执行的代码为
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
tcp_mstamp_refresh(tp);
queued = tcp_rcv_synsent_state_process(sk, skb, th); //执行这个函数改变tcp状态
if (queued >= 0)
return queued;
/* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);
return 0;
}
tcp_rcv_synsent_state_process首先判断是否有 ack报文,然后检查报文的合法性,(seq,tstamp)。然后检查是否有rst,如果有rst 则直接关闭socket,并通知用户态程序 该sock 出错。
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
* If the ACK bit is set
* If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
* a reset (unless the RST bit is set, if so drop
* the segment and return)"
*/
if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
goto reset_and_undo;
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
/* Now ACK is acceptable.
*
* "If the RST bit is set
* If the ACK was acceptable then signal the user "error:
* connection reset", drop the segment, enter CLOSED state,
* delete TCB, and return."
*/
if (th->rst) {
tcp_reset(sk);
goto discard;
}
/* rfc793:
* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*
* See note below!
* --ANK(990513)
*/
if (!th->syn)
goto discard_and_undo;
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
* (our SYN has been ACKed), change the connection
* state to ESTABLISHED..."
*/
tcp_ecn_rcv_synack(tp, th);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
如果整个过程中一切都ok,则调用 tcp_finish_connect函数,这个函数将tcp sk 状态改为 TCP_ESTABLISH状态,如果开启了keepalive则设置keepalive定时器。代码如下
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
icsk->icsk_ack.lrcvtime = tcp_jiffies32;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
sk_mark_napi_id(sk, skb);
}
tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_jiffies32;
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = 0;
}
如果收到syn/rst 则会调用 tcp_reset函数。函数如下
void tcp_reset(struct sock *sk)
{
trace_tcp_receive_reset(sk);
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
sk->sk_err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
sk->sk_err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
sk->sk_err = ECONNRESET;
}
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
tcp_write_queue_purge(sk);
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_error_report(sk); //通知用户态程序出错,具体函数是sock_def_error_report。 在net/core/sock.c文件中
}
sock_def_error_report函数如下:
static void sock_def_error_report(struct sock *sk)
{
struct socket_wq *wq;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait, EPOLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); //正常情况下返回POLL_ERR,但是我之前遇到过既返回POLL_ERR又返回POLL_IN POLL_OUT的情况,可能是因为有其他事件同时发生导致。
rcu_read_unlock();
}
最后调用 sk_wake_async唤醒阻塞在sk上的进程。
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
}
最后调用 tcp_send_ack 发送 ack报文到服务端,整个tcp 接受syn/ack 过程完成。
后面的函数都是一些异常处理。