代码文件
-
#include<stdio.h>
-
#include<string.h>
-
#include<sys/socket.h>
-
#include<sys/types.h>
-
#include<netinet/in.h>
-
#define length 40
-
int mAIn()
-
{
-
struct sockaddr_in serv;
-
char buff[length];
-
int sockfd,n,i;
-
-
for(i=0;i<length;i++)
-
buff[i]=i+’0’+5;
-
-
if((sockfd=socket(PF_INET,SOCK_DGRAM,0)) < 0){
-
printf(“socket create error
”);
-
return -1;
-
}
-
-
bzero(&serv, sizeof(serv));
-
serv.sin_family = AF_INET;
-
serv.sin_addr.s_addr=inet_addr(“115.239.210.27”);
-
serv.sin_port = htons(13); /* daytime server */
-
-
sendto(sockfd,buff,length,0,(structsockaddr *)&serv,16);
-
n=recvfrom(sockfd,buff,150,0,NULL,NULL);
-
buff[n-2]=0;
-
-
printf(“buff=?n”,buff);
-
return0;
-
-
}
该程序是一个简单的udp程序,向服务器发送时间信息请求,如果服务器开启了相应的服务,
就会返回类似的输出:buff=07 DEC 2013 11:40:40 CST
本文主要分析sendto函数具体做了什么。
本机的信息:ip:192.168.1.109 mac:1c:65:9d:2c:fe:f7
通过路由器连入网络,路由器信息:ip:192.168.1.1 mac:00:23:cd:5b:ea:d6
发送的包有40个字节的信息,内容为0x35到0x5c
首先通过tcpdump查看这种组合下发出去的包的内容,先有个直观的感觉:
-
11:31:39.553588 IP 192.168.1.109.55527 > 115.239.210.27.13: UDP, length 40
-
0x0000: 0023 cd5b ead6 1c65 9d2c fef7 0800 4500 .#.[…e.,….E.
-
0x0010: 0044 0000 4000 4011 3289 c0a8 016d 73ef .D..@.@.2….ms.
-
0x0020: d21b d8e7 000d 0030 78bf 3536 3738 393a …….0x.56789:
-
0x0030: 3b3c 3d3e 3f40 4142 4344 4546 4748 494a ;<=>?@ABCDEFGHIJ
-
0x0040: 4b4c 4d4e 4f50 5152 5354 5556 5758 595a KLMNOPQRSTUVWXYZ
-
0x0050: 5b5c [
要读懂这串信息,需要知道L2 L3 L4的头结构
ethernet header长度为14字节
发包服务器流程解析是什么?
ip header长度为20字节(不考虑option)
发包服务器流程解析是什么?
udp header 长度为8个字节
发包服务器流程解析是什么?
tcpdump抓到的总共为82个字节(L2:14 + L3:20 + L4:8 + LOAD:40)
用户态的代码中只指定了服务器的ip地址和端口号以及负载的内容,通过内核相应的处理后,填充了L2,L3以及L4的头结构,
其中包含本地的ip地址,MAC地址,本地端口号,路由器的MAC地址,以及类型相关的内容。
后面这些分量的赋值涉及Linux内核的整个网络协议。
首先略过整个内核协议栈,手动构造发送的包,看需要做什么操作才能把包发出去。
内核调用dev_queue_xmit(skb)函数实现发包流程,因此手动构造skb包,然后调用该函数,看能否正常发出
-
static int create_init(void)
-
{
-
struct net_device *dev,*dev_tmp;
-
int hh_len;
-
int alloc_len;
-
char *data_addr;
-
struct iphdr *iph;
-
struct ethhdr *eth;
-
struct udphdr *uh;
-
struct sk_buff *skb;
-
for_each_netdev(&init_net, dev)
-
if(strncmp(dev->name,”wlan0″,5)==0)
-
dev=dev_tmp;
-
hh_len= LL_RESERVED_SPACE(dev);
-
init_dest_mac(dest_mac);
-
-
/* based on __ip_append_data */
-
alloc_len=LOAD_SIZE+L4_SIZE+L3_SIZE+hh_len+15;
-
skb=alloc_skb(alloc_len, GFP_ATOMIC);
-
if(!skb)
-
return -1;
-
skb->dev=dev;
-
skb_reserve(skb, hh_len);//date+=hh_len;tail+=hh_len
-
skb_put(skb,LOAD_SIZE+L4_SIZE+L3_SIZE);//tal+= ; skb->len+=
-
skb_set_network_header(skb, 0);
-
skb->transport_header = (skb->network_header + L3_SIZE);
-
data_addr=skb->data+L3_SIZE+L4_SIZE;
-
skb_fill_load(data_addr,LOAD_SIZE);
-
-
/* based on __ip_make_skb */
-
iph = (struct iphdr *)skb->data;
-
iph->version = 4;
-
iph->ihl = 5;
-
iph->tos = 0;
-
iph->frag_off = htons(IP_DF);
-
iph->ttl = 0x40;
-
iph->protocol = IPPROTO_UDP; //0x11
-
iph->tot_len=htons(0x44);
-
iph->saddr = SOURCE_IP;
-
iph->daddr = DEST_IP;
-
iph->id=0;
-
-
/* based on udp_send_skb */
-
uh = udp_hdr(skb);
-
uh->source = htons(SOURCE_PORT);
-
uh->dest = htons(DEST_PORT);
-
uh->len = htons(skb->len – L3_SIZE);
-
uh->check = 0;
-
-
/* based on eth_header */
-
eth = (struct ethhdr *)skb_push(skb, L2_SIZE);//skb->data -= ;skb->len +=
-
eth->h_proto =htons(ETH_P_IP); //0x0800
-
memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
-
memcpy(eth->h_dest, dest_mac, ETH_ALEN);
-
dev_queue_xmit(skb);
-
return 0;
-
}
这样构造的包是能够正常发送的,因此我们主要关注协议栈是如何自动完成上面模块中手动参与制定的部分。
1:net_device如何选择,用户态代码中只有目的ip,如何决定使用哪个net_device,系统中可能有多个网卡。
通过jprobe可以确定源IP以及net_device是在udp_sendmsg函数中的这段代码中实现的:
-
if (rt == NULL) {
-
struct net *net = sock_net(sk);
-
-
fl4 = &fl4_stack;
-
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
-
RT_SCOPE_UNIVERSE, sk->sk_protocol,
-
inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
-
faddr, saddr, dport, inet->inet_sport);
-
-
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
-
rt = ip_route_output_flow(net, fl4, sk);
-
if (IS_ERR(rt)) {
-
err = PTR_ERR(rt);
-
rt = NULL;
-
if (err == -ENETUNREACH)
-
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
-
goto out;
-
}
-
-
err = -EACCES;
-
if ((rt->rt_flags & RTCF_BROADCAST) &&
-
!sock_flag(sk, SOCK_BROADCAST))
-
goto out;
-
if (connected)
-
sk_dst_set(sk, dst_clone(&rt->dst));
-
}
首先通过flowi4_init_output函数初始化一个flowi4数据结构,此时源port已经确定,但是源ip还没有:
-
struct flowi4 {
-
struct flowi_common __fl_common;
-
#define flowi4_oif __fl_common.flowic_oif // 0
-
#define flowi4_iif __fl_common.flowic_iif // 发包流程,该字段为0
-
#define flowi4_mark __fl_common.flowic_mark // 0
-
#define flowi4_tos __fl_common.flowic_tos // 0
-
#define flowi4_scope __fl_common.flowic_scope // RT_SCOPE_UNIVERSE
-
#define flowi4_proto __fl_common.flowic_proto // IPPROTO_UDP
-
#define flowi4_flags __fl_common.flowic_flags
-
#define flowi4_secid __fl_common.flowic_secid //0
-
__be32 daddr;
-
__be32 saddr;
-
union flowi_uli uli;
-
#define fl4_sport uli.ports.sport
-
#define fl4_dport uli.ports.dport
-
#define fl4_icmp_type uli.icmpt.type
-
#define fl4_icmp_code uli.icmpt.code
-
#define fl4_ipsec_spi uli.spi
-
#define fl4_mh_type uli.mht.type
-
#define fl4_gre_key uli.gre_key
-
} __attribute__((__aligned__(BITS_PER_LONG/8)));
接着以flowi4为参数进行路由查找:
-
struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
-
{
-
struct rtable *rth;
-
unsigned int hash;
-
-
if (!rt_caching(net))
-
goto slow_output;
-
-
hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
-
-
rcu_read_lock_bh();
-
for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
-
rth = rcu_dereference_bh(rth->dst.rt_next)) {
-
if (rth->rt_key_dst == flp4->daddr &&
-
rth->rt_key_src == flp4->saddr &&
-
rt_is_output_route(rth) &&
-
rth->rt_oif == flp4->flowi4_oif &&
-
rth->rt_mark == flp4->flowi4_mark &&
-
!((rth->rt_key_tos ^ flp4->flowi4_tos) &
-
(IPTOS_RT_MASK | RTO_ONLINK)) &&
-
net_eq(dev_net(rth->dst.dev), net) &&
-
!rt_is_expired(rth)) {
-
dst_use(&rth->dst, jiffies);
-
RT_CACHE_STAT_INC(out_hit);
-
rcu_read_unlock_bh();
-
if (!flp4->saddr)
-
flp4->saddr = rth->rt_src;
-
if (!flp4->daddr)
-
flp4->daddr = rth->rt_dst;
-
return rth;
-
}
-
RT_CACHE_STAT_INC(out_hlist_search);
-
}
-
rcu_read_unlock_bh();
-
-
slow_output:
-
return ip_route_output_slow(net, flp4);
-
}
首先从cache中查找,查不到再走ip_route_output_slow流程
cache查找比较简单,利用flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)这四个字段生成hash值,
根据这个hash值去rt_hash_table全局变量对应的hash表中查找,对比需要比较的字段,满足则返回缓存项。
|