dpvssynproxy
TCP协议开辟了一个比较大的内存空间请求连接队列来存储连接请求块,当SYN请求不断增加,请求连接数目到达上限时,会致使系统丢弃SYN连接请求。SYNcookies技术就可以使服务器在半连接队列已满的情况下仍能处理新的SYN请求
当半连接队列满时,SYNcookies并不丢弃SYN请求,而是通过加密技术来标识半连接状态。在TCP实现中,当收到客户端的SYN请求时,服务器需回复SYNACK包给客户端,然后客户端再发送确认包给服务器。通常服务器的初始序列号是由服务器按照一定的规律计算得到或采用随机数,而在SYNcookies中,服务器的初始序列号是由客户端IP地址,客户端端口,服务器IP地址和服务器端口,接收到的客户端初始序列号以及其他一些安全数值进行hash运算,并加密后得到的,称之为cookie。当服务器遭受SYN攻击使得请求连接队列满时,服务器并不拒绝新的SYN请求,而是回复一个初始化序列号为cookie的SYN包给客户端,如果收到客户端的ACK段,服务器将客户端的ACK序列号减1得到的值,与用上述那些元素hash运算得到的值比较,如果相等,直接完成三次握手,注意:此时并不必查看此连接是否属于请求连接队列
linux内核启用SYNcookies是通过在启动环境中设置以下命令来完成
echo1procsysnetipv4tcpsyncookies三次握手示例图
synproxy说明
client发送syn,LB代理了第一次握手,不转发给rs。LB返回synack数据包时,seq由syncookies算法生成,并且将rcvwnd设置为0,不允许在握手阶段携带数据,由此得知不支持tcpfastopen当client返回ack时,反解seq,如果与syncookies算法匹配,那么就是正常流量。此时LB与后端RS开启三次握手,并透传winsize,由于经过LB代理,还需要记录seq差值delta数据交互通信,lb除了正常的fullnat工作,还要补偿seqdelta连接关闭,正常清理client第一次握手dpvsprerouting
staticintdpvsprerouting(voidpriv,structrtembufmbuf,conststructinethookstatestate,intaf){。。。Synproxy:defencesynflood如果是传输层协议是TCP,synproxy处理,此处处理client端第一次握手(syn)包的处理if(IPPROTOTCPiph。proto){intvINETACCEPT;if(0dpvssynproxysynrcv(af,mbuf,iph,v))returnv;}returnINETACCEPT;
}dpvssynproxysynrcv
处理client侧第一次握手数据包(syn包)Synproxystep1logic:receiveclientsSyn。Checkifsynproxyisenabledforthisskb,andsendsynackbackSynproxyisenabledwhen:mbufisasynpacket,andtheserviceissynproxyenable,andipvstodropreturnfasle(notsupportednow)return0meansthecallershouldreturnatonceanduseverdictasreturnvalue,return1fornothing。intdpvssynproxysynrcv(intaf,structrtembufmbuf,conststructdpvsiphdriph,intverdict){intret;structdpvsservicesvcNULL;structtcphdrth,tcph;structdpvssynproxyopttcpopt;structnetifportdev;structetherhdreth;structetheraddrethaddr;th指向tcp首部thmbufheaderpointer(mbuf,iphlen,sizeof(tcph),tcph);if(unlikely(NULLth))gotosynrcvout;第一次握手只有syn包,并有访问svc,开启了synproxy防护if(thsyn!thack!thrst!thfin(svcdpvsservicelookup(af,iphproto,iphdaddr,thdest,0,NULL,NULL,NULL,rtelcoreid()))(svcflagsDPVSSVCFSYNPROXY)){ifservicesweightiszero(nonactiverealserver),donotinganddropthepacket如果后端服务svc权重为0,没有可用后端,返回INETDROPif(svcweight0){dpvsestatsinc(SYNPROXYNODEST);gotosynrcvout;}droppacketfromblacklist
如果在黑名单中,那么退出,返回INETDROP
if(dpvsblklstlookup(iphaf,iphproto,iphdaddr,
thdest,iphsaddr)){
gotosynrcvout;
}}else{return1;}mbufwillbereusedandetherheaderwillbeset。FIXME:tosupportnonetherpackets。if(mbufl2len!sizeof(structetherhdr))gotosynrcvout;updatestatistics更新统计信息dpvsestatsinc(SYNPROXYSYNCNT);settxoffloadflags校验assert(mbufportNETIFMAXPORTS);获取netdevice层设备,并做校验devnetifportget(mbufport);if(unlikely(!dev)){RTELOG(ERR,IPVS,s:deviceethdnotfound,func,mbufport);gotosynrcvout;}根据物理设备的硬件负载功能,设置mbuf相应标志位if(likely(dev(devflagNETIFPORTFLAGTXTCPCSUMOFFLOAD))){if(afAFINET)mbufolflags(PKTTXTCPCKSUMPKTTXIPCKSUMPKTTXIPV4);elsembufolflags(PKTTXTCPCKSUMPKTTXIPV6);}reusembuf复用mbuf并回复synack,为什么说是复用呢?因为对mbuf修改后,直接当做synack回复包返回给了clientsynproxyreusembuf(af,mbuf,th,tcpopt);setL2headerandsendthepacketoutItisnotedthatipv4xmitshouldnotusedhere,becausembufisreused。设置L2层的header,源和目的对换eth(structetherhdr)rtepktmbufprepend(mbuf,mbufl2len);if(unlikely(!eth)){RTELOG(ERR,IPVS,s:nomemory,func);gotosynrcvout;}memcpy(ethaddr,ethsaddr,sizeof(structetheraddr));memcpy(ethsaddr,ethdaddr,sizeof(structetheraddr));memcpy(ethdaddr,ethaddr,sizeof(structetheraddr));调用netifxmit发送数据包if(unlikely(EDPVSOK!(retnetifxmit(mbuf,dev)))){RTELOG(ERR,IPVS,s:netifxmitfaileds,func,dpvsstrerror(ret));shouldnotsetverdicttoINETDROPsincenetifxmitalwaysconsumethembufwhileINETDROPmeansmbufllbefreeinINETHOOK。}verdictINETSTOLEN;return0;
synrcvout:dropanddestroythepacketverdictINETDROP;return0;}
synproxyreusembuf
设置tcp选项计算synack包的seq,syncookies计算设置synack包的seq和ackseq交换ip和tcp首部的源,目的信息计算ip首部和tcp首部校验和Reusembufforsynproxy,calledbysynproxysynrcv()。dofollowingthings:settcpoptions,computeseqwithcookiefunc,settcpseqandackseq,exchangeipaddrandtcpport,computeiphdrandtcpcheck(HWxmitchecksumoffloadnotsupportforsyn)。staticvoidsynproxyreusembuf(intaf,structrtembufmbuf,structtcphdrth,structdpvssynproxyoptopt){uint32tisn;uint16ttmpport;intiphlen;获取ip首部长度if(AFINET6af){iphlensizeof(structip6hdr);}else{iphlenip4hdrlen(mbuf);}长度校验,确保首部长度正确if(mbufmaypull(mbuf,iphlen(thdoff2))!0){return;}dealwithtcpoptions解析并且设置tcpoptions,包括mss,windowsize,timestampsynproxyparsesetopts(mbuf,th,opt);getcookie根据syncookies算法生成synack数据包的seqif(AFINET6af){isnsynproxycookiev6initsequence(mbuf,th,opt);}else{isnsynproxycookiev4initsequence(mbuf,th,opt);}setsynackflag设置synack标志((uint8t)th)〔13〕0x12;exchageports交换dest,source端口tmpportthdest;thdestthsource;thsourcetmpport;setwindowsizetozero设置接收窗口为0,不允许握手阶段携带数据信息thwindow0;setseq(cookie)andackseq设置seq和ackseq,其中ackseq是客户端序号加1,而返回的synseq就是刚刚计算出来的cookiethackseqhtonl(ntohl(thseq)1);thseqhtonl(isn);exchageaddresses交换源和目的ip地址信息并重新计算校验和if(AFINET6af){structin6addrtmpaddr;structip6hdrip6hip6hdr(mbuf);tmpaddrip6hip6src;
ip6hip6srcip6hip6dst;
ip6hip6dsttmpaddr;
ip6hip6hlimdpvssynproxyctrlsynackttl;
if(likely(mbufolflagsPKTTXTCPCKSUM))
{
mbufl3len(void)th(void)ip6h;
mbufl4lenntohs(ip6hip6plen)sizeof(structip6hdr)mbufl3len;
thcheckip6phdrcksum(ip6h,mbufolflags,mbufl3len,IPPROTOTCP);
}
else
{
if(mbufmaypull(mbuf,mbufpktlen)!0)
{
return;
}
tcp6sendcsum((structipv6hdr)ip6h,th);
}}else{uint32ttmpaddr;structiphdriph(structiphdr)ip4hdr(mbuf);tmpaddriphsaddr;
iphsaddriphdaddr;
iphdaddrtmpaddr;
iphttldpvssynproxyctrlsynackttl;
iphtos0;
computechecksum
if(likely(mbufolflagsPKTTXTCPCKSUM))
{
mbufl3leniphlen;
mbufl4lenntohs(iphtotlen)iphlen;
thcheckip4phdrcksum((structipv4hdr)iph,mbufolflags);
}
else
{
if(mbufmaypull(mbuf,mbufpktlen)!0)
{
return;
}
tcp4sendcsum((structipv4hdr)iph,th);
}
如果硬件不支持计算csum,调用ip4sendcsum生成checksum
if(likely(mbufolflagsPKTTXIPCKSUM))
{
iphcheck0;
}
else
{
ip4sendcsum((structipv4hdr)iph);
}}}
synproxyparsesetopts
Replacetcpoptionsintcpheader,calledbysynproxyreusembuf()staticvoidsynproxyparsesetopts(structrtembufmbuf,structtcphdrth,structdpvssynproxyoptopt){mssinreceivedpacketuint16tinmss;uint32ttmp;unsignedcharptr;计算tcp选项长度intlength(thdoff4)sizeof(structtcphdr);uint16tusermssdpvssynproxyctrlinitmss;structtimespectspnow;memset(opt,,sizeof(structdpvssynproxyopt));optmssclamp536;ptr(unsignedchar)(th1);while(length0){unsignedchartmpopcodeptr;intopcodeptr;intopsize;switch(opcode){选项结束,直接返回caseTCPOPTEOL:return;NOP选项,只作填充用,因此选项长度减1,进入下一个循环处理下一个选项caseTCPOPTNOP:length;continue;default:opsizeptr;如果不是选项表结束标志也不是空操作,则选取选项长度,并检测其合法性if(opsize2)sillyoptions{return;}选项长度校验if(opsizelength){return;dontparsepartialoptions}switch(opcode){caseTCPOPTMAXSEG:用来通告最大段长度,最大段长度选项格式如下kind2len4最大段长度该选项只能出现在SYN段片段中if(opsizeTCPOLENMAXSEG){inmssntohs((uint16t)ptr);if(inmss){如果系统设置的mss小于对端通告的mss,使用较小值回复if(usermssinmss){inmssusermss;}optmssclampinmss;}字节序转换(uint16t)ptrhtons(optmssclamp);}break;窗口选项caseTCPOPTWINDOW:kind3len3位移数去窗口扩大因子选项中的位移数,将标识SYN段中包含窗口扩大因子选项的wscaleok置为1,如果选项中位移数大于14则警告if(opsizeTCPOLENWINDOW){if(dpvssynproxyctrlwscale){optwscaleok1;optsndwscale(uint8t)ptr;if(optsndwscaleDPVSSYNPROXYWSCALEMAX){RTELOG(INFO,IPVS,tcpparseoptions:Illegalwindowscalingvalueddreceived。,optsndwscale,DPVSSYNPROXYWSCALEMAX);optsndwscaleDPVSSYNPROXYWSCALEMAX;}(uint8t)ptr(uint8t)dpvssynproxyctrlwscale;}else{不支持以NOP选项填充memset(tmpopcode,TCPOPTNOP,TCPOLENWINDOW);}}break;时间戳选项caseTCPOPTTIMESTAMP:if(opsizeTCPOLENTIMESTAMP){if(dpvssynproxyctrltimestamp){memset(tspnow,0,sizeof(tspnow));clockgettime(CLOCKREALTIME,tspnow);opttstampok1;tmp(uint32t)ptr;(tmp1)tmp;tmphtonl((uint32t)(TCPOPTTIMESTAMP(tspnow)));}else{memset(tmpopcode,TCPOPTNOP,TCPOLENTIMESTAMP);}}break;caseTCPOPTSACKPERMITTED:允许SACK选项,只能出现在SYN段中,将sackok置为1,标识syn中允许sack选项。if(opsizeTCPOLENSACKPERMITTED){if(dpvssynproxyctrlsack){optsackok1;}else{memset(tmpopcode,TCPOPTNOP,TCPOLENSACKPERMITTED);}}break;}ptropsize2;lengthopsize;}}
}
client第三次握手包应答dpvsin
client侧第三次握手包(ACK),在dpvsprerouting中肯定会返回ACCEPT,继续在dpvsin中处理查找连接时不会命中,调用tcp传输层tcpconnsched函数进行新连接的调度staticintdpvsin(voidpriv,structrtembufmbuf,conststructinethookstatestate,intaf){。。。。对于新建的连接,肯定是没有会话的,connsched根据请求选择一个后端realserver建立连接if(unlikely(!conn)){tryscheduleRSandcreatenewconnection调用proto中connsched接口选择一个后端rs建立连接,如果创建连接失败,返回verdictif(protconnsched(prot,iph,mbuf,conn,verdict)!EDPVSOK){RTELOG(DEBUG,IPVS,s:failtoschedule。,func);return(verdict);}onlySNATtriggersconnectionbyinsideoutsidetraffic。snat模式,则是内部服务器访问外部服务,内网服务器dpvs外网服务器(baidu),所以设置dirDPVSCONNDIROUTBOUNDif(conndestfwdmodeDPVSFWDMODESNAT){dirDPVSCONNDIROUTBOUND;}else{其余模式设置dirDPVSCONNDIRINBOUNDdirDPVSCONNDIRINBOUND;}}。。。
}
tcpconnsched
staticinttcpconnsched(structdpvsprotoproto,conststructdpvsiphdriph,structrtembufmbuf,structdpvsconnconn,intverdict){。。。Synproxystep2logic:receiveclients3handshackeackpacketWhensynproxydisabled,onlySYNpacketscanarrivehere。SodontjudgeSYNPROXYflaghere!IfSYNPROXYflagjudged,andsynproxygotdisbledandkeepalivedreloaded,SYNpacketsforRSmayneverbesent。如果是syncookies连接建立第三次握手数据包,则返回EDPVSPKTSTOLENif(dpvssynproxyackrcv(iphaf,mbuf,th,proto,conn,iph,verdict)0){Attention:FirstACKpacketisalsostoredinconnackmbufreturn(EDPVSPKTSTOLEN);}。。。
}
dpvssynproxyackrcv
syncookies校验dpvsschedule新建立连接后端调度,选择一个realserversynproxysendrssyn进行LB与RS的第一次握手Synproxystep2logic:receiveclientsAckReceiveclients3handshakesackpacket,docookiecheckandthensendsyntorsaftercreatingasessionintdpvssynproxyackrcv(intaf,structrtembufmbuf,structtcphdrth,structdpvsprotopp,structdpvsconncpp,conststructdpvsiphdriph,intverdict){intres;structdpvssynproxyoptopt;structdpvsservicesvc;intrescookiecheck;Donotchecksvcsynproxyflag,asitmaybechangedaftersynproxystep1。if(!thsynthack!thrst!thfin(svcdpvsservicelookup(af,iphproto,iphdaddr,thdest,0,NULL,NULL,NULL,rtelcoreid()))){if(dpvssynproxyctrldefer!synproxyackhasdata(mbuf,iph,th)){Updatestatisticsdpvsestatsinc(SYNPROXYNULLACK);Wegetapureackwhenexpectingackpacketwithpayload,so
havetodropit
verdictINETDROP;
return(0);
}
syncookies验证,如果不匹配,那么就是攻击或是无效流量,将包丢弃。如果成功,执行synproxy第二阶段,lb调用
dpvsschedule与后端realserver建立连接
if(AFINET6af)
{
rescookiechecksynproxyv6cookiecheck(mbuf,
ntohl(thackseq)1,opt);
}
else
{
rescookiechecksynproxyv4cookiecheck(mbuf,
ntohl(thackseq)1,opt);
}
if(!rescookiecheck)
{
Updatestatistics
dpvsestatsinc(SYNPROXYBADACK);
Cookiecheckfailed,dropthepacket
RTELOG(DEBUG,IPVS,s:syncookiecheckfailedsequn,func,
ntohl(thackseq)1);
verdictINETDROP;
return(0);
}
Updatestatistics
dpvsestatsinc(SYNPROXYOKACK);
Letthevirtualserverselectarealserverfortheincomingconnetion,
andcreateaconnectionentry
dpvsschedule新建立连接后端调度,选择一个realserver
cppdpvsschedule(svc,iph,mbuf,1,0);
if(unlikely(!cpp))
{
RTELOG(WARNING,IPVS,s:ipvsschedulefailedn,func);
FIXME:Whattodowhenvirtualserviceisavailablebutnodestination
availableforanewconnetion:sendanicmpUNREACHABLE?
verdictINETDROP;
return(0);
}
Donothingbutprintaerrormsgwhenfail,becausesessionwillbe
correctlyfreedindpvsconnexpire
synproxysendrssyn完成lb与realserver建连
if(EDPVSOK!(ressynproxysendrssyn(af,th,cpp,mbuf,pp,opt)))
{
RTELOG(ERR,IPVS,s:synproxysendrssynfailedsn,
func,dpvsstrerror(res));
}
Countintheackpacket(STOLENbysynproxy)
dpvsstatsin(cpp,mbuf);
Activesessiontimer,anddecrefcnt。
Alsostealthembuf,andletcallerreturnimmediately
dpvsconnput(cpp);
verdictINETSTOLEN;
return(0);}return(1);}
synproxysendrssyn
Createsynpacketandsendittors。Wealsostoresynmbufincpifsynretransmitionisturnedon。staticintsynproxysendrssyn(intaf,conststructtcphdrth,structdpvsconncp,structrtembufmbuf,structdpvsprotopp,structdpvssynproxyoptopt){inttcphdrsize;structrtembufsynmbuf,synmbufcloned;structrtemempoolpool;structtcphdrsynth;if(!cppacketxmit){RTELOG(WARNING,IPVS,s:packetxmitisnull,func);return(EDPVSINVAL);}Allocatembuffromdevicemempoolpoolgetmbufpool(cp,DPVSCONNDIRINBOUND);if(unlikely(!pool)){RTELOG(WARNING,IPVS,s:s,func,dpvsstrerror(EDPVSNOROUTE));return(EDPVSNOROUTE);}从内存池中分配synmbuf,用于发送到后端realserversynmbufrtepktmbufalloc(pool);if(unlikely(!synmbuf)){RTELOG(WARNING,IPVS,s:s,func,dpvsstrerror(EDPVSNOMEM));return(EDPVSNOMEM);}设置路由缓存为nullsynmbufuserdataNULL;makesurenorouteinfoReservespacefortcpheader为tcp层保留空间,包括选项,通过prepend向mbuf的headroom添加数据tcphdrsize(sizeof(structtcphdr)TCPOLENMAXSEG(opttstampok?TCPOLENTSTAMPAPPA:0)(optwscaleok?TCPOLENWSCALEALIGNED:0)SACKPERMisinthepalceofNOPNOPofTS((optsackok!opttstampok)?TCPOLENSACKPERMITTEDALIGNED:0));synth(structtcphdr)rtepktmbufprepend(synmbuf,tcphdrsize);if(!synth){rtepktmbuffree(synmbuf);RTELOG(WARNING,IPVS,s:s,func,dpvsstrerror(EDPVSNOROOM));return(EDPVSNOROOM);}Setuptcpheadermemset(synth,0,tcphdrsize);synthsourcethsource;synthdestthdest;synthseqhtonl(ntohl(thseq)1);synthackseq0;(((uint16t)synth)6)htons(((tcphdrsize2)12)THSYN0x02);FIXME:whatwindowshouldweusesynthwindowhtons(5000);synthcheck0;synthurgptr0;synthurg0;构造syn包的tcp选项synproxysynbuildoptions((uint32t)(synth1),opt);IP首部的构造if(AFINET6af){structip6hdrackip6h;structip6hdrsynip6h;Reservespaceforipv6header
synip6h(structip6hdr)rtepktmbufprepend(synmbuf,
sizeof(structip6hdr));
if(!synip6h)
{
rtepktmbuffree(synmbuf);
RTELOG(WARNING,IPVS,s:sn,func,dpvsstrerror(EDPVSNOROOM));
return(EDPVSNOROOM);
}
ackip6h(structip6hdr)ip6hdr(mbuf);
synip6hip6vfc0x60;IPv6
synip6hip6srcackip6hip6src;
synip6hip6dstackip6hip6dst;
synip6hip6plenhtons(tcphdrsize);
synip6hip6nxtNEXTHDRTCP;
synip6hip6hlimIPV6DEFAULTHOPLIMIT;
synmbufl3lensizeof(synip6h);}else{structiphdrackiph;structiphdrsyniph;Reservespaceforipv4header
syniph(structiphdr)rtepktmbufprepend(synmbuf,sizeof(structipv4hdr));
if(!syniph)
{
rtepktmbuffree(synmbuf);
RTELOG(WARNING,IPVS,s:sn,func,dpvsstrerror(EDPVSNOROOM));
return(EDPVSNOROOM);
}
ackiph(structiphdr)ip4hdr(mbuf);
((uint16t)syniph)htons((412)(58)(ackiphtos0x1E));
syniphtotlenhtons(synmbufpktlen);
syniphfragoffhtons(IPV4HDRDFFLAG);
syniphttl64;
syniphprotocolIPPROTOTCP;
syniphsaddrackiphsaddr;
syniphdaddrackiphdaddr;
synmbufl3lensizeof(syniph);
checksumisdonebyfnatinhandler
syniphcheck0;}Savesynmbufifsynretransmissionisonsynretry,主动连接时的超时重传次数,如果大于零,将构造的数据报缓存起来if(dpvssynproxyctrlsynretry0){synmbufclonedmbufcopy(synmbuf,pool);if(unlikely(!synmbufcloned)){rtepktmbuffree(synmbuf);RTELOG(WARNING,IPVS,s:s,func,dpvsstrerror(EDPVSNOMEM));return(EDPVSNOMEM);}synmbufcloneduserdataNULL;
cpsynmbufsynmbufcloned;
spdbgstats32inc(spsynsaved);
rteatomic32set(cpsynretrymax,dpvssynproxyctrlsynretry);}TODO:SaveinfoforfastresponsexmitCountinthesynpacketdpvsstatsin(cp,mbuf);Ifxmitfailed,synmbufwillbefreedcorrectly调用packetxmit发送,此处为dpvsxmitfnatcppacketxmit(pp,cp,synmbuf);return(EDPVSOK);}
rs端synack应答dpvsin
方向为DPVSCONNDIROUTBOUND此时能够查找到连接,最终会进入dpvssynproxysynackrcv逻辑staticintdpvsin(voidpriv,structrtembufmbuf,conststructinethookstatestate,intaf){if(connflagsDPVSCONNFSYNPROXY){if(dirDPVSCONNDIRINBOUND){FilteroutinackpacketwhencpisatSYNSENTstate。Dropitifnotavalidpacket,storeitotherwiseif(0dpvssynproxyfilterack(mbuf,conn,prot,iph,verdict)){dpvsstatsin(conn,mbuf);dpvsconnput(conn);return(verdict);}Reusesynproxysessions。Reusemeansupdatesynproxyseqstructandcleanackmbufetc。if(0!dpvssynproxyctrlconnreuse){if(0dpvssynproxyreuseconn(af,mbuf,conn,prot,iph,verdict)){dpvsstatsin(conn,mbuf);dpvsconnput(conn);return(verdict);}}}else{Synproxy3logic:receivesynackfromrsif(dpvssynproxysynackrcv(mbuf,conn,prot,iph。len,verdict)0){dpvsstatsout(conn,mbuf);dpvsconnput(conn);return(verdict);}}}
}dpvssynproxysynackrcv
Synproxystep3logic:receiverssSynAck。Updatesynproxyseq。deltaandsendstoredackmbufstors。intdpvssynproxysynackrcv(structrtembufmbuf,structdpvsconncp,structdpvsprotopp,intthoffset,intverdict){structtcphdrtcph,th;structdpvssynproxyackpakcettmbuf,tmbuf2;structlistheadsavembuf;structdpvsdestdestcpdest;unsignedconntimeout0;th指向tcp首部起始位置thmbufheaderpointer(mbuf,thoffset,sizeof(tcph),tcph);if(unlikely(!th)){verdictINETDROP;return(0);}
ifdefCONFIGDPVSIPVSDEBUGRTELOG(DEBUG,IPVS,s:sequacksequccccpissynproxyucpstateu,func,ntohl(thseq),ntohl(thackseq),(thsyn)?S:,(thack)?A:,(thrst)?R:,cpflagsDPVSCONNFSYNPROXY,cpstate);endifINITLISTHEAD(savembuf);判断应答包状态,必须是syn和ack包,并且开启了synproxy,当前conn连接处于DPVSTCPSSYNSENT状态if((thsyn)(thack)(!thrst)(cpflagsDPVSCONNFSYNPROXY)(cpstateDPVSTCPSSYNSENT)){更新synproxyseq。delta序列号差值cpsynproxyseq。deltantohl(cpsynproxyseq。isn)ntohl(thseq);连接状态进入ESTABLISHEDcpstateDPVSTCPSESTABLISHED;获取连接超时时间conntimeoutdpvsgetconntimeout(cp);if(unlikely((conntimeout!0)(cpprotoIPPROTOTCP))){cptimeout。tvsecconntimeout;}else{cptimeout。tvsecpptimeouttable〔cpstate〕;}dpvstimeranddelay(cptimeout,1000000);更新dest上的连接统计信息if(dest){rteatomic32inc(destactconns);rteatomic32dec(destinactconns);cpflagsDPVSCONNFINACTIVE;}Savetcpsequenceforfullnatnat,insidetooutside保存序号rsendseq和rsendackif(DPVSFWDMODENATcpdestfwdmodeDPVSFWDMODEFNATcpdestfwdmode){cprsendseqhtonl(ntohl(thseq)1);cprsendackthackseq;
ifdefCONFIGDPVSIPVSDEBUGRTELOG(DEBUG,IPVS,s:packetfromrs,sequ,acksequ,portuu,func,ntohl(thseq),ntohl(thackseq),ntohs(thsource),ntohs(thdest));endif}TODO:ipvssynproxysavefastxmitinfo?Freestoredsynmbuf,noneedforretransmitionanymoresynmbuf上保存了lbrs发起连接请求的数据报,此时连接正常完成,需要释放if(cpsynmbuf){rtepktmbuffree(cpsynmbuf);cpsynmbufNULL;spdbgstats32dec(spsynsaved);}在全局ackmbuf链表中删除自己的ackmbuf引用if(listempty(cpackmbuf)){FIXME:Maybeabughere,printerrmsgandgo。Attention:cpstatehasbeenchangedandweshouldstillDROPthesynackmbuf。RTELOG(ERR,IPVS,s:gotackmbufNULLpointer:acksavedun,func,cpacknum);verdictINETDROP;return(0);}WindowsizehasbeensettozerointhesynackpackettoClient。Ifgetmorethanoneackpackethere,itmeansclienthassentawindowprobeafteroneRTO。TheprobewillbeforwardtoRSandRSwillrespondawindowupdate。SoDPVShasnoneedtosendawindowupdate。设置窗口if(cpacknum1){synproxysendwindowupdate(tuplehashout(cp)。af,mbuf,cp,pp,th);}listforeachentrysafe(tmbuf,tmbuf2,cpackmbuf,list){listdelinit(tmbuflist);cpacknum;listaddtail(tmbuflist,savembuf);}assert(cpacknum0);调用packetxmit将缓存发送至rs侧的数据包发送至rs,其中包括第三次握手的ack数据包listforeachentrysafe(tmbuf,tmbuf2,savembuf,list){listdelinit(tmbuflist);synmbufwillbefreedcorrectlyifxmitfailed调用packetxmit将其发送至rscppacketxmit(pp,cp,tmbufmbuf);freedpvssynproxyackpakcetrtemempoolput(thisackmbufpool,tmbuf);spdbgstats32dec(spacksaved);}这个ack连接数据报不需要发送给client侧,所以此处返回dropverdictINETDROP;return(0);}elseif((thrst)(cpflagsDPVSCONNFSYNPROXY)(cpstateDPVSTCPSSYNSENT)){RTELOG(DEBUG,IPVS,s:getrstfromrs,sequacksequn,func,ntohl(thseq),ntohl(thackseq));Countthedeltaofseq如果是rst包,设置连接状态为DPVSTCPSCLOSEcpsynproxyseq。deltantohl(cpsynproxyseq。isn)ntohl(thseq);cpstateDPVSTCPSCLOSE;cptimeout。tvsecpptimeouttable〔cpstate〕;dpvstimeranddelay(cptimeout,1000000);thseqhtonl(ntohl(thseq)1);synproxyseqcsumupdate?return(1);}return(1);
}
原文链接:https:blog。csdn。netzjx345438858articledetails108106143