conntrack_linux.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. package netlink
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io/fs"
  8. "net"
  9. "time"
  10. "github.com/vishvananda/netlink/nl"
  11. "golang.org/x/sys/unix"
  12. )
  13. // ConntrackTableType Conntrack table for the netlink operation
  14. type ConntrackTableType uint8
  15. const (
  16. // ConntrackTable Conntrack table
  17. // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK 1
  18. ConntrackTable = 1
  19. // ConntrackExpectTable Conntrack expect table
  20. // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
  21. ConntrackExpectTable = 2
  22. )
  23. const (
  24. // backward compatibility with golang 1.6 which does not have io.SeekCurrent
  25. seekCurrent = 1
  26. )
  27. // InetFamily Family type
  28. type InetFamily uint8
  29. // -L [table] [options] List conntrack or expectation table
  30. // -G [table] parameters Get conntrack or expectation
  31. // -I [table] parameters Create a conntrack or expectation
  32. // -U [table] parameters Update a conntrack
  33. // -E [table] [options] Show events
  34. // -C [table] Show counter
  35. // -S Show statistics
  36. // ConntrackTableList returns the flow list of a table of a specific family
  37. // conntrack -L [table] [options] List conntrack or expectation table
  38. //
  39. // If the returned error is [ErrDumpInterrupted], results may be inconsistent
  40. // or incomplete.
  41. func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
  42. return pkgHandle.ConntrackTableList(table, family)
  43. }
  44. // ConntrackTableFlush flushes all the flows of a specified table
  45. // conntrack -F [table] Flush table
  46. // The flush operation applies to all the family types
  47. func ConntrackTableFlush(table ConntrackTableType) error {
  48. return pkgHandle.ConntrackTableFlush(table)
  49. }
  50. // ConntrackCreate creates a new conntrack flow in the desired table
  51. // conntrack -I [table] Create a conntrack or expectation
  52. func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
  53. return pkgHandle.ConntrackCreate(table, family, flow)
  54. }
  55. // ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
  56. // conntrack -U [table] Update a conntrack
  57. func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
  58. return pkgHandle.ConntrackUpdate(table, family, flow)
  59. }
  60. // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
  61. // conntrack -D [table] parameters Delete conntrack or expectation
  62. //
  63. // Deprecated: use [ConntrackDeleteFilters] instead.
  64. func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
  65. return pkgHandle.ConntrackDeleteFilters(table, family, filter)
  66. }
  67. // ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters
  68. // conntrack -D [table] parameters Delete conntrack or expectation
  69. func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
  70. return pkgHandle.ConntrackDeleteFilters(table, family, filters...)
  71. }
  72. // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
  73. // conntrack -L [table] [options] List conntrack or expectation table
  74. //
  75. // If the returned error is [ErrDumpInterrupted], results may be inconsistent
  76. // or incomplete.
  77. func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
  78. res, executeErr := h.dumpConntrackTable(table, family)
  79. if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
  80. return nil, executeErr
  81. }
  82. // Deserialize all the flows
  83. var result []*ConntrackFlow
  84. for _, dataRaw := range res {
  85. result = append(result, parseRawData(dataRaw))
  86. }
  87. return result, executeErr
  88. }
  89. // ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
  90. // conntrack -F [table] Flush table
  91. // The flush operation applies to all the family types
  92. func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
  93. req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
  94. _, err := req.Execute(unix.NETLINK_NETFILTER, 0)
  95. return err
  96. }
  97. // ConntrackCreate creates a new conntrack flow in the desired table using the handle
  98. // conntrack -I [table] Create a conntrack or expectation
  99. func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
  100. req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE)
  101. attr, err := flow.toNlData()
  102. if err != nil {
  103. return err
  104. }
  105. for _, a := range attr {
  106. req.AddData(a)
  107. }
  108. _, err = req.Execute(unix.NETLINK_NETFILTER, 0)
  109. return err
  110. }
  111. // ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
  112. // conntrack -U [table] Update a conntrack
  113. func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
  114. req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE)
  115. attr, err := flow.toNlData()
  116. if err != nil {
  117. return err
  118. }
  119. for _, a := range attr {
  120. req.AddData(a)
  121. }
  122. _, err = req.Execute(unix.NETLINK_NETFILTER, 0)
  123. return err
  124. }
  125. // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
  126. // conntrack -D [table] parameters Delete conntrack or expectation
  127. //
  128. // Deprecated: use [Handle.ConntrackDeleteFilters] instead.
  129. func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
  130. return h.ConntrackDeleteFilters(table, family, filter)
  131. }
  132. // ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed
  133. // conntrack -D [table] parameters Delete conntrack or expectation
  134. func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
  135. var finalErr error
  136. res, err := h.dumpConntrackTable(table, family)
  137. if err != nil {
  138. if !errors.Is(err, ErrDumpInterrupted) {
  139. return 0, err
  140. }
  141. // This allows us to at least do a best effort to try to clean the
  142. // entries matching the filter.
  143. finalErr = err
  144. }
  145. var totalFilterErrors int
  146. var matched uint
  147. for _, dataRaw := range res {
  148. flow := parseRawData(dataRaw)
  149. for _, filter := range filters {
  150. if match := filter.MatchConntrackFlow(flow); match {
  151. req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
  152. // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
  153. req2.AddRawData(dataRaw[4:])
  154. if _, err = req2.Execute(unix.NETLINK_NETFILTER, 0); err == nil || errors.Is(err, fs.ErrNotExist) {
  155. matched++
  156. // flow is already deleted, no need to match on other filters and continue to the next flow.
  157. break
  158. } else {
  159. totalFilterErrors++
  160. }
  161. }
  162. }
  163. }
  164. if totalFilterErrors > 0 {
  165. finalErr = errors.Join(finalErr, fmt.Errorf("failed to delete %d conntrack flows with %d filters", totalFilterErrors, len(filters)))
  166. }
  167. return matched, finalErr
  168. }
  169. func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
  170. // Create the Netlink request object
  171. req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
  172. // Add the netfilter header
  173. msg := &nl.Nfgenmsg{
  174. NfgenFamily: uint8(family),
  175. Version: nl.NFNETLINK_V0,
  176. ResId: 0,
  177. }
  178. req.AddData(msg)
  179. return req
  180. }
  181. func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
  182. req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP)
  183. return req.Execute(unix.NETLINK_NETFILTER, 0)
  184. }
  185. // ProtoInfo wraps an L4-protocol structure - roughly corresponds to the
  186. // __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h.
  187. // Currently, only protocol names, and TCP state is supported.
  188. type ProtoInfo interface {
  189. Protocol() string
  190. }
  191. // ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union.
  192. // Only TCP state is currently supported.
  193. type ProtoInfoTCP struct {
  194. State uint8
  195. }
  196. // Protocol returns "tcp".
  197. func (*ProtoInfoTCP) Protocol() string {return "tcp"}
  198. func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) {
  199. ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{})
  200. ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{})
  201. ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State))
  202. ctProtoInfoTCP.AddChild(ctProtoInfoTCPState)
  203. ctProtoInfo.AddChild(ctProtoInfoTCP)
  204. return []*nl.RtAttr{ctProtoInfo}, nil
  205. }
  206. // ProtoInfoSCTP only supports the protocol name.
  207. type ProtoInfoSCTP struct {}
  208. // Protocol returns "sctp".
  209. func (*ProtoInfoSCTP) Protocol() string {return "sctp"}
  210. // ProtoInfoDCCP only supports the protocol name.
  211. type ProtoInfoDCCP struct {}
  212. // Protocol returns "dccp".
  213. func (*ProtoInfoDCCP) Protocol() string {return "dccp"}
  214. // The full conntrack flow structure is very complicated and can be found in the file:
  215. // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
  216. // For the time being, the structure below allows to parse and extract the base information of a flow
  217. type IPTuple struct {
  218. Bytes uint64
  219. DstIP net.IP
  220. DstPort uint16
  221. Packets uint64
  222. Protocol uint8
  223. SrcIP net.IP
  224. SrcPort uint16
  225. }
  226. // toNlData generates the inner fields of a nested tuple netlink datastructure
  227. // does not generate the "nested"-flagged outer message.
  228. func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) {
  229. var srcIPsFlag, dstIPsFlag int
  230. if family == nl.FAMILY_V4 {
  231. srcIPsFlag = nl.CTA_IP_V4_SRC
  232. dstIPsFlag = nl.CTA_IP_V4_DST
  233. } else if family == nl.FAMILY_V6 {
  234. srcIPsFlag = nl.CTA_IP_V6_SRC
  235. dstIPsFlag = nl.CTA_IP_V6_DST
  236. } else {
  237. return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family)
  238. }
  239. ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil)
  240. ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP)
  241. ctTupleIP.AddChild(ctTupleIPSrc)
  242. ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP)
  243. ctTupleIP.AddChild(ctTupleIPDst)
  244. ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil)
  245. ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol})
  246. ctTupleProto.AddChild(ctTupleProtoNum)
  247. ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort))
  248. ctTupleProto.AddChild(ctTupleProtoSrcPort)
  249. ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort))
  250. ctTupleProto.AddChild(ctTupleProtoDstPort, )
  251. return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil
  252. }
  253. type ConntrackFlow struct {
  254. FamilyType uint8
  255. Forward IPTuple
  256. Reverse IPTuple
  257. Mark uint32
  258. Zone uint16
  259. TimeStart uint64
  260. TimeStop uint64
  261. TimeOut uint32
  262. Labels []byte
  263. ProtoInfo ProtoInfo
  264. }
  265. func (s *ConntrackFlow) String() string {
  266. // conntrack cmd output:
  267. // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100
  268. // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec)
  269. start := time.Unix(0, int64(s.TimeStart))
  270. stop := time.Unix(0, int64(s.TimeStop))
  271. timeout := int32(s.TimeOut)
  272. res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ",
  273. nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
  274. s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
  275. s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
  276. s.Mark)
  277. if len(s.Labels) > 0 {
  278. res += fmt.Sprintf("labels=0x%x ", s.Labels)
  279. }
  280. if s.Zone != 0 {
  281. res += fmt.Sprintf("zone=%d ", s.Zone)
  282. }
  283. res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout)
  284. return res
  285. }
  286. // toNlData generates netlink messages representing the flow.
  287. func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) {
  288. var payload []*nl.RtAttr
  289. // The message structure is built as follows:
  290. // <len, NLA_F_NESTED|CTA_TUPLE_ORIG>
  291. // <len, NLA_F_NESTED|CTA_TUPLE_IP>
  292. // <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
  293. // <IP>
  294. // <len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
  295. // <IP>
  296. // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
  297. // <len, CTA_PROTO_NUM>
  298. // <uint8>
  299. // <len, CTA_PROTO_SRC_PORT>
  300. // <BEuint16>
  301. // <len, CTA_PROTO_DST_PORT>
  302. // <BEuint16>
  303. // <len, NLA_F_NESTED|CTA_TUPLE_REPLY>
  304. // <len, NLA_F_NESTED|CTA_TUPLE_IP>
  305. // <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
  306. // <IP>
  307. // <len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
  308. // <IP>
  309. // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
  310. // <len, CTA_PROTO_NUM>
  311. // <uint8>
  312. // <len, CTA_PROTO_SRC_PORT>
  313. // <BEuint16>
  314. // <len, CTA_PROTO_DST_PORT>
  315. // <BEuint16>
  316. // <len, CTA_STATUS>
  317. // <uint64>
  318. // <len, CTA_MARK>
  319. // <BEuint64>
  320. // <len, CTA_TIMEOUT>
  321. // <BEuint64>
  322. // <len, NLA_F_NESTED|CTA_PROTOINFO>
  323. // CTA_TUPLE_ORIG
  324. ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil)
  325. forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType)
  326. if err != nil {
  327. return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err)
  328. }
  329. for _, a := range forwardFlowAttrs {
  330. ctTupleOrig.AddChild(a)
  331. }
  332. // CTA_TUPLE_REPLY
  333. ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil)
  334. reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType)
  335. if err != nil {
  336. return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err)
  337. }
  338. for _, a := range reverseFlowAttrs {
  339. ctTupleReply.AddChild(a)
  340. }
  341. ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark))
  342. ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut))
  343. payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout)
  344. if s.ProtoInfo != nil {
  345. switch p := s.ProtoInfo.(type) {
  346. case *ProtoInfoTCP:
  347. attrs, err := p.toNlData()
  348. if err != nil {
  349. return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err)
  350. }
  351. payload = append(payload, attrs...)
  352. default:
  353. return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil")
  354. }
  355. }
  356. return payload, nil
  357. }
  358. // This method parse the ip tuple structure
  359. // The message structure is the following:
  360. // <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
  361. // <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
  362. // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
  363. // <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
  364. // <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
  365. func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 {
  366. for i := 0; i < 2; i++ {
  367. _, t, _, v := parseNfAttrTLV(reader)
  368. switch t {
  369. case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
  370. tpl.SrcIP = v
  371. case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
  372. tpl.DstIP = v
  373. }
  374. }
  375. // Get total length of nested protocol-specific info.
  376. _, _, protoInfoTotalLen := parseNfAttrTL(reader)
  377. _, t, l, v := parseNfAttrTLV(reader)
  378. // Track the number of bytes read.
  379. protoInfoBytesRead := uint16(nl.SizeofNfattr) + l
  380. if t == nl.CTA_PROTO_NUM {
  381. tpl.Protocol = uint8(v[0])
  382. }
  383. // We only parse TCP & UDP headers. Skip the others.
  384. if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP {
  385. // skip the rest
  386. bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
  387. reader.Seek(int64(bytesRemaining), seekCurrent)
  388. return tpl.Protocol
  389. }
  390. // Skip 3 bytes of padding
  391. reader.Seek(3, seekCurrent)
  392. protoInfoBytesRead += 3
  393. for i := 0; i < 2; i++ {
  394. _, t, _ := parseNfAttrTL(reader)
  395. protoInfoBytesRead += uint16(nl.SizeofNfattr)
  396. switch t {
  397. case nl.CTA_PROTO_SRC_PORT:
  398. parseBERaw16(reader, &tpl.SrcPort)
  399. protoInfoBytesRead += 2
  400. case nl.CTA_PROTO_DST_PORT:
  401. parseBERaw16(reader, &tpl.DstPort)
  402. protoInfoBytesRead += 2
  403. }
  404. // Skip 2 bytes of padding
  405. reader.Seek(2, seekCurrent)
  406. protoInfoBytesRead += 2
  407. }
  408. // Skip any remaining/unknown parts of the message
  409. bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
  410. reader.Seek(int64(bytesRemaining), seekCurrent)
  411. return tpl.Protocol
  412. }
  413. func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
  414. isNested, attrType, len = parseNfAttrTL(r)
  415. value = make([]byte, len)
  416. binary.Read(r, binary.BigEndian, &value)
  417. return isNested, attrType, len, value
  418. }
  419. func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
  420. binary.Read(r, nl.NativeEndian(), &len)
  421. len -= nl.SizeofNfattr
  422. binary.Read(r, nl.NativeEndian(), &attrType)
  423. isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
  424. attrType = attrType & (nl.NLA_F_NESTED - 1)
  425. return isNested, attrType, len
  426. }
  427. // skipNfAttrValue seeks `r` past attr of length `len`.
  428. // Maintains buffer alignment.
  429. // Returns length of the seek performed.
  430. func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 {
  431. len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1)
  432. r.Seek(int64(len), seekCurrent)
  433. return len
  434. }
  435. func parseBERaw16(r *bytes.Reader, v *uint16) {
  436. binary.Read(r, binary.BigEndian, v)
  437. }
  438. func parseBERaw32(r *bytes.Reader, v *uint32) {
  439. binary.Read(r, binary.BigEndian, v)
  440. }
  441. func parseBERaw64(r *bytes.Reader, v *uint64) {
  442. binary.Read(r, binary.BigEndian, v)
  443. }
  444. func parseRaw32(r *bytes.Reader, v *uint32) {
  445. binary.Read(r, nl.NativeEndian(), v)
  446. }
  447. func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
  448. for i := 0; i < 2; i++ {
  449. switch _, t, _ := parseNfAttrTL(r); t {
  450. case nl.CTA_COUNTERS_BYTES:
  451. parseBERaw64(r, &bytes)
  452. case nl.CTA_COUNTERS_PACKETS:
  453. parseBERaw64(r, &packets)
  454. default:
  455. return
  456. }
  457. }
  458. return
  459. }
  460. // when the flow is alive, only the timestamp_start is returned in structure
  461. func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) {
  462. var numTimeStamps int
  463. oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp
  464. if readSize == uint16(oneItem) {
  465. numTimeStamps = 1
  466. } else if readSize == 2*uint16(oneItem) {
  467. numTimeStamps = 2
  468. } else {
  469. return
  470. }
  471. for i := 0; i < numTimeStamps; i++ {
  472. switch _, t, _ := parseNfAttrTL(r); t {
  473. case nl.CTA_TIMESTAMP_START:
  474. parseBERaw64(r, &tstart)
  475. case nl.CTA_TIMESTAMP_STOP:
  476. parseBERaw64(r, &tstop)
  477. default:
  478. return
  479. }
  480. }
  481. return
  482. }
  483. func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) {
  484. binary.Read(r, binary.BigEndian, &s)
  485. r.Seek(nl.SizeofNfattr - 1, seekCurrent)
  486. return s
  487. }
  488. // parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr.
  489. func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) {
  490. p := new(ProtoInfoTCP)
  491. bytesRead := 0
  492. for bytesRead < int(attrLen) {
  493. _, t, l := parseNfAttrTL(r)
  494. bytesRead += nl.SizeofNfattr
  495. switch t {
  496. case nl.CTA_PROTOINFO_TCP_STATE:
  497. p.State = parseProtoInfoTCPState(r)
  498. bytesRead += nl.SizeofNfattr
  499. default:
  500. bytesRead += int(skipNfAttrValue(r, l))
  501. }
  502. }
  503. return p
  504. }
  505. func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) {
  506. bytesRead := 0
  507. for bytesRead < int(attrLen) {
  508. _, t, l := parseNfAttrTL(r)
  509. bytesRead += nl.SizeofNfattr
  510. switch t {
  511. case nl.CTA_PROTOINFO_TCP:
  512. p = parseProtoInfoTCP(r, l)
  513. bytesRead += int(l)
  514. // No inner fields of DCCP / SCTP currently supported.
  515. case nl.CTA_PROTOINFO_DCCP:
  516. p = new(ProtoInfoDCCP)
  517. skipped := skipNfAttrValue(r, l)
  518. bytesRead += int(skipped)
  519. case nl.CTA_PROTOINFO_SCTP:
  520. p = new(ProtoInfoSCTP)
  521. skipped := skipNfAttrValue(r, l)
  522. bytesRead += int(skipped)
  523. default:
  524. skipped := skipNfAttrValue(r, l)
  525. bytesRead += int(skipped)
  526. }
  527. }
  528. return p
  529. }
  530. func parseTimeOut(r *bytes.Reader) (ttimeout uint32) {
  531. parseBERaw32(r, &ttimeout)
  532. return
  533. }
  534. func parseConnectionMark(r *bytes.Reader) (mark uint32) {
  535. parseBERaw32(r, &mark)
  536. return
  537. }
  538. func parseConnectionLabels(r *bytes.Reader) (label []byte) {
  539. label = make([]byte, 16) // netfilter defines 128 bit labels value
  540. binary.Read(r, nl.NativeEndian(), &label)
  541. return
  542. }
  543. func parseConnectionZone(r *bytes.Reader) (zone uint16) {
  544. parseBERaw16(r, &zone)
  545. r.Seek(2, seekCurrent)
  546. return
  547. }
  548. func parseRawData(data []byte) *ConntrackFlow {
  549. s := &ConntrackFlow{}
  550. // First there is the Nfgenmsg header
  551. // consume only the family field
  552. reader := bytes.NewReader(data)
  553. binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
  554. // skip rest of the Netfilter header
  555. reader.Seek(3, seekCurrent)
  556. // The message structure is the following:
  557. // <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
  558. // <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
  559. // flow information of the forward flow
  560. // <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
  561. // <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
  562. // flow information of the reverse flow
  563. for reader.Len() > 0 {
  564. if nested, t, l := parseNfAttrTL(reader); nested {
  565. switch t {
  566. case nl.CTA_TUPLE_ORIG:
  567. if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
  568. parseIpTuple(reader, &s.Forward)
  569. }
  570. case nl.CTA_TUPLE_REPLY:
  571. if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
  572. parseIpTuple(reader, &s.Reverse)
  573. } else {
  574. // Header not recognized skip it
  575. skipNfAttrValue(reader, l)
  576. }
  577. case nl.CTA_COUNTERS_ORIG:
  578. s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
  579. case nl.CTA_COUNTERS_REPLY:
  580. s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
  581. case nl.CTA_TIMESTAMP:
  582. s.TimeStart, s.TimeStop = parseTimeStamp(reader, l)
  583. case nl.CTA_PROTOINFO:
  584. s.ProtoInfo = parseProtoInfo(reader, l)
  585. default:
  586. skipNfAttrValue(reader, l)
  587. }
  588. } else {
  589. switch t {
  590. case nl.CTA_MARK:
  591. s.Mark = parseConnectionMark(reader)
  592. case nl.CTA_LABELS:
  593. s.Labels = parseConnectionLabels(reader)
  594. case nl.CTA_TIMEOUT:
  595. s.TimeOut = parseTimeOut(reader)
  596. case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE:
  597. skipNfAttrValue(reader, l)
  598. case nl.CTA_ZONE:
  599. s.Zone = parseConnectionZone(reader)
  600. default:
  601. skipNfAttrValue(reader, l)
  602. }
  603. }
  604. }
  605. return s
  606. }
  607. // Conntrack parameters and options:
  608. // -n, --src-nat ip source NAT ip
  609. // -g, --dst-nat ip destination NAT ip
  610. // -j, --any-nat ip source or destination NAT ip
  611. // -m, --mark mark Set mark
  612. // -c, --secmark secmark Set selinux secmark
  613. // -e, --event-mask eventmask Event mask, eg. NEW,DESTROY
  614. // -z, --zero Zero counters while listing
  615. // -o, --output type[,...] Output format, eg. xml
  616. // -l, --label label[,...] conntrack labels
  617. // Common parameters and options:
  618. // -s, --src, --orig-src ip Source address from original direction
  619. // -d, --dst, --orig-dst ip Destination address from original direction
  620. // -r, --reply-src ip Source address from reply direction
  621. // -q, --reply-dst ip Destination address from reply direction
  622. // -p, --protonum proto Layer 4 Protocol, eg. 'tcp'
  623. // -f, --family proto Layer 3 Protocol, eg. 'ipv6'
  624. // -t, --timeout timeout Set timeout
  625. // -u, --status status Set status, eg. ASSURED
  626. // -w, --zone value Set conntrack zone
  627. // --orig-zone value Set zone for original direction
  628. // --reply-zone value Set zone for reply direction
  629. // -b, --buffer-size Netlink socket buffer size
  630. // --mask-src ip Source mask address
  631. // --mask-dst ip Destination mask address
  632. // Layer 4 Protocol common parameters and options:
  633. // TCP, UDP, SCTP, UDPLite and DCCP
  634. // --sport, --orig-port-src port Source port in original direction
  635. // --dport, --orig-port-dst port Destination port in original direction
  636. // Filter types
  637. type ConntrackFilterType uint8
  638. const (
  639. ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction
  640. ConntrackOrigDstIP // -orig-dst ip Destination address from original direction
  641. ConntrackReplySrcIP // --reply-src ip Reply Source IP
  642. ConntrackReplyDstIP // --reply-dst ip Reply Destination IP
  643. ConntrackReplyAnyIP // Match source or destination reply IP
  644. ConntrackOrigSrcPort // --orig-port-src port Source port in original direction
  645. ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction
  646. ConntrackMatchLabels // --label label1,label2 Labels used in entry
  647. ConntrackUnmatchLabels // --label label1,label2 Labels not used in entry
  648. ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
  649. ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
  650. ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP
  651. )
  652. type CustomConntrackFilter interface {
  653. // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches
  654. // the filter or false otherwise
  655. MatchConntrackFlow(flow *ConntrackFlow) bool
  656. }
  657. type ConntrackFilter struct {
  658. ipNetFilter map[ConntrackFilterType]*net.IPNet
  659. portFilter map[ConntrackFilterType]uint16
  660. protoFilter uint8
  661. labelFilter map[ConntrackFilterType][][]byte
  662. zoneFilter *uint16
  663. }
  664. // AddIPNet adds a IP subnet to the conntrack filter
  665. func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error {
  666. if ipNet == nil {
  667. return fmt.Errorf("Filter attribute empty")
  668. }
  669. if f.ipNetFilter == nil {
  670. f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet)
  671. }
  672. if _, ok := f.ipNetFilter[tp]; ok {
  673. return errors.New("Filter attribute already present")
  674. }
  675. f.ipNetFilter[tp] = ipNet
  676. return nil
  677. }
  678. // AddIP adds an IP to the conntrack filter
  679. func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
  680. if ip == nil {
  681. return fmt.Errorf("Filter attribute empty")
  682. }
  683. return f.AddIPNet(tp, NewIPNet(ip))
  684. }
  685. // AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it
  686. func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error {
  687. switch f.protoFilter {
  688. // TCP, UDP, DCCP, SCTP, UDPLite
  689. case 6, 17, 33, 132, 136:
  690. default:
  691. return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter)
  692. }
  693. if f.portFilter == nil {
  694. f.portFilter = make(map[ConntrackFilterType]uint16)
  695. }
  696. if _, ok := f.portFilter[tp]; ok {
  697. return errors.New("Filter attribute already present")
  698. }
  699. f.portFilter[tp] = port
  700. return nil
  701. }
  702. // AddProtocol adds the Layer 4 protocol to the conntrack filter
  703. func (f *ConntrackFilter) AddProtocol(proto uint8) error {
  704. if f.protoFilter != 0 {
  705. return errors.New("Filter attribute already present")
  706. }
  707. f.protoFilter = proto
  708. return nil
  709. }
  710. // AddLabels adds the provided list (zero or more) of labels to the conntrack filter
  711. // ConntrackFilterType here can be either:
  712. // 1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0)
  713. // against the list of provided labels. If `flow.Labels` contains ALL the provided labels
  714. // it is considered a match. This can be used when you want to match flows that contain
  715. // one or more labels.
  716. // 2. ConntrackUnmatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0)
  717. // against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels
  718. // it is considered a match. This can be used when you want to match flows that don't contain
  719. // one or more labels.
  720. func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error {
  721. if len(labels) == 0 {
  722. return errors.New("Invalid length for provided labels")
  723. }
  724. if f.labelFilter == nil {
  725. f.labelFilter = make(map[ConntrackFilterType][][]byte)
  726. }
  727. if _, ok := f.labelFilter[tp]; ok {
  728. return errors.New("Filter attribute already present")
  729. }
  730. f.labelFilter[tp] = labels
  731. return nil
  732. }
  733. // AddZone adds a zone to the conntrack filter
  734. func (f *ConntrackFilter) AddZone(zone uint16) error {
  735. if f.zoneFilter != nil {
  736. return errors.New("Filter attribute already present")
  737. }
  738. f.zoneFilter = &zone
  739. return nil
  740. }
  741. // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
  742. // false otherwise
  743. func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
  744. if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil {
  745. // empty filter always not match
  746. return false
  747. }
  748. // -p, --protonum proto Layer 4 Protocol, eg. 'tcp'
  749. if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter {
  750. // different Layer 4 protocol always not match
  751. return false
  752. }
  753. // Conntrack zone filter
  754. if f.zoneFilter != nil && *f.zoneFilter != flow.Zone {
  755. return false
  756. }
  757. match := true
  758. // IP conntrack filter
  759. if len(f.ipNetFilter) > 0 {
  760. // -orig-src ip Source address from original direction
  761. if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found {
  762. match = match && elem.Contains(flow.Forward.SrcIP)
  763. }
  764. // -orig-dst ip Destination address from original direction
  765. if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found {
  766. match = match && elem.Contains(flow.Forward.DstIP)
  767. }
  768. // -src-nat ip Source NAT ip
  769. if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found {
  770. match = match && elem.Contains(flow.Reverse.SrcIP)
  771. }
  772. // -dst-nat ip Destination NAT ip
  773. if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found {
  774. match = match && elem.Contains(flow.Reverse.DstIP)
  775. }
  776. // Match source or destination reply IP
  777. if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found {
  778. match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP))
  779. }
  780. }
  781. // Layer 4 Port filter
  782. if len(f.portFilter) > 0 {
  783. // -orig-port-src port Source port from original direction
  784. if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found {
  785. match = match && elem == flow.Forward.SrcPort
  786. }
  787. // -orig-port-dst port Destination port from original direction
  788. if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found {
  789. match = match && elem == flow.Forward.DstPort
  790. }
  791. }
  792. // Label filter
  793. if len(f.labelFilter) > 0 {
  794. if len(flow.Labels) > 0 {
  795. // --label label1,label2 in conn entry;
  796. // every label passed should be contained in flow.Labels for a match to be true
  797. if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found {
  798. for _, label := range elem {
  799. match = match && (bytes.Contains(flow.Labels, label))
  800. }
  801. }
  802. // --label label1,label2 in conn entry;
  803. // every label passed should be not contained in flow.Labels for a match to be true
  804. if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found {
  805. for _, label := range elem {
  806. match = match && !(bytes.Contains(flow.Labels, label))
  807. }
  808. }
  809. } else {
  810. // flow doesn't contain labels, so it doesn't contain or notContain any provided matches
  811. match = false
  812. }
  813. }
  814. return match
  815. }
  816. var _ CustomConntrackFilter = (*ConntrackFilter)(nil)