1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
20 * Copyright (C) 2014 Cloudius Systems, Ltd.
24 * Ceph - scalable distributed file system
26 * Copyright (C) 2015 XSky <haomai@xsky.com>
28 * Author: Haomai Wang <haomaiwang@gmail.com>
30 * This is free software; you can redistribute it and/or
31 * modify it under the terms of the GNU Lesser General Public
32 * License version 2.1, as published by the Free Software
33 * Foundation. See file COPYING.
37 #ifndef CEPH_MSG_IP_H_
38 #define CEPH_MSG_IP_H_
40 #include <arpa/inet.h>
41 #include <unordered_map>
48 #include "msg/async/Event.h"
49 #include "common/Throttle.h"
51 #include "array_map.h"
53 #include "IPChecksum.h"
57 #include "PacketUtil.h"
61 template <ip_protocol_num ProtoNum>
64 template <typename InetTraits>
68 using address_type = ipv4_address;
69 using inet_type = ipv4_l4<ip_protocol_num::tcp>;
73 ethernet_address e_dst;
74 ip_protocol_num proto_num;
76 using packet_provider_type = std::function<Tub<l4packet> ()>;
77 static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
78 csum.sum_many(src.ip, dst.ip, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
80 static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
83 template <ip_protocol_num ProtoNum>
88 ipv4_l4(ipv4& inet) : _inet(inet) {}
89 void register_packet_provider(ipv4_traits::packet_provider_type func);
90 void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
95 virtual ~ip_protocol() {}
96 virtual void received(Packet p, ipv4_address from, ipv4_address to) = 0;
97 virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) { return true; }
100 template <typename InetTraits>
102 using ipaddr = typename InetTraits::address_type;
103 using inet_type = typename InetTraits::inet_type;
109 uint16_t foreign_port;
111 bool operator==(const l4connid& x) const {
112 return local_ip == x.local_ip
113 && foreign_ip == x.foreign_ip
114 && local_port == x.local_port
115 && foreign_port == x.foreign_port;
118 uint32_t hash(const rss_key_type& rss_key) {
119 forward_hash hash_data;
120 hash_data.push_back(hton(foreign_ip.ip));
121 hash_data.push_back(hton(local_ip.ip));
122 hash_data.push_back(hton(foreign_port));
123 hash_data.push_back(hton(local_port));
124 return toeplitz_hash(rss_key, hash_data);
128 class ipv4_tcp final : public ip_protocol {
129 ipv4_l4<ip_protocol_num::tcp> _inet_l4;
130 std::unique_ptr<tcp<ipv4_traits>> _tcp;
132 ipv4_tcp(ipv4& inet, EventCenter *c);
134 virtual void received(Packet p, ipv4_address from, ipv4_address to);
135 virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) override;
140 enum class msg_type : uint8_t {
148 } __attribute__((packed));
153 using ipaddr = ipv4_address;
154 using inet_type = ipv4_l4<ip_protocol_num::icmp>;
155 explicit icmp(CephContext *c, inet_type& inet)
156 : cct(c), _inet(inet), _queue_space(c, "DPDK::icmp::_queue_space", 212992) {
157 _inet.register_packet_provider([this] {
158 Tub<ipv4_traits::l4packet> l4p;
159 if (!_packetq.empty()) {
160 l4p = std::move(_packetq.front());
161 _packetq.pop_front();
162 _queue_space.put(l4p->p.len());
167 void received(Packet p, ipaddr from, ipaddr to);
171 // ipv4_l4<ip_protocol_num::icmp>
173 circular_buffer<ipv4_traits::l4packet> _packetq;
174 Throttle _queue_space;
177 class ipv4_icmp final : public ip_protocol {
179 ipv4_l4<ip_protocol_num::icmp> _inet_l4;
182 ipv4_icmp(CephContext *c, ipv4& inet) : cct(c), _inet_l4(inet), _icmp(c, _inet_l4) {}
183 virtual void received(Packet p, ipv4_address from, ipv4_address to) {
184 _icmp.received(std::move(p), from, to);
191 struct ip_packet_filter {
192 virtual ~ip_packet_filter() {};
193 virtual void handle(Packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
196 struct ipv4_frag_id {
200 uint16_t identification;
202 bool operator==(const ipv4_frag_id& x) const {
203 return src_ip == x.src_ip &&
204 dst_ip == x.dst_ip &&
205 identification == x.identification &&
206 protocol == x.protocol;
210 struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
211 private std::hash<uint16_t>, private std::hash<uint8_t> {
212 size_t operator()(const ipv4_frag_id& id) const noexcept {
213 using h1 = std::hash<ipv4_address>;
214 using h2 = std::hash<uint16_t>;
215 using h3 = std::hash<uint8_t>;
216 return h1::operator()(id.src_ip) ^
217 h1::operator()(id.dst_ip) ^
218 h2::operator()(id.identification) ^
219 h3::operator()(id.protocol);
224 using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>;
230 using address_type = ipv4_address;
231 using proto_type = uint16_t;
232 static address_type broadcast_address() { return ipv4_address(0xffffffff); }
233 static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
239 std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
240 Tub<uint64_t> frag_timefd;
241 EventCallbackRef frag_handler;
244 ipv4_address _host_address;
245 ipv4_address _gw_address;
246 ipv4_address _netmask;
248 subscription<Packet, ethernet_address> _rx_packets;
251 array_map<ip_protocol*, 256> _l4;
252 ip_packet_filter *_packet_filter;
255 ipv4_packet_merger data;
257 uint32_t mem_size = 0;
258 // fragment with MF == 0 inidates it is the last fragment
259 bool last_frag_received = false;
261 Packet get_assembled_packet(ethernet_address from, ethernet_address to);
262 int32_t merge(ip_hdr &h, uint16_t offset, Packet p);
265 std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
266 std::list<ipv4_frag_id> _frags_age;
267 static utime_t _frag_timeout;
268 static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
269 static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
270 uint32_t _frag_mem = 0;
271 circular_buffer<l3_protocol::l3packet> _packetq;
272 unsigned _pkt_provider_idx = 0;
273 PerfCounters *perf_logger;
276 int handle_received_packet(Packet p, ethernet_address from);
277 bool forward(forward_hash& out_hash_data, Packet& p, size_t off);
278 Tub<l3_protocol::l3packet> get_packet();
279 bool in_my_netmask(ipv4_address a) const {
280 return !((a.ip ^ _host_address.ip) & _netmask.ip);
282 void frag_limit_mem();
283 void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size) {
284 _frags.erase(frag_id);
285 _frag_mem -= dropped_size;
287 void frag_arm(utime_t now) {
288 auto tp = now + _frag_timeout;
289 frag_timefd.construct(center->create_time_event(tp.to_nsec() / 1000, frag_handler));
292 auto now = ceph_clock_now();
293 frag_timefd.construct(center->create_time_event(now.to_nsec() / 1000, frag_handler));
300 explicit ipv4(CephContext *c, EventCenter *cen, interface* netif);
304 void set_host_address(ipv4_address ip) {
306 _arp.set_self_addr(ip);
308 ipv4_address host_address() {
309 return _host_address;
311 void set_gw_address(ipv4_address ip) {
314 ipv4_address gw_address() const {
317 void set_netmask_address(ipv4_address ip) {
320 ipv4_address netmask_address() const {
323 interface *netif() const {
326 // TODO or something. Should perhaps truly be a list
327 // of filters. With ordering. And blackjack. Etc.
328 // But for now, a simple single raw pointer suffices
329 void set_packet_filter(ip_packet_filter *f) {
332 ip_packet_filter * packet_filter() const {
333 return _packet_filter;
335 void send(ipv4_address to, ip_protocol_num proto_num, Packet p, ethernet_address e_dst);
336 tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
337 void register_l4(proto_type id, ip_protocol* handler);
338 const hw_features& get_hw_features() const;
339 static bool needs_frag(Packet& p, ip_protocol_num proto_num, hw_features hw_features) {
340 if (p.len() + ipv4_hdr_len_min <= hw_features.mtu)
343 if ((proto_num == ip_protocol_num::tcp && hw_features.tx_tso))
348 void learn(ethernet_address l2, ipv4_address l3) {
351 void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
352 _pkt_providers.push_back(std::move(func));
354 void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
357 template <ip_protocol_num ProtoNum>
358 inline void ipv4_l4<ProtoNum>::register_packet_provider(
359 ipv4_traits::packet_provider_type func) {
360 _inet.register_packet_provider([func] {
363 (*l4p).proto_num = ProtoNum;
369 template <ip_protocol_num ProtoNum>
370 inline void ipv4_l4<ProtoNum>::wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb) {
371 _inet.wait_l2_dst_address(to, std::move(p), std::move(cb));
382 enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
391 hdr.len = ::hton(len);
393 hdr.frag = ::hton(frag);
394 hdr.csum = ::hton(csum);
395 hdr.src_ip.ip = ::hton(src_ip.ip);
396 hdr.dst_ip.ip = ::hton(dst_ip.ip);
401 hdr.len = ::ntoh(len);
403 hdr.frag = ::ntoh(frag);
404 hdr.csum = ::ntoh(csum);
405 hdr.src_ip = src_ip.ntoh();
406 hdr.dst_ip = dst_ip.ntoh();
410 bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
411 bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
412 uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
413 } __attribute__((packed));
415 template <typename InetTraits>
416 struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
417 size_t operator()(const l4connid<InetTraits>& id) const noexcept {
418 using h1 = std::hash<ipaddr>;
419 using h2 = std::hash<uint16_t>;
420 return h1::operator()(id.local_ip)
421 ^ h1::operator()(id.foreign_ip)
422 ^ h2::operator()(id.local_port)
423 ^ h2::operator()(id.foreign_port);
427 #endif /* CEPH_MSG_IP_H */