libnet/
route.rs

1// Copyright 2024 Oxide Computer Company
2
3//!
4//! This file contains machinery for interacting with the illumos router socket.
5//! This socket has the address family AF_ROUTE. Packets exchanged over an
6//! AF_ROUTE socket have a special header described by `[sys::rt_msghdr]`.
7//!
8//! The structure of an AF_ROUTE message is the following.
9//!
10//! ```text
11//! rt_msghdr: 74 bytes
12//! route_addr_element_1: N bytes
13//! route_addr_element_2: N bytes
14//! ...
15//! route_addr_element_N: N bytes
16//! ```
17//!
18//! In the rt_msghdr there is a field `addrs` which is a bitmask that identifies
19//! what address elements are present in the message. Members of this bitfield
20//! are constants with the name format `RTA_<address name>`. These include
21//!
22//! ```text
23//! RTA_DST =       1
24//! RTA_GATEWAY =   (1<<1)
25//! RTA_NETMASK =   (1<<2)
26//! RTA_GENMASK =   (1<<3)
27//! RTA_IFP =       (1<<4)
28//! RTA_IFA =       (1<<5)
29//! RTA_AUTHOR =    (1<<6)
30//! RTA_BRD =       (1<<7)
31//! RTA_SRC =       (1<<8)
32//! RTA_DELAY =     (1<<9)
33//! ```
34//!
35//! Address elements always appear in the order they are defined in the bitmask.
36//! For example, a message containing RTA_DST RTA_GENMASK and RTA_AUTHOR will
37//! always be structured as
38//!
39//! ```text
40//! t_msghdr
41//! TA_DST
42//! TA_GENMASK
43//! TA_AUTHOR
44//! ```
45//!
46
47use crate::{
48    sys::{
49        self, addr_family_t, rt_msghdr, RTA_AUTHOR, RTA_BRD, RTA_DELAY,
50        RTA_DST, RTA_GATEWAY, RTA_GENMASK, RTA_IFA, RTA_IFP, RTA_NETMASK,
51        RTA_SRC,
52    },
53    IpNet,
54};
55use std::mem::size_of;
56use std::slice::from_raw_parts;
57use std::{
58    io::{Read, Write},
59    time::Duration,
60};
61
62use libc::{sockaddr, sockaddr_in, sockaddr_in6, AF_INET, AF_INET6, AF_ROUTE};
63
64use socket2::{Domain, Socket, Type};
65use std::net::{
66    IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6,
67};
68use thiserror::Error;
69
70// These items are missing from non-illumos platforms (such as Linux).
71// We define them by hand below so that cargo-check/cargo-clippy are still
72// functional when not run on an illumos machine.
73#[cfg(target_os = "illumos")]
74use libc::{sockaddr_dl, AF_LINK};
75
76#[cfg(not(target_os = "illumos"))]
77#[repr(C)]
78struct sockaddr_dl {
79    sdl_family: u16,
80    sdl_index: u16,
81    sdl_type: u8,
82    sdl_nlen: u8,
83    sdl_alen: u8,
84    sdl_slen: u8,
85    sdl_data: [i8; 244],
86}
87
88#[cfg(not(target_os = "illumos"))]
89const AF_LINK: std::os::raw::c_int = 25;
90
91#[derive(Error, Debug)]
92pub enum Error {
93    #[error("{0} not implemented")]
94    NotImplemented(String),
95    #[error("system error {0}")]
96    SystemError(String),
97    #[error("bad argument: {0}")]
98    BadArgument(String),
99    #[error("exists")]
100    Exists,
101    #[error("route does not exist")]
102    DoesNotExist,
103    #[error("insufficient resources")]
104    InsufficientResources,
105    #[error("insufficient permissions")]
106    InsufficientPermissions,
107    #[error("io error {0}")]
108    IoError(#[from] std::io::Error),
109}
110
111#[derive(Debug)]
112pub struct Route {
113    pub dest: IpAddr,
114    pub mask: u32,
115    pub gw: IpAddr,
116    pub delay: u32,
117    pub ifx: Option<String>,
118}
119
120#[derive(Default, Debug)]
121pub struct RtMsg {
122    pub dst: Option<SocketAddr>,
123    pub gw: Option<SocketAddr>,
124    pub mask: Option<SocketAddr>,
125    pub genmask: Option<SocketAddr>,
126    pub ifp: Option<SocketDlAddr>,
127    pub ifa: Option<SocketAddr>,
128    pub author: Option<SocketAddr>,
129    pub brd: Option<SocketAddr>,
130    pub src: Option<SocketAddr>,
131    pub delay: Option<u32>,
132}
133
134#[derive(Debug)]
135pub struct SocketDlAddr {
136    pub index: u16,
137    pub name: String,
138}
139
140unsafe fn read_msg(buf: &[u8]) -> (RtMsg, &[u8]) {
141    let hdr = buf.as_ptr() as *const rt_msghdr;
142    let buf = &buf[std::mem::size_of::<rt_msghdr>()..];
143
144    let (dst, buf) = get_addr_element(hdr, buf, RTA_DST as i32);
145    let (gw, buf) = get_addr_element(hdr, buf, RTA_GATEWAY as i32);
146    let (mask, buf) = get_addr_element(hdr, buf, RTA_NETMASK as i32);
147    let (genmask, buf) = get_addr_element(hdr, buf, RTA_GENMASK as i32);
148    let (ifp, buf) = get_dladdr_element(hdr, buf, RTA_IFP as i32);
149    let (ifa, buf) = get_addr_element(hdr, buf, RTA_IFA as i32);
150    let (author, buf) = get_addr_element(hdr, buf, RTA_AUTHOR as i32);
151    let (brd, buf) = get_addr_element(hdr, buf, RTA_BRD as i32);
152    let (src, buf) = get_addr_element(hdr, buf, RTA_SRC as i32);
153    let (delay, buf) = get_u32_element(hdr, buf, RTA_DELAY as i32);
154
155    (
156        RtMsg {
157            dst,
158            gw,
159            mask,
160            genmask,
161            ifp,
162            ifa,
163            author,
164            brd,
165            src,
166            delay,
167        },
168        buf,
169    )
170}
171
172unsafe fn get_u32_element(
173    hdr: *const rt_msghdr,
174    buf: &[u8],
175    rta: i32,
176) -> (Option<u32>, &[u8]) {
177    if ((*hdr).addrs & rta) == 0 {
178        return (None, buf);
179    }
180    let value = *(buf.as_ptr() as *const u32);
181    (Some(value), &buf[4..])
182}
183
184unsafe fn get_dladdr_element(
185    hdr: *const rt_msghdr,
186    buf: &[u8],
187    rta: i32,
188) -> (Option<SocketDlAddr>, &[u8]) {
189    if ((*hdr).addrs & rta) == 0 {
190        return (None, buf);
191    }
192    let off = std::mem::size_of::<sockaddr_dl>();
193    if buf.len() < off {
194        return (None, buf);
195    }
196
197    let sa = &*(buf.as_ptr() as *mut sockaddr_dl);
198    let index = sa.sdl_index;
199    let mut name = String::new();
200    let len = sa.sdl_nlen as usize;
201    if len > 0 {
202        let data: &[u8] =
203            std::slice::from_raw_parts(sa.sdl_data.as_ptr() as *const u8, len);
204        name = String::from_utf8_lossy(data).to_string();
205    }
206
207    (Some(SocketDlAddr { index, name }), &buf[off..])
208}
209
210unsafe fn get_addr_element(
211    hdr: *const rt_msghdr,
212    buf: &[u8],
213    rta: i32,
214) -> (Option<SocketAddr>, &[u8]) {
215    if ((*hdr).addrs & rta) == 0 {
216        return (None, buf);
217    }
218
219    let dst = buf.as_ptr() as *mut sockaddr;
220    match (*dst).sa_family as i32 {
221        libc::AF_INET => {
222            let dst = dst as *const sockaddr_in;
223            let off = std::mem::size_of::<sockaddr_in>();
224            (
225                Some(
226                    SocketAddrV4::new(
227                        Ipv4Addr::from((*dst).sin_addr.s_addr.to_be()),
228                        (*dst).sin_port,
229                    )
230                    .into(),
231                ),
232                &buf[off..],
233            )
234        }
235        libc::AF_INET6 => {
236            let dst = dst as *const sockaddr_in6;
237            let off = std::mem::size_of::<sockaddr_in6>();
238            if buf.len() < off {
239                return (None, buf);
240            }
241            (
242                Some(
243                    SocketAddrV6::new(
244                        Ipv6Addr::from((*dst).sin6_addr.s6_addr),
245                        (*dst).sin6_port,
246                        (*dst).sin6_flowinfo,
247                        (*dst).sin6_scope_id,
248                    )
249                    .into(),
250                ),
251                &buf[off..],
252            )
253        }
254        _ => (None, buf),
255    }
256}
257
258pub fn get_route(
259    destination: IpNet,
260    timeout: Option<Duration>,
261) -> Result<Route, Error> {
262    let mut sock = Socket::new(Domain::from(AF_ROUTE), Type::RAW, None)?;
263    sock.set_read_timeout(timeout)?;
264    let mut msglen = size_of::<rt_msghdr>();
265    let flags = match destination {
266        IpNet::V4(p) => {
267            if p.is_host_net() {
268                msglen += size_of::<sockaddr_in>();
269                sys::RTF_HOST as i32
270            } else {
271                msglen += size_of::<sockaddr_in>() * 2;
272                0i32
273            }
274        }
275        IpNet::V6(p) => {
276            if p.is_host_net() {
277                msglen += size_of::<sockaddr_in6>();
278                sys::RTF_HOST as i32
279            } else {
280                msglen += size_of::<sockaddr_in6>() * 2;
281                0i32
282            }
283        }
284    };
285
286    let mut req = rt_msghdr {
287        addrs: (RTA_DST | RTA_IFP) as i32,
288        typ: sys::RTM_GET as u8,
289        version: sys::RTM_VERSION as u8,
290        pid: std::process::id() as i32,
291        msglen: msglen as u16,
292        flags,
293        ..Default::default()
294    };
295    if flags == 0 {
296        req.addrs |= RTA_NETMASK as i32;
297    }
298
299    let mut buf: Vec<u8> = Vec::new();
300    buf.extend_from_slice(unsafe {
301        from_raw_parts(
302            (&req as *const rt_msghdr) as *const u8,
303            size_of::<rt_msghdr>(),
304        )
305    });
306    serialize_addr(&mut buf, destination.addr());
307    if flags == 0 {
308        serialize_addr(&mut buf, destination.mask_addr());
309    }
310
311    let n = sock.write(&buf)?;
312    if n < buf.len() {
313        return Err(Error::SystemError(format!(
314            "short write: {} < {}",
315            n,
316            buf.len()
317        )));
318    }
319
320    let mut buf: [u8; 10240] = [0; 10240];
321    let n = sock.read(&mut buf)?;
322    let buf = &buf[..n];
323
324    let (msg, _b) = unsafe { read_msg(buf) };
325
326    let dest = msg
327        .dst
328        .ok_or(Error::SystemError("missing destination".into()))?;
329    let mask = msg.mask.ok_or(Error::SystemError("missing mask".into()))?;
330    let gw = msg.gw.ok_or(Error::SystemError("missing gateway".into()))?;
331    let ifx = match msg.ifp {
332        Some(ifp) => Some(ifp.name),
333        None => None,
334    };
335
336    Ok(Route {
337        dest: dest.ip(),
338        mask: match mask {
339            SocketAddr::V4(s) => u32::from(*s.ip()).leading_ones(),
340            SocketAddr::V6(s) => u128::from(*s.ip()).leading_ones(),
341        },
342        gw: gw.ip(),
343        delay: 0,
344        ifx,
345    })
346}
347
348pub fn get_routes() -> Result<Vec<Route>, Error> {
349    let mut sock = Socket::new(Domain::from(AF_ROUTE), Type::RAW, None)?;
350
351    let req = rt_msghdr {
352        addrs: (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_DELAY | RTA_IFP)
353            as i32,
354        ..Default::default()
355    };
356    let req_data = unsafe {
357        std::slice::from_raw_parts(
358            (&req as *const rt_msghdr) as *const u8,
359            std::mem::size_of::<rt_msghdr>(),
360        )
361    };
362    let n = sock.write(req_data)?;
363    if n < std::mem::size_of::<rt_msghdr>() {
364        return Err(Error::SystemError(format!(
365            "get routes: short write: {} < {}",
366            n,
367            std::mem::size_of::<rt_msghdr>(),
368        )));
369    }
370
371    let mut buf: [u8; 10240] = [0; 10240];
372    let n = sock.read(&mut buf)?;
373    let mut buf = &buf[..n];
374
375    let mut result = Vec::new();
376    loop {
377        if buf.len() < std::mem::size_of::<rt_msghdr>() {
378            break;
379        }
380        let (msg, b) = unsafe { read_msg(buf) };
381        buf = b;
382        let dest = match msg.dst {
383            Some(d) => d.ip(),
384            None => continue,
385        };
386        let mask = match msg.mask {
387            Some(m) => match m {
388                SocketAddr::V4(s) => u32::from(*s.ip()).leading_ones(),
389                SocketAddr::V6(s) => u128::from(*s.ip()).leading_ones(),
390            },
391            None => continue,
392        };
393        let gw = match msg.gw {
394            Some(d) => d.ip(),
395            None => continue,
396        };
397        let delay = msg.delay.unwrap_or(0);
398        let ifx = match msg.ifp {
399            Some(ifp) => Some(ifp.name.clone()),
400            None => None,
401        };
402
403        let r = Route {
404            dest,
405            mask,
406            gw,
407            delay,
408            ifx,
409        };
410        result.push(r);
411    }
412
413    Ok(result)
414}
415
416pub fn add_route(
417    destination: IpNet,
418    gateway: IpAddr,
419    interface: Option<String>,
420) -> Result<(), Error> {
421    mod_route(destination, gateway, interface, sys::RTM_ADD as u8)
422}
423
424pub fn ensure_route_present(
425    destination: IpNet,
426    gateway: IpAddr,
427    interface: Option<String>,
428) -> Result<(), Error> {
429    match add_route(destination, gateway, interface) {
430        Err(Error::IoError(e)) => {
431            if e.kind() == std::io::ErrorKind::AlreadyExists {
432                Ok(())
433            } else {
434                Err(Error::IoError(e))
435            }
436        }
437        result => result,
438    }
439}
440
441pub fn delete_route(
442    destination: IpNet,
443    gateway: IpAddr,
444    interface: Option<String>,
445) -> Result<(), Error> {
446    mod_route(destination, gateway, interface, sys::RTM_DELETE as u8)
447}
448
449fn mod_route(
450    destination: IpNet,
451    gateway: IpAddr,
452    interface: Option<String>,
453    cmd: u8,
454) -> Result<(), Error> {
455    let mut sock = Socket::new(Domain::from(AF_ROUTE), Type::RAW, None)?;
456    let mut msglen = size_of::<rt_msghdr>();
457    match destination {
458        IpNet::V4(_) => {
459            msglen += size_of::<sockaddr_in>() * 2;
460        }
461        IpNet::V6(_) => {
462            msglen += size_of::<sockaddr_in6>() * 2;
463        }
464    };
465    match gateway {
466        IpAddr::V4(_) => {
467            msglen += size_of::<sockaddr_in>();
468        }
469        IpAddr::V6(_) => {
470            msglen += size_of::<sockaddr_in6>();
471        }
472    };
473
474    let flags = (sys::RTF_GATEWAY | sys::RTF_STATIC) as i32;
475    let mut addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK) as i32;
476    if interface.is_some() {
477        addrs |= sys::RTA_IFP as i32;
478        msglen += size_of::<sockaddr_dl>();
479    }
480
481    let req = rt_msghdr {
482        typ: cmd,
483        msglen: msglen as u16,
484        version: sys::RTM_VERSION as u8,
485        addrs,
486        pid: std::process::id() as i32,
487        flags,
488        ..Default::default()
489    };
490
491    let mut buf: Vec<u8> = Vec::new();
492    buf.extend_from_slice(unsafe {
493        from_raw_parts(
494            (&req as *const rt_msghdr) as *const u8,
495            size_of::<rt_msghdr>(),
496        )
497    });
498
499    serialize_addr(&mut buf, destination.addr());
500    serialize_addr(&mut buf, gateway);
501    serialize_addr(&mut buf, destination.mask_addr());
502
503    if let Some(ifp) = interface {
504        serialize_dladdr(&mut buf, &ifp, destination.addr())?;
505    }
506
507    let n = sock.write(&buf)?;
508    if n < buf.len() {
509        return Err(Error::SystemError(format!(
510            "short write: {} < {}",
511            n,
512            buf.len()
513        )));
514    }
515
516    Ok(())
517}
518
519fn serialize_addr(buf: &mut Vec<u8>, a: IpAddr) {
520    match a {
521        IpAddr::V4(a) => {
522            let sa = sockaddr_in {
523                #[cfg(target_os = "macos")]
524                sin_len: 0,
525                sin_family: AF_INET as addr_family_t,
526                sin_port: 0,
527                sin_addr: libc::in_addr {
528                    s_addr: u32::from(a).to_be(),
529                },
530                sin_zero: [0; 8],
531            };
532            buf.extend_from_slice(unsafe {
533                from_raw_parts(
534                    (&sa as *const sockaddr_in) as *const u8,
535                    size_of::<sockaddr_in>(),
536                )
537            });
538        }
539        IpAddr::V6(a) => {
540            let sa = unsafe {
541                sockaddr_in6 {
542                    sin6_family: AF_INET6 as addr_family_t,
543                    sin6_port: 0,
544                    sin6_flowinfo: 0,
545                    sin6_addr: libc::in6_addr {
546                        s6_addr: a.octets(),
547                    },
548                    sin6_scope_id: 0,
549                    ..std::mem::zeroed()
550                }
551            };
552            buf.extend_from_slice(unsafe {
553                from_raw_parts(
554                    (&sa as *const sockaddr_in6) as *const u8,
555                    size_of::<sockaddr_in6>(),
556                )
557            });
558        }
559    };
560}
561
562fn serialize_dladdr(
563    buf: &mut Vec<u8>,
564    ifname: &str,
565    ip: IpAddr,
566) -> Result<(), Error> {
567    let bs = ifname.as_bytes();
568    if bs.len() > 244 {
569        return Err(Error::BadArgument("ifname too long".into()));
570    }
571
572    let proto = match ip {
573        IpAddr::V4(_) => AF_INET,
574        IpAddr::V6(_) => AF_INET6,
575    } as u16;
576    let ifnum = crate::ioctl::get_ifnum(ifname, proto)
577        .map_err(|x| Error::SystemError(x.to_string()))?;
578    let mut sa = unsafe {
579        sockaddr_dl {
580            sdl_family: AF_LINK as u16,
581            sdl_index: ifnum as u16,
582            sdl_nlen: bs.len() as u8,
583            ..std::mem::zeroed()
584        }
585    };
586    for (i, b) in bs.iter().enumerate() {
587        sa.sdl_data[i] = *b as i8;
588    }
589    buf.extend_from_slice(unsafe {
590        from_raw_parts(
591            (&sa as *const sockaddr_dl) as *const u8,
592            size_of::<sockaddr_dl>(),
593        )
594    });
595    Ok(())
596}