API for the Patched Kernel

The patched kernel supports some new commands for setsockopt() and getsockopt() system calls. They can be used to control the connection classification /forwarding and the socket level traffic.

1.  API for Connection Classification and Forwarding

To use the extended commands, you should include tcp_fw_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.

1.1. Data Structure

    struct content_fw_branch_user {
        __u16 redir_port;     ---- outgoing port, host byte order
        __u16 target;         ---- reserved for further optimization
        __u16 ptn_len;        ---- strlen(pattern)
        char* pattern;        ---- string to be found in the packet
    };

    struct dice_fw_branch_user {
        __u16 redir_port;     ---- outgoing port, host byte order
        __u16 redir_chance;   ---- forwarding probability
    };

   
struct address_bw_branch_user {
        __u16 redir_port;     ---- outgoing port, host byte order
        struct in_addr src;   ---- source IP address, network byte order
        struct src_mask;      ---- subnet mask, network byte order
    };
   
    #define TCP_DICE_FW    1
    #define TCP_CONTENT_FW 2
    #define TCP_ADDR_FW    3
   
struct tcp_fw_user{
        __u16 type;           ---- rule type, be one of the above constant TCP_xxx_FW
        __u16 port;           ---- incoming port, host byte order
        __u16 def_port;       ---- default outgoing port, host byte order
        union {
            struct content_fw_branch_user cfb_u;
            struct dice_fw_branch_user    dfb_u;
            struct address_fw_branch_user afb_u;
        } u;
    };

1.2 Add a new rule

    Step 1. create an empty rule:

    struct tcp_fw_user fwu;
    fwu.type = TCP_xxx_FW;
    fwu.port = incoming port number;
    fwu.def_port = default outgoing port number;

    setsockopt(sk, IPPROTO_TCP, TCP_FW_ADD_RULE, (char*)&fwu, sizoef(fwu));

    Step 2. add conditions, take address based rule as an example:

    fwu.type = TCP_ADDR_FW;
    fwu.port = incoming port number;
    fwu.u.afb_u.redir_port = outgoing port number;
    fwu.u.afb.u.src = inet_aton(source addr);
    fwu.u.afb.u.src_mask = inet_aton(subnet mask);
   

    setsockopt(sk, IPPROTO_TCP, TCP_FW_ADD_BRANCH, (char*)&fwu, sizoef(fwu));

1.3  Manipulate conditions

    1.  remove a condition,  take address-based rule as an example:

    fwu.type = TCP_ADDR_FW;
    fwu.port = incoming port number;
    fwu.u.afb_u.redir_port = outgoing port number;
    fwu.u.afb.u.src = inet_aton(source addr);
    fwu.u.afb.u.src_mask = inet_aton(subnet mask);
   

    setsockopt(sk, IPPROTO_TCP, TCP_FW_DEL_BRANCH, (char*)&fwu, sizoef(fwu));

    2.  remove all the conditions of a rule:

    fwu.type = TCP_ADDR_FW;
    fwu.port = incoming port number;
   

    setsockopt(sk, IPPROTO_TCP, TCP_FW_FLUSH_BRANCH, (char*)&fwu, sizoef(fwu));

1.4  Manipulate rules

    1.  change the default outgoing port

    fwu.type = TCP_ADDR_FW;
    fwu.port = incoming port number;
    fwu.def_port = new default outgoing port number;

   
    setsockopt(sk, IPPROTO_TCP, TCP_FW_EIDT_RULE, (char*)&fwu, sizoef(fwu));

    2.  remove a rule:

    fwu.type = TCP_ADDR_FW;
    fwu.port = incoming port number;
   

    setsockopt(sk, IPPROTO_TCP, TCP_FW_DEL_RULE, (char*)&fwu, sizoef(fwu));

    3.  remove all the rules of one type

    fwu.type = TCP_ADDR_FW;
    

    setsockopt(sk, IPPROTO_TCP, TCP_FW_DEL_RULE, (char*)&fwu, sizoef(fwu));

2.  API for socket level traffic control

To use the extended commands, you should include socket_ex_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.

2.1  Data Structure

    struct quota {
        __u16 whole;
        __u16 frac;
    };

    This is for representing a fraction approximately since floating point operation in kernel is not an easy task. The maximum value of frac is 1000.

    struct sock_ex_user {
        __u16 port;            ---- port number, host byte order  
        union {
            struct quota frac;  
            __u32 integer;
        } quota;               ---- quota for this port, meaning depends on the type of the group
        short nonblock;              
    };

    nonblock field controls what happens when the process tries to accept/output more than allowed. A non-zero value means that an error will be returned to the process. Otherwise, the process will be blocked. It should be non-zero for those services started by inetd or xinetd, because the inetd or xinet process is responsible to start more than one type of service and can never be blocked. It should also be non-zero for those applications that use one thread to service multiple connections.

    #define SKEX_ACCEPT_ABSOLUTE     1
    #define SKEX_ACCEPT_RATIO        2
    #define SKEX_OUTPUT_ABSOLUTE     3
    #define SKEX_OUTPUT_ABSOLUTE_FTP 4
    struct
sock_group_user {
        __u16 gid;                   ---- groud id
        __u16 type;                  ---- socket group type, one of the above constant
        __u16 sock_cnt;              ---- how many listening port in this group
        struct sock_ex_user *socks;  ---- one sock_ex_user structure for one port in this group
    };

2.2   Create a new group

    1.  create an absolute rate based accept group. Only one port is allowed in each group.

    struct sock_group_user *sg;
    int sock;
    sg = (struct sock_group_user*)malloc(sizeof(sg));
    sg->socks = (struct sock_ex_user*)malloc(sizeof(struct sock_ex_user));
    sg->type = SKEX_ACCEPT_ABSOLUTE;
    sg->sock_cnt = 1;

    sg->socks[0].port = port number;
    sg->socks[0].quota.frac.whole = (__u16)rate;
    sg->socks[0].quota.frac.frac = (__u16)FRAC_PART(rate);
    sg->socks[0].nonblock = 0 or 1;

    sock = socket(AF_INET, SOCK_STREAM, 0);
    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));

    2.  create a relative ratio based accept group. In this example, 2 ports are in the group and their accept ratio is a : b.

    struct sock_group_user *sg;
    sg = (struct sock_group_user*)malloc(sizeof(sg));
    sg.socks = (struct sock_ex_user*)malloc(sizeof(struct sock_ex_user) * 2);
    sg->type = SKEX_ACCEPT_RATIO;
    sg->sock_cnt = 2;

    sg->socks[0].port = port number1;
    sg->socks[0].quota.frac.whole = (__u16)a;
    sg->socks[0].quota.frac.frac = (__u16)FRAC_PART(a);
    sg->socks[0].nonblock = 0 or 1;

    sg->socks[1].port = port number2;
    sg->socks[1].quota.frac.whole = (__u16)b;
    sg->socks[1].quota.frac.frac = (__u16)FRAC_PART(b);
    sg->socks[1].nonblock = 0 or 1;

    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));

    3.  create an absolute rate based output group / absolute rate based output group for FTP. Only one port is allowed in each group.

    struct sock_group_user *sg;
    sg = (struct sock_group_user*)malloc(sizeof(sg));
    sg.socks = (struct sock_ex_user*)malloc(sizeof(struct sock_ex_user));
    sg->type = SKEX_OUTPUT_ABSOLUTE or SKEX_OUTPUT_ABSOLUTE_FTP;
    sg->sock_cnt = 1;

    sg->socks[0].port = port number;
    sg->socks[0].quota.integer = rate;
    sg->socks[0].nonblock = 0 or 1;

    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));

when setsockopt returns successfully, sg->gid is the group id, which is needed to manage this group.

2.3  Edit  an exist  group  

    Editing an exist group is the same as creating a new one except that sg->gid should be set to the group id:

    sg->gid = group id;
    initialize other fileds
    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_EDIT_GROUP, (char*)sg, sizeof(sg));

2.4  Remove an existing group

    int gid = group id; /* must use int instead of __u16 */
    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_DEL_GROUP, (char*)&gid, sizeof(gid);

2.5  Remove all the groups

    int useless; /* must use int */
    setsockopt(sock, IPPROTO_TCP, TCP_SKEX_FLUSH_LIST, (char*)&useless, sizeof(useless);

3.  API for Network Status Report

The patched kernel can also report some network status data related to a listening port. To use the extended commands, you should include socket_ex_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.

3.1  Data Structure

    struct port_stat_user {
        __u64 bout, bin;    /* bytes sent out, received via this port */
        __u32 conn;         /* connection accepted via this port */
        __u32 accept_delay; /* total delay of the accepted connections, unit is 0.01s */
        __u16 ref_count;    /* internal information */
        __u16 conn_waiting; /* number of connection waiting to be accepted */
    };

    struct netmeter{
        __u16 port;        /* listen port number, host byte order */
        struct port_stat_user stat;
    };
 

the delay mentioned above is the time from the moment the 3-way handshake is complete until the moment the connection is accepted by the application.

3.2  Collect Network Status

    struct netmeter nm;
    int sock;

    sock = socket(AF_INET, SOCK_STREAM, 0);
    nm.port = port number;
    len = sizeof(nm);
    getsockopt(sock, IPPROTO_TCP, TCP_SKEX_STAT, (char*)&nm, &len);