The patched kernel supports some new commands for setsockopt() and getsockopt() system calls. They can be used to control the connection classification /forwarding and the socket level traffic.
To use the extended commands, you should include tcp_fw_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.
1.1. Data Structure
struct
content_fw_branch_user {
__u16 redir_port;
---- outgoing port, host byte order
__u16 target;
---- reserved for further optimization
__u16 ptn_len;
---- strlen(pattern)
char*
pattern; ---- string to be found in
the packet
};
struct dice_fw_branch_user {
__u16 redir_port;
---- outgoing port, host byte order
__u16 redir_chance;
---- forwarding probability
};
struct
address_bw_branch_user {
__u16 redir_port;
---- outgoing port, host byte order
struct
in_addr src; ---- source IP address, network byte order
struct src_mask;
---- subnet mask, network byte order
};
#define TCP_DICE_FW 1
#define TCP_CONTENT_FW 2
#define TCP_ADDR_FW 3
struct tcp_fw_user{
__u16 type;
---- rule type, be one of the above constant TCP_xxx_FW
__u16 port;
---- incoming port, host byte order
__u16 def_port;
---- default outgoing port, host byte order
union {
struct content_fw_branch_user cfb_u;
struct dice_fw_branch_user dfb_u;
struct address_fw_branch_user afb_u;
} u;
};
1.2 Add a new rule
Step 1. create an empty rule:
struct tcp_fw_user
fwu;
fwu.type = TCP_xxx_FW;
fwu.port = incoming port number;
fwu.def_port = default outgoing port number;
setsockopt(sk,
IPPROTO_TCP, TCP_FW_ADD_RULE, (char*)&fwu, sizoef(fwu));
Step 2. add conditions, take address based rule as an example:
fwu.type =
TCP_ADDR_FW;
fwu.port = incoming port number;
fwu.u.afb_u.redir_port = outgoing port number;
fwu.u.afb.u.src = inet_aton(source addr);
fwu.u.afb.u.src_mask = inet_aton(subnet mask);
setsockopt(sk,
IPPROTO_TCP, TCP_FW_ADD_BRANCH, (char*)&fwu, sizoef(fwu));
1.3 Manipulate conditions
1. remove a condition, take address-based rule as an example:
fwu.type =
TCP_ADDR_FW;
fwu.port = incoming port number;
fwu.u.afb_u.redir_port = outgoing port number;
fwu.u.afb.u.src = inet_aton(source addr);
fwu.u.afb.u.src_mask = inet_aton(subnet mask);
setsockopt(sk,
IPPROTO_TCP, TCP_FW_DEL_BRANCH, (char*)&fwu, sizoef(fwu));
2. remove all the conditions of a rule:
fwu.type =
TCP_ADDR_FW;
fwu.port = incoming port number;
setsockopt(sk,
IPPROTO_TCP, TCP_FW_FLUSH_BRANCH, (char*)&fwu, sizoef(fwu));
1.4 Manipulate rules
1. change the default outgoing port
fwu.type =
TCP_ADDR_FW;
fwu.port = incoming port number;
fwu.def_port = new default outgoing port number;
setsockopt(sk,
IPPROTO_TCP, TCP_FW_EIDT_RULE, (char*)&fwu, sizoef(fwu));
2. remove a rule:
fwu.type =
TCP_ADDR_FW;
fwu.port = incoming port number;
setsockopt(sk,
IPPROTO_TCP, TCP_FW_DEL_RULE, (char*)&fwu, sizoef(fwu));
3. remove all the rules of one type
fwu.type =
TCP_ADDR_FW;
setsockopt(sk,
IPPROTO_TCP, TCP_FW_DEL_RULE, (char*)&fwu, sizoef(fwu));
To use the extended commands, you should include socket_ex_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.
2.1 Data Structure
struct
quota {
__u16 whole;
__u16 frac;
};
This is for representing a fraction approximately since floating point operation in kernel is not an easy task. The maximum value of frac is 1000.
struct
sock_ex_user {
__u16 port;
---- port number, host byte order
union {
struct quota
frac;
__u32
integer;
} quota;
---- quota for this port, meaning depends on the type of the group
short
nonblock;
};
nonblock field controls what happens when the process tries to accept/output more than allowed. A non-zero value means that an error will be returned to the process. Otherwise, the process will be blocked. It should be non-zero for those services started by inetd or xinetd, because the inetd or xinet process is responsible to start more than one type of service and can never be blocked. It should also be non-zero for those applications that use one thread to service multiple connections.
#define
SKEX_ACCEPT_ABSOLUTE 1
#define SKEX_ACCEPT_RATIO
2
#define SKEX_OUTPUT_ABSOLUTE 3
#define SKEX_OUTPUT_ABSOLUTE_FTP 4
struct sock_group_user {
__u16 gid;
---- groud id
__u16 type;
---- socket group type, one of the above constant
__u16 sock_cnt;
---- how many listening port in this group
struct sock_ex_user *socks;
---- one sock_ex_user structure for one port in this group
};
2.2 Create a new group
1. create an absolute rate based accept group. Only one port is allowed in each group.
struct
sock_group_user *sg;
int sock;
sg = (struct sock_group_user*)malloc(sizeof(sg));
sg->socks = (struct sock_ex_user*)malloc(sizeof(struct
sock_ex_user));
sg->type = SKEX_ACCEPT_ABSOLUTE;
sg->sock_cnt = 1;
sg->socks[0].port = port number;
sg->socks[0].quota.frac.whole = (__u16)rate;
sg->socks[0].quota.frac.frac = (__u16)FRAC_PART(rate);
sg->socks[0].nonblock = 0 or 1;
sock = socket(AF_INET, SOCK_STREAM, 0);
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));
2. create a relative ratio based accept group. In this example, 2 ports are in the group and their accept ratio is a : b.
struct
sock_group_user *sg;
sg = (struct sock_group_user*)malloc(sizeof(sg));
sg.socks = (struct sock_ex_user*)malloc(sizeof(struct
sock_ex_user) * 2);
sg->type = SKEX_ACCEPT_RATIO;
sg->sock_cnt = 2;
sg->socks[0].port = port number1;
sg->socks[0].quota.frac.whole = (__u16)a;
sg->socks[0].quota.frac.frac = (__u16)FRAC_PART(a);
sg->socks[0].nonblock = 0 or 1;
sg->socks[1].port = port number2;
sg->socks[1].quota.frac.whole = (__u16)b;
sg->socks[1].quota.frac.frac = (__u16)FRAC_PART(b);
sg->socks[1].nonblock = 0 or 1;
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));
3. create an absolute rate based output group / absolute rate based output group for FTP. Only one port is allowed in each group.
struct
sock_group_user *sg;
sg = (struct sock_group_user*)malloc(sizeof(sg));
sg.socks = (struct sock_ex_user*)malloc(sizeof(struct
sock_ex_user));
sg->type = SKEX_OUTPUT_ABSOLUTE or SKEX_OUTPUT_ABSOLUTE_FTP;
sg->sock_cnt = 1;
sg->socks[0].port = port number;
sg->socks[0].quota.integer = rate;
sg->socks[0].nonblock = 0 or 1;
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_ADD_GROUP, (char*)sg, sizeof(sg));
when setsockopt returns successfully, sg->gid is the group id, which is needed to manage this group.
2.3 Edit an exist group
Editing an exist group is the same as creating a new one except that sg->gid should be set to the group id:
sg->gid = group id;
initialize other fileds
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_EDIT_GROUP, (char*)sg,
sizeof(sg));
2.4 Remove an existing group
int gid = group id;
/* must use int instead of __u16
*/
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_DEL_GROUP, (char*)&gid,
sizeof(gid);
2.5 Remove all the groups
int useless; /*
must use int
*/
setsockopt(sock, IPPROTO_TCP, TCP_SKEX_FLUSH_LIST,
(char*)&useless, sizeof(useless);
The patched kernel can also report some network status data related to a listening port. To use the extended commands, you should include socket_ex_user.h, which is located at $(NEW_KERNEL_SRC)/include/linux, where $(NEW_KERNEL_SRC) is the directory for the new kernel source.
3.1 Data Structure
struct
port_stat_user {
__u64 bout, bin;
/* bytes sent out, received via this port */
__u32 conn;
/* connection accepted via this port */
__u32 accept_delay;
/* total delay of the accepted connections, unit is 0.01s
*/
__u16 ref_count;
/* internal information */
__u16 conn_waiting;
/* number of connection waiting to be accepted */
};
struct netmeter{
__u16 port;
/* listen port number, host byte order */
struct
port_stat_user stat;
};
the delay mentioned above is the time from the moment the 3-way handshake is complete until the moment the connection is accepted by the application.
3.2 Collect Network Status
struct netmeter nm;
int sock;
sock = socket(AF_INET, SOCK_STREAM, 0);
nm.port = port number;
len = sizeof(nm);
getsockopt(sock, IPPROTO_TCP, TCP_SKEX_STAT, (char*)&nm, &len);