beeseek-devs team mailing list archive
-
beeseek-devs team
-
Mailing list archive
-
Message #00171
[Merge] lp:~andrea.corbellini/beeseek/sniffer into lp:beeseek
Andrea Corbellini has proposed merging lp:~andrea.corbellini/beeseek/sniffer into lp:beeseek.
Requested reviews:
Lorenzo Allegrucci (l-allegrucci)
BeeSeek Developers (beeseek-devs)
This branch adds the TCP/IP packets sniffer. It's fully written in C, asynchronous and optimized for low memory usage and light CPU load; also privacy is respected.
First, a quick overview of what it does. Basically, every sniffed packet is checked to see if it looks like a HTTP request. If so, it sends the most important information of the request to the analyzer. Here are more details:
* The packets sniffed are just the ones with the destination port 80 (HTTP). [include/handler.h: Bf_PCAP_FILTER_EXP]
* When a packet is caught, a parser checks if it starts with "GET" or "HEAD" (we don't care about other methods such as POST or PUT). [src/parser.c: BfHTTPRequest_ReadRequestLine]
* In case of a positive match, it assumes that the packet contains a HTTP request and looks for the URI.
* It then scans all headers looking for 'Host'. [src/parser.c: BfHTTPRequest_ReadHeader]
* If both the URI and the Host have been found, it sends them to the analyzer. [src/sender.c: BfSender_SendReq]
This implementation has however some problems:
* The parser assumes that every HTTP request fits in just one packet. If, for example, the request line and the headers are in two different packets, the request is lost.
* The parser assumes also that for every packet there's just one request. If in a packet there are two or more requests, only the first one is considered.
* Finally, the parser assumes that every HTTP request starts at the beginning of a packet.
These problems may seem critical, however they're not so important. In fact every web browser I've used sends every request in a single packet. Also, fixing the problems above would slow down the application and sightly increase the memory usage.
Although the sniffer application is finished and may be used, it needs some tuning. In include/parser.h the two constants BfHTTPRequest_URL_SIZE and BfHTTPRequest_HOST_SIZE should be set to a reasonable size (currently, URL_SIZE is too small). Also unit tests are missing, but I'll work on them as soon as this branch will be approved.
--
https://code.launchpad.net/~andrea.corbellini/beeseek/sniffer/+merge/28393
Your team BeeSeek Developers is requested to review the proposed merge of lp:~andrea.corbellini/beeseek/sniffer into lp:beeseek.
=== modified file '.bzrignore'
--- .bzrignore 2010-03-21 12:19:12 +0000
+++ .bzrignore 2010-06-24 11:51:28 +0000
@@ -1,1 +1,2 @@
beeseek/_version_info.py
+sniffer/beeseek-sniffer
=== added directory 'sniffer'
=== added file 'sniffer/Makefile'
--- sniffer/Makefile 1970-01-01 00:00:00 +0000
+++ sniffer/Makefile 2010-06-24 11:51:28 +0000
@@ -0,0 +1,15 @@
+# Copyright 2010 BeeSeek Developers. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+ifdef DEBUG
+ GCC := gcc -g -DBfDebug
+else
+ GCC := gcc
+endif
+
+build: beeseek-sniffer
+beeseek-sniffer:
+ $(GCC) $(GCC_OPTS) -Wall -Iinclude/ -lpcap -o beeseek-sniffer src/*.c
+
+clean:
+ rm -f beeseek-sniffer
=== added directory 'sniffer/include'
=== added file 'sniffer/include/handler.h'
--- sniffer/include/handler.h 1970-01-01 00:00:00 +0000
+++ sniffer/include/handler.h 2010-06-24 11:51:28 +0000
@@ -0,0 +1,22 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#include <pcap.h>
+
+typedef struct {
+ u_int32_t client_addr;
+ u_int16_t client_port;
+ u_int32_t server_addr;
+ u_int16_t server_port;
+ char *data;
+ unsigned int data_len;
+} BfPacket;
+
+#define Bf_PCAP_FILTER_EXP "tcp and ip and dst port 80"
+#define Bf_PCAP_BUF_SIZE 8192
+
+BfPacket *BfPacket_New(BfPacket *packet, const u_char *packet_data);
+char *BfPacket_Repr(BfPacket *packet);
+void BfSniffer_HandlePacket(u_char *args, const struct pcap_pkthdr *header,
+ const u_char *packet_data);
+int BfSniffer_SniffDevice(const char *device_name);
=== added file 'sniffer/include/parser.h'
--- sniffer/include/parser.h 1970-01-01 00:00:00 +0000
+++ sniffer/include/parser.h 2010-06-24 11:51:28 +0000
@@ -0,0 +1,14 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#define BfHTTPRequest_URL_SIZE 200
+#define BfHTTPRequest_HOST_SIZE 20
+
+typedef struct {
+ char url[BfHTTPRequest_URL_SIZE+1];
+ char host[BfHTTPRequest_HOST_SIZE+1];
+} BfHTTPRequest;
+
+int BfHTTPRequest_ParsePacket(BfPacket *packet);
+int BfHTTPRequest_ReadRequestLine(BfHTTPRequest *request, BfPacket *packet);
+int BfHTTPRequest_ReadHeader(BfHTTPRequest *request, BfPacket *packet);
=== added file 'sniffer/include/sender.h'
--- sniffer/include/sender.h 1970-01-01 00:00:00 +0000
+++ sniffer/include/sender.h 2010-06-24 11:51:28 +0000
@@ -0,0 +1,16 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+extern int BfSender_Socket;
+
+#define BfSender_DEFAULT_PORT 7222
+
+int BfSender_Connect(char *address, int port);
+int BfSender_SendRaw(char *data);
+int BfSender_SendReq(BfHTTPRequest *request);
+void BfSender_Close(void);
+
+#define BfSender_MSG_CONNECT "PUT /api/sniffer-interface HTTP/1.1\r\n\r\n"
+#define BfSender_MSG_PAGE_TMPL "http://%s%s\n"
+#define BfSender_MSG_PAGE_SIZE \
+ BfHTTPRequest_URL_SIZE + BfHTTPRequest_HOST_SIZE + 8
=== added file 'sniffer/include/sniffer.h'
--- sniffer/include/sniffer.h 1970-01-01 00:00:00 +0000
+++ sniffer/include/sniffer.h 2010-06-24 11:51:28 +0000
@@ -0,0 +1,8 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+extern char *Bf_ProgramName;
+
+#include "handler.h"
+#include "parser.h"
+#include "sender.h"
=== added directory 'sniffer/src'
=== added file 'sniffer/src/app.c'
--- sniffer/src/app.c 1970-01-01 00:00:00 +0000
+++ sniffer/src/app.c 2010-06-24 11:51:28 +0000
@@ -0,0 +1,68 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sniffer.h"
+
+/* The command used to launch the application (i.e. `argv[0]`). */
+char *Bf_ProgramName;
+
+/* Display an help message. */
+static void
+Bf_PrintUsage(void)
+{
+ printf("Usage: %s DEVICE HOST\n\n", Bf_ProgramName);
+ printf("Options:\n");
+ printf(" -h, --help show this help message and exit\n");
+}
+
+/* Handle SIGINT/SIGQUIT. */
+static void
+Bf_HandleQuit(int sig)
+{
+ BfSender_Close();
+ printf("%s: sniffer stopped\n", Bf_ProgramName);
+ exit(0);
+}
+
+/* Application ingress point. */
+int
+main(int argc, char **argv)
+{
+ /* Read and check the command line arguments. */
+ Bf_ProgramName = argv[0];
+ if (argc == 1) {
+ fprintf(stderr, "%s: error: missing interface name\n", Bf_ProgramName);
+ Bf_PrintUsage();
+ return 2;
+ }
+ else if (argc == 2) {
+ if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) {
+ Bf_PrintUsage();
+ return 0;
+ }
+ else {
+ fprintf(stderr, "%s: error: no destination host\n", Bf_ProgramName);
+ Bf_PrintUsage();
+ return 2;
+ }
+ }
+ else if (argc > 3) {
+ fprintf(stderr, "%s: error: too many arguments\n", Bf_ProgramName);
+ Bf_PrintUsage();
+ return 2;
+ }
+
+ /* Initialize the sender. */
+ if (BfSender_Connect(argv[2], BfSender_DEFAULT_PORT) < 0)
+ return 1;
+ /* Connect signals. */
+ signal(SIGINT, Bf_HandleQuit);
+ signal(SIGQUIT, Bf_HandleQuit);
+
+ /* Start sniffing. */
+ return BfSniffer_SniffDevice(argv[1]);
+}
=== added file 'sniffer/src/handler.c'
--- sniffer/src/handler.c 1970-01-01 00:00:00 +0000
+++ sniffer/src/handler.c 2010-06-24 11:51:28 +0000
@@ -0,0 +1,98 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <pcap.h>
+#include <stdlib.h>
+#include "sniffer.h"
+
+
+/* Return a BfPacket initialized with the information contained in a packet.
+ * If the packet doesn't contain data to be parsed, this function returns NULL.
+ */
+BfPacket *
+BfPacket_Init(BfPacket *packet, const u_char *packet_data)
+{
+ const struct iphdr *ip;
+ const struct tcphdr *tcp;
+
+ /* Consume the Ethernet header (we don't need it). */
+ packet_data += 14;
+ /* Read the IP and TCP headers. */
+ ip = (struct iphdr *)packet_data;
+ packet_data += ip->ihl * 4;
+ tcp = (struct tcphdr *)packet_data;
+ packet_data += tcp->doff * 4;
+
+ if (tcp->psh == 0)
+ /* This packet has no data. */
+ return NULL;
+
+ /* Populate the BfPacket. */
+ packet->client_addr = ip->saddr;
+ packet->client_port = tcp->source;
+ packet->server_addr = ip->saddr;
+ packet->server_port = tcp->dest;
+ packet->data = (char *)packet_data;
+ packet->data_len = ntohs(ip->tot_len) - ip->ihl * 4 - tcp->doff * 4;
+
+ return packet;
+}
+
+/* Return a string representing the sender and the receiver of a packet in a
+ human-readable form. */
+char *
+BfPacket_Repr(BfPacket *packet)
+{
+ static char repr[42] = "\0";
+ sprintf(repr, "%d.%d.%d.%d:%d->%d.%d.%d.%d:%d",
+ packet->client_addr & 0xFF, (packet->client_addr >> 8) & 0xFF,
+ (packet->client_addr >> 16) & 0xFF, packet->client_addr >> 24,
+ ntohs(packet->client_port),
+ packet->server_addr & 0xFF, (packet->server_addr >> 8) & 0xFF,
+ (packet->server_addr >> 16) & 0xFF, packet->server_addr >> 24,
+ ntohs(packet->server_port));
+ return repr;
+}
+
+/* Handle a packet. */
+void
+BfSniffer_HandlePacket(u_char *args, const struct pcap_pkthdr *header,
+ const u_char *packet_data)
+{
+ BfPacket packet;
+ if (BfPacket_Init(&packet, packet_data) == NULL)
+ return;
+#ifdef BfDebug
+ printf("%s: debug: [%s] new packet with data received\n", Bf_ProgramName,
+ BfPacket_Repr(&packet));
+#endif
+ if (BfHTTPRequest_ParsePacket(&packet) < 0)
+ exit(1);
+}
+
+/* Start sniffing packets from the device running the PCAP loop. */
+int
+BfSniffer_SniffDevice(const char *device_name)
+{
+ pcap_t *handler;
+ char errbuf[PCAP_ERRBUF_SIZE];
+ struct bpf_program filter_program;
+
+ /* Open the Ethernet device. */
+ handler = pcap_open_live(device_name, Bf_PCAP_BUF_SIZE, 1, 1000, errbuf);
+ if (handler == NULL) {
+ fprintf(stderr, "%s: error: cannot open device: %s\n",
+ Bf_ProgramName, errbuf);
+ return 1;
+ }
+
+ /* Apply the filter expression. */
+ pcap_compile(handler, &filter_program, Bf_PCAP_FILTER_EXP, 0, 0);
+ pcap_setfilter(handler, &filter_program);
+
+ /* Handle the packets. */
+ printf("%s: sniffing\n", Bf_ProgramName);
+ return pcap_loop(handler, -1, BfSniffer_HandlePacket, NULL);
+}
=== added file 'sniffer/src/parser.c'
--- sniffer/src/parser.c 1970-01-01 00:00:00 +0000
+++ sniffer/src/parser.c 2010-06-24 11:51:28 +0000
@@ -0,0 +1,167 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include "sniffer.h"
+
+/* Parse a BfPacket and send a BfHTTPRequest to the analyzer. */
+int
+BfHTTPRequest_ParsePacket(BfPacket *packet)
+{
+ /* Here we assume that requests are always at the beginning of a packet.
+ Although this is not always true, most of the browsers do this, so we
+ should be able to catch almost every request. */
+
+ int status;
+ BfHTTPRequest request;
+
+ if (BfHTTPRequest_ReadRequestLine(&request, packet) < 0)
+ return 0;
+
+ while (packet->data_len > 0) {
+ status = BfHTTPRequest_ReadHeader(&request, packet);
+ if (status == -1)
+ return 0;
+ else if (status == 0)
+ continue;
+ return BfSender_SendReq(&request);
+ }
+ return 0;
+}
+
+/* Read the request line and put the parsed data into the given `request`. */
+int
+BfHTTPRequest_ReadRequestLine(BfHTTPRequest *request, BfPacket *packet)
+{
+ int url_len;
+ int line_len;
+
+ if (packet->data_len < 6)
+ /* This line is too short to be a request line. */
+ return -1;
+
+ /* Read the request method. */
+ if (strncmp(packet->data, "GET ", 4) == 0) {
+ packet->data += 4;
+ packet->data_len -= 4;
+ } else if (strncmp(packet->data, "HEAD ", 5) == 0) {
+ packet->data += 5;
+ packet->data_len -= 5;
+ } else {
+ /* We don't care about methods other than GET and HEAD. */
+ return -1;
+ }
+
+ /* Get the end of the line. */
+ line_len = memchr(packet->data, '\n', packet->data_len) -
+ (void *)packet->data + 1;
+ if (line_len <= 0)
+ return -1;
+
+ /* Get the URL size. */
+ url_len = memchr(packet->data, ' ', line_len) - (void *)packet->data;
+ if (url_len <= 0)
+ return -1;
+ else if (url_len >= BfHTTPRequest_URL_SIZE) {
+ fprintf(stderr,
+ "%s: error: [%s] URL too long (%d bytes), request discarded\n",
+ Bf_ProgramName, BfPacket_Repr(packet), url_len);
+ return -1;
+ }
+
+ /* Put the URL into the request, adding the NULL terminator. */
+ memcpy(request->url, packet->data, url_len);
+ request->url[url_len] = '\0';
+#ifdef BfDebug
+ printf("%s: debug: [%s] caught request: %s\n", Bf_ProgramName,
+ BfPacket_Repr(packet), request->url);
+#endif
+ /* Consume the bytes used. */
+ packet->data += line_len;
+ packet->data_len -= line_len;
+ return 0;
+}
+
+/* Read a header and, if it's Host, put it into the request. */
+int
+BfHTTPRequest_ReadHeader(BfHTTPRequest *request, BfPacket *packet)
+{
+ char *buf;
+ char *line_end;
+ char *host;
+ char *host_end;
+ int host_len;
+
+ /* TODO Skip lines starting with a space. */
+
+ if (packet->data_len < 7)
+ /* This line is too short to be a Host header (probably headers are
+ finished). */
+ return -1;
+
+ /* Get the end of the line. */
+ line_end = memchr(packet->data, '\n', packet->data_len);
+ if (line_end == NULL)
+ return -1;
+ line_end--;
+
+ /* Look for the header name-value separator, making the header name lower
+ case. */
+ for (buf = packet->data; buf < line_end; buf++) {
+ if (buf[0] == ':')
+ break;
+ buf[0] = tolower(buf[0]);
+ }
+ if (buf[0] != ':')
+ /* This is not a header. */
+ return -1;
+
+ if (strncmp(packet->data, "host", buf - packet->data) != 0) {
+ /* This is not Host, but an another header. Consume the bytes of the
+ line and return. */
+ packet->data_len -= line_end + 2 - packet->data;
+ packet->data = line_end + 2;
+ return 0;
+ }
+
+ /* Get the beginning of the host name, skipping any blank space. */
+ for (buf++ ; buf < line_end; buf++)
+ if (buf[0] != ' ' && buf[0] != '\t' && buf[0] != '\r') {
+ host = buf;
+ break;
+ }
+ if (buf == line_end)
+ /* The header value is not specified, or continues on the next line. */
+ return 0;
+
+ /* Get the end of the host name, skipping the blank spaces. */
+ for (buf = line_end; buf > host; buf--)
+ if (buf[0] != ' ' && buf[0] != '\t' && buf[0] != '\r') {
+ host_end = buf + 1;
+ break;
+ }
+
+ /* Check if the host name length. */
+ host_len = host_end - host;
+ if (host_len > BfHTTPRequest_HOST_SIZE) {
+ fprintf(stderr,
+ "%s: error: [%s] host name too long (%d bytes), "
+ "request discarded\n", Bf_ProgramName, BfPacket_Repr(packet),
+ host_len);
+ return -1;
+ }
+
+ /* Put the host name into the request, adding the NULL terminator. */
+ memcpy(request->host, host, host_len);
+ request->host[host_len] = '\0';
+#ifdef BfDebug
+ printf("%s: debug: [%s] found host: %s\n", Bf_ProgramName,
+ BfPacket_Repr(packet), request->host);
+#endif
+ /* Consume the bytes used. */
+ packet->data_len -= line_end + 2 - packet->data;
+ packet->data = line_end + 2;
+ return 1;
+}
=== added file 'sniffer/src/sender.c'
--- sniffer/src/sender.c 1970-01-01 00:00:00 +0000
+++ sniffer/src/sender.c 2010-06-24 11:51:28 +0000
@@ -0,0 +1,124 @@
+/* Copyright 2010 BeeSeek Developers. This software is licensed under the
+ GNU Affero General Public License version 3 (see the file LICENSE). */
+
+#include <errno.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "sniffer.h"
+
+/* Socket used to send data to the BeeSeek server. */
+int BfSender_Socket;
+
+
+/* Connect to the BeeSeek server. */
+int
+BfSender_Connect(char *address, int port)
+{
+ struct hostent *host;
+ struct sockaddr_in server_addr;
+
+#ifdef BfDebug
+ printf("%s: debug: connecting to server at %s:%d\n", Bf_ProgramName,
+ address, port);
+#endif
+
+ /* Create the socket. */
+ BfSender_Socket = socket(AF_INET, SOCK_STREAM, 0);
+ if (BfSender_Socket < 0) {
+ fprintf(stderr, "%s: error: cannot create socket: %s\n",
+ Bf_ProgramName, strerror(errno));
+ return -1;
+ }
+
+ /* Get the destination host. */
+ host = gethostbyname(address);
+ if (host == 0) {
+ fprintf(stderr, "%s: error: cannot connect to server: %s\n",
+ Bf_ProgramName, strerror(errno));
+ return -1;
+ }
+
+ /* Insert the address information. */
+ memset(&server_addr, 0, sizeof(server_addr));
+ server_addr.sin_family = AF_INET;
+ server_addr.sin_addr.s_addr = ((struct in_addr *)(host->h_addr))->s_addr;
+ server_addr.sin_port = htons(port);
+
+ /* Connect to the server. */
+ if (connect(BfSender_Socket, (struct sockaddr *) &server_addr,
+ sizeof(server_addr)) < 0) {
+ fprintf(stderr, "%s: error: cannot connect to server: %s\n",
+ Bf_ProgramName, strerror(errno));
+ return -1;
+ }
+
+ /* Initialize the HTTP communication with the server. */
+ if (BfSender_SendRaw(BfSender_MSG_CONNECT) < 0)
+ return -1;
+
+ char data[12];
+ int bytes_recvd;
+ int data_len = 0;
+
+ /* Receive the response from the server. */
+ while (data_len < 12) {
+ bytes_recvd = recv(BfSender_Socket, data + data_len, 12 - data_len, 0);
+ if (bytes_recvd < 0) {
+ fprintf(stderr, "%s: error: cannot get response from server: %s\n",
+ Bf_ProgramName, strerror(errno));
+ return -1;
+ }
+ else if (bytes_recvd == 0) {
+ fprintf(stderr, "%s: error: server dropped the connection\n",
+ Bf_ProgramName);
+ return -1;
+ }
+ data_len += bytes_recvd;
+ }
+ /* The status code of the response should be 2XX (e.g. "HTTP/1.1 200 OK").
+ */
+ if (data[9] != '2') {
+ fprintf(stderr, "%s: error: bad response from server: %c%c%c\n",
+ Bf_ProgramName, data[9], data[10], data[11]);
+ return -1;
+ }
+ return 0;
+}
+
+int
+BfSender_SendRaw(char *data)
+{
+ int bytes_sent;
+ while (strlen(data) > 0) {
+ bytes_sent = send(BfSender_Socket, data, strlen(data), MSG_NOSIGNAL);
+ if (bytes_sent < 0) {
+ fprintf(stderr, "%s: error: cannot send message to server: %s\n",
+ Bf_ProgramName, strerror(errno));
+ return -1;
+ }
+ data += bytes_sent;
+ }
+ return 0;
+}
+
+int
+BfSender_SendReq(BfHTTPRequest *request)
+{
+ char data[BfSender_MSG_PAGE_SIZE];
+ sprintf(data, BfSender_MSG_PAGE_TMPL, request->host, request->url);
+ return BfSender_SendRaw(data);
+}
+
+void
+BfSender_Close(void)
+{
+ shutdown(BfSender_Socket, SHUT_RDWR);
+ close(BfSender_Socket);
+#ifdef BfDebug
+ printf("%s: debug: connection to the server closed\n", Bf_ProgramName);
+#endif
+}
Follow ups