tpotce/docker/p0f/fp_http.c
Marco Ochse 0d5d80b1e3 include docker repos
... skip emobility since it is a dev repo
2017-10-13 18:58:14 +00:00

1415 lines
30 KiB
C

/*
p0f - HTTP fingerprinting
-------------------------
Copyright (C) 2012 by Michal Zalewski <lcamtuf@coredump.cx>
Distributed under the terms and conditions of GNU LGPL.
*/
#define _FROM_FP_HTTP
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <time.h>
#include <netinet/in.h>
#include <sys/types.h>
#include "types.h"
#include "config.h"
#include "debug.h"
#include "alloc-inl.h"
#include "process.h"
#include "readfp.h"
#include "p0f.h"
#include "tcp.h"
#include "hash.h"
#include "fp_http.h"
#include "languages.h"
static u8** hdr_names; /* List of header names by ID */
static u32 hdr_cnt; /* Number of headers registered */
static u32* hdr_by_hash[SIG_BUCKETS]; /* Hashed header names */
static u32 hbh_cnt[SIG_BUCKETS]; /* Number of headers in bucket */
/* Signatures aren't bucketed due to the complex matching used; but we use
Bloom filters to go through them quickly. */
static struct http_sig_record* sigs[2];
static u32 sig_cnt[2];
static struct ua_map_record* ua_map; /* Mappings between U-A and OS */
static u32 ua_map_cnt;
#define SLOF(_str) (u8*)_str, strlen((char*)_str)
/* Ghetto Bloom filter 4-out-of-64 bitmask generator for adding 32-bit header
IDs to a set. We expect around 10 members in a set. */
static inline u64 bloom4_64(u32 val) {
u32 hash = hash32(&val, 4, hash_seed);
u64 ret;
ret = (1LL << (hash & 63));
ret ^= (1LL << ((hash >> 8) & 63));
ret ^= (1LL << ((hash >> 16) & 63));
ret ^= (1LL << ((hash >> 24) & 63));
return ret;
}
/* Look up or register new header */
static s32 lookup_hdr(u8* name, u32 len, u8 create) {
u32 bucket = hash32(name, len, hash_seed) % SIG_BUCKETS;
u32* p = hdr_by_hash[bucket];
u32 i = hbh_cnt[bucket];
while (i--) {
if (!memcmp(hdr_names[*p], name, len) /* ASAN won't like this... */ &&
!hdr_names[*p][len]) return *p;
p++;
}
/* Not found! */
if (!create) return -1;
hdr_names = DFL_ck_realloc(hdr_names, (hdr_cnt + 1) * sizeof(u8*));
hdr_names[hdr_cnt] = DFL_ck_memdup_str(name, len);
hdr_by_hash[bucket] = DFL_ck_realloc(hdr_by_hash[bucket],
(hbh_cnt[bucket] + 1) * 4);
hdr_by_hash[bucket][hbh_cnt[bucket]++] = hdr_cnt++;
return hdr_cnt - 1;
}
/* Pre-register essential headers. */
void http_init(void) {
u32 i;
/* Do not change - other code depends on the ordering of first 6 entries. */
lookup_hdr(SLOF("User-Agent"), 1); /* 0 */
lookup_hdr(SLOF("Server"), 1); /* 1 */
lookup_hdr(SLOF("Accept-Language"), 1); /* 2 */
lookup_hdr(SLOF("Via"), 1); /* 3 */
lookup_hdr(SLOF("X-Forwarded-For"), 1); /* 4 */
lookup_hdr(SLOF("Date"), 1); /* 5 */
#define HDR_UA 0
#define HDR_SRV 1
#define HDR_AL 2
#define HDR_VIA 3
#define HDR_XFF 4
#define HDR_DAT 5
i = 0;
while (req_optional[i].name) {
req_optional[i].id = lookup_hdr(SLOF(req_optional[i].name), 1);
i++;
}
i = 0;
while (resp_optional[i].name) {
resp_optional[i].id = lookup_hdr(SLOF(resp_optional[i].name), 1);
i++;
}
i = 0;
while (req_skipval[i].name) {
req_skipval[i].id = lookup_hdr(SLOF(req_skipval[i].name), 1);
i++;
}
i = 0;
while (resp_skipval[i].name) {
resp_skipval[i].id = lookup_hdr(SLOF(resp_skipval[i].name), 1);
i++;
}
i = 0;
while (req_common[i].name) {
req_common[i].id = lookup_hdr(SLOF(req_common[i].name), 1);
i++;
}
i = 0;
while (resp_common[i].name) {
resp_common[i].id = lookup_hdr(SLOF(resp_common[i].name), 1);
i++;
}
}
/* Find match for a signature. */
static void http_find_match(u8 to_srv, struct http_sig* ts, u8 dupe_det) {
struct http_sig_record* gmatch = NULL;
struct http_sig_record* ref = sigs[to_srv];
u32 cnt = sig_cnt[to_srv];
while (cnt--) {
struct http_sig* rs = ref->sig;
u32 ts_hdr = 0, rs_hdr = 0;
if (rs->http_ver != -1 && rs->http_ver != ts->http_ver) goto next_sig;
/* Check that all the headers listed for the p0f.fp signature (probably)
appear in the examined traffic. */
if ((ts->hdr_bloom4 & rs->hdr_bloom4) != rs->hdr_bloom4) goto next_sig;
/* Confirm the ordering and values of headers (this is relatively
slow, hence the Bloom filter first). */
while (rs_hdr < rs->hdr_cnt) {
u32 orig_ts = ts_hdr;
while (rs->hdr[rs_hdr].id != ts->hdr[ts_hdr].id &&
ts_hdr < ts->hdr_cnt) ts_hdr++;
if (ts_hdr == ts->hdr_cnt) {
if (!rs->hdr[rs_hdr].optional) goto next_sig;
/* If this is an optional header, check that it doesn't appear
anywhere else. */
for (ts_hdr = 0; ts_hdr < ts->hdr_cnt; ts_hdr++)
if (rs->hdr[rs_hdr].id == ts->hdr[ts_hdr].id) goto next_sig;
ts_hdr = orig_ts;
rs_hdr++;
continue;
}
if (rs->hdr[rs_hdr].value &&
(!ts->hdr[ts_hdr].value ||
!strstr((char*)ts->hdr[ts_hdr].value,
(char*)rs->hdr[rs_hdr].value))) goto next_sig;
ts_hdr++;
rs_hdr++;
}
/* Check that the headers forbidden in p0f.fp don't appear in the traffic.
We first check if they seem to appear in ts->hdr_bloom4, and only if so,
we do a full check. */
for (rs_hdr = 0; rs_hdr < rs->miss_cnt; rs_hdr++) {
u64 miss_bloom4 = bloom4_64(rs->miss[rs_hdr]);
if ((ts->hdr_bloom4 & miss_bloom4) != miss_bloom4) continue;
/* Okay, possible instance of a banned header - scan list... */
for (ts_hdr = 0; ts_hdr < ts->hdr_cnt; ts_hdr++)
if (rs->miss[rs_hdr] == ts->hdr[ts_hdr].id) goto next_sig;
}
/* When doing dupe detection, we want to allow a signature with additional
banned headers to precede one with fewer, or with a different set. */
if (dupe_det) {
if (rs->miss_cnt > ts->miss_cnt) goto next_sig;
for (rs_hdr = 0; rs_hdr < rs->miss_cnt; rs_hdr++) {
for (ts_hdr = 0; ts_hdr < ts->miss_cnt; ts_hdr++)
if (rs->miss[rs_hdr] == ts->miss[ts_hdr]) break;
/* One of the reference headers doesn't appear in current sig! */
if (ts_hdr == ts->miss_cnt) goto next_sig;
}
}
/* Whoa, a match. */
if (!ref->generic) {
ts->matched = ref;
if (rs->sw && ts->sw && !strstr((char*)ts->sw, (char*)rs->sw))
ts->dishonest = 1;
return;
} else if (!gmatch) gmatch = ref;
next_sig:
ref = ref + 1;
}
/* A generic signature is the best we could find. */
if (!dupe_det && gmatch) {
ts->matched = gmatch;
if (gmatch->sig->sw && ts->sw && !strstr((char*)ts->sw,
(char*)gmatch->sig->sw)) ts->dishonest = 1;
}
}
/* Register new HTTP signature. */
void http_register_sig(u8 to_srv, u8 generic, s32 sig_class, u32 sig_name,
u8* sig_flavor, u32 label_id, u32* sys, u32 sys_cnt,
u8* val, u32 line_no) {
struct http_sig* hsig;
struct http_sig_record* hrec;
u8* nxt;
hsig = DFL_ck_alloc(sizeof(struct http_sig));
sigs[to_srv] = DFL_ck_realloc(sigs[to_srv], sizeof(struct http_sig_record) *
(sig_cnt[to_srv] + 1));
hrec = &sigs[to_srv][sig_cnt[to_srv]];
if (val[1] != ':') FATAL("Malformed signature in line %u.", line_no);
/* http_ver */
switch (*val) {
case '0': break;
case '1': hsig->http_ver = 1; break;
case '*': hsig->http_ver = -1; break;
default: FATAL("Bad HTTP version in line %u.", line_no);
}
val += 2;
/* horder */
while (*val != ':') {
u32 id;
u8 optional = 0;
if (hsig->hdr_cnt >= HTTP_MAX_HDRS)
FATAL("Too many headers listed in line %u.", line_no);
nxt = val;
if (*nxt == '?') { optional = 1; val++; nxt++; }
while (isalnum(*nxt) || *nxt == '-' || *nxt == '_') nxt++;
if (val == nxt)
FATAL("Malformed header name in line %u.", line_no);
id = lookup_hdr(val, nxt - val, 1);
hsig->hdr[hsig->hdr_cnt].id = id;
hsig->hdr[hsig->hdr_cnt].optional = optional;
if (!optional) hsig->hdr_bloom4 |= bloom4_64(id);
val = nxt;
if (*val == '=') {
if (val[1] != '[')
FATAL("Missing '[' after '=' in line %u.", line_no);
val += 2;
nxt = val;
while (*nxt && *nxt != ']') nxt++;
if (val == nxt || !*nxt)
FATAL("Malformed signature in line %u.", line_no);
hsig->hdr[hsig->hdr_cnt].value = DFL_ck_memdup_str(val, nxt - val);
val = nxt + 1;
}
hsig->hdr_cnt++;
if (*val == ',') val++; else if (*val != ':')
FATAL("Malformed signature in line %u.", line_no);
}
val++;
/* habsent */
while (*val != ':') {
u32 id;
if (hsig->miss_cnt >= HTTP_MAX_HDRS)
FATAL("Too many headers listed in line %u.", line_no);
nxt = val;
while (isalnum(*nxt) || *nxt == '-' || *nxt == '_') nxt++;
if (val == nxt)
FATAL("Malformed header name in line %u.", line_no);
id = lookup_hdr(val, nxt - val, 1);
hsig->miss[hsig->miss_cnt] = id;
val = nxt;
hsig->miss_cnt++;
if (*val == ',') val++; else if (*val != ':')
FATAL("Malformed signature in line %u.", line_no);
}
val++;
/* exp_sw */
if (*val) {
if (strchr((char*)val, ':'))
FATAL("Malformed signature in line %u.", line_no);
hsig->sw = DFL_ck_strdup(val);
}
http_find_match(to_srv, hsig, 1);
if (hsig->matched)
FATAL("Signature in line %u is already covered by line %u.",
line_no, hsig->matched->line_no);
hrec->class_id = sig_class;
hrec->name_id = sig_name;
hrec->flavor = sig_flavor;
hrec->label_id = label_id;
hrec->sys = sys;
hrec->sys_cnt = sys_cnt;
hrec->line_no = line_no;
hrec->generic = generic;
hrec->sig = hsig;
sig_cnt[to_srv]++;
}
/* Register new HTTP signature. */
void http_parse_ua(u8* val, u32 line_no) {
u8* nxt;
while (*val) {
u32 id;
u8* name = NULL;
nxt = val;
while (*nxt && (isalnum(*nxt) || strchr(NAME_CHARS, *nxt))) nxt++;
if (val == nxt)
FATAL("Malformed system name in line %u.", line_no);
id = lookup_name_id(val, nxt - val);
val = nxt;
if (*val == '=') {
if (val[1] != '[')
FATAL("Missing '[' after '=' in line %u.", line_no);
val += 2;
nxt = val;
while (*nxt && *nxt != ']') nxt++;
if (val == nxt || !*nxt)
FATAL("Malformed signature in line %u.", line_no);
name = DFL_ck_memdup_str(val, nxt - val);
val = nxt + 1;
}
ua_map = DFL_ck_realloc(ua_map, (ua_map_cnt + 1) *
sizeof(struct ua_map_record));
ua_map[ua_map_cnt].id = id;
if (!name) ua_map[ua_map_cnt].name = fp_os_names[id];
else ua_map[ua_map_cnt].name = name;
ua_map_cnt++;
if (*val == ',') val++;
}
}
/* Dump a HTTP signature. */
static u8* dump_sig(u8 to_srv, struct http_sig* hsig) {
u32 i;
u8 had_prev = 0;
struct http_id* list;
u8 tmp[HTTP_MAX_SHOW + 1];
u32 tpos;
static u8* ret;
u32 rlen = 0;
u8* val;
#define RETF(_par...) do { \
s32 _len = snprintf(NULL, 0, _par); \
if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \
ret = DFL_ck_realloc_kb(ret, rlen + _len + 1); \
snprintf((char*)ret + rlen, _len + 1, _par); \
rlen += _len; \
} while (0)
RETF("%u:", hsig->http_ver);
for (i = 0; i < hsig->hdr_cnt; i++) {
if (hsig->hdr[i].id >= 0) {
u8 optional = 0;
/* Check the "optional" list. */
list = to_srv ? req_optional : resp_optional;
while (list->name) {
if (list->id == hsig->hdr[i].id) break;
list++;
}
if (list->name) optional = 1;
RETF("%s%s%s", had_prev ? "," : "", optional ? "?" : "",
hdr_names[hsig->hdr[i].id]);
had_prev = 1;
if (!(val = hsig->hdr[i].value)) continue;
/* Next, make sure that the value is not on the ignore list. */
if (optional) continue;
list = to_srv ? req_skipval : resp_skipval;
while (list->name) {
if (list->id == hsig->hdr[i].id) break;
list++;
}
if (list->name) continue;
/* Looks like it's not on the list, so let's output a cleaned-up version
up to HTTP_MAX_SHOW. */
tpos = 0;
while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 &&
val[tpos] != ']' && val[tpos] != '|') {
tmp[tpos] = val[tpos];
tpos++;
}
tmp[tpos] = 0;
if (!tpos) continue;
RETF("=[%s]", tmp);
} else {
RETF("%s%s", had_prev ? "," : "", hsig->hdr[i].name);
had_prev = 1;
if (!(val = hsig->hdr[i].value)) continue;
tpos = 0;
while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 &&
val[tpos] != ']') { tmp[tpos] = val[tpos]; tpos++; }
tmp[tpos] = 0;
if (!tpos) continue;
RETF("=[%s]", tmp);
}
}
RETF(":");
list = to_srv ? req_common : resp_common;
had_prev = 0;
while (list->name) {
for (i = 0; i < hsig->hdr_cnt; i++)
if (hsig->hdr[i].id == list->id) break;
if (i == hsig->hdr_cnt) {
RETF("%s%s", had_prev ? "," : "", list->name);
had_prev = 1;
}
list++;
}
RETF(":");
if ((val = hsig->sw)) {
tpos = 0;
while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 &&
val[tpos] != ']') { tmp[tpos] = val[tpos]; tpos++; }
tmp[tpos] = 0;
if (tpos) RETF("%s", tmp);
}
return ret;
}
/* Dump signature flags. */
static u8* dump_flags(struct http_sig* hsig, struct http_sig_record* m) {
static u8* ret;
u32 rlen = 0;
RETF("");
if (hsig->dishonest) RETF(" dishonest");
if (!hsig->sw) RETF(" anonymous");
if (m && m->generic) RETF(" generic");
#undef RETF
if (*ret) return ret + 1; else return (u8*)"none";
}
/* Score signature differences. For unknown signatures, the presumption is that
they identify apps, so the logic is quite different from TCP. */
static void score_nat(u8 to_srv, struct packet_flow* f) {
struct http_sig_record* m = f->http_tmp.matched;
struct host_data* hd;
struct http_sig* ref;
u8 score = 0, diff_already = 0;
u16 reason = 0;
if (to_srv) {
hd = f->client;
ref = hd->http_req_os;
} else {
hd = f->server;
ref = hd->http_resp;
/* If the signature is for a different port, don't read too much into it. */
if (hd->http_resp_port != f->srv_port) ref = NULL;
}
if (!m) {
/* No match. The user is probably running another app; this is only of
interest if a server progresses from known to unknown. We can't
compare two unknown server sigs with that much confidence. */
if (!to_srv && ref && ref->matched) {
DEBUG("[#] HTTP server signature changed from known to unknown.\n");
score += 4;
reason |= NAT_TO_UNK;
}
goto header_check;
}
if (m->class_id == -1) {
/* Got a match for an application signature. Make sure it runs on the
OS we have on file... */
verify_tool_class(to_srv, f, m->sys, m->sys_cnt);
/* ...and check for inconsistencies in server behavior. */
if (!to_srv && ref && ref->matched) {
if (ref->matched->name_id != m->name_id) {
DEBUG("[#] Name on the matched HTTP server signature changes.\n");
score += 8;
reason |= NAT_APP_LB;
} else if (ref->matched->label_id != m->label_id) {
DEBUG("[#] Label on the matched HTTP server signature changes.\n");
score += 2;
reason |= NAT_APP_LB;
}
}
} else {
/* Ooh, spiffy: a match for an OS signature! There will be about two uses
for this code, ever. */
if (ref && ref->matched) {
if (ref->matched->name_id != m->name_id) {
DEBUG("[#] Name on the matched HTTP OS signature changes.\n");
score += 8;
reason |= NAT_OS_SIG;
diff_already = 1;
} else if (ref->matched->name_id != m->label_id) {
DEBUG("[#] Label on the matched HTTP OS signature changes.\n");
score += 2;
reason |= NAT_OS_SIG;
}
} else if (ref) {
DEBUG("[#] HTTP OS signature changed from unknown to known.\n");
score += 4;
reason |= NAT_TO_UNK;
}
/* If we haven't pointed out anything major yet, also complain if the
signature doesn't match host data. */
if (!diff_already && hd->last_name_id != m->name_id) {
DEBUG("[#] Matched HTTP OS signature different than host data.\n");
score += 4;
reason |= NAT_OS_SIG;
}
}
/* If we have determined that U-A looks legit, but the OS doesn't match,
that's a clear sign of trouble. */
if (to_srv && m->class_id == -1 && f->http_tmp.sw && !f->http_tmp.dishonest) {
u32 i;
for (i = 0; i < ua_map_cnt; i++)
if (strstr((char*)f->http_tmp.sw, (char*)ua_map[i].name)) break;
if (i != ua_map_cnt) {
if (ua_map[i].id != hd->last_name_id) {
DEBUG("[#] Otherwise plausible User-Agent points to another OS.\n");
score += 4;
reason |= NAT_APP_UA;
if (!hd->bad_sw) hd->bad_sw = 1;
} else {
DEBUG("[#] User-Agent OS value checks out.\n");
hd->bad_sw = 0;
}
}
}
header_check:
/* Okay, some last-resort checks. This is obviously concerning: */
if (f->http_tmp.via) {
DEBUG("[#] Explicit use of Via or X-Forwarded-For.\n");
score += 8;
reason |= NAT_APP_VIA;
}
/* Last but not least, see what happened to 'Date': */
if (ref && !to_srv && ref->date && f->http_tmp.date) {
s64 recv_diff = ((s64)f->http_tmp.recv_date) - ref->recv_date;
s64 hdr_diff = ((s64)f->http_tmp.date) - ref->date;
if (hdr_diff < -HTTP_MAX_DATE_DIFF ||
hdr_diff > recv_diff + HTTP_MAX_DATE_DIFF) {
DEBUG("[#] HTTP 'Date' distance too high (%lld in %lld sec).\n",
hdr_diff, recv_diff);
score += 4;
reason |= NAT_APP_DATE;
} else {
DEBUG("[#] HTTP 'Date' distance seems fine (%lld in %lld sec).\n",
hdr_diff, recv_diff);
}
}
add_nat_score(to_srv, f, reason, score);
}
/* Look up HTTP signature, create an observation. */
static void fingerprint_http(u8 to_srv, struct packet_flow* f) {
struct http_sig_record* m;
u8* lang = NULL;
http_find_match(to_srv, &f->http_tmp, 0);
start_observation(to_srv ? "http request" : "http response", 4, to_srv, f);
if ((m = f->http_tmp.matched)) {
OBSERVF((m->class_id < 0) ? "app" : "os", "%s%s%s",
fp_os_names[m->name_id], m->flavor ? " " : "",
m->flavor ? m->flavor : (u8*)"");
} else add_observation_field("app", NULL);
if (f->http_tmp.lang && isalpha(f->http_tmp.lang[0]) &&
isalpha(f->http_tmp.lang[1]) && !isalpha(f->http_tmp.lang[2])) {
u8 lh = LANG_HASH(f->http_tmp.lang[0], f->http_tmp.lang[1]);
u8 pos = 0;
while (languages[lh][pos]) {
if (f->http_tmp.lang[0] == languages[lh][pos][0] &&
f->http_tmp.lang[1] == languages[lh][pos][1]) break;
pos += 2;
}
if (!languages[lh][pos]) add_observation_field("lang", NULL);
else add_observation_field("lang",
(lang = (u8*)languages[lh][pos + 1]));
} else add_observation_field("lang", (u8*)"none");
add_observation_field("params", dump_flags(&f->http_tmp, m));
add_observation_field("raw_sig", dump_sig(to_srv, &f->http_tmp));
score_nat(to_srv, f);
/* Save observations needed to score future responses. */
if (!to_srv) {
/* For server response, always store the signature. */
ck_free(f->server->http_resp);
f->server->http_resp = ck_memdup(&f->http_tmp, sizeof(struct http_sig));
f->server->http_resp->hdr_cnt = 0;
f->server->http_resp->sw = NULL;
f->server->http_resp->lang = NULL;
f->server->http_resp->via = NULL;
f->server->http_resp_port = f->srv_port;
if (lang) f->server->language = lang;
if (m) {
if (m->class_id != -1) {
/* If this is an OS signature, update host record. */
f->server->last_class_id = m->class_id;
f->server->last_name_id = m->name_id;
f->server->last_flavor = m->flavor;
f->server->last_quality = (m->generic * P0F_MATCH_GENERIC);
} else {
/* Otherwise, record app data. */
f->server->http_name_id = m->name_id;
f->server->http_flavor = m->flavor;
if (f->http_tmp.dishonest) f->server->bad_sw = 2;
}
}
} else {
if (lang) f->client->language = lang;
if (m) {
if (m->class_id != -1) {
/* Client request - only OS sig is of any note. */
ck_free(f->client->http_req_os);
f->client->http_req_os = ck_memdup(&f->http_tmp,
sizeof(struct http_sig));
f->client->http_req_os->hdr_cnt = 0;
f->client->http_req_os->sw = NULL;
f->client->http_req_os->lang = NULL;
f->client->http_req_os->via = NULL;
f->client->last_class_id = m->class_id;
f->client->last_name_id = m->name_id;
f->client->last_flavor = m->flavor;
f->client->last_quality = (m->generic * P0F_MATCH_GENERIC);
} else {
/* Record app data for the API. */
f->client->http_name_id = m->name_id;
f->client->http_flavor = m->flavor;
if (f->http_tmp.dishonest) f->client->bad_sw = 2;
}
}
}
}
/* Free up any allocated strings in http_sig. */
void free_sig_hdrs(struct http_sig* h) {
u32 i;
for (i = 0; i < h->hdr_cnt; i++) {
if (h->hdr[i].name) ck_free(h->hdr[i].name);
if (h->hdr[i].value) ck_free(h->hdr[i].value);
}
}
/* Parse HTTP date field. */
static u32 parse_date(u8* str) {
struct tm t;
if (!strptime((char*)str, "%a, %d %b %Y %H:%M:%S %Z", &t)) {
DEBUG("[#] Invalid 'Date' field ('%s').\n", str);
return 0;
}
return mktime(&t);
}
/* Parse name=value pairs into a signature. */
static u8 parse_pairs(u8 to_srv, struct packet_flow* f, u8 can_get_more) {
u32 plen = to_srv ? f->req_len : f->resp_len;
u32 off;
/* Try to parse name: value pairs. */
while ((off = f->http_pos) < plen) {
u8* pay = to_srv ? f->request : f->response;
u32 nlen, vlen, vstart;
s32 hid;
u32 hcount;
/* Empty line? Dispatch for fingerprinting! */
if (pay[off] == '\r' || pay[off] == '\n') {
f->http_tmp.recv_date = get_unix_time();
fingerprint_http(to_srv, f);
/* If this is a request, flush the collected signature and prepare
for parsing the response. If it's a response, just shut down HTTP
parsing on this flow. */
if (to_srv) {
f->http_req_done = 1;
f->http_pos = 0;
free_sig_hdrs(&f->http_tmp);
memset(&f->http_tmp, 0, sizeof(struct http_sig));
return 1;
} else {
f->in_http = -1;
return 0;
}
}
/* Looks like we're getting a header value. See if we have room for it. */
if ((hcount = f->http_tmp.hdr_cnt) >= HTTP_MAX_HDRS) {
DEBUG("[#] Too many HTTP headers in a %s.\n", to_srv ? "request" :
"response");
f->in_http = -1;
return 0;
}
/* Try to extract header name. */
nlen = 0;
while ((isalnum(pay[off]) || pay[off] == '-' || pay[off] == '_') &&
off < plen && nlen <= HTTP_MAX_HDR_NAME) {
off++;
nlen++;
}
if (off == plen) {
if (!can_get_more) {
DEBUG("[#] End of HTTP %s before end of headers.\n",
to_srv ? "request" : "response");
f->in_http = -1;
}
return can_get_more;
}
/* Empty, excessively long, or non-':'-followed header name? */
if (!nlen || pay[off] != ':' || nlen > HTTP_MAX_HDR_NAME) {
DEBUG("[#] Invalid HTTP header encountered (len = %u, char = 0x%02x).\n",
nlen, pay[off]);
f->in_http = -1;
return 0;
}
/* At this point, header name starts at f->http_pos, and has nlen bytes.
Skip ':' and a subsequent whitespace next. */
off++;
if (off < plen && isblank(pay[off])) off++;
vstart = off;
vlen = 0;
/* Find the next \n. */
while (off < plen && vlen <= HTTP_MAX_HDR_VAL && pay[off] != '\n') {
off++;
vlen++;
}
if (vlen > HTTP_MAX_HDR_VAL) {
DEBUG("[#] HTTP %s header value length exceeded.\n",
to_srv ? "request" : "response");
f->in_http = -1;
return -1;
}
if (off == plen) {
if (!can_get_more) {
DEBUG("[#] End of HTTP %s before end of headers.\n",
to_srv ? "request" : "response");
f->in_http = -1;
}
return can_get_more;
}
/* If party is using \r\n terminators, go back one char. */
if (pay[off - 1] == '\r') vlen--;
/* Header value starts at vstart, and has vlen bytes (may be zero). Record
this in the signature. */
hid = lookup_hdr(pay + f->http_pos, nlen, 0);
f->http_tmp.hdr[hcount].id = hid;
if (hid < 0) {
/* Header ID not found, store literal value. */
f->http_tmp.hdr[hcount].name = ck_memdup_str(pay + f->http_pos, nlen);
} else {
/* Found - update Bloom filter. */
f->http_tmp.hdr_bloom4 |= bloom4_64(hid);
}
/* If there's a value, store that too. For U-A and Server, also update
'sw'; and for requests, collect Accept-Language. */
if (vlen) {
u8* val = ck_memdup_str(pay + vstart, vlen);
f->http_tmp.hdr[hcount].value = val;
if (to_srv) {
switch (hid) {
case HDR_UA: f->http_tmp.sw = val; break;
case HDR_AL: f->http_tmp.lang = val; break;
case HDR_VIA:
case HDR_XFF: f->http_tmp.via = val; break;
}
} else {
switch (hid) {
case HDR_SRV: f->http_tmp.sw = val; break;
case HDR_DAT: f->http_tmp.date = parse_date(val); break;
case HDR_VIA:
case HDR_XFF: f->http_tmp.via = val; break;
}
}
}
/* Moving on... */
f->http_tmp.hdr_cnt++;
f->http_pos = off + 1;
}
if (!can_get_more) {
DEBUG("[#] End of HTTP %s before end of headers.\n",
to_srv ? "request" : "response");
f->in_http = -1;
}
return can_get_more;
}
/* Examine request or response; returns 1 if more data needed and plausibly can
be read. Note that the buffer is always NUL-terminated. */
u8 process_http(u8 to_srv, struct packet_flow* f) {
/* Already decided this flow is not worth tracking? */
if (f->in_http < 0) return 0;
if (to_srv) {
u8* pay = f->request;
u8 can_get_more = (f->req_len < MAX_FLOW_DATA);
u32 off;
/* Request done, but pending response? */
if (f->http_req_done) return 1;
if (!f->in_http) {
u8 chr;
u8* sig_at;
/* Ooh, new flow! */
if (f->req_len < 15) return can_get_more;
/* Scan until \n, or until binary data spotted. */
off = f->http_pos;
/* We only care about GET and HEAD requests at this point. */
if (!off && strncmp((char*)pay, "GET /", 5) &&
strncmp((char*)pay, "HEAD /", 6)) {
DEBUG("[#] Does not seem like a GET / HEAD request.\n");
f->in_http = -1;
return 0;
}
while (off < f->req_len && off < HTTP_MAX_URL &&
(chr = pay[off]) != '\n') {
if (chr != '\r' && (chr < 0x20 || chr > 0x7f)) {
DEBUG("[#] Not HTTP - character 0x%02x encountered.\n", chr);
f->in_http = -1;
return 0;
}
off++;
}
/* Newline too far or too close? */
if (off == HTTP_MAX_URL || off < 14) {
DEBUG("[#] Not HTTP - newline offset %u.\n", off);
f->in_http = -1;
return 0;
}
/* Not enough data yet? */
if (off == f->req_len) {
f->http_pos = off;
if (!can_get_more) {
DEBUG("[#] Not HTTP - no opening line found.\n");
f->in_http = -1;
}
return can_get_more;
}
sig_at = pay + off - 8;
if (pay[off - 1] == '\r') sig_at--;
/* Bad HTTP/1.x signature? */
if (strncmp((char*)sig_at, "HTTP/1.", 7)) {
DEBUG("[#] Not HTTP - bad signature.\n");
f->in_http = -1;
return 0;
}
f->http_tmp.http_ver = (sig_at[7] == '1');
f->in_http = 1;
f->http_pos = off + 1;
DEBUG("[#] HTTP detected.\n");
}
return parse_pairs(1, f, can_get_more);
} else {
u8* pay = f->response;
u8 can_get_more = (f->resp_len < MAX_FLOW_DATA);
u32 off;
/* Response before request? Bail out. */
if (!f->in_http || !f->http_req_done) {
f->in_http = -1;
return 0;
}
if (!f->http_gotresp1) {
u8 chr;
if (f->resp_len < 13) return can_get_more;
/* Scan until \n, or until binary data spotted. */
off = f->http_pos;
while (off < f->resp_len && off < HTTP_MAX_URL &&
(chr = pay[off]) != '\n') {
if (chr != '\r' && (chr < 0x20 || chr > 0x7f)) {
DEBUG("[#] Invalid HTTP response - character 0x%02x encountered.\n",
chr);
f->in_http = -1;
return 0;
}
off++;
}
/* Newline too far or too close? */
if (off == HTTP_MAX_URL || off < 13) {
DEBUG("[#] Invalid HTTP response - newline offset %u.\n", off);
f->in_http = -1;
return 0;
}
/* Not enough data yet? */
if (off == f->resp_len) {
f->http_pos = off;
if (!can_get_more) {
DEBUG("[#] Invalid HTTP response - no opening line found.\n");
f->in_http = -1;
}
return can_get_more;
}
/* Bad HTTP/1.x signature? */
if (strncmp((char*)pay, "HTTP/1.", 7)) {
DEBUG("[#] Invalid HTTP response - bad signature.\n");
f->in_http = -1;
return 0;
}
f->http_tmp.http_ver = (pay[7] == '1');
f->http_pos = off + 1;
DEBUG("[#] HTTP response starts correctly.\n");
}
return parse_pairs(0, f, can_get_more);
}
}