/* Take the current entry from the queue, fuzz it for a while. This function is a tad too long... returns 0 if fuzzed successfully, 1 if skipped or bailed out. */static u8 fuzz_one(char** argv) { s32 len, fd, temp_len, i, j; u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0; u64 havoc_queued, orig_hit_cnt, new_hit_cnt; u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1; u8 ret_val = 1, doing_det = 0; u8 a_collect[MAX_AUTO_EXTRA]; u32 a_len = 0;
/* Uncomment this to use instrumentation data to record newly discovered paths, but do not use them as seeds for fuzzing. This is useful for conveniently measuring coverage that could be attained by a "dumb" fuzzing algorithm: */// #define IGNORE_FINDS#endif /* ! _HAVE_CONFIG_H */
#else if (pending_favored) { /* If we have any favored, non-fuzzed new arrivals in the queue, possibly skip to them at the expense of already-fuzzed or non-favored cases. */ if ((queue_cur->was_fuzzed || !queue_cur->favored) && UR(100) < SKIP_TO_NEW_PROB) return 1; }
如果有感兴趣的输入则进入下一个判断,进入这个逻辑的要求为:
当前队列没有感兴趣的种子;
不处于 dump_mode(也就是无插桩模式);
当前队列的种子不感兴趣;
队列中测试用例总数大于 10。
在这种情况下,如果:
已经进行了超过一轮测试;
队列中当前的种子没有被 fuzz 过。
则有 75% 的概率跳过,否则有 95% 的概率跳过。
else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) { /* Otherwise, still possibly skip non-favored cases, albeit less often. The odds of skipping stuff are higher for already-fuzzed inputs and lower for never-fuzzed entries. */ if (queue_cycle > 1 && !queue_cur->was_fuzzed) { if (UR(100) < SKIP_NFAV_NEW_PROB) return 1; } else { if (UR(100) < SKIP_NFAV_OLD_PROB) return 1; } }#endif /* ^IGNORE_FINDS */ if (not_on_tty) { ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...", current_entry, queued_paths, unique_crashes); fflush(stdout); }
Coverage-based Greybox Fuzzing as Markov Chain(AFLFast)
获取输入
/* Map the test case into memory. */ fd = open(queue_cur->fname, O_RDONLY); if (fd < 0) PFATAL("Unable to open '%s'", queue_cur->fname); len = queue_cur->len; orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (orig_in == MAP_FAILED) PFATAL("Unable to mmap '%s'", queue_cur->fname); close(fd); /* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every single byte anyway, so it wouldn't give us any performance or memory usage benefits. */ out_buf = ck_alloc_nozero(len); subseq_tmouts = 0; cur_depth = queue_cur->depth;
/******************************************* * CALIBRATION (only if failed earlier on) * *******************************************/ if (queue_cur->cal_failed) { u8 res = FAULT_TMOUT; if (queue_cur->cal_failed < CAL_CHANCES) { /* Reset exec_cksum to tell calibrate_case to re-execute the testcase avoiding the usage of an invalid trace_bits. For more info: https://github.com/AFLplusplus/AFLplusplus/pull/425 */ queue_cur->exec_cksum = 0; res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0); if (res == FAULT_ERROR) FATAL("Unable to execute target application"); }
/* Skip right away if -d is given, if we have done deterministic fuzzing on this entry ourselves (was_fuzzed), or if it has gone through deterministic testing in earlier, resumed runs (passed_det). */ if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det) goto havoc_stage;
/* Skip deterministic fuzzing if exec path checksum puts this out of scope for this master instance. */ if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1) goto havoc_stage; doing_det = 1;
/* While flipping the least significant bit in every byte, pull of an extra trick to detect possible syntax tokens. In essence, the idea is that if you have a binary blob like this: xxxxxxxxIHDRxxxxxxxx ...and changing the leading and trailing bytes causes variable or no changes in program flow, but touching any character in the "IHDR" string always produces the same, distinctive path, it's highly likely that "IHDR" is an atomically-checked magic value of special significance to the fuzzed format. We do this here, rather than as a separate stage, because it's a nice way to keep the operation approximately "free" (i.e., no extra execs). Empirically, performing the check when flipping the least significant bit is advantageous, compared to doing it at the time of more disruptive changes, where the program flow may be affected in more violent ways. The caveat is that we won't generate dictionaries in the -d mode or -S mode - but that's probably a fair trade-off. This won't work particularly well with paths that exhibit variable behavior, but fails gracefully, so we'll carry out the checks anyway. */
if (!dumb_mode && (stage_cur & 7) == 7) { u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); if (stage_cur == stage_max - 1 && cksum == prev_cksum) { /* If at end of file and we are still collecting a string, grab the final character and force output. */ if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; a_len++; if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) maybe_add_auto(a_collect, a_len); } else if (cksum != prev_cksum) { /* Otherwise, if the checksum has changed, see if we have something worthwhile queued up, and collect that if the answer is yes. */ if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) maybe_add_auto(a_collect, a_len); a_len = 0; prev_cksum = cksum; } /* Continue collecting string, but only if the bit flip actually made any difference - we don't want no-op tokens. */ if (cksum != queue_cur->exec_cksum) { if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; a_len++; } } } new_hit_cnt = queued_paths + unique_crashes; stage_finds[STAGE_FLIP1] += new_hit_cnt - orig_hit_cnt; stage_cycles[STAGE_FLIP1] += stage_max;
/* Effector map setup. These macros calculate: EFF_APOS - position of a particular file offset in the map. EFF_ALEN - length of a map with a particular number of bytes. EFF_SPAN_ALEN - map span for a sequence of bytes. */#define EFF_APOS(_p) ((_p) >> EFF_MAP_SCALE2)#define EFF_REM(_x) ((_x) & ((1 << EFF_MAP_SCALE2) - 1))#define EFF_ALEN(_l) (EFF_APOS(_l) + !!EFF_REM(_l))#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1) /* Initialize effector map for the next step (see comments below). Always flag first and last byte as doing something. */ eff_map = ck_alloc(EFF_ALEN(len)); eff_map[0] = 1; if (EFF_APOS(len - 1) != 0) { eff_map[EFF_APOS(len - 1)] = 1; eff_cnt++; }
/* We also use this stage to pull off a simple trick: we identify bytes that seem to have no effect on the current execution path even when fully flipped - and we skip them during more expensive deterministic stages, such as arithmetics or known ints. */ if (!eff_map[EFF_APOS(stage_cur)]) { u32 cksum; /* If in dumb mode or if the file is very short, just flag everything without wasting time on checksums. */ if (!dumb_mode && len >= EFF_MIN_LEN) cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); else cksum = ~queue_cur->exec_cksum; if (cksum != queue_cur->exec_cksum) { eff_map[EFF_APOS(stage_cur)] = 1; eff_cnt++; } } out_buf[stage_cur] ^= 0xFF; } /* If the effector map is more than EFF_MAX_PERC dense, just flag the whole thing as worth fuzzing, since we wouldn't be saving much time anyway. */ if (eff_cnt != EFF_ALEN(len) && eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) { memset(eff_map, 1, EFF_ALEN(len)); blocks_eff_select += EFF_ALEN(len); } else { blocks_eff_select += eff_cnt; } blocks_eff_total += EFF_ALEN(len); new_hit_cnt = queued_paths + unique_crashes; stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt; stage_cycles[STAGE_FLIP8] += stage_max;
bitflip 16/8
步长为 1 byte,每次翻转相邻的 2 bytes:
/* Two walking bytes. */ if (len < 2) goto skip_bitflip; stage_name = "bitflip 16/8"; stage_short = "flip16"; stage_cur = 0; stage_max = len - 1; orig_hit_cnt = new_hit_cnt; for (i = 0; i < len - 1; i++) {
/* Setting 8-bit integers. */ for (i = 0; i < len; i++) { u8 orig = out_buf[i];
处理种子有效情况:
/* Let's consult the effector map... */ if (!eff_map[EFF_APOS(i)]) { stage_max -= sizeof(interesting_8); continue; } stage_cur_byte = i;
遍历字符中的每一位,对应 stage_max = len * sizeof(interesting_8);
for (j = 0; j < sizeof(interesting_8); j++) {
跳过与位翻转相同的情况:
/* Skip if the value could be a product of bitflips or arithmetics. */ if (could_be_bitflip(orig ^ (u8)interesting_8[j]) || could_be_arith(orig, (u8)interesting_8[j], 1)) { stage_max--; continue; }
/* Extras are sorted by size, from smallest to largest. This means that we don't have to worry about restoring the buffer in between writes at a particular offset determined by the outer loop. */ for (j = 0; j < extras_cnt; j++) {
/* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also skip them if there's no room to insert the payload, if the token is redundant, or if its entire span has no bytes set in the effector map. */ if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) || extras[j].len > len - i || !memcmp(extras[j].data, out_buf + i, extras[j].len) || !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) { stage_max--; continue; } last_len = extras[j].len; memcpy(out_buf + i, extras[j].data, last_len); if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; stage_cur++; } /* Restore all the clobbered memory. */ memcpy(out_buf + i, in_buf + i, last_len); } new_hit_cnt = queued_paths + unique_crashes; stage_finds[STAGE_EXTRAS_UO] += new_hit_cnt - orig_hit_cnt; stage_cycles[STAGE_EXTRAS_UO] += stage_max;
user extras (insert)
从头开始,将用户提供的 tokens 依次插入到原文件中:
/* Insertion of user-supplied extras. */ stage_name = "user extras (insert)"; stage_short = "ext_UI"; stage_cur = 0; stage_max = extras_cnt * (len + 1); orig_hit_cnt = new_hit_cnt; ex_tmp = ck_alloc(len + MAX_DICT_FILE); for (i = 0; i <= len; i++) { stage_cur_byte = i; for (j = 0; j < extras_cnt; j++) { if (len + extras[j].len > MAX_FILE) { stage_max--; continue; } /* Insert token */ memcpy(ex_tmp + i, extras[j].data, extras[j].len); /* Copy tail */ memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i); if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) { ck_free(ex_tmp); goto abandon_entry; } stage_cur++; } /* Copy head */ ex_tmp[i] = out_buf[i]; } ck_free(ex_tmp); new_hit_cnt = queued_paths + unique_crashes; stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt; stage_cycles[STAGE_EXTRAS_UI] += stage_max;
skip_extras: /* If we made this to here without jumping to havoc_stage or abandon_entry, we're properly done with deterministic steps and can mark it as such in the .state/ directory. */ if (!queue_cur->passed_det) mark_as_det_done(queue_cur); /**************** * RANDOM HAVOC * ****************/havoc_stage: stage_cur_byte = -1; /* The havoc stage mutation code is also invoked when splicing files; if the splice_cycle variable is set, generate different descriptions and such. */ if (!splice_cycle) { stage_name = "havoc"; stage_short = "havoc"; stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * perf_score / havoc_div / 100; } else { static u8 tmp[32]; perf_score = orig_perf; sprintf(tmp, "splice %u", splice_cycle); stage_name = tmp; stage_short = "splice"; stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100; } if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN; temp_len = len; orig_hit_cnt = queued_paths + unique_crashes; havoc_queued = queued_paths; /* We essentially just do several thousand runs (depending on perf_score) where we take the input file and make random stacked tweaks. */ for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2)); stage_cur_val = use_stacking; for (i = 0; i < use_stacking; i++) {
#ifndef IGNORE_FINDS /************ * SPLICING * ************/ /* This is a last-resort strategy triggered by a full round with no findings. It takes the current input file, randomly selects another input, and splices them together at some offset, then relies on the havoc code to mutate that blob. */retry_splicing: if (use_splicing && splice_cycle++ < SPLICE_CYCLES && queued_paths > 1 && queue_cur->len > 1) { struct queue_entry* target; u32 tid, split_at; u8* new_buf; s32 f_diff, l_diff; /* First of all, if we've modified in_buf for havoc, let's clean that up... */ if (in_buf != orig_in) { ck_free(in_buf); in_buf = orig_in; len = queue_cur->len; } /* Pick a random queue entry and seek to it. Don't splice with yourself. */ do { tid = UR(queued_paths); } while (tid == current_entry); splicing_with = tid; target = queue; while (tid >= 100) { target = target->next_100; tid -= 100; } while (tid--) target = target->next; /* Make sure that the target has a reasonable length. */ while (target && (target->len < 2 || target == queue_cur)) { target = target->next; splicing_with++; } if (!target) goto retry_splicing; /* Read the testcase into a new buffer. */ fd = open(target->fname, O_RDONLY); if (fd < 0) PFATAL("Unable to open '%s'", target->fname); new_buf = ck_alloc_nozero(target->len); ck_read(fd, new_buf, target->len, target->fname); close(fd); /* Find a suitable splicing location, somewhere between the first and the last differing byte. Bail out if the difference is just a single byte or so. */ locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff); if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) { ck_free(new_buf); goto retry_splicing; } /* Split somewhere between the first and last differing byte. */ split_at = f_diff + UR(l_diff - f_diff); /* Do the thing. */ len = target->len; memcpy(new_buf, in_buf, split_at); in_buf = new_buf; ck_free(out_buf); out_buf = ck_alloc_nozero(len); memcpy(out_buf, in_buf, len); goto havoc_stage; }#endif /* !IGNORE_FINDS */
收尾工作
ret_val = 0;abandon_entry: splicing_with = -1; /* Update pending_not_fuzzed count if we made it through the calibration cycle and have not seen this entry before. */ if (!stop_soon && !queue_cur->cal_failed && !queue_cur->was_fuzzed) { queue_cur->was_fuzzed = 1; pending_not_fuzzed--; if (queue_cur->favored) pending_favored--; } munmap(orig_in, queue_cur->len); if (in_buf != orig_in) ck_free(in_buf); ck_free(out_buf); ck_free(eff_map); return ret_val;#undef FLIP_BIT}