afl-fuzz.c 的代码阅读。

总结

calculate_score

/* Calculate case desirability score to adjust the length of havoc fuzzing.
   A helper function for fuzz_one(). Maybe some of these constants should
   go into config.h. */
 
static u32 calculate_score(struct queue_entry* q) {
 
  u32 avg_exec_us = total_cal_us / total_cal_cycles;
  u32 avg_bitmap_size = total_bitmap_size / total_bitmap_entries;
  u32 perf_score = 100;
 
  /* Adjust score based on execution speed of this path, compared to the
     global average. Multiplier ranges from 0.1x to 3x. Fast inputs are
     less expensive to fuzz, so we're giving them more air time. */
 
  if (q->exec_us * 0.1 > avg_exec_us) perf_score = 10;
  else if (q->exec_us * 0.25 > avg_exec_us) perf_score = 25;
  else if (q->exec_us * 0.5 > avg_exec_us) perf_score = 50;
  else if (q->exec_us * 0.75 > avg_exec_us) perf_score = 75;
  else if (q->exec_us * 4 < avg_exec_us) perf_score = 300;
  else if (q->exec_us * 3 < avg_exec_us) perf_score = 200;
  else if (q->exec_us * 2 < avg_exec_us) perf_score = 150;
 
  /* Adjust score based on bitmap size. The working theory is that better
     coverage translates to better targets. Multiplier from 0.25x to 3x. */
 
  if (q->bitmap_size * 0.3 > avg_bitmap_size) perf_score *= 3;
  else if (q->bitmap_size * 0.5 > avg_bitmap_size) perf_score *= 2;
  else if (q->bitmap_size * 0.75 > avg_bitmap_size) perf_score *= 1.5;
  else if (q->bitmap_size * 3 < avg_bitmap_size) perf_score *= 0.25;
  else if (q->bitmap_size * 2 < avg_bitmap_size) perf_score *= 0.5;
  else if (q->bitmap_size * 1.5 < avg_bitmap_size) perf_score *= 0.75;
 
  /* Adjust score based on handicap. Handicap is proportional to how late
     in the game we learned about this path. Latecomers are allowed to run
     for a bit longer until they catch up with the rest. */
 
  if (q->handicap >= 4) {
 
    perf_score *= 4;
    q->handicap -= 4;
 
  } else if (q->handicap) {
 
    perf_score *= 2;
    q->handicap--;
 
  }
 
  /* Final adjustment based on input depth, under the assumption that fuzzing
     deeper test cases is more likely to reveal stuff that can't be
     discovered with traditional fuzzers. */
 
  switch (q->depth) {
 
    case 0 ... 3:   break;
    case 4 ... 7:   perf_score *= 2; break;
    case 8 ... 13:  perf_score *= 3; break;
    case 14 ... 25: perf_score *= 4; break;
    default:        perf_score *= 5;
 
  }
 
  /* Make sure that we don't go over limit. */
 
  if (perf_score > HAVOC_MAX_MULT * 100) perf_score = HAVOC_MAX_MULT * 100;
 
  return perf_score;
 
}

classify_counts

#ifdef WORD_SIZE_64
 
static inline void classify_counts(u64* mem) {
 
  u32 i = MAP_SIZE >> 3;
 
  while (i--) {
 
    /* Optimize for sparse bitmaps. */
 
    if (unlikely(*mem)) {
 
      u16* mem16 = (u16*)mem;
 
      mem16[0] = count_class_lookup16[mem16[0]];
      mem16[1] = count_class_lookup16[mem16[1]];
      mem16[2] = count_class_lookup16[mem16[2]];
      mem16[3] = count_class_lookup16[mem16[3]];
 
    }
 
    mem++;
 
  }
 
}
 
#else
 
static inline void classify_counts(u32* mem) {
 
  u32 i = MAP_SIZE >> 2;
 
  while (i--) {
 
    /* Optimize for sparse bitmaps. */
 
    if (unlikely(*mem)) {
 
      u16* mem16 = (u16*)mem;
 
      mem16[0] = count_class_lookup16[mem16[0]];
      mem16[1] = count_class_lookup16[mem16[1]];
 
    }
 
    mem++;
 
  }
 
}
 
#endif /* ^WORD_SIZE_64 */

common_fuzz_stuff

Tldr

这个函数会处理输入,将输入写入文件并执行,在执行后处理错误情况。如果出错返回 1。

/* Write a modified test case, run program, process results. Handle
   error conditions, returning 1 if it's time to bail out. This is
   a helper function for fuzz_one(). */
 
EXP_ST u8 common_fuzz_stuff(char** argv, u8* out_buf, u32 len) {
 
  u8 fault;

如果有后处理逻辑则处理输入:

  if (post_handler) {
 
    out_buf = post_handler(out_buf, &len);
    if (!out_buf || !len) return 0;
 
  }

将输入写入文件并调用 run_target 执行:

  write_to_testcase(out_buf, len);
 
  fault = run_target(argv, exec_tmout);

处理错误情况:

  • 如果程序停了就返回 1;
  • 如果错误代码是 FAULT_TMOUT(超时错误):
    • subseq_tmouts(连续超时数量)加一;
    • 如果连续超时数量大于 TMOUT_LIMIT(默认为 250),将 cur_skipped_paths 加一并返回 1。
  • 如果没有超时,就将 subseq_tmouts 设置为 0;
  if (stop_soon) return 1;
 
  if (fault == FAULT_TMOUT) {
 
    if (subseq_tmouts++ > TMOUT_LIMIT) {
      cur_skipped_paths++;
      return 1;
    }
 
  } else subseq_tmouts = 0;

skip_requested 参数在 handle_skipreq 中设置,用户向程序发送 SIGUSR1 信号时触发,如果触发了 SIGUSR1 信号,跳过后续的处理并对 cur_skipped_paths 加一,之后返回 1。

  /* Users can hit us with SIGUSR1 to request the current input
     to be abandoned. */
 
  if (skip_requested) {
 
     skip_requested = 0;
     cur_skipped_paths++;
     return 1;
 
  }

调用 save_if_interesting 继续处理其他错误,如果输入发现了新路径,就对 queued_discovered 加一。

  /* This handles FAULT_ERROR for us: */
 
  queued_discovered += save_if_interesting(argv, out_buf, len, fault);

更新界面。

  if (!(stage_cur % stats_update_freq) || stage_cur + 1 == stage_max)
    show_stats();
 
  return 0;
 
}

fuzz_one

fuzz_one

handle_skipreq

处理 SIGUSR1 信号

/* Handle skip request (SIGUSR1). */
static void handle_skipreq(int sig) {
  skip_requested = 1;
}

handle_timeout

处理 SIGALRM 信号,如果是以子进程的方式启动的,就 kill 掉子进程;如果是以 fork server 形式启动的,则 kill 掉 forkserver 启动的进程。

/* Handle timeout (SIGALRM). */
 
static void handle_timeout(int sig) {
 
  if (child_pid > 0) {
 
    child_timed_out = 1; 
    kill(child_pid, SIGKILL);
 
  } else if (child_pid == -1 && forksrv_pid > 0) {
 
    child_timed_out = 1; 
    kill(forksrv_pid, SIGKILL);
 
  }
 
}

init_forkserver

Todo

load_extras

Todo

maybe_add_auto

添加自动识别的 tokens。

  • 跳过相同字节的情况;
  • 跳过和内置有趣值相同的情况;
  • 跳过和 extra 数组中已有 token 相同的情况;
/* Maybe add automatic extra. */
 
static void maybe_add_auto(u8* mem, u32 len) {
 
  u32 i;

如果收集到了足够的 token 或者用户不想自动识别就不收集;

  /* Allow users to specify that they don't want auto dictionaries. */
 
  if (!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS) return;

跳过相同字节的情况:如果所有的字节都相同就跳过;

  /* Skip runs of identical bytes. */
 
  for (i = 1; i < len; i++)
    if (mem[0] ^ mem[i]) break;
 
  if (i == len) return;

跳过和内置有趣值相同的情况,包括内置有趣值取反的情况,仅比较 len=2len=4 的情况:

  /* Reject builtin interesting values. */
 
  if (len == 2) {
 
    i = sizeof(interesting_16) >> 1;
 
    while (i--) 
      if (*((u16*)mem) == interesting_16[i] ||
          *((u16*)mem) == SWAP16(interesting_16[i])) return;
 
  }
 
  if (len == 4) {
 
    i = sizeof(interesting_32) >> 2;
 
    while (i--) 
      if (*((u32*)mem) == interesting_32[i] ||
          *((u32*)mem) == SWAP32(interesting_32[i])) return;
 
  }

检查 extra 数组,跳过与 extra 数组中已有 token 相同的情况。

因为 extra 数组中的值是按照长度从小到大排序的,因此可以快速判断

  /* Reject anything that matches existing extras. Do a case-insensitive
     match. We optimize by exploiting the fact that extras[] are sorted
     by size. */
 
  for (i = 0; i < extras_cnt; i++)
    if (extras[i].len >= len) break;
 
  for (; i < extras_cnt && extras[i].len == len; i++)
    if (!memcmp_nocase(extras[i].data, mem, len)) return;

检查 a_extra 数组,如果发现这个 token 已经被发现过了,就给这个 token 的计数器加一,并跳转到排序逻辑:

  /* Last but not least, check a_extras[] for matches. There are no
     guarantees of a particular sort order. */
 
  auto_changed = 1;
 
  for (i = 0; i < a_extras_cnt; i++) {
 
    if (a_extras[i].len == len && !memcmp_nocase(a_extras[i].data, mem, len)) {
 
      a_extras[i].hit_cnt++;
      goto sort_a_extras;
 
    }
 
  }

在跳过了众多可能重复的情况后,这里到达了添加新 token 的逻辑。如果发现的 token 太多,就随机从列表的后半部分替换掉一个 token。

  /* At this point, looks like we're dealing with a new entry. So, let's
     append it if we have room. Otherwise, let's randomly evict some other
     entry from the bottom half of the list. */
 
  if (a_extras_cnt < MAX_AUTO_EXTRAS) {
 
    a_extras = ck_realloc_block(a_extras, (a_extras_cnt + 1) *
                                sizeof(struct extra_data));
 
    a_extras[a_extras_cnt].data = ck_memdup(mem, len);
    a_extras[a_extras_cnt].len  = len;
    a_extras_cnt++;
 
  } else {
 
    i = MAX_AUTO_EXTRAS / 2 +
        UR((MAX_AUTO_EXTRAS + 1) / 2);
 
    ck_free(a_extras[i].data);
 
    a_extras[i].data    = ck_memdup(mem, len);
    a_extras[i].len     = len;
    a_extras[i].hit_cnt = 0;
 
  }

最后对自动发现的 token 数据先按照命中次数排序,再按照 token 长度排序。在这里的细节是:

  • 对于命中次数排序,会对 a_extras_cnt 数量的 token 进行排序,默认 a_extras_cnt 小于 500(USE_AUTO_EXTRAS*10);
  • 对于 token 长度排序,会对 MIN(USE_AUTO_EXTRAS, a_extras_cnt) 数量的 token 排序,默认 USE_AUTO_EXTRAS 值为 50,因此最多对前 50 个 token 按照长度排序。

这就是为什么在发现 token 的时候会随机驱逐 a_extras 后半部分的 token,事实上前 50 个 token 才是最有用的。

sort_a_extras:
 
  /* First, sort all auto extras by use count, descending order. */
 
  qsort(a_extras, a_extras_cnt, sizeof(struct extra_data),
        compare_extras_use_d);
 
  /* Then, sort the top USE_AUTO_EXTRAS entries by size. */
 
  qsort(a_extras, MIN(USE_AUTO_EXTRAS, a_extras_cnt),
        sizeof(struct extra_data), compare_extras_len);
 
}

run_target

Tldr

执行目标程序并监控执行状态,更新覆盖率信息。

/* Execute target application, monitoring for timeouts. Return status
   information. The called program will update trace_bits[]. */
 
static u8 run_target(char** argv, u32 timeout) {
 
  static struct itimerval it;
  static u32 prev_timed_out = 0;
  static u64 exec_ms = 0;
 
  int status = 0;
  u32 tb4;
 
  child_timed_out = 0;

清空共享内存,并设置内存屏障

  /* After this memset, trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */
 
  memset(trace_bits, 0, MAP_SIZE);
  MEM_BARRIER();

如果在 dumb 模式下或没有开启 forkserver,就 fork 出一个子进程,然后让子进程 execv 目标程序。这一段代码和 init_forkserver 中的代码有一些重复:

  /* If we're running in "dumb" mode, we can't rely on the fork server
     logic compiled into the target program, so we will just keep calling
     execve(). There is a bit of code duplication between here and 
     init_forkserver(), but c'est la vie. */
 
  if (dumb_mode == 1 || no_forkserver) {
    child_pid = fork();
    if (child_pid < 0) PFATAL("fork() failed");
    if (!child_pid) {
      struct rlimit r;
      if (mem_limit) {
        r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
#ifdef RLIMIT_AS
        setrlimit(RLIMIT_AS, &r); /* Ignore errors */
#else
        setrlimit(RLIMIT_DATA, &r); /* Ignore errors */
#endif /* ^RLIMIT_AS */
      }
      r.rlim_max = r.rlim_cur = 0;
      setrlimit(RLIMIT_CORE, &r); /* Ignore errors */
      /* Isolate the process and configure standard descriptors. If out_file is
         specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */
      setsid();
      dup2(dev_null_fd, 1);
      dup2(dev_null_fd, 2);
      if (out_file) {
        dup2(dev_null_fd, 0);
      } else {
        dup2(out_fd, 0);
        close(out_fd);
      }
 
      /* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */
 
      close(dev_null_fd);
      close(out_dir_fd);
      close(dev_urandom_fd);
      close(fileno(plot_file));
 
      /* Set sane defaults for ASAN if nothing else specified. */
 
      setenv("ASAN_OPTIONS", "abort_on_error=1:"
                             "detect_leaks=0:"
                             "symbolize=0:"
                             "allocator_may_return_null=1", 0);
 
      setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
                             "symbolize=0:"
                             "msan_track_origins=0", 0);
 
      execv(target_path, argv);

如果执行失败,就将 trace_bits 标记为 EXEC_FAIL_SIG

      /* Use a distinctive bitmap value to tell the parent about execv()
         falling through. */
 
      *(u32*)trace_bits = EXEC_FAIL_SIG;
      exit(0);
 
    }
 
  }

如果不在 dumb 模式,向 fsrv_ctl_fd(控制管道)写入 prev_timed_out 的值,接下来从 fsrv_st_fd(状态管道)读子进程的 pid:

  else {
 
    s32 res;
 
    /* In non-dumb mode, we have the fork server up and running, so simply
       tell it to have at it, and then read back PID. */
 
    if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {
 
      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");
 
    }
 
    if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4) {
 
      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");
 
    }
 
    if (child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");
 
  }

配置超时,如果超时了,那么 AFL 会发送 SIGALRM 信号,进入到 handle_timeout 的处理逻辑。

  /* Configure timeout, as requested by user, then wait for child to terminate. */
 
  it.it_value.tv_sec = (timeout / 1000);
  it.it_value.tv_usec = (timeout % 1000) * 1000;
 
  setitimer(ITIMER_REAL, &it, NULL);

等待子进程结束:

  /* The SIGALRM handler simply kills the child_pid and sets child_timed_out. */
 
  if (dumb_mode == 1 || no_forkserver) {
    if (waitpid(child_pid, &status, 0) <= 0) PFATAL("waitpid() failed");
  }

如果不在 dumb 模式且是 forkserver,会从状态管道读取程序的退出原因:

  else {
 
    s32 res;
 
    if ((res = read(fsrv_st_fd, &status, 4)) != 4) {
 
      if (stop_soon) return 0;
      RPFATAL(res, "Unable to communicate with fork server (OOM?)");
 
    }
 
  }

如果子进程未停止,则将子进程 pid 设置为 0。

  if (!WIFSTOPPED(status)) child_pid = 0;

取消定时器:

  getitimer(ITIMER_REAL, &it);
  exec_ms = (u64) timeout - (it.it_value.tv_sec * 1000 +
                             it.it_value.tv_usec / 1000);
 
  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;
 
  setitimer(ITIMER_REAL, &it, NULL);

执行次数加一:

  total_execs++;

设置内存屏障,处理覆盖率信息,这里通过 WORD_SIZE_64 调用处理不同架构宽度下的 classify_counts 函数:

  /* Any subsequent operations on trace_bits must not be moved by the
     compiler below this point. Past this location, trace_bits[] behave
     very normally and do not have to be treated as volatile. */
 
  MEM_BARRIER();
 
  tb4 = *(u32*)trace_bits;
 
#ifdef WORD_SIZE_64
  classify_counts((u64*)trace_bits);
#else
  classify_counts((u32*)trace_bits);
#endif /* ^WORD_SIZE_64 */

设置 prev_timed_out 的值为 child_timed_out

  prev_timed_out = child_timed_out;

根据 status 的值向调用者返回结果:

  • WIFSIGNALED(status) 若为异常结束子进程返回的状态,则为真
    • WTERMSIG(status)取得子进程因信号而中止的信号代码
      • 如果child_timed_out为1,且状态码为SIGKILL,则返回FAULT_TMOUT
      • 否则返回 FAULT_CRASH
  /* Report outcome to caller. */
 
  if (WIFSIGNALED(status) && !stop_soon) {
    kill_signal = WTERMSIG(status);
    if (child_timed_out && kill_signal == SIGKILL) return FAULT_TMOUT;
    return FAULT_CRASH;
  }

处理使用 MSAN 时会遇到的特殊情况:

  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
     must use a special exit code. */
 
  if (uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
    kill_signal = 0;
    return FAULT_CRASH;
  }
  • 如果是 dumb_mode,且 trace_bitsEXEC_FAIL_SIG,就返回 FAULT_ERROR
  if ((dumb_mode == 1 || no_forkserver) && tb4 == EXEC_FAIL_SIG)
    return FAULT_ERROR;
  • 如果 timeout 小于等于 exec_tmout,且 slowest_exec_ms 小于 exec_ms,设置 slowest_exec_ms 等于 exec_ms
  /* It makes sense to account for the slowest units only if the testcase was run
  under the user defined timeout. */
  if (!(timeout > exec_tmout) && (slowest_exec_ms < exec_ms)) {
    slowest_exec_ms = exec_ms;
  }
  • 返回 FAULT_NONE
  return FAULT_NONE;
 
}

save_if_interesting

Todo

trim_case

/* Trim all new test cases to save cycles when doing deterministic checks. The
   trimmer uses power-of-two increments somewhere between 1/16 and 1/1024 of
   file size, to keep the stage short and sweet. */
 
static u8 trim_case(char** argv, struct queue_entry* q, u8* in_buf) {
 
  static u8 tmp[64];
  static u8 clean_trace[MAP_SIZE];
 
  u8  needs_write = 0, fault = 0;
  u32 trim_exec = 0;
  u32 remove_len;
  u32 len_p2;
 
  /* Although the trimmer will be less useful when variable behavior is
     detected, it will still work to some extent, so we don't check for
     this. */
 
  if (q->len < 5) return 0;
 
  stage_name = tmp;
  bytes_trim_in += q->len;
 
  /* Select initial chunk len, starting with large steps. */
 
  len_p2 = next_p2(q->len);
 
  remove_len = MAX(len_p2 / TRIM_START_STEPS, TRIM_MIN_BYTES);
 
  /* Continue until the number of steps gets too high or the stepover
     gets too small. */
 
  while (remove_len >= MAX(len_p2 / TRIM_END_STEPS, TRIM_MIN_BYTES)) {
 
    u32 remove_pos = remove_len;
 
    sprintf(tmp, "trim %s/%s", DI(remove_len), DI(remove_len));
 
    stage_cur = 0;
    stage_max = q->len / remove_len;
 
    while (remove_pos < q->len) {
 
      u32 trim_avail = MIN(remove_len, q->len - remove_pos);
      u32 cksum;
 
      write_with_gap(in_buf, q->len, remove_pos, trim_avail);
 
      fault = run_target(argv, exec_tmout);
      trim_execs++;
 
      if (stop_soon || fault == FAULT_ERROR) goto abort_trimming;
 
      /* Note that we don't keep track of crashes or hangs here; maybe TODO? */
 
      cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
 
      /* If the deletion had no impact on the trace, make it permanent. This
         isn't perfect for variable-path inputs, but we're just making a
         best-effort pass, so it's not a big deal if we end up with false
         negatives every now and then. */
 
      if (cksum == q->exec_cksum) {
 
        u32 move_tail = q->len - remove_pos - trim_avail;
 
        q->len -= trim_avail;
        len_p2  = next_p2(q->len);
 
        memmove(in_buf + remove_pos, in_buf + remove_pos + trim_avail, 
                move_tail);
 
        /* Let's save a clean trace, which will be needed by
           update_bitmap_score once we're done with the trimming stuff. */
 
        if (!needs_write) {
 
          needs_write = 1;
          memcpy(clean_trace, trace_bits, MAP_SIZE);
 
        }
 
      } else remove_pos += remove_len;
 
      /* Since this can be slow, update the screen every now and then. */
 
      if (!(trim_exec++ % stats_update_freq)) show_stats();
      stage_cur++;
 
    }
 
    remove_len >>= 1;
 
  }
 
  /* If we have made changes to in_buf, we also need to update the on-disk
     version of the test case. */
 
  if (needs_write) {
 
    s32 fd;
 
    unlink(q->fname); /* ignore errors */
 
    fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);
 
    if (fd < 0) PFATAL("Unable to create '%s'", q->fname);
 
    ck_write(fd, in_buf, q->len, q->fname);
    close(fd);
 
    memcpy(trace_bits, clean_trace, MAP_SIZE);
    update_bitmap_score(q);
 
  }
 
abort_trimming:
 
  bytes_trim_out += q->len;
  return fault;
 
}

参考资料