blob: ec547b9d0844ca7db5edb6ffbf7d16be6d1edfee [file]
#!/bin/bash
# Run libFuzzer targets with persistent corpus and dictionary management.
#
# Usage: iree-bazel-fuzz [options] <target> [-- fuzzer-args...]
#
# Examples:
# iree-bazel-fuzz //runtime/src/iree/base/internal:unicode_fuzz
# iree-bazel-fuzz //path/to:target -- -max_total_time=60 -jobs=8
# iree-bazel-fuzz -m //path/to:target # minimize corpus first
set -e
# Source shared library.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/iree-bazel-lib"
iree_bazel_init "iree-bazel-fuzz"
# Expand combined short flags (e.g., -nv -> -n -v).
eval "set -- $(iree_expand_combined_flags "$@")"
# Fuzz-specific globals.
FUZZ_CACHE_DIR="${IREE_FUZZ_CACHE:-${HOME}/.cache/iree-fuzz-cache}"
FUZZ_CORPUS_DIR="${IREE_FUZZ_CORPUS:-${HOME}/.cache/iree-fuzz-corpus}"
FUZZ_MINIMIZE=0
FUZZ_DICT_ONLY=0
FUZZ_MIN_USES=10 # Minimum usage count for dict entries
FUZZ_AUTO_MINIMIZE=1000 # Auto-minimize when corpus exceeds this count
show_help() {
cat << 'EOF'
iree-bazel-fuzz - Run libFuzzer targets with persistent corpus/dict
USAGE
iree-bazel-fuzz [options] <target> [-- fuzzer-args...]
OPTIONS
-n, --dry_run Show command without executing
-v, --verbose Show command before executing
-m, --minimize Minimize corpus with set_cover_merge before fuzzing
-d, --dict_only Update dict from corpus without new fuzzing
-h, --help Show this help
NOTE: Short flags can be combined: -nv is equivalent to -n -v
ARGUMENTS
target Bazel fuzz target (required)
fuzzer-args Arguments passed to libFuzzer (after --)
EXAMPLES
# Basic fuzzing (Ctrl+C to stop)
iree-bazel-fuzz //runtime/src/iree/base/internal:unicode_fuzz
# Time-limited with parallel workers
iree-bazel-fuzz //path/to:target -- -max_total_time=60 -jobs=8
# Minimize corpus before fuzzing
iree-bazel-fuzz -m //path/to:target
# Just update dictionary from existing corpus
iree-bazel-fuzz -d //path/to:target
# Overnight fuzzing: all fuzz targets under a path (Ctrl+C to stop)
iree-bazel-fuzz //runtime/src/iree/tokenizer/... -- -jobs=128
# Time-limited multi-target fuzzing
iree-bazel-fuzz //runtime/... -- -jobs=64 -max_total_time=3600
CORPUS & DICTIONARY
Corpus: ~/.cache/iree-fuzz-cache/<relative-path>/<target>/corpus/
Artifacts: ~/.cache/iree-fuzz-cache/<relative-path>/<target>/artifacts/
Dictionary: ~/.cache/iree-fuzz-corpus/<relative-path>/<target>.dict
Paths mirror the source tree structure. Artifacts (crash-*, slow-unit-*)
persist across runs. Dictionary entries (Uses > 10) are auto-appended.
Corpus is auto-minimized when it exceeds 1000 files.
COMMON FUZZER FLAGS
-max_total_time=N Stop after N seconds
-jobs=N Run N parallel fuzzing jobs
-workers=M Use M worker processes (default: min(jobs, cores/2))
-dict=file Load additional dictionary
SEE ALSO
iree-bazel-build, iree-bazel-run, iree-bazel-test
EOF
}
# Extract target name from bazel label (e.g., "//foo/bar:baz" -> "baz")
get_target_name() {
local target="${1}"
echo "${target##*:}"
}
# Get the repo-relative directory for a target (e.g., "runtime/src/iree/base").
# Uses bazel query to find source file location.
get_target_relative_dir() {
local target="${1}"
local BAZEL_BIN
BAZEL_BIN=$(iree_get_bazel_command)
# Query for source files of this target.
local src_location
src_location=$("${BAZEL_BIN}" query "labels(srcs, ${target})" --output=location 2>/dev/null | head -1)
if [[ -n "${src_location}" ]]; then
# Extract directory from "path/to/file.cc:1:1" and make it relative.
local src_file="${src_location%%:*}"
local src_dir
src_dir=$(dirname "${src_file}")
# Make path relative to worktree root.
echo "${src_dir#${IREE_BAZEL_WORKTREE_DIR}/}"
fi
}
# Get the dict file path for a target in the fuzz-corpus directory.
# Returns path even if file doesn't exist yet (for creation).
find_dict_file() {
local target="${1}"
local target_name
target_name=$(get_target_name "${target}")
local relative_dir
relative_dir=$(get_target_relative_dir "${target}")
if [[ -n "${relative_dir}" ]]; then
local dict_dir="${FUZZ_CORPUS_DIR}/${relative_dir}"
mkdir -p "${dict_dir}"
echo "${dict_dir}/${target_name}.dict"
fi
}
# Parse recommended dictionary from fuzzer output.
# Filters to entries with usage count >= threshold.
# Converts octal escapes (\NNN) to hex escapes (\xNN) for libFuzzer compatibility.
parse_dict_entries() {
local output_file="${1}"
local min_uses="${2}"
# Extract lines between "Recommended dictionary" and "End of recommended"
# Format: "\NNN\NNN" # Uses: NNN (libFuzzer outputs octal, but only parses hex)
sed -n '/^###### Recommended dictionary/,/^###### End of recommended/p' "${output_file}" \
| grep -E '^"' \
| while IFS= read -r line; do
# Extract usage count
local uses
uses=$(echo "${line}" | grep -oE 'Uses: [0-9]+' | grep -oE '[0-9]+')
if [[ -n "${uses}" ]] && [[ "${uses}" -ge "${min_uses}" ]]; then
# Output just the string part (before #), converting octal to hex.
# \NNN (octal) -> \xNN (hex)
echo "${line}" | sed 's/ *#.*//' | perl -pe 's/\\([0-7]{3})/sprintf("\\x%02x", oct($1))/ge'
fi
done
}
# Merge new dict entries into dict file (creates file if needed).
merge_dict_entries() {
local dict_file="${1}"
local new_entries="${2}"
local temp_file
local valid_entries
if [[ -z "${new_entries}" ]]; then
iree_debug "No new dictionary entries to add"
return 0
fi
temp_file=$(mktemp)
valid_entries=$(mktemp)
trap "rm -f '${temp_file}' '${valid_entries}'" RETURN
# Filter new entries to only valid ones (start with " and end with ").
# Use printf to avoid bash escape interpretation.
printf '%s\n' "${new_entries}" | while IFS= read -r entry; do
# Validate: must start with " and end with "
if [[ "${entry}" =~ ^\".*\"$ ]]; then
printf '%s\n' "${entry}"
else
iree_debug "Skipping malformed dict entry: ${entry}"
fi
done > "${valid_entries}"
if [[ ! -s "${valid_entries}" ]]; then
iree_debug "No valid dictionary entries to add"
return 0
fi
# Combine existing entries (if any) with new entries, sort, dedupe.
{
grep -E '^"' "${dict_file}" 2>/dev/null || true
cat "${valid_entries}"
} | sort -u > "${temp_file}"
local old_count new_count added_count
old_count=$(grep -cE '^"' "${dict_file}" 2>/dev/null || echo 0)
new_count=$(wc -l < "${temp_file}")
added_count=$((new_count - old_count))
if [[ "${added_count}" -gt 0 ]]; then
# Preserve comments from original file (if exists), then add sorted entries.
{
grep -E '^#|^$' "${dict_file}" 2>/dev/null || true
cat "${temp_file}"
} > "${dict_file}.new"
mv "${dict_file}.new" "${dict_file}"
iree_info "Added ${added_count} new dictionary entries to ${dict_file}"
else
iree_debug "No new unique dictionary entries"
fi
}
# Run corpus minimization using set_cover_merge.
minimize_corpus() {
local binary="${1}"
local corpus_dir="${2}"
local corpus_count
corpus_count=$(ls -1 "${corpus_dir}" 2>/dev/null | wc -l)
if [[ "${corpus_count}" -eq 0 ]]; then
iree_debug "Corpus is empty, nothing to minimize"
return 0
fi
iree_info "Minimizing corpus (${corpus_count} files)..."
local temp_corpus
temp_corpus=$(mktemp -d)
trap "rm -rf '${temp_corpus}'" RETURN
if "${binary}" -set_cover_merge=1 "${temp_corpus}" "${corpus_dir}" 2>/dev/null; then
local new_count
new_count=$(ls -1 "${temp_corpus}" | wc -l)
# Use find -delete to avoid "argument list too long" with large corpora.
find "${corpus_dir}" -mindepth 1 -delete
mv "${temp_corpus}"/* "${corpus_dir}"/ 2>/dev/null || true
iree_info "Corpus minimized: ${corpus_count} -> ${new_count} files"
else
iree_warn "Corpus minimization failed"
fi
}
# Parse arguments.
TARGET=""
BAZEL_ARGS=()
FUZZER_ARGS=()
PARSING_FUZZER_ARGS=0
while [[ $# -gt 0 ]]; do
if [[ "${PARSING_FUZZER_ARGS}" == "1" ]]; then
FUZZER_ARGS+=("${1}")
shift
continue
fi
case "${1}" in
-h|--help)
show_help
exit 0
;;
--agent-md|--agent_md)
iree_show_agent_md
exit 0
;;
-n|--dry_run|--dry-run)
IREE_BAZEL_DRY_RUN=1
shift
;;
-v|--verbose)
IREE_BAZEL_VERBOSE=1
shift
;;
-m|--minimize)
FUZZ_MINIMIZE=1
shift
;;
-d|--dict_only|--dict-only)
FUZZ_DICT_ONLY=1
shift
;;
--)
PARSING_FUZZER_ARGS=1
shift
;;
-*)
BAZEL_ARGS+=("${1}")
shift
;;
*)
if [[ -z "${TARGET}" ]]; then
TARGET="${1}"
else
BAZEL_ARGS+=("${1}")
fi
shift
;;
esac
done
# Target is required.
if [[ -z "${TARGET}" ]]; then
iree_error "Target is required"
echo ""
show_help
exit 1
fi
# Set up worktree.
iree_setup_worktree
#===----------------------------------------------------------------------===#
# Multi-target pattern support (e.g., //path/to/...)
#===----------------------------------------------------------------------===#
if [[ "${TARGET}" == *"..."* ]]; then
# Expand pattern to all fuzz targets.
BAZEL_BIN=$(iree_get_bazel_command)
iree_info "Discovering fuzz targets matching: ${TARGET}"
# Query for all targets ending in _fuzz under the pattern.
FUZZ_TARGETS=()
while IFS= read -r target; do
if [[ "${target}" == *"_fuzz" ]]; then
FUZZ_TARGETS+=("${target}")
fi
done < <("${BAZEL_BIN}" query "${TARGET}" 2>/dev/null | sort)
if [[ ${#FUZZ_TARGETS[@]} -eq 0 ]]; then
iree_error "No fuzz targets found matching: ${TARGET}"
exit 1
fi
iree_info "Found ${#FUZZ_TARGETS[@]} fuzz targets"
# Dry-run: just list targets.
if iree_is_dry_run; then
iree_info "Would run these fuzz targets:"
for target in "${FUZZ_TARGETS[@]}"; do
echo " ${target}"
done
iree_info "Fuzzer args: ${FUZZER_ARGS[*]:-<none>}"
exit 0
fi
# Build all targets first.
iree_info "Building all fuzz targets..."
iree_bazel_build_default_configs
BUILD_ARGS=("${IREE_BAZEL_DEFAULT_CONFIGS[@]}" "--config=fuzzer" "${BAZEL_ARGS[@]}")
if ! iree_bazel_build_quiet "${TARGET}" "${BUILD_ARGS[@]}"; then
iree_error "Build failed"
exit 1
fi
# Track results using indexed arrays (bash 3 compatible).
declare -a FUZZ_PIDS=()
declare -a FUZZ_TARGETS_RUNNING=()
CRASHED_TARGETS=()
CLEAN_TARGETS=()
# Trap Ctrl+C to stop all fuzzers gracefully.
cleanup_multi() {
echo ""
iree_info "Stopping all fuzzers..."
for pid in "${FUZZ_PIDS[@]}"; do
kill -INT "${pid}" 2>/dev/null || true
done
wait 2>/dev/null || true
echo ""
iree_info "=== Fuzzing Summary ==="
iree_info "Total targets: ${#FUZZ_TARGETS[@]}"
if [[ ${#CRASHED_TARGETS[@]} -gt 0 ]]; then
iree_warn "Crashed (${#CRASHED_TARGETS[@]}):"
for t in "${CRASHED_TARGETS[@]}"; do
echo " - ${t}"
done
fi
if [[ ${#CLEAN_TARGETS[@]} -gt 0 ]]; then
iree_info "Clean (${#CLEAN_TARGETS[@]}): ${#CLEAN_TARGETS[@]} targets"
fi
exit 0
}
trap cleanup_multi INT TERM
# Build forwarded options for child processes.
declare -a FORWARD_ARGS=()
[[ "${FUZZ_MINIMIZE}" == "1" ]] && FORWARD_ARGS+=("-m")
[[ "${FUZZ_DICT_ONLY}" == "1" ]] && FORWARD_ARGS+=("-d")
[[ "${IREE_BAZEL_VERBOSE}" == "1" ]] && FORWARD_ARGS+=("-v")
FORWARD_ARGS+=("${BAZEL_ARGS[@]}")
# Run all fuzzers. They each manage their own corpus/artifacts.
iree_info "Starting ${#FUZZ_TARGETS[@]} fuzzers..."
echo ""
for target in "${FUZZ_TARGETS[@]}"; do
# Spawn fuzzer in subshell that ignores SIGINT (parent handles cleanup).
(
trap '' INT
exec "${SCRIPT_DIR}/iree-bazel-fuzz" "${FORWARD_ARGS[@]}" "${target}" -- "${FUZZER_ARGS[@]}"
) &
FUZZ_PIDS+=($!)
FUZZ_TARGETS_RUNNING+=("${target}")
done
# Wait for all fuzzers and collect results.
for i in "${!FUZZ_PIDS[@]}"; do
local pid="${FUZZ_PIDS[$i]}"
local target="${FUZZ_TARGETS_RUNNING[$i]}"
if wait "${pid}"; then
CLEAN_TARGETS+=("${target}")
else
CRASHED_TARGETS+=("${target}")
fi
done
# Final summary.
echo ""
iree_info "=== Fuzzing Complete ==="
iree_info "Total targets: ${#FUZZ_TARGETS[@]}"
if [[ ${#CRASHED_TARGETS[@]} -gt 0 ]]; then
iree_warn "Crashed (${#CRASHED_TARGETS[@]}):"
for t in "${CRASHED_TARGETS[@]}"; do
echo " - ${t}"
done
fi
iree_info "Clean: ${#CLEAN_TARGETS[@]} targets"
exit 0
fi
# Extract target info for directory structure.
TARGET_NAME=$(get_target_name "${TARGET}")
TARGET_RELATIVE_DIR=$(get_target_relative_dir "${TARGET}")
# Set up corpus and artifact directories with repo-relative paths.
# Structure: ~/.cache/iree-fuzz-cache/<relative-path>/<target>/corpus/
# ~/.cache/iree-fuzz-cache/<relative-path>/<target>/artifacts/
if [[ -n "${TARGET_RELATIVE_DIR}" ]]; then
FUZZ_TARGET_DIR="${FUZZ_CACHE_DIR}/${TARGET_RELATIVE_DIR}/${TARGET_NAME}"
else
# Fallback if we can't determine relative path.
FUZZ_TARGET_DIR="${FUZZ_CACHE_DIR}/${TARGET_NAME}"
fi
CORPUS_DIR="${FUZZ_TARGET_DIR}/corpus"
ARTIFACT_DIR="${FUZZ_TARGET_DIR}/artifacts"
# Find dictionary file.
DICT_FILE=$(find_dict_file "${TARGET}")
# Set up bazel args with fuzzer config.
iree_bazel_build_default_configs
BAZEL_ARGS=("${IREE_BAZEL_DEFAULT_CONFIGS[@]}" "--config=fuzzer" "${BAZEL_ARGS[@]}")
# Create corpus and artifact directories.
mkdir -p "${CORPUS_DIR}" "${ARTIFACT_DIR}"
# Verbose/dry-run output.
if iree_is_verbose || iree_is_dry_run; then
iree_info "Target: ${TARGET}"
iree_info "Corpus: ${CORPUS_DIR}"
iree_info "Artifacts: ${ARTIFACT_DIR}"
if [[ -n "${DICT_FILE}" ]]; then
iree_info "Dict: ${DICT_FILE}"
fi
iree_info "Bazel args: ${BAZEL_ARGS[*]}"
iree_info "Fuzzer args: ${FUZZER_ARGS[*]:-<none>}"
fi
if iree_is_dry_run; then
exit 0
fi
# Build the target.
iree_debug "Building ${TARGET}..."
if ! iree_bazel_build_quiet "${TARGET}" "${BAZEL_ARGS[@]}"; then
exit $?
fi
# Get the binary path.
BINARY_PATH=$(iree_bazel_get_binary_path "${TARGET}" "${BAZEL_ARGS[@]}")
if [[ -z "${BINARY_PATH}" ]] || [[ ! -x "${BINARY_PATH}" ]]; then
iree_error "Could not find built binary for ${TARGET}"
exit 1
fi
iree_debug "Binary: ${BINARY_PATH}"
# Minimize corpus if requested or if corpus is too large.
CORPUS_COUNT=$(ls -1 "${CORPUS_DIR}" 2>/dev/null | wc -l)
if [[ "${FUZZ_MINIMIZE}" == "1" ]]; then
minimize_corpus "${BINARY_PATH}" "${CORPUS_DIR}"
elif [[ "${CORPUS_COUNT}" -gt "${FUZZ_AUTO_MINIMIZE}" ]]; then
iree_info "Corpus has ${CORPUS_COUNT} files (threshold: ${FUZZ_AUTO_MINIMIZE}), auto-minimizing..."
minimize_corpus "${BINARY_PATH}" "${CORPUS_DIR}"
fi
# Dict-only mode: just run briefly to generate dictionary recommendations.
if [[ "${FUZZ_DICT_ONLY}" == "1" ]]; then
if [[ -z "${DICT_FILE}" ]]; then
iree_error "No .dict file found for ${TARGET}"
exit 1
fi
iree_info "Running corpus to extract dictionary entries..."
OUTPUT_FILE=$(mktemp)
trap "rm -f '${OUTPUT_FILE}'" EXIT
# Run with existing corpus, short time limit.
"${BINARY_PATH}" "${CORPUS_DIR}" -max_total_time=5 2>&1 | tee "${OUTPUT_FILE}"
# Extract and merge dictionary entries.
NEW_ENTRIES=$(parse_dict_entries "${OUTPUT_FILE}" "${FUZZ_MIN_USES}")
merge_dict_entries "${DICT_FILE}" "${NEW_ENTRIES}"
exit 0
fi
# Set up log directory for this instance (temp, cleaned on success).
FUZZ_LOG_DIR="/tmp/iree-bazel-fuzz/${TARGET_NAME}/$$"
mkdir -p "${FUZZ_LOG_DIR}"
# Build fuzzer command.
FUZZ_CMD=("${BINARY_PATH}" "${CORPUS_DIR}")
# Redirect artifacts to persistent location, logs to temp.
FUZZ_CMD+=("-artifact_prefix=${ARTIFACT_DIR}/")
# Add dictionary if it exists.
if [[ -n "${DICT_FILE}" ]] && [[ -f "${DICT_FILE}" ]]; then
FUZZ_CMD+=("-dict=${DICT_FILE}")
iree_info "Using dictionary: ${DICT_FILE}"
elif [[ -n "${DICT_FILE}" ]]; then
iree_debug "Dictionary will be created at: ${DICT_FILE}"
fi
# Add per-input timeout if not specified but max_total_time is.
# This prevents pathological inputs from hanging the fuzzer.
HAS_TIMEOUT=0
MAX_TOTAL_TIME=0
for arg in "${FUZZER_ARGS[@]}"; do
if [[ "${arg}" =~ ^-timeout= ]]; then
HAS_TIMEOUT=1
elif [[ "${arg}" =~ ^-max_total_time=([0-9]+) ]]; then
MAX_TOTAL_TIME="${BASH_REMATCH[1]}"
fi
done
if [[ "${HAS_TIMEOUT}" -eq 0 ]] && [[ "${MAX_TOTAL_TIME}" -gt 0 ]]; then
# Default timeout to 2x max_total_time (minimum 5 seconds).
DEFAULT_TIMEOUT=$((MAX_TOTAL_TIME * 2))
if [[ "${DEFAULT_TIMEOUT}" -lt 5 ]]; then
DEFAULT_TIMEOUT=5
fi
FUZZ_CMD+=("-timeout=${DEFAULT_TIMEOUT}")
iree_debug "Auto-setting timeout=${DEFAULT_TIMEOUT}s (2x max_total_time)"
fi
# Add user fuzzer args.
FUZZ_CMD+=("${FUZZER_ARGS[@]}")
# Cleanup and dict update on exit.
CLEANUP_DONE=0
cleanup() {
local exit_code=$?
# Prevent running twice (INT then EXIT).
if [[ "${CLEANUP_DONE}" -eq 1 ]]; then
exit ${exit_code}
fi
CLEANUP_DONE=1
trap - EXIT INT TERM
# Extract dictionary from log files using grep to find exact section.
if [[ -n "${DICT_FILE}" ]] && [[ -d "${FUZZ_LOG_DIR}" ]]; then
# Use grep -A to get lines after the dictionary marker.
# The dictionary section ends with "End of recommended", so we stop there.
local dict_output
dict_output=$(grep -h -A 1000 '^###### Recommended dictionary' "${FUZZ_LOG_DIR}"/*.log 2>/dev/null \
| sed '/^###### End of recommended/q' \
| grep -E '^"' || true)
if [[ -n "${dict_output}" ]]; then
NEW_ENTRIES=$(echo "${dict_output}" \
| while IFS= read -r line; do
local uses
uses=$(echo "${line}" | grep -oE 'Uses: [0-9]+' | grep -oE '[0-9]+')
if [[ -n "${uses}" ]] && [[ "${uses}" -ge "${FUZZ_MIN_USES}" ]]; then
echo "${line}" | sed 's/ *#.*//' | perl -pe 's/\\([0-7]{3})/sprintf("\\x%02x", oct($1))/ge'
fi
done)
merge_dict_entries "${DICT_FILE}" "${NEW_ENTRIES}"
fi
fi
# Report corpus size.
local corpus_count
corpus_count=$(ls -1 "${CORPUS_DIR}" 2>/dev/null | wc -l)
iree_info "Corpus: ${corpus_count} files in ${CORPUS_DIR}"
# Check for crash/leak/timeout artifacts in persistent location.
local artifact_count
artifact_count=$(ls -1 "${ARTIFACT_DIR}" 2>/dev/null | wc -l)
if [[ "${artifact_count}" -gt 0 ]]; then
iree_warn "Artifacts (${artifact_count} files): ${ARTIFACT_DIR}"
ls -1 "${ARTIFACT_DIR}" | head -5 | while read -r artifact; do
echo " ${artifact}"
done
if [[ "${artifact_count}" -gt 5 ]]; then
echo " ... and $((artifact_count - 5)) more"
fi
fi
# Clean up log directory on success, keep on failure for debugging.
if [[ "${exit_code}" -eq 0 ]]; then
rm -rf "${FUZZ_LOG_DIR}"
else
# Show error messages from logs on failure.
if [[ "${exit_code}" -ne 0 ]]; then
local errors
errors=$(grep -h -E '(ERROR|FATAL|ASAN|SUMMARY:|ParseDictionaryFile:)' "${FUZZ_LOG_DIR}"/*.log 2>/dev/null | head -20 || true)
if [[ -n "${errors}" ]]; then
iree_error "Fuzzer failed:"
echo "${errors}" >&2
fi
fi
iree_info "Logs and artifacts: ${FUZZ_LOG_DIR}"
fi
exit ${exit_code}
}
trap cleanup EXIT INT TERM
# Run the fuzzer from log directory so fuzz-N.log files go there.
# Output goes to log files only - we just show status.
iree_info "Starting fuzzer (Ctrl+C to stop)..."
iree_info "Logs: ${FUZZ_LOG_DIR}"
cd "${FUZZ_LOG_DIR}"
"${FUZZ_CMD[@]}" > main.log 2>&1