#!/usr/bin/env bash
##
# Linux Malware Detect v2.0.1
#             (C) 2002-2026, R-fx Networks <proj@rfxn.com>
#             (C) 2026, Ryan MacDonald <ryan@rfxn.com>
# This program may be freely redistributed under the terms of the GNU GPL v2
##
# hookscan.sh — unified hook scanning API for ModSecurity, FTP, Exim, and third-party integrations (see hookscan(1))

# Defaults (overridden by conf.maldet.hookscan)
quarantine_hits=1
quarantine_clean=0
scan_clamscan=auto
scan_yara=auto
scan_tmpdir_paths=""
hookscan_timeout=30
hookscan_user_rate_limit=60
hookscan_user_show_signames=1
hookscan_escalate_hits=0
hookscan_service_users="apache,nginx,www-data,nobody,proftpd,exim"
hookscan_fail_open=1
hookscan_list_max_bytes=1048576
hookscan_list_max_entries=10000
hscan=1

# Output formatting functions (per-mode)

_output_clean() {
	case "$hscan_mode" in
		modsec)
			echo "1 maldet: OK"
			;;
		ftp)
			logger -t maldet-ftpscan "clean: $file"
			;;
		proftpd)
			logger -t maldet-proftpdscan "clean: $file"
			;;
		exim)
			echo "maldet: clean"
			;;
		generic)
			echo "CLEAN: $file"
			;;
	esac
}

_output_infected() {
	local _sig="${1:-MALWARE}"
	case "$hscan_mode" in
		modsec)
			echo "0 maldet: $_sig $file"
			;;
		ftp)
			logger -t maldet-ftpscan "infected: $_sig $file"
			;;
		proftpd)
			logger -t maldet-proftpdscan "infected: $_sig $file"
			;;
		exim)
			echo "maldet: $_sig"
			;;
		generic)
			# Mask output only — caller and logs retain original signame
			local _display_sig="$_sig"
			if [ "$_is_root" == "0" ] && [ "${hookscan_user_show_signames:-1}" == "0" ]; then
				_display_sig="MALWARE-DETECTED"
			fi
			echo "INFECTED: $_display_sig $file"
			;;
	esac
}

_output_error() {
	local _reason="${1:-unknown error}"
	case "$hscan_mode" in
		modsec)
			# modsec: error path defers to fail-open/fail-closed
			;;
		ftp)
			logger -t maldet-ftpscan "error: $_reason $file"
			;;
		proftpd)
			logger -t maldet-proftpdscan "error: $_reason $file"
			;;
		exim)
			# exim: error path defers to fail-open/fail-closed
			;;
		generic)
			echo "ERROR: $_reason"
			;;
	esac
}

_hook_rate_check() {
	# Exempt: root, service users, non-generic modes, unlimited
	[ "$_is_root" == "1" ] && return 0
	[ "$_is_service_user" == "1" ] && return 0
	[ "$hscan_mode" != "generic" ] && return 0
	[ "${hookscan_user_rate_limit:-60}" -eq 0 ] && return 0

	local _uid _countfile _now _window_start _ts _count
	_uid=$(id -u)
	_countfile="${tmpdir:-$inspath/tmp}/.hook_rate_${_uid}"
	_now=$(date +%s)
	_window_start=$((_now - 3600))

	if [ -f "$_countfile" ]; then
		read -r _ts _count < "$_countfile"
		if [ "${_ts:-0}" -lt "$_window_start" ]; then
			_count=0
		fi
	else
		_count=0
	fi

	if [ "${_count:-0}" -ge "${hookscan_user_rate_limit:-60}" ]; then
		logger -t maldet-hookscan "rate limit exceeded for UID $_uid (${_count}/${hookscan_user_rate_limit} scans/hr)"
		command -v elog_event >/dev/null 2>&1 && \
			elog_event "threshold_exceeded" "warn" "hook rate limit exceeded" "uid=$_uid" "count=$_count" "limit=${hookscan_user_rate_limit:-60}"
		echo "ERROR: rate limit exceeded (${_count}/${hookscan_user_rate_limit} scans/hour)"
		exit 1
	fi

	_count=$((_count + 1))
	printf '%s %s\n' "$_now" "$_count" > "$_countfile" 2>/dev/null  # safe: fail-open if tmpdir not writable
}

# Mode dispatch
# No mode arg AND path as $1 = backward compat modsec mode.
# Pure-ftpd auto-detect: if UPLOAD_VUSER env var set, default to ftp mode.

hscan_mode="modsec"
_list_mode=""
_list_source=""

if [ $# -eq 0 ]; then
	# No arguments at all — nothing to scan
	_output_clean
	exit 0
fi

if [[ "$1" == /* ]]; then
	# First arg is an absolute path — backward compat modsec mode
	file="$1"
elif [ $# -ge 2 ]; then
	# First arg is a mode keyword
	case "$1" in
		modsec|ftp|proftpd|exim|generic)
			hscan_mode="$1"
			# Detect --list and --stdin in generic mode
			if [ "$hscan_mode" == "generic" ]; then
				case "$2" in
					--list)
						if [ $# -ge 3 ]; then
							_list_mode="list"
							_list_source="$3"
						else
							echo "ERROR: --list requires a file path argument"
							exit 1
						fi
						;;
					--stdin)
						_list_mode="stdin"
						;;
					*)
						file="$2"
						;;
				esac
			else
				file="$2"
			fi
			;;
		*)
			# Unknown mode — treat as modsec for safety
			logger -t maldet-hookscan "unknown mode '$1', defaulting to modsec"
			hscan_mode="modsec"
			file="$2"
			;;
	esac
else
	# Single arg that is not an absolute path — reject
	hscan_mode="modsec"
	file="$1"
fi

# Pure-ftpd auto-detection: UPLOAD_VUSER env var presence
if [ -n "${UPLOAD_VUSER:-}" ] && [ "$hscan_mode" == "modsec" ]; then
	hscan_mode="ftp"
fi

export hscan_mode

# Reject shell metacharacters (including double-quote, backtick)
# $'...' quoting includes literal backtick without triggering command substitution
# Defined here because it is reused by both single-file and list validation
metachar_pat=$'[;|&$(){}`"<>]'

# Filename validation (single-file mode only)

if [ -z "$_list_mode" ]; then
	# Reject non-printable characters
	case "$file" in
		*[![:print:]]*)
			logger -t maldet-hookscan "rejected filename with non-printable characters"
			_output_infected "MALWARE"
			exit 0
			;;
	esac

	# Reject shell metacharacters
	if [[ "$file" =~ $metachar_pat ]]; then
		logger -t maldet-hookscan "rejected filename with shell metacharacters: $file"
		_output_infected "MALWARE"
		exit 0
	fi

	# Reject path traversal
	dotdot_pat='\.\.'
	if [[ "$file" =~ $dotdot_pat ]]; then
		logger -t maldet-hookscan "rejected filename with path traversal: $file"
		_output_infected "MALWARE"
		exit 0
	fi

	# Require absolute path
	if [[ "$file" != /* ]]; then
		logger -t maldet-hookscan "rejected relative path: $file"
		_output_infected "MALWARE"
		exit 0
	fi

	# Canonicalize path via readlink -e (CentOS 6 compatible, NOT realpath -e)
	_canonical=$(readlink -e "$file" 2>/dev/null)  # safe: non-existent path handled below
	if [ -z "$_canonical" ]; then
		# File does not exist or is a broken symlink
		_output_clean
		exit 0
	fi
	file="$_canonical"

	# Verify file exists and is a regular file
	if [ ! -f "$file" ]; then
		_output_clean
		exit 0
	fi
fi

inspath="${inspath:-/usr/share/maldet}"
intcnf="$inspath/internals/internals.conf"
if [ -f "$intcnf" ]; then
	# shellcheck disable=SC1090
	source "$intcnf"
fi

# Config parser (inline allowlist)

hookcnf="$inspath/conf.maldet.hookscan"
if [ -f "$hookcnf" ]; then
	while IFS= read -r _line; do
		# Skip comments and blank lines
		case "$_line" in
			\#*|"") continue ;;
		esac

		# Extract key=value, stripping optional quotes
		_key="${_line%%=*}"
		_val="${_line#*=}"
		# Strip surrounding double quotes from value
		_val="${_val#\"}"
		_val="${_val%\"}"
		# Strip surrounding single quotes from value
		_val="${_val#\'}"
		_val="${_val%\'}"

		# Reject metachar values
		if [[ "$_val" =~ $metachar_pat ]]; then
			logger -t maldet-hookscan "rejected metachar in config value: $_key"
			continue
		fi

		case "$_key" in
			quarantine_hits)        quarantine_hits="$_val" ;;
			quarantine_clean)       quarantine_clean="$_val" ;;
			scan_clamscan)          scan_clamscan="$_val" ;;
			scan_yara)              scan_yara="$_val" ;;
			scan_tmpdir_paths)      scan_tmpdir_paths="$_val" ;;
			hookscan_timeout)       hookscan_timeout="$_val" ;;
			hookscan_user_rate_limit) hookscan_user_rate_limit="$_val" ;;
			hookscan_user_show_signames) hookscan_user_show_signames="$_val" ;;
			hookscan_service_users) hookscan_service_users="$_val" ;;
			hookscan_fail_open)     hookscan_fail_open="$_val" ;;
			hookscan_list_max_bytes) hookscan_list_max_bytes="$_val" ;;
			hookscan_list_max_entries) hookscan_list_max_entries="$_val" ;;
			hookscan_escalate_hits) hookscan_escalate_hits="$_val" ;;
			*)
				logger -t maldet-hookscan "unknown config key ignored: $_key"
				;;
		esac
	done < "$hookcnf"
fi

# Source elog_lib for audit trail (optional — graceful if absent)
if [ -f "${elog_lib:-}" ]; then
	ELOG_APP="maldet"
	ELOG_STDOUT="never"
	if [ "$(id -u)" -eq 0 ]; then
		ELOG_AUDIT_FILE="$logdir/audit.log"
		[ -d "$logdir" ] || command mkdir -m 750 "$logdir" 2>/dev/null  # safe: dir may exist
	else
		ELOG_AUDIT_FILE=""
	fi
	export ELOG_APP ELOG_STDOUT ELOG_AUDIT_FILE
	# shellcheck disable=SC1090
	source "$elog_lib"
	elog_init 2>/dev/null  # safe: stderr suppressed for missing log dir
fi

# Caller identity resolution

_is_root=1
_user_home=""
_is_service_user=0

if [ "$(id -u)" != "0" ]; then
	_is_root=0
	_user_home=$(getent passwd "$(whoami)" | cut -d: -f6)
	_current_user=$(whoami)

	# Check if current user is in the service users whitelist
	_saved_ifs="$IFS"
	IFS=","
	for _svc_user in $hookscan_service_users; do
		if [ "$_current_user" == "$_svc_user" ]; then
			_is_service_user=1
			break
		fi
	done
	IFS="$_saved_ifs"
fi

# Rate limit check (non-root generic mode only)
_hook_rate_check

if [ -z "$_list_mode" ] && [ "$_is_root" == "0" ] && [ "$_is_service_user" == "0" ] && [ -n "$_user_home" ]; then
	case "$file" in
		"$_user_home"/*)
			# Allowed: file is inside user's homedir
			;;
		*)
			logger -t maldet-hookscan "rejected non-root scan outside homedir: $file"
			_output_clean
			exit 0
			;;
	esac
fi

# File list processing (generic --list / --stdin)

if [ -n "$_list_mode" ]; then
	tmpdir="${tmpdir:-$inspath/tmp}"

	if [ "$_list_mode" == "stdin" ]; then
		# Reject interactive terminal input
		if [ -t 0 ]; then
			echo "ERROR: --stdin requires piped input, not terminal"
			exit 1
		fi
		_stdin_tmp=$(mktemp "$tmpdir/hookscan_stdin.XXXXXX")
		# Read stdin with size limit and timeout
		head -c "${hookscan_list_max_bytes:-1048576}" > "$_stdin_tmp"
		_list_source="$_stdin_tmp"
	fi

	# Step 1: File type check (--list only)
	if [ "$_list_mode" == "list" ]; then
		# Reject if not a regular file
		if [ ! -f "$_list_source" ]; then
			echo "ERROR: list file does not exist or is not a regular file"
			exit 1
		fi

		# Reject if not text (binary, device, FIFO, etc.)
		_ftype=$(file -b --mime-type "$_list_source" 2>/dev/null)  # safe: fallback below
		case "${_ftype:-text/plain}" in
			text/plain|text/x-*|application/x-empty|inode/x-empty) ;;  # allowed text types
			*)
				logger -t maldet-hookscan "rejected non-text list file: $_ftype"
				echo "ERROR: list file is not a text file (detected: $_ftype)"
				exit 1
				;;
		esac

		# Reject if oversized
		_fsize=$(stat -c %s "$_list_source" 2>/dev/null || stat -f %z "$_list_source" 2>/dev/null)  # safe: FreeBSD fallback
		if [ "${_fsize:-0}" -gt "${hookscan_list_max_bytes:-1048576}" ]; then
			logger -t maldet-hookscan "rejected oversized list file: ${_fsize} bytes"
			echo "ERROR: list file exceeds maximum size"
			exit 1
		fi
	fi

	# Step 2: Per-line validation
	_validated_list=$(mktemp "$tmpdir/hookscan_list.XXXXXX")
	_line_num=0
	_rejected=0
	_accepted=0
	_path_valid_pat='^[a-zA-Z0-9/_. @+~%=,{}-]+$'

	while IFS= read -r _line || [ -n "$_line" ]; do
		_line_num=$((_line_num + 1))

		# Skip empty lines and comments
		[ -z "$_line" ] && continue
		[[ "$_line" == "#"* ]] && continue

		# RULE 1: Must be absolute path (starts with /)
		if [[ "$_line" != /* ]]; then
			logger -t maldet-hookscan "list:$_line_num: rejected relative path"
			_rejected=$((_rejected + 1))
			continue
		fi

		# RULE 2: Printable characters only (no null, no control chars)
		case "$_line" in
			*[![:print:]]*)
				logger -t maldet-hookscan "list:$_line_num: rejected non-printable chars"
				_rejected=$((_rejected + 1))
				continue
				;;
		esac

		# RULE 3: No shell metacharacters
		if [[ "$_line" =~ $metachar_pat ]]; then
			logger -t maldet-hookscan "list:$_line_num: rejected shell metacharacters"
			_rejected=$((_rejected + 1))
			continue
		fi

		# RULE 4: No path traversal components
		if [[ "$_line" == *".."* ]]; then
			logger -t maldet-hookscan "list:$_line_num: rejected path traversal"
			_rejected=$((_rejected + 1))
			continue
		fi

		# RULE 5: Path format — only characters valid in file paths (positive allowlist)
		if [[ ! "$_line" =~ $_path_valid_pat ]]; then
			logger -t maldet-hookscan "list:$_line_num: rejected invalid path characters"
			_rejected=$((_rejected + 1))
			continue
		fi

		# RULE 6: Resolve symlinks and verify existence
		_resolved=$(readlink -e "$_line" 2>/dev/null)  # safe: non-existent path returns empty
		if [ -z "$_resolved" ]; then
			# File does not exist or dangling symlink — skip silently
			continue
		fi

		# RULE 7: Must be a regular file (not directory, device, socket, FIFO)
		if [ ! -f "$_resolved" ]; then
			logger -t maldet-hookscan "list:$_line_num: rejected non-regular file"
			_rejected=$((_rejected + 1))
			continue
		fi

		# RULE 8: Usermode homedir restriction (same as single-file)
		if [ "$_is_root" == "0" ] && [ "$_is_service_user" == "0" ] && [ -n "$_user_home" ]; then
			case "$_resolved" in
				"$_user_home"/*)
					;;
				*)
					logger -t maldet-hookscan "list:$_line_num: rejected outside homedir"
					_rejected=$((_rejected + 1))
					continue
					;;
			esac
		fi

		# RULE 9: Line count cap (defense against unbounded lists)
		if [ "$_accepted" -ge "${hookscan_list_max_entries:-10000}" ]; then
			logger -t maldet-hookscan "list: max entries reached ($_accepted), truncating"
			break
		fi

		printf '%s\n' "$_resolved" >> "$_validated_list"
		_accepted=$((_accepted + 1))
	done < "$_list_source"

	# Reject if zero valid entries survived validation
	if [ "$_accepted" -eq 0 ]; then
		command rm -f "$_validated_list"
		[ -n "${_stdin_tmp:-}" ] && command rm -f "$_stdin_tmp"
		echo "ERROR: no valid file paths in list ($_rejected rejected)"
		exit 1
	fi

	logger -t maldet-hookscan "list: $_accepted accepted, $_rejected rejected"

	# Step 3: Dispatch validated list to maldet
	cd "$tmpdir" || exit 1

	export hscan

	_scan_output=""
	_scan_rc=0

	# Snapshot hook.hits.log line count before scan (for post-processing)
	_hits_log="${sessdir:-$inspath/sess}/hook.hits.log"
	_hits_before=0
	if [ -f "$_hits_log" ]; then
		_hits_before=$(wc -l < "$_hits_log")
	fi

	_timeout_cmd=$(command -v timeout 2>/dev/null)  # safe: fallback to no-timeout path below
	if [ -n "$_timeout_cmd" ]; then
		_scan_output=$("$_timeout_cmd" "$hookscan_timeout" \
			"$inspath/maldet" --hook-scan \
			--config-option "quarantine_hits=$quarantine_hits,quarantine_clean=$quarantine_clean,scan_clamscan=$scan_clamscan,scan_yara=$scan_yara,scan_tmpdir_paths=$scan_tmpdir_paths" \
			-f "$_validated_list" 2>/dev/null) || _scan_rc=$?  # safe: stderr from maldet is logged internally
	else
		# No timeout command available — run without timeout protection
		_scan_output=$("$inspath/maldet" --hook-scan \
			--config-option "quarantine_hits=$quarantine_hits,quarantine_clean=$quarantine_clean,scan_clamscan=$scan_clamscan,scan_yara=$scan_yara,scan_tmpdir_paths=$scan_tmpdir_paths" \
			-f "$_validated_list" 2>/dev/null) || _scan_rc=$?  # safe: stderr from maldet is logged internally
	fi

	# Step 4: Post-process output to per-file STATUS lines
	_worst_rc=0

	# Handle timeout/error from maldet
	case "$_scan_rc" in
		124|137)
			echo "ERROR: scan timeout after ${hookscan_timeout}s"
			logger -t maldet-hookscan "list scan timed out after ${hookscan_timeout}s"
			command rm -f "$_validated_list"
			[ -n "${_stdin_tmp:-}" ] && command rm -f "$_stdin_tmp"
			exit 1
			;;
		0|2)
			# Scan completed — collect infected files from hook.hits.log
			# New hits are lines added since _hits_before
			_infected_paths=$(mktemp "$tmpdir/hookscan_infected.XXXXXX")
			if [ -f "$_hits_log" ]; then
				# Extract paths (field 2, tab-delimited) from new entries
				tail -n +"$((_hits_before + 1))" "$_hits_log" 2>/dev/null \
					| awk -F'\t' '!/^#/ && NF>0 {print $2}' > "$_infected_paths"  # safe: file existence checked above
			fi

			# Output per-file STATUS lines
			while IFS= read -r _vpath; do
				[ -z "$_vpath" ] && continue
				# Check if this path is in the infected list
				if grep -qxF "$_vpath" "$_infected_paths" 2>/dev/null; then  # safe: empty file returns no match
					# Extract sig name for this path
					_sig=""
					if [ -f "$_hits_log" ]; then
						_sig=$(tail -n +"$((_hits_before + 1))" "$_hits_log" 2>/dev/null \
							| awk -F'\t' -v path="$_vpath" '!/^#/ && $2 == path {print $1; exit}')  # safe: file existence guarded
					fi
					# Mask signame for non-root when configured
					if [ "$_is_root" == "0" ] && [ "${hookscan_user_show_signames:-1}" == "0" ]; then
						_sig="MALWARE-DETECTED"
					fi
					echo "INFECTED: ${_sig:-MALWARE-DETECTED} $_vpath"
					_worst_rc=2
				else
					echo "CLEAN: $_vpath"
				fi
			done < "$_validated_list"

			command rm -f "$_infected_paths"
			;;
		*)
			# Other scan error
			echo "ERROR: scan error (rc=$_scan_rc)"
			logger -t maldet-hookscan "list scan error rc=$_scan_rc"
			_worst_rc=1
			;;
	esac

	# Cleanup temp files
	command rm -f "$_validated_list"
	[ -n "${_stdin_tmp:-}" ] && command rm -f "$_stdin_tmp"

	exit "$_worst_rc"
fi

# Scan dispatch
# Use timeout to prevent runaway scans from exhausting resources.
# The maldet subprocess inherits hscan=1 and hscan_mode from env.

tmpdir="${tmpdir:-$inspath/tmp}"
export hscan

_scan_output=""
_scan_rc=0

cd "$tmpdir" || exit 1

_timeout_cmd=$(command -v timeout 2>/dev/null)  # safe: fallback to no-timeout path below
if [ -n "$_timeout_cmd" ]; then
	_scan_output=$("$_timeout_cmd" "$hookscan_timeout" \
		"$inspath/maldet" --hook-scan \
		--config-option "quarantine_hits=$quarantine_hits,quarantine_clean=$quarantine_clean,scan_clamscan=$scan_clamscan,scan_yara=$scan_yara,scan_tmpdir_paths=$scan_tmpdir_paths" \
		-a "$file" 2>/dev/null) || _scan_rc=$?  # safe: stderr from maldet is logged internally
else
	# No timeout command available — run without timeout protection
	_scan_output=$("$inspath/maldet" --hook-scan \
		--config-option "quarantine_hits=$quarantine_hits,quarantine_clean=$quarantine_clean,scan_clamscan=$scan_clamscan,scan_yara=$scan_yara,scan_tmpdir_paths=$scan_tmpdir_paths" \
		-a "$file" 2>/dev/null) || _scan_rc=$?  # safe: stderr from maldet is logged internally
fi

# Exit code dispatch
# maldet --hook-scan outputs:
#   "0 maldet: SIG PATH"  for infections
#   "1 maldet: OK"         for clean

case "$_scan_rc" in
	0)
		# Scan completed — relay maldet output directly for modsec
		# Parse output for other modes
		if echo "$_scan_output" | grep -q "^0 maldet:"; then
			# maldet reported infection
			_sig=$(echo "$_scan_output" | grep -o "^0 maldet: [^ ]* " | head -1)
			_sig="${_sig#0 maldet: }"
			_sig="${_sig% }"
			_output_infected "${_sig:-MALWARE}"
			if [ "$hscan_mode" == "generic" ]; then
				exit 2
			fi
		else
			_output_clean
		fi
		;;
	2)
		# maldet exit 2 = malware found
		_sig=$(echo "$_scan_output" | grep -o "^0 maldet: [^ ]* " | head -1)
		_sig="${_sig#0 maldet: }"
		_sig="${_sig% }"
		_output_infected "${_sig:-MALWARE}"
		if [ "$hscan_mode" == "generic" ]; then
			exit 2
		fi
		;;
	124|137)
		# Timeout
		_output_error "scan timeout after ${hookscan_timeout}s"
		logger -t maldet-hookscan "scan timed out after ${hookscan_timeout}s: $file"
		if [ "$hookscan_fail_open" == "1" ]; then
			# Only output clean for modes that need stdout (modsec, exim)
			case "$hscan_mode" in
				modsec) echo "1 maldet: OK" ;;
				exim) echo "maldet: clean" ;;
				generic) exit 1 ;;
			esac
		else
			case "$hscan_mode" in
				modsec) echo "0 maldet: TIMEOUT $file" ;;
				exim) echo "maldet: TIMEOUT" ;;
				generic) exit 2 ;;
			esac
		fi
		;;
	*)
		# Other error
		_output_error "scan error (rc=$_scan_rc)"
		logger -t maldet-hookscan "scan error rc=$_scan_rc: $file"
		if [ "$hookscan_fail_open" == "1" ]; then
			case "$hscan_mode" in
				modsec) echo "1 maldet: OK" ;;
				exim) echo "maldet: clean" ;;
				generic) exit 1 ;;
			esac
		else
			case "$hscan_mode" in
				modsec) echo "0 maldet: ERROR $file" ;;
				exim) echo "maldet: ERROR" ;;
				generic) exit 2 ;;
			esac
		fi
		;;
esac

exit 0
