Files
kiri-mail-server/src/ContentFilter.php
T
2026-06-28 19:42:35 +08:00

171 lines
4.4 KiB
PHP

<?php
declare(strict_types=1);
namespace Kiri\MailServer;
/**
* 内容过滤器 — 基本垃圾邮件内容检测
*
* 检测项:
* - 空主题/空邮件体
* - 常见垃圾邮件关键词
* - 纯 HTML 无文本备选 (multipart/alternative)
* - 过多链接
* - 发件人域名伪造 (From 与 MAIL FROM 不一致)
*/
class ContentFilter
{
/** @var array<string> 高危垃圾邮件关键词 */
private const SPAM_KEYWORDS = [
'viagra', 'cialis', 'casino', 'lottery', 'winner',
'click here', 'act now', 'limited time', '100% free',
'earn money', 'work from home', 'make money fast',
'unsubscribe', 'opt out', 'not spam',
];
/** @var int 最大链接数 (超过可能为垃圾邮件) */
private const MAX_LINKS = 20;
/** @var float 垃圾邮件阈值 */
private const SPAM_THRESHOLD = 3.0;
/**
* 分析邮件内容并返回垃圾评分
*
* @param MailMessage $message 邮件消息
* @param string $envelopeFrom MAIL FROM 地址
* @return float 垃圾评分 (0-10, 越高越可疑)
*/
public function analyze(MailMessage $message, string $envelopeFrom): float
{
$score = 0.0;
// 空主题
if ($message->subject === '') {
$score += 1.5;
}
// 空邮件体
if (strlen($message->body) < 10) {
$score += 2.0;
}
// 关键词匹配
$score += $this->checkKeywords($message->subject . ' ' . $message->body);
// 纯 HTML (无纯文本 fallback)
$contentType = $message->headers['content-type'] ?? $message->headers['Content-Type'] ?? '';
if (stripos($contentType, 'text/html') !== false && stripos($contentType, 'multipart/alternative') === false) {
$score += 1.0;
}
// 过多链接
if ($this->countLinks($message->body) > self::MAX_LINKS) {
$score += 2.0;
}
// 发件人域名伪造
if ($this->isFromSpoofed($message->from, $envelopeFrom)) {
$score += 3.0;
}
// 全是 HTML 标签、几乎没有文本
$textContent = strip_tags($message->body);
if (strlen($textContent) < 20 && strlen($message->body) > 200) {
$score += 2.0;
}
// 大量大写字母
$textOnly = preg_replace('/[^A-Za-z]/', '', $message->body);
if (strlen($textOnly) > 50) {
$upperCount = strlen(preg_replace('/[^A-Z]/', '', $textOnly));
$ratio = $upperCount / max(1, strlen($textOnly));
if ($ratio > 0.5) {
$score += 1.5;
}
}
return round(min($score, 10.0), 1);
}
/**
* 是否为垃圾邮件 (评分超过阈值)
*/
public function isSpam(MailMessage $message, string $envelopeFrom): bool
{
return $this->analyze($message, $envelopeFrom) >= self::SPAM_THRESHOLD;
}
/**
* 检查垃圾邮件关键词
*/
private function checkKeywords(string $text): float
{
$text = strtolower($text);
$score = 0.0;
foreach (self::SPAM_KEYWORDS as $keyword) {
if (str_contains($text, $keyword)) {
$score += 0.5;
}
}
return $score;
}
/**
* 统计链接数量
*/
private function countLinks(string $content): int
{
return preg_match_all('/https?:\/\//i', $content);
}
/**
* 检测发件人域名是否伪造
* (From header 的域名与 MAIL FROM 信封域名不一致)
*/
private function isFromSpoofed(string $fromHeader, string $envelopeFrom): bool
{
if ($envelopeFrom === '' || $fromHeader === '') {
return false;
}
// 提取 From header 的域名
$fromDomain = $this->extractDomain($fromHeader);
if ($fromDomain === null) {
return false;
}
// 提取 MAIL FROM 的域名
$envelopeDomain = $this->extractDomain($envelopeFrom);
if ($envelopeDomain === null) {
return false;
}
return strtolower($fromDomain) !== strtolower($envelopeDomain);
}
/**
* 从地址提取域名
*/
private function extractDomain(string $address): ?string
{
$address = trim($address, '<> ');
if ($address === '') {
return null;
}
$parts = explode('@', $address);
return count($parts) === 2 ? $parts[1] : null;
}
}