171 lines
4.4 KiB
PHP
171 lines
4.4 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
namespace Kiri\MailServer;
|
|
|
|
/**
|
|
* 内容过滤器 — 基本垃圾邮件内容检测
|
|
*
|
|
* 检测项:
|
|
* - 空主题/空邮件体
|
|
* - 常见垃圾邮件关键词
|
|
* - 纯 HTML 无文本备选 (multipart/alternative)
|
|
* - 过多链接
|
|
* - 发件人域名伪造 (From 与 MAIL FROM 不一致)
|
|
*/
|
|
class ContentFilter
|
|
{
|
|
|
|
/** @var array<string> 高危垃圾邮件关键词 */
|
|
private const SPAM_KEYWORDS = [
|
|
'viagra', 'cialis', 'casino', 'lottery', 'winner',
|
|
'click here', 'act now', 'limited time', '100% free',
|
|
'earn money', 'work from home', 'make money fast',
|
|
'unsubscribe', 'opt out', 'not spam',
|
|
];
|
|
|
|
/** @var int 最大链接数 (超过可能为垃圾邮件) */
|
|
private const MAX_LINKS = 20;
|
|
|
|
/** @var float 垃圾邮件阈值 */
|
|
private const SPAM_THRESHOLD = 3.0;
|
|
|
|
|
|
/**
|
|
* 分析邮件内容并返回垃圾评分
|
|
*
|
|
* @param MailMessage $message 邮件消息
|
|
* @param string $envelopeFrom MAIL FROM 地址
|
|
* @return float 垃圾评分 (0-10, 越高越可疑)
|
|
*/
|
|
public function analyze(MailMessage $message, string $envelopeFrom): float
|
|
{
|
|
$score = 0.0;
|
|
|
|
// 空主题
|
|
if ($message->subject === '') {
|
|
$score += 1.5;
|
|
}
|
|
|
|
// 空邮件体
|
|
if (strlen($message->body) < 10) {
|
|
$score += 2.0;
|
|
}
|
|
|
|
// 关键词匹配
|
|
$score += $this->checkKeywords($message->subject . ' ' . $message->body);
|
|
|
|
// 纯 HTML (无纯文本 fallback)
|
|
$contentType = $message->headers['content-type'] ?? $message->headers['Content-Type'] ?? '';
|
|
if (stripos($contentType, 'text/html') !== false && stripos($contentType, 'multipart/alternative') === false) {
|
|
$score += 1.0;
|
|
}
|
|
|
|
// 过多链接
|
|
if ($this->countLinks($message->body) > self::MAX_LINKS) {
|
|
$score += 2.0;
|
|
}
|
|
|
|
// 发件人域名伪造
|
|
if ($this->isFromSpoofed($message->from, $envelopeFrom)) {
|
|
$score += 3.0;
|
|
}
|
|
|
|
// 全是 HTML 标签、几乎没有文本
|
|
$textContent = strip_tags($message->body);
|
|
if (strlen($textContent) < 20 && strlen($message->body) > 200) {
|
|
$score += 2.0;
|
|
}
|
|
|
|
// 大量大写字母
|
|
$textOnly = preg_replace('/[^A-Za-z]/', '', $message->body);
|
|
if (strlen($textOnly) > 50) {
|
|
$upperCount = strlen(preg_replace('/[^A-Z]/', '', $textOnly));
|
|
$ratio = $upperCount / max(1, strlen($textOnly));
|
|
if ($ratio > 0.5) {
|
|
$score += 1.5;
|
|
}
|
|
}
|
|
|
|
return round(min($score, 10.0), 1);
|
|
}
|
|
|
|
|
|
/**
|
|
* 是否为垃圾邮件 (评分超过阈值)
|
|
*/
|
|
public function isSpam(MailMessage $message, string $envelopeFrom): bool
|
|
{
|
|
return $this->analyze($message, $envelopeFrom) >= self::SPAM_THRESHOLD;
|
|
}
|
|
|
|
|
|
/**
|
|
* 检查垃圾邮件关键词
|
|
*/
|
|
private function checkKeywords(string $text): float
|
|
{
|
|
$text = strtolower($text);
|
|
$score = 0.0;
|
|
|
|
foreach (self::SPAM_KEYWORDS as $keyword) {
|
|
if (str_contains($text, $keyword)) {
|
|
$score += 0.5;
|
|
}
|
|
}
|
|
|
|
return $score;
|
|
}
|
|
|
|
|
|
/**
|
|
* 统计链接数量
|
|
*/
|
|
private function countLinks(string $content): int
|
|
{
|
|
return preg_match_all('/https?:\/\//i', $content);
|
|
}
|
|
|
|
|
|
/**
|
|
* 检测发件人域名是否伪造
|
|
* (From header 的域名与 MAIL FROM 信封域名不一致)
|
|
*/
|
|
private function isFromSpoofed(string $fromHeader, string $envelopeFrom): bool
|
|
{
|
|
if ($envelopeFrom === '' || $fromHeader === '') {
|
|
return false;
|
|
}
|
|
|
|
// 提取 From header 的域名
|
|
$fromDomain = $this->extractDomain($fromHeader);
|
|
if ($fromDomain === null) {
|
|
return false;
|
|
}
|
|
|
|
// 提取 MAIL FROM 的域名
|
|
$envelopeDomain = $this->extractDomain($envelopeFrom);
|
|
if ($envelopeDomain === null) {
|
|
return false;
|
|
}
|
|
|
|
return strtolower($fromDomain) !== strtolower($envelopeDomain);
|
|
}
|
|
|
|
|
|
/**
|
|
* 从地址提取域名
|
|
*/
|
|
private function extractDomain(string $address): ?string
|
|
{
|
|
$address = trim($address, '<> ');
|
|
if ($address === '') {
|
|
return null;
|
|
}
|
|
|
|
$parts = explode('@', $address);
|
|
return count($parts) === 2 ? $parts[1] : null;
|
|
}
|
|
|
|
}
|