* Jerome Lavigne * Luca Lizzeri * * Adapted from Kitten's Spaminator * http://mookitty.co.uk/devblog/category/kittens-spaminator/ * * $Id$ ******************************************************************************/ class SpaminatorPort { var $strikes = 5; var $regexes = array( '/[0-9]{1,4}.*ewqrb2/i', '/^byob.*[0-9]{1,4}/i'); var $nap_time = 2; var $crap_flood = 60; var $user_level = 3; var $comment_status = '1'; var $_links_re = '/]*?href=/i'; var $_frontend; var $_db; var $_options; var $_db_tables; var $_mail_helper; var $_strikes = 0; var $_max_links = 0; var $_word_list = array(); var $_ip_list = array(); var $_email_list = array(); function SpaminatorPort(&$frontend) { $this->_frontend =& $frontend; $this->_db =& $frontend->db; $this->_options =& $frontend->options; $this->_db_tables =& $frontend->tables; $this->_mail_helper =& $frontend->getMailHelper(); // get relevant options $q = "select option_name, option_value from {$this->_db_tables['options']} where option_name in ('moderation_keys', 'comment_max_links')"; $this->_db->query($q); foreach ($this->_db->all() as $row) { switch ($row['option_name']) { case 'moderation_keys': // build wordlist and iplist $wordlist = str_replace("\r", '', $row['option_value']); foreach (explode("\n", $row['option_value']) as $word) { $word = strtolower(trim($word)); if (empty($word)) continue; if (preg_match('/^([0-9]{1,3}\.?){2,4}/', $word)) $this->_ip_list[] = $word; else $this->_word_list[] = $word; } break; case 'comment_max_links': $this->_max_links = (int)$row['option_value']; break; } } // build approved emails list, MySQL specific $this->_db->query("select comment_author_email from {$this->_db_tables['comments']} where comment_approved='1' and comment_date_gmt < ('{$this->_frontend->where_date}' - interval 1 day)"); $this->_email_list =& $this->_db->all(); } function _addStrike($count) { $this->_strikes += $count; if ($this->_strikes >= $this->strikes) { return true; } } function _exit($user, $email, $url, $text, $xml_output=false) { // sleep for a bit sleep($this->nap_time); if ($this->_options['comment_spam_kill']) { // check if we have to send email if ($this->_options['comment_spam_mail']) { $body = "A comment has been killed.\r\n\r\n"; $body .= "The details:\r\n"; $body .= "Strikes : " . $this->_strikes . "/" . $this->strikes . "\r\n"; $body .= "IP Addr : ".$_SERVER['REMOTE_ADDR']."\r\n"; $body .= "Referer : " . (isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : '') ."\r\n"; $body .= "Client : " . $_SERVER['HTTP_USER_AGENT']."\r\n"; $body .= "Request : " . $_SERVER['REQUEST_METHOD']." ". $_SERVER['REQUEST_URI']."\r\n"; $body .= "Email : $email\r\n"; $body .= "Author : $user\r\n"; $body .= "Body:\r\n"; $body .= "$text\r\n\r\n"; $body .= "--\r\nThis email has been sent because you have comment_spam_kill and comment_spam_mail set to true in your config\r\n"; $users =& $this->_frontend->getUserPerms(); $recipients = array(); foreach ($users as $user_id=>$user_data) { if ($user_id == $author_id) $recipients[] = $user_data['user_email']; else if ($user_data['user_level'] >= 3) $recipients[] = $user_data['user_email']; else if (isset($user_data['capabilities']['administrator']) || isset($user_data['capabilities']['editor'])) $recipients[] = $user_data['user_email']; } $to = implode(', ', $recipients); $shortname_hdr = $this->_mail_helper->encoded2hdr($this->_options['shortname']); $this->_mail_helper->send_mail( $to, "[$shortname_hdr] Spammer caught!", $body, "From: " . $this->_options['mail_from']); } if (!$xml_output) { // Make sure the bot thinks that spam was posted: header( "HTTP/1.0 200 OK" ); // Tell humans something else: echo "Sorry, you are banned from commenting on this blog.

"; echo "Either your comment content was found to contain spam, or
"; echo "your IP address (or a subnet of your IP address) has spammed this blog before."; echo "

Strike count: " . $this->_strikes . "

"; exit; } else { header('Content-Type: text/xml'); echo '_options['charset'] . '"?>' . "\n1" . "The system thinks your trackback is spam!" . "\n"; exit; } } return '0'; } function isSpam($user, $email, $url, $text, $xml_output=false) { // comment attributes should have already been checked for non-emptyness $email = strtolower($email); // check if in whitelist $debug = 0; if (in_array($email, $this->_email_list)) $this->_strikes -= 3; // check for regex match foreach ($this->regexes as $regex) { if (preg_match($regex, $text) && $this->_addStrike(10)) return $this->_exit($user, $email, $url, $text, $xml_output); } // check referrer if (isset($_SERVER['HTTP_REFERER']) && strpos($_SERVER['HTTP_REFERER'], $this->_options['url']) === false && $this->_addStrike(2)) return $this->_exit($user, $email, $url, $text, $xml_output); // check ip $ip = $_SERVER['REMOTE_ADDR']; foreach ($this->_ip_list as $spam_ip) { if (strpos($ip, $spam_ip) == 0 && $this->_addStrike(5)) return $this->_exit($user, $email, $url, $text, $xml_output); } // check for flooding $q = "select max(UNIX_TIMESTAMP(comment_date)) as tstamp from " . $this->_db_tables['comments'] . " where comment_author_IP='$ip'"; $this->_db->query($q); $row = $this->_db->next(); if (!empty($row)) { $t = (int)$row['tstamp']; if (($t + $this->crap_flood > time()) && $this->_addStrike(3)) return $this->_exit($user, $email, $url, $text, $xml_output); } // check number of links //$links = preg_match_all($this->_links_re, $text, $matches); if ((count(explode('http://', $text)) - 1) >= $this->_max_links && $this->_addStrike(2)) return $this->_exit($user, $email, $url, $text, $xml_output); // check for email, user, url, text in wordlist // start with smaller fields as they're faster to check foreach (array($user=>1, $email=>1, $url=>2, $text=>1) as $word => $strike_value) { foreach ($this->_word_list as $spam_word) { if (strlen($spam_word) < 3 || strlen($word) == 0) continue; if (strlen($spam_word) > strlen($word)) { if (strpos($spam_word, $word) !== false && $this->_addStrike($strike_value)) return $this->_exit($user, $email, $url, $text, $xml_output); } else { if (strpos($word, $spam_word) !== false && $this->_addStrike($strike_value)) return $this->_exit($user, $email, $url, $text, $xml_output); } } } // straight from spaminator // Useless numeric encoding is a pretty good spam indicator: // Extract entities: if (preg_match_all('/&#(\d+);/', $text, $chars)) { foreach ($chars[1] as $char) { // If it's an encoded char in the normal ASCII set, reject if ($char < 128 && $this->_addStrike(1)) return $this->_exit($user, $email, $url, $text, $xml_output); } } return $this->comment_status; } } require_once 'classes/LightPressPlugin.php'; class CommentSpam extends LightPressPlugin { var $constructor_args = array(); var $default_context = 24; // LP_CONTEXT_POST | LP_CONTEXT_STATIC var $description = 'Check for spam on comment insertion'; var $active = true; var $hooks = array('insert_comment'); var $_spaminator; function CommentSpam(&$frontend, $args, $dummy_run=false) { $this->LightPressPlugin($frontend, $args, $dummy_run); if (!$dummy_run) { $this->_spaminator =& new SpaminatorPort($frontend); } } function run($hook, &$c) { $c['approved'] = $this->_spaminator->isSpam( $c['author'], $c['email'], $c['url'], $c['comment'], $c['type'] == 'trackback'); } } ?>