-
Notifications
You must be signed in to change notification settings - Fork 13
/
update-phishtank-rules
executable file
·205 lines (182 loc) · 7.67 KB
/
update-phishtank-rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env php
<?PHP
/*****************************************************
* ____ _ _ _ _____ _ _ _ _ __
* | _ \| |__ (_)___| |_|_ _|/ \ | \ | | |/ /
* | |_) | '_ \| / __| '_ \| | / _ \ | \| | ' /
* | __/| | | | \__ \ | | | |/ ___ \| |\ | . \
* |_| |_| |_|_|___/_| |_|_/_/ \_\_| \_|_|\_\
* SpamAssassin Rules - Update/Build script
*
* Contact: Kawaiipantsu
* Github: https://github.com/kawaiipantsu/spamassassin-rules
*
* Not using cURL, for quick implementation and no extra modules
* needed with PHP etc. But you should set some sort of UA for
* friendly visability for Phishtank...
*
* NOTE: PhishTank ONLY includes "ONLINE" and "VALIDATED" in their
* public DB files, so the EXTRA scores will always be applied!
*
* I have simply added this feature in case they someday release
* the full DB with "offline" and "unvalidated" urls to...
*
****/
// CONFIG
define("DATE_FORMAT" ,"H:i:s d-m-Y"); // 24:01:01 31-12-1234
define("PHISHTANK_DB_URL" ,"http://data.phishtank.com/data/online-valid.json"); // Must be JSON (Uncompressed)
define("PHISHTANK_DB_ARCHIVE" ,TRUE); // Should we arhive old files?
define("PHISHTANK_ARCHIVE_GZ" ,TRUE); // Should we compress archives?
define("PHISHTANK_DB_DAYCHECK" ,5); // Re-Download latest DB > 5 days old
define("SCORE_DEFAULT" ,"5.0"); // Default score if URL is found in URI search
define("SCORE_EXTRA_VALIDATED" ,"2.0"); // Extra score to add if validated by PhishTank
define("SCORE_EXTRA_ACTIVE" ,"2.0"); // Extra score to add if site still active
// Script start
cLog("==[ ".strtoupper(basename(__FILE__,'.php'))." ]===============================");
cLog("> Checking for phishtank DB locally");
// Check if PhishTank DB is avaiable locally and not older than X days
// If not, then download it! Also make sure if we should archive old one...
$timestamp = time();
define("ptDB_latest", "phishtank-latest-db.json");
define("ptDB_archive", "phishtank-".$timestamp."-db.json");
if ( is_file(ptDB_latest) && is_readable(ptDB_latest) ) {
cLog("> Found DB locallay");
$dbAge = $timestamp-filemtime(ptDB_latest);
$dbMaxAge = 60*60*24*PHISHTANK_DB_DAYCHECK;
if ($dbAge >= $dbMaxAge) {
cLog(" - DB is outdated, re-downloading now");
downloadPhishTankDB();
cLog(" - Loading");
$data = loadPhishTankDB(ptDB_latest);
cLog(" - Done");
} else {
cLog(" - Loading");
$data = loadPhishTankDB(ptDB_latest);
cLog(" - Done");
}
} else {
cLog("> Did not find any local DB, downloading now");
downloadPhishTankDB();
cLog(" - Loading");
$data = loadPhishTankDB(ptDB_latest);
cLog(" - Done");
}
// We should have some data to work with now
$totalEntries = count($data);
cLog("> Found ${totalEntries} PhishTank entries in DB");
cLog("> Cleaning up phishtank validated rule directory");
$fullPath = __DIR__ ."/phishtank-7days-rules/";
array_map('unlink', glob("$fullPath*.cf"));
cLog("> Cleaning up phishtank online rule directory");
$fullPath = __DIR__ ."/phishtank-30days-rules/";
array_map('unlink', glob("$fullPath*.cf"));
cLog("> Beginning to build Phishtank rules");
foreach ( $data as $ptEntry ) {
$timestamp = time();
$timestampSub = strtotime(trim($ptEntry["submission_time"]));
$ruleAge = $timestamp-$timestampSub;
$ruleMaxAge1 = 60*60*24*7;
$ruleMaxAge2 = 60*60*24*31;
$id = trim($ptEntry["phish_id"]);
$idPad = sprintf("%010s",$id);
$rule = buildRule($ptEntry);
$filename_rule = "phishtank-".$idPad.".cf";
// Save rule to "all" rules
file_put_contents(__DIR__ ."/phishtank-all-rules/".$filename_rule,$rule);
// If within 7days old, save under 7days rules
if ( $ruleAge <= $ruleMaxAge1 ) file_put_contents(__DIR__ ."/phishtank-7days-rules/".$filename_rule,$rule);
// If within 30days old, save under 30days rules
if ( $ruleAge <= $ruleMaxAge2 ) file_put_contents(__DIR__ ."/phishtank-30days-rules/".$filename_rule,$rule);
}
// Function to build a SpamAssassin rule from phishtank array entry
function buildRule($entry) {
$ptID = trim($entry["phish_id"]);
$id = sprintf("PHISHTANK_%010s",trim($entry["phish_id"]));
cLog(" - Building rule: ".$id);
$ptURL = trim($entry["phish_detail_url"]);
$ptDate = trim($entry["submission_time"]);
$parsed = parse_url(trim($entry["url"]));
$matchURL = array_key_exists("path",$parsed) ? $parsed['host'].$parsed['path'] : $parsed['host'];
$matchURL_regex = preg_quote($matchURL, '/');
$bValidated = strtolower(trim($entry["verified"])) == "yes" ? "1" : "0";
$bActive = strtolower(trim($entry["online"])) == "yes" ? "1" : "0";
$score = strlen(SCORE_DEFAULT) >= 1 ? SCORE_DEFAULT : "10";
$score_validated = strlen(SCORE_EXTRA_VALIDATED) >= 1 ? SCORE_EXTRA_VALIDATED : "10";
$score_online = strlen(SCORE_EXTRA_ACTIVE) >= 1 ? SCORE_EXTRA_ACTIVE : "10";
$ptTarget = "(".trim($entry["target"]).")";
$rule = <<<EOD
## ____ _ _ _ _____ _ ____ _
## | _ \| |__ (_)___| |_|_ _|_ _ _ __ | | __ | _ \ _ _| | ___
## | |_) | '_ \| / __| '_ \| |/ _` | '_ \| |/ / | |_) | | | | |/ _ \
## | __/| | | | \__ \ | | | | (_| | | | | < | _ <| |_| | | __/
## |_| |_| |_|_|___/_| |_|_|\__,_|_| |_|_|\_\ |_| \_\\\__,_|_|\___|
##
## Rule ID.......: ${id}
## PhishTank URL.: ${ptURL}
## Submitted.....: ${ptDate}
##
## DEFAULT MATCH RULE
uri\t\t${id}_MATCH\t/${matchURL_regex}/
score\t\t${id}_MATCH\t${score}
describe\t${id}_MATCH\tPhishtank ID ${ptID} ${ptTarget}
## VALIDATED RULE
meta\t\t__${id}_VALCHECK\t(1*${bValidated}) > 0
meta\t\t${id}_VALIDATED\t${id}_MATCH && __${id}_VALCHECK
score\t\t${id}_VALIDATED\t${score_validated}
describe\t${id}_VALIDATED\tValidated phishing url
## ACTIVE/ONLINE RULE
meta\t\t__${id}_ONCHECK\t(1*${bActive}) > 0
meta\t\t${id}_ONLINE\t${id}_MATCH && __${id}_ONCHECK
score\t\t${id}_ONLINE\t${score_online}
describe\t${id}_ONLINE\tPhishing URL still online
##
EOD;
return $rule;
}
// Simple fetch DB function
function downloadPhishTankDB() {
// Archive check - Archive if needed!
if ( PHISHTANK_DB_ARCHIVE && is_file(ptDB_latest) ) {
if ( is_writable(__DIR__."/") ) rename(ptDB_latest, ptDB_archive);
// If compression is wanted, do it!
if ( PHISHTANK_ARCHIVE_GZ ) {
$data = file_get_contents(ptDB_archive);
$gzdata = gzencode($data, 9);
file_put_contents(ptDB_archive.".gz", $gzdata);
unlink(ptDB_archive);
}
}
// Old file check - Unlink if needed!
if ( !PHISHTANK_DB_ARCHIVE && is_file(ptDB_latest) ) {
if ( is_writable(__DIR__."/") ) unlink(ptDB_latest);
}
$url = trim(PHISHTANK_DB_URL);
$CRLF = "\r\n";
$options = array(
'http'=>array(
'method'=>"GET",
'header'=> "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".$CRLF,
"Accept-Language: en-US,en;q=0.5".$CRLF,
"Connection: keep-alive".$CRLF,
"Upgrade-Insecure-Requests: 1".$CRLF,
"User-Agent: phishtank/soclatam".$CRLF // Something they want, picked random user
)
);
$context = stream_context_create($options);
// Mae sure we can write the file before spending time downloading it ...
if ( is_writable(__DIR__."/") ) {
$content = file_get_contents($url, false, $context);
file_put_contents(ptDB_latest, $content);
}
}
// Simple load DB function
function loadPhishTankDB() {
$dbarray = json_decode(file_get_contents(ptDB_latest),true);
return $dbarray;
}
// Simple unified function to print console logs or more etc (syslog)?
function cLog($msg) {
$ts = date(DATE_FORMAT);
printf("[%s] %s\n",$ts,rtrim($msg));
}
?>