forked from Easy-Forex/Verify-emails
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathverify-emails.pl
executable file
·276 lines (197 loc) · 6.04 KB
/
verify-emails.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#!/usr/bin/perl -w
=pod
=head1 NAME
verify-emails.pl - Check the list of emails for valid addresses
=head1 USAGE
./verify-emails.pl [OPTIONS]
=head1 OPTIONS
=over
=item --emails (required)
Example: --emails=emails.txt
The file should be a plain text file with one email address per line. It's
OK for the file to contain duplicate addresses - all checks will be performed
only once for each unique email address.
=item --children (optional)
Example: --children=10
This parameter can be used to specify the number of maximum parallel child
processes. If not given, the default of 20 is used.
=item --from_email (optional)
Example: [email protected]
This parameter can be used to specify the FROM email address for SMTP checks.
If not given, the default of [email protected] is used.
=item --from_domain (optional)
Example: --from_domain=here.com
This parameter can be used to specify the EHLO domain for SMTP checks.
If not given, the domain of the from_email option is used.
=item --blacklist (optional)
Example: --blacklist=blacklist.txt
The file should be a plain text file with blacklisted strings or regular
expressions for email addresses. Any address matching any of the rules will be
skipped from all the checks. This is useful for filtering out group email
addresses like sales@ or info@, as well as spam words in domains or local parts.
=item --output (optional)
Example: --output="%email% - %status% - %reason%\n"
This parameter defines the script's output. You can use three macros that will be replaced
with content automatically:
* %email% - this will be replaced with the email address that is being checked
* %status% - this will be either OK or FAIL, depending on whether email address passed the check
* %reason% - for failed addresses this will provide a reason why validation failed
By default, the format is set to a simple CSV
=back
=head1 EXAMPLE
./verify-emails.pl --emails=emails.txt --children=10 --blacklist=blacklist.txt > out.csv
=head1 DESCRIPTION
verify-emails.pl performs the following three checks for each email
in the given list:
1. Check the format of the email address.
2. Check that MX or A record is available for the domain part.
3. Connect to the mail server via SMTP and use MAIL and RCPT commands
to check if the mailbox exists. No actual email is being sent.
In order to speed up the processing of emails, the script will fork
multiple child processes. The maximum can be defined through the
command line option.
=head1 REQUIREMENTS
This script relies on the following perl modules (available from CPAN):
=over
=item Getopt::Long
For reading command line parameters reliably.
=item List::MoreUtils
For removing duplicates in the lists of emails and blacklist rules.
=item Parallel::ForkManager
For easy forking.
=item Mail::CheckUser
For the actual email address checks.
=back
=head1 AUTHOR
Leonid Mamchenkov <[email protected]>
=cut
use strict;
use Getopt::Long;
use List::MoreUtils qw(uniq);
use Parallel::ForkManager;
use Mail::CheckUser qw(check_email last_check);
# Verbose output to STDERR
use constant DEBUG => 1;
my $emails_file = '';
my $blacklist_file = '';
my $max_children = 20;
my $from_email = '[email protected]';
my $from_domain = '';
my $output_format = "%email%,%status%,%reason%\n"; # simple CSV
GetOptions(
'emails=s' => \$emails_file,
'children=i' => \$max_children,
'from_email=s' => \$from_email,
'from_domain=s' => \$from_domain,
'blacklist=s' => \$blacklist_file,
'output=s' => \$output_format,
);
if (!$emails_file) {
print_help();
die("No --emails given");
}
my @emails = get_emails($emails_file);
if ($blacklist_file) {
@emails = clean_emails($blacklist_file, @emails);
}
# If no domain was given, then use the domain from FROM email
if (!$from_domain && $from_email) {
$from_domain = $from_email;
$from_domain =~ s/^.*?@//;
}
# Mail::CheckUser configuration
$Mail::CheckUser::Treat_Timeout_As_Fail = 1;
$Mail::CheckUser::Treat_Full_As_Fail = 1;
$Mail::CheckUser::Sender_Addr = $from_email;
$Mail::CheckUser::Helo_Domain = $from_domain;
$Mail::CheckUser::Timeout = 15;
#$Mail::CheckUser::Debug = 1 if DEBUG;
my $pm = Parallel::ForkManager->new($max_children);
$pm->run_on_start(
sub {
my ($pid, $ident) = @_;
print STDERR (localtime) . " PID [ $pid ] START [$ident]\n" if DEBUG;
}
);
$pm->run_on_finish(
sub {
my ($pid, $exit_code, $ident, $signal, $core, $data) = @_;
print STDERR (localtime) . " PID [ $pid ] FINISH [$ident]\n" if DEBUG;
if ($data) {
print STDERR (localtime) . " PID [ $pid ] HIT [$ident]\n" if DEBUG;
print $$data;
}
else {
print STDERR (localtime) . " PID [ $pid ] MISSED [$ident]\n";
}
}
);
$pm->run_on_wait(
sub {
print STDERR (localtime) . " Waiting ...\n" if DEBUG;
},
3
);
foreach my $email (@emails) {
$pm->start($email) and next; # do the fork
my $status;
my $reason = '';
eval {
chomp($email);
$status = check_email($email) if ($email);
};
if ($@) {
$status = 'FAIL';
$reason = $@;
}
else {
if ($status) {
$status = 'OK';
}
else {
$status = 'FAIL';
$reason = last_check()->{reason};
}
}
my $out = $output_format;
$out =~ s/%email%/$email/g;
$out =~ s/%status%/$status/g;
$out =~ s/%reason%/$reason/g;
$pm->finish(0, \$out); # do the exit in the child process
}
$pm->wait_all_children;
# Print usage help
sub print_help {
print "Usage:\n";
print "\t$0 --emails=FILE [--children=NUMBER] [--blacklist=FILE]\n\n"
}
# Get emails from the given file
sub get_emails {
my @result;
my $email_file = shift;
@result = get_uniq_file_lines($email_file);
return @result;
}
# Clean emails using rules from blacklist file
sub clean_emails {
my @result;
my $blacklist_file = shift;
my @emails = @_;
my @rules = get_uniq_file_lines($blacklist_file);
@result = @emails;
foreach my $rule (@rules) {
@result = grep !/$rule/, @result;
}
return @result;
}
# Get unique lines from file
sub get_uniq_file_lines {
my @result;
my $file = shift;
open(my $fh, "<", "$file") or die "Failed to read $file: $@";
@result = <$fh>;
close($fh);
chomp(@result);
@result = uniq @result;
return @result;
}