-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractemails.txt
55 lines (45 loc) · 1.66 KB
/
extractemails.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// This script will scan all pages of the input document
// and extract valid email addresses into new PDF document
// Output PDF document will be placed in the same folder
// as input. The name of the output document will be:
// Original filename + "_Extracted_Emails"
// Visit www.evermap.com for more useful JavaScript samples.
var reEmail = /(([^<>()[\]\\.,;:\s@\"]+(\.[^<>()[\]\\.,;:\s@\"]+)*)|(\".+\"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/g;
var strExt = "_Extracted_Emails.pdf";
var strIntro = "Email addresses extracted from document: ";
var strFinal = "Total number of email addresses extracted: " ;
ExtractFromDocument(reEmail,strExt,strIntro,strFinal);
function ExtractFromDocument(reMatch, strFileExt, strMessage1, strMessage2)
{
var chWord, numWords;
// construct filename for output document
var filename = this.path.replace(/\.pdf$/, strFileExt);
// create a report document
try {
var nTotal = 0;
var nCounter = 0;
var nLinesPerPages = 60;
for (var i = 0; i < this.numPages; i++)
{
numWords = this.getPageNumWords(i);
var PageText = "";
for (var j = 0; j < numWords; j++) {
var word = this.getPageNthWord(i,j,false);
PageText += word;
}
var strMatches = PageText.match(reMatch);
if (strMatches == null) continue;
// now output matches into report document
for (j = 0; j < strMatches.length; j++)
{
console.println(strMatches[j]);
nTotal++;
nCounter++;
}
}
}
catch(e)
{
app.alert("Processing error: "+e)
}
} // end of the function