-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrubber.ps1
74 lines (58 loc) · 2.79 KB
/
scrubber.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function Remove-PersonalInformationFromEml {
param (
[string]$emlFilePath,
[string]$outputFilePath
)
# Check if the file exists
if (-Not (Test-Path -Path $emlFilePath)) {
Write-Host "The specified file does not exist."
return
}
# Read the content of the .eml file
$content = Get-Content -Path $emlFilePath -Raw
# Define a regex pattern to find email addresses
$emailPattern = '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
# Remove email addresses from the content (you can add other patterns for more personal info)
$contentWithoutEmails = $content -replace $emailPattern, '[removed]'
# Optionally, remove names or other identifiable information (add more patterns as needed)
# Example: Removing names (naive approach)
$namePattern = '(?<=From:|To:|Cc:)[^\n]+'
$contentWithoutPersonalInfo = $contentWithoutEmails -replace $namePattern, '[removed]'
# Define a regex pattern to remove problematic characters for Windows filenames
$problematicCharsPattern = '[<>:"/\\|?*\[\]{}()]'
# Replace problematic characters with nothing (remove them)
$contentWithoutProblematicChars = $contentWithoutPersonalInfo -replace $problematicCharsPattern, ''
# Save the modified content back to the file
Set-Content -Path $outputFilePath -Value $contentWithoutProblematicChars
Write-Host "Personal information and problematic characters removed from: $emlFilePath and saved to $outputFilePath"
}
# Function to process all .eml files in the given directory and save redacted files in a separate folder
function Process-AllEmlFilesInDirectory {
param (
[string]$directoryPath
)
# Define the folder to store redacted emails
$redactedFolderPath = "$directoryPath\RedactedEmails"
# Create the RedactedEmails folder if it doesn't exist
if (-Not (Test-Path -Path $redactedFolderPath)) {
New-Item -Path $redactedFolderPath -ItemType Directory
Write-Host "Created folder for redacted emails at: $redactedFolderPath"
}
# Get all .eml files in the directory
$emlFiles = Get-ChildItem -Path $directoryPath -Filter "*.eml"
# Check if there are no .eml files in the directory
if ($emlFiles.Count -eq 0) {
Write-Host "No .eml files found in the directory: $directoryPath"
return
}
# Process each .eml file
foreach ($file in $emlFiles) {
$inputFilePath = $file.FullName
$outputFilePath = "$redactedFolderPath\$($file.BaseName)_modified.eml"
# Call the function to remove personal information from the current .eml file
Remove-PersonalInformationFromEml -emlFilePath $inputFilePath -outputFilePath $outputFilePath
}
}
# Example usage:
$directoryPath = "${PWD}"
Process-AllEmlFilesInDirectory -directoryPath $directoryPath