-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconvert2word.ps1
58 lines (46 loc) · 2.15 KB
/
convert2word.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Create a new Word.Application object
$word = New-Object -ComObject Word.Application
# Don't display the Word application on screen
$word.Visible = $false
# Define the folder path (replace 'C:\your\folder\path' with your actual path)
$folderPath = "D:\data\pdf"
# Get all PDF files in the folder
$pdfFiles = Get-ChildItem -Path $folderPath -Filter *.pdf
# Loop through each PDF file
foreach ($documentPath in $pdfFiles) {
Write-Host "Processing file: $documentPath"
# Extract the path without extension
$filePathWithoutExt = $documentPath.FullName.Substring(0, $documentPath.FullName.LastIndexOf('.'))
# Create the new HTML file path (append .html extension)
$pdfFolder = "pdf"
$docFolder = "word"
$markdownFolder = "md"
$attachmentFolder = "md\attachments"
# Replace the subfolder name in the path
$docPath = $filePathWithoutExt.Replace($pdfFolder, $docFolder) + ".docx"
$markdownPath = $filePathWithoutExt.Replace($pdfFolder, $markdownFolder) + ".md"
$attachmentsPath = $filePathWithoutExt.Replace($pdfFolder, $attachmentFolder)
# **Note:** You'll need to add logic to create the HTML content here
# (e.g., convert from PDF text or create new HTML content)
$document = $word.Documents.Open($($documentPath.FullName))
# Write-Host message (optional)
Write-Host "Created Doc file: $docPath"
# Save the document as Doc
$wdFormatDocumentDefault = 16 # Word default document format
$document.SaveAs([Ref] $docPath, [Ref] $wdFormatDocumentDefault)
# Define the pandoc command
$pandocCommand = "pandoc -t markdown_strict+grid_tables --wrap=none --columns=200 --extract-media=`"$attachmentPath`" `"$docPath`" -o `"$markdownPath`" "
Write-Host $pandocCommand
Write-Host "Created Markdown file: $markdownPath"
# Call the pandoc command
Invoke-Expression $pandocCommand
# $document.Close()
}
# Quit Word application
$word.Quit()
# Clean up the COM objects
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($document) | Out-Null
# Clean up the COM objects
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($word) | Out-Null
[System.GC]::Collect()
[System.GC]::WaitForPendingFinalizers()