-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate.py
More file actions
76 lines (62 loc) · 2.63 KB
/
translate.py
File metadata and controls
76 lines (62 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys
import requests
from docx import Document
from googletrans import Translator
import PyPDF2
# function to translate text using Google Translate API
def translate_text(text, src_lang, dest_lang):
translator = Translator()
translation = translator.translate(text, src=src_lang, dest=dest_lang)
return translation.text
# function to convert PDF to DOCX using PyPDF2
def convert_pdf_to_docx(file_path):
docx_file_path = os.path.splitext(file_path)[0] + ".docx"
with open(file_path, 'rb') as pdf_file, open(docx_file_path, 'wb') as docx_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
document = Document()
for page in range(len(pdf_reader.pages)):
page_content = pdf_reader.pages[page].extract_text()
if page_content is not None:
document.add_paragraph(page_content)
document.save(docx_file)
return docx_file_path
# function to convert DOCX to PDF using LibreOffice
def convert_docx_to_pdf(file_path):
pdf_file_path = os.path.splitext(file_path)[0] + "_translated.pdf"
outdir = os.path.join(os.path.dirname(os.path.abspath(file_path)), "PDF_Output")
os.system(f"libreoffice --headless --convert-to pdf {os.path.abspath(file_path)} --outdir {outdir}")
return pdf_file_path
# main function to translate PDF to English
def translate_pdf_to_english(file_path):
# convert PDF to DOCX
docx_file_path = convert_pdf_to_docx(file_path)
# read DOCX file and translate text
document = Document(docx_file_path)
text = "\n".join([paragraph.text for paragraph in document.paragraphs])
translated_text = translate_text(text, 'auto', 'en')
# save translated text to DOCX file
document = Document()
paragraph = document.add_paragraph()
paragraph.add_run(translated_text)
translated_docx_file_path = os.path.splitext(file_path)[0] + "_translated.docx"
document.save(translated_docx_file_path)
# convert translated DOCX to PDF
translated_pdf_file_path = convert_docx_to_pdf(translated_docx_file_path)
# delete temporary files
os.remove(docx_file_path)
os.remove(translated_docx_file_path)
return translated_pdf_file_path
# check if file path is provided as a command line argument
if len(sys.argv) != 2:
print("Usage: python translate_pdf.py <file_path>")
sys.exit(1)
# get file path from command line argument
file_path = sys.argv[1]
# check if file exists
if not os.path.isfile(file_path):
print(f"Error: {file_path} does not exist.")
sys.exit(1)
# example usage
translated_pdf_file = translate_pdf_to_english(file_path)
print(f"Translated PDF file saved to: {translated_pdf_file}")