#!/usr/bin/python ######################################################################### # PDF2DJVU version 0.12 -- Converts a PDF file to a DJVU file. # # Copyright 2005, 2006 Jonathan Hanke # Original Author: Jonathan Hanke # Last updated by Jonathan Hanke on March 4, 2006 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA ######################################################################### # Version History: # # Version 0.12 -- Padded the page number strings to give correct page orderings # Version 0.11 -- Ported from Bash to Python # Version 0.10 -- Original Bash script # ######################################################################### # To Do: # - Add a flag to reinstate the page rotation feature # ######################################################################### import os, sys, time, tempfile, string ## Print the current version print "Using pdf2dj v0.12 -- Written by Jonathan Hanke, last modified 3/4/2006" ## Get an argument list from the command line arg_list = sys.argv[1:] ## Filter for only ".pdf" suffixes pdf_args = [ name for name in arg_list if name.endswith(".pdf") ] print " There are", len(pdf_args), " PDF files to convert." ############### ## FILE LOOP ## ############### for pdf_filename in pdf_args: ## Make the temporary directory tmp_dir = tempfile.mkdtemp("", pdf_filename + "__", "/tmp") ## (Partially) protect the filename(s) for Bash bashpdf_filename = pdf_filename.replace("(", "\(").replace(")", "\)") # Protect parentheses bashpdf_filename = bashpdf_filename.replace("'", "\'").replace('"', '\"') # Protect single and double quotes #bashtmp_dir = tmp_dir bashtmp_dir = tmp_dir.replace("(", "\(").replace(")", "\)") # Protect parentheses bashtmp_dir = bashtmp_dir.replace("'", "\'").replace('"', '\"') # Protect single and double quotes ## Find the number of pages in the pdf file pages_text = os.popen("pdfinfo " + bashpdf_filename + " | grep 'Pages:'").readline() num_of_pages = int(pages_text.lstrip("Pages:").strip()) print "There are", num_of_pages, "pages in the PDF file", pdf_filename ############### ## PAGE LOOP ## ############### for num in range(1, num_of_pages + 1): num_str = string.zfill(num, 8) # Allows up to 10 million (correctly ordered) pages if os.path.exists(tmp_dir + "/" + num_str + ".djvu"): print " Found page ", num, " -- ", time.asctime() else: print " Generating page ", num, " -- ", time.asctime() # Extract the current page extracted_pagename = bashtmp_dir + "/newpage_" + num_str GS_Basic = "gs -q -r300 -dNOPAUSE -dBATCH -dSAFER -sDEVICE=ppmraw" GS_Custom = "-dFirstPage=" + str(num) + " -dLastPage=" + str(num) + " -sOutputFile=" + extracted_pagename + " -- " + bashpdf_filename os.system(GS_Basic + " " + GS_Custom) # ###################################### # ## ADD THIS FUNCTIONALITY AS A FLAG ## # ###################################### # # # Rotate CCW by the number of degrees in the second argument! # if [ $# = 1 ]; then # ## echo " No extra operations!" # sleep 0 #Dummy command # else # ## echo " Rotating clockwise by $2 degrees!" # convert -rotate $2 $tmp_dir/newpage_$num $tmp_dir/newpage_$num-rot$2.ppm # rm -f $tmp_dir/newpage_$num # Needed to ensure that we only convert the rotated image! # fi # Convert the page djvu format, then remove it converted_pagename = bashtmp_dir + "/" + num_str + ".djvu" os.system("cpaldjvu -dpi 300 " + extracted_pagename + " " + converted_pagename) os.system("rm -f " + extracted_pagename) # Assemble each multi-page djvu document djvu_filename = bashpdf_filename.rstrip(".pdf") + ".djvu" # This is the filename with .pdf replaced by .djvu os.system("djvm -c " + djvu_filename + " " + bashtmp_dir + "/*.djvu") # Remove the temporary directory os.system("rm -rf " + bashtmp_dir)