Tuesday, June 15, 2010


Abstract: I demonstrate a C# code to convert a PDF file into image files directly without using a 3rd party library. I adopt an open-source virtual image printer driver and this C# example codes invokes Adobe AcrobatReader to print out to it.

How can I covert PDF into image files?

 There are several commercial PDF conversion libraries, but no open-source library. Concrete methods are: 1) an open-source PDF rendering engine such as GhostScript, and 2) a virtual printer. The former method is not so good, because it forces users to install a software and fonts and rendering image may not be the same as Adobe AcrobatReader. The latter method, I have tried to use XPS document writer but it does not work as following:

 XPS Document Writer can convert any printable files into XPS files. However, it shows a modal dialog to ask a user in which folder XPS files should be saved. There are some discussions about how to specify the folder name from code, in Printing documents to Microsoft XPS Document Writer without user interaction - Feng Yuan (袁峰) - Site Home - MSDN Blogs, and Is There Really No Way To Print XPS From A Generic Application Without Dialog Box?. But it is not easy for me ;-P

 Therefore I use an open-source virtual printer here.

Using an open-source virtual printer

 I use Virtual Image Printer driver | Download Virtual Image Printer driver software for free at SourceForge.net. As I address above, a user must install install this driver software ... what else?
 The following code does:
  1. Setting output folder name to the registry,
  2. Invoking AcrobatReader to print out,
  3. Waiting for job finishing.

Setting AcrobatReader executable file path and output folder 

 Strings named pdf_filepath and output_folder in Main method specifies AcrobatReader executable path and image file output folder. The example code uses AcrobatReader8.0:  psInfo.FileName = @"C:\Program Files\Adobe\Reader 8.0\Reader\AcroRd32.exe";.

Output folder is set into registry

 ImagePrinter (virtual printer) holds setting information under the registry "HKEY_LOCAL_MACHINE\Software\ImagePrinter". Output folder name is set to the registry key "path".

A little ingenuity to prevent misspelling

 Misspelling registry key results in a bug difficult to find. Therefore I create a value object then reflect its content to the registry using Sysmte.Reflection.FieldInfo.

AcrobatReader command options

 Detailed descriptions of AcrobatReader command options is here: . Options "/s", "/h", "/t" mean preventing splash window, minimizing window, specifying printer name.

Print queue monitoring 

 To wait finishing print job, first detecting image files are in the folder, next waiting while printer queue is not empty.

Example code

   1:  using System;
   2:  using System.Collections.Generic;
   3:  using System.Text;
   5:  using System.Runtime.InteropServices;
   6:  using System.ComponentModel;
   8:  namespace pdf2image
   9:  {
  10:      /// <summary>
  11:      /// Converting a PDF file to image files
  12:      /// 2008/07/12 Akihiro Uehara    
  13:      /// </summary>    
  15:      // Preparation
  16:      // Install printer driver http://sourceforge.net/projects/imageprinter
  18:      public class Program
  19:      {
  20:          #region printer setting value object
  21:          class printer_configration
  22:          {
  23:              public string ext_app;
  24:              public string format;
  25:              public string format_ext;
  26:              public string multipage_tiff;
  27:              public string one_file;
  28:              public string original_name;
  29:              public string path;
  30:              public string q_jpg;
  31:              public string ShowProgress;
  33:              const string _registry_prefix = @"Software\ImagePrinter\";
  35:              public printer_configration()
  36:              { 
  37:                  ReadRegistry();
  38:              }
  40:              void write_registry(string key, string value)
  41:              {
  42:                  if (null == value)
  43:                      return;
  45:                  // Writing to registry "HKEY_LOCAL_MACHINE\Software\ImagePrinter\" 
  46:                  Microsoft.Win32.RegistryKey regkey = Microsoft.Win32.Registry.LocalMachine.OpenSubKey(_registry_prefix, true);
  47:                  if (null != regkey)
  48:                  {
  49:                      regkey.SetValue(key, value);
  50:                      regkey.Close();
  51:                  }
  52:              }
  54:              string read_registry(string key)
  55:              {
  56:                  string val = string.Empty;
  57:                  // Writing to the registry "HKEY_LOCAL_MACHINE\Software\ImagePrinter\"
  58:                  Microsoft.Win32.RegistryKey regkey = Microsoft.Win32.Registry.LocalMachine.OpenSubKey(_registry_prefix, false);
  59:                  if (null != regkey)
  60:                  {
  61:                      val = (string)regkey.GetValue(key);
  62:                      regkey.Close();
  63:                  }
  65:                  return val;
  66:              }
  68:              public void ReadRegistry()
  69:              {
  70:                  // the following code is equivalent to:
  71:                  /*
  72:                  ext_app             = read_registry("ext_app");
  73:                  format              = read_registry("format");
  74:                  format_ext          = read_registry("format_ext");
  75:                  multiplepage_tiff   = read_registry("multiplepage_tiff");
  76:                  one_file            = read_registry("one_file");
  77:                  original_name       = read_registry("original_name");
  78:                  path                = read_registry("path");
  79:                  q_jpg               = read_registry("q_jpg");
  80:                  ShowProgress        = read_registry("ShowProgress");
  81:                  */
  83:                  // To prevend misspelling, fileds of the value object has the name which is the same to the registry key name.
  84:                  foreach (System.Reflection.FieldInfo finfo in typeof(printer_configration).GetFields())
  85:                      finfo.SetValue(this, read_registry(finfo.Name));
  86:              }
  88:              public void WriteRegistry()
  89:              {
  90:                  foreach (System.Reflection.FieldInfo finfo in typeof(printer_configration).GetFields())
  91:                      write_registry(finfo.Name, (string)finfo.GetValue(this));
  92:              }
  93:          }
  94:          #endregion
  96:          static bool imageFileExists(string folder, string keyphrase)
  97:          {
  98:              string [] files = System.IO.Directory.GetFiles(folder);
  99:              return Array.Exists<string>(files, delegate(string item) 
 100:              {
 101:                  string fname =System.IO.Path.GetFileName(item);
 102:                  return keyphrase != fname && fname.StartsWith(keyphrase); });
 103:          }
 105:          static void Main(string[] args)
 106:          {
 107:              string pdf_filepath = @"c:\tmp\test.pdf"; // 変換するPDFファイル
 108:              string output_folder = @"c:\tmp\";
 110:              // preserving current printer settings
 111:              printer_configration prevCfg = new printer_configration();
 112:              printer_configration curCfg  = new printer_configration();
 113:              // printer setting
 114:              curCfg.path          = output_folder;  // image output folder
 115:              curCfg.ext_app       = @"";
 116:              curCfg.format        = "png";       // file format is png, due to its smaller file size compared to TIFF (about 1/2 = 1/3)
 117:              curCfg.one_file      = "false";
 118:              curCfg.original_name = "true";      // image file name is the same ot the PDF filename
 119:              curCfg.ShowProgress  = "false";     // does not show progress bar
 121:              Console.WriteLine("Setting printer");
 122:              curCfg.WriteRegistry();
 124:              // Printing PDF file
 125:              Console.WriteLine("Starting Acrobat reader");
 126:              System.Diagnostics.ProcessStartInfo psInfo = new System.Diagnostics.ProcessStartInfo();
 127:              psInfo.FileName = @"C:\Program Files\Adobe\Reader 8.0\Reader\AcroRd32.exe";
 128:              psInfo.Arguments = String.Format(@" /s /h /t {0} ImagePrinter",pdf_filepath); // ファイル名は適切なPDFファイルを指定,オプション詳細は http://scripting.cocolog-nifty.com/blog/2006/12/pdf_4c95.html を参照.
 129:              psInfo.CreateNoWindow = true; // not to open console window
 130:              psInfo.UseShellExecute = false; // does not use shell
 131:              System.Diagnostics.Process ps = new System.Diagnostics.Process();
 132:              ps.StartInfo = psInfo;
 133:              ps.Start();            
 135:              // Waiting for printing
 136:              // First, waiting for the first image file
 137:              Console.WriteLine("First image file has been generated");
 138:              string fname = System.IO.Path.GetFileName(pdf_filepath);
 139:              while (! imageFileExists(output_folder, fname))
 140:                  System.Threading.Thread.Sleep(1000);
 142:              // Next, waiting the job queue becomes empty
 143:              Console.WriteLine("Waiting for Job queue empty");
 144:              PRINTER_INFO_2 pinfo;
 145:              do
 146:              {
 147:                  System.Threading.Thread.Sleep(1000);   
 148:                 pinfo = GetPrinterInfo("ImagePrinter"); // polling printer queue
 149:              } while (pinfo.cJobs > 0);
 150:              // If you use .net framework 2.0 later, use System.Printing namespace.
 151:              /*
 152:              System.Printing.LocalPrintServer prtSrv = new System.Printing.LocalPrintServer();
 153:              System.Printing.PrintQueue queue  = prtSrv.GetPrintQueue("ImagePrinter");
 154:              do
 155:              {
 156:                  System.Threading.Thread.Sleep(1000);   // 
 157:                  queue.Refresh();//  checking queue
 158:              } while (queue.NumberOfJobs > 0);
 159:              */ 
 160:              //ps.Kill(); // kill Acrobat reader
 162:              // Recovering printer setting
 163:              Console.WriteLine("Recoverring printer setting");
 164:              prevCfg.WriteRegistry();
 165:          }
 167:          #region Polling printer port. ref: DOBON.NET http://dobon.net/vb/dotnet/graphics/printerport.html 
 169:          //using System.Runtime.InteropServices;
 170:          //using System.ComponentModel;
 172:          [DllImport("winspool.drv", CharSet = CharSet.Auto, SetLastError = true)]
 173:          private static extern bool OpenPrinter(string pPrinterName,
 174:              out IntPtr hPrinter, IntPtr pDefault);
 176:          [DllImport("winspool.drv", SetLastError = true)]
 177:          private static extern bool ClosePrinter(IntPtr hPrinter);
 179:          [DllImport("winspool.drv", SetLastError = true)]
 180:          private static extern bool GetPrinter(IntPtr hPrinter,
 181:              int dwLevel, IntPtr pPrinter, int cbBuf, out int pcbNeeded);
 183:          [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
 184:          public struct PRINTER_INFO_2
 185:          {
 186:              public string pServerName;
 187:              public string pPrinterName;
 188:              public string pShareName;
 189:              public string pPortName;
 190:              public string pDriverName;
 191:              public string pComment;
 192:              public string pLocation;
 193:              public IntPtr pDevMode;
 194:              public string pSepFile;
 195:              public string pPrintProcessor;
 196:              public string pDatatype;
 197:              public string pParameters;
 198:              public IntPtr pSecurityDescriptor;
 199:              public uint Attributes;
 200:              public uint Priority;
 201:              public uint DefaultPriority;
 202:              public uint StartTime;
 203:              public uint UntilTime;
 204:              public uint Status;
 205:              public uint cJobs;
 206:              public uint AveragePPM;
 207:          }
 209:          /// <summary>
 210:          /// プリンタの情報をPRINTER_INFO_2で取得する
 211:          /// </summary>
 212:          /// <param name="printerName">プリンタ名</param>
 213:          /// <returns>プリンタの情報</returns>
 214:          public static PRINTER_INFO_2 GetPrinterInfo(string printerName)
 215:          {
 216:              //プリンタのハンドルを取得する
 217:              IntPtr hPrinter;
 218:              if (!OpenPrinter(printerName, out hPrinter, IntPtr.Zero))
 219:              {
 220:                  throw new Win32Exception(Marshal.GetLastWin32Error());
 221:              }
 223:              IntPtr pPrinterInfo = IntPtr.Zero;
 224:              try
 225:              {
 226:                  //必要なバイト数を取得する
 227:                  int needed;
 228:                  GetPrinter(hPrinter, 2, IntPtr.Zero, 0, out needed);
 229:                  if (needed <= 0)
 230:                      throw new Exception("失敗しました。");
 232:                  //メモリを割り当てる
 233:                  pPrinterInfo = Marshal.AllocHGlobal(needed);
 235:                  //プリンタ情報を取得する
 236:                  int temp;
 237:                  if (!GetPrinter(hPrinter, 2, pPrinterInfo, needed, out temp))
 238:                  {
 239:                      throw new Win32Exception(Marshal.GetLastWin32Error());
 240:                  }
 242:                  //PRINTER_INFO_2型にマーシャリングする
 243:                  PRINTER_INFO_2 printerInfo =
 244:                      (PRINTER_INFO_2)Marshal.PtrToStructure(pPrinterInfo,
 245:                      typeof(PRINTER_INFO_2));
 247:                  //結果を返す
 248:                  return printerInfo;
 249:              }
 250:              finally
 251:              {
 252:                  //後始末をする
 253:                  ClosePrinter(hPrinter);
 254:                  Marshal.FreeHGlobal(pPrinterInfo);
 255:              }
 256:          }
 257:          #endregion
 258:      }    
 259:  }


