1. Ref: http://naspinski.net/post/ParsingReading-a-PDF-file-with-C-and-AspNet-to-text.aspx
2. Add Ref
// PDFBox/bin:
//2-1 FontBox-0.1.0-dev.dll
//2-2 IKVM.GNU.Classpath.dll
//2-3 IKVM.Runtime.dll
//2-4 PDFBox-0.7.3.dll
// 3. using
using org.pdfbox.pdmodel;
using org.pdfbox.util;
// 4
using System.IO;
// 5 Add parsePDF method
public static void parsePDF(string pdf_in, string txt_out)
{
StreamWriter sw = new StreamWriter(txt_out, false);
try
{
sw.WriteLine();
sw.WriteLine(DateTime.Now.ToString());
PDDocument doc = PDDocument.load(pdf_in);
PDFTextStripper stripper = new PDFTextStripper();
sw.Write(stripper.getText(doc));
}
catch (Exception ex) { Console.Write(ex.Message); }
finally
{
sw.Close();
sw.Dispose();
}
}
// 6. Console mode project
static void Main(string[] args)
{
string pdf_in = @"E:\Working\PDF2Text\TryExe\101050_1401.PDF";
string txt_out = "Test.txt";
parsePDF(pdf_in, txt_out);
Console.WriteLine("Done!");
}
沒有留言:
張貼留言