转:C#读取PDF、TXT内容
阅读原文时间:2023年07月08日阅读:1

//读取PDF内容
private void button2_Click(object sender, EventArgs e)
{
label3.Text = OnCreated("D:\\aa.pdf");
}

    private string OnCreated(string filepath)  
    {  
        try  
        {  
            string pdffilename = filepath;  
            PdfReader pdfReader = new PdfReader(pdffilename);  
            int numberOfPages = pdfReader.NumberOfPages;  
            string text = string.Empty;

            for (int i = 1; i <= numberOfPages; ++i)  
            {  
                iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();  
                text += iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy);  
            }  
            pdfReader.Close();

            return text;  
        }  
        catch (Exception ex)  
        {  
            StreamWriter wlog = File.AppendText(System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "\\\\mylog.log");  
            wlog.WriteLine("出错文件:"  + "原因:" + ex.ToString());  
            wlog.Flush();  
            wlog.Close(); return null;  
        }

//读取TXT
string text = System.IO.File.ReadAllText(path);//读取内容 path为文件路径
text = text.Replace("\n", string.Empty).Replace("\r", string.Empty);//去掉字符串里的\n \r符号

实例:

//1. 生成一个PDF,将文本和图片添加到PDF里面。
//2. 从PDF文档中提取所有图片。
//3. 从PDF文档中提取所有文本。

   //生成一个PDF文件 里面包含文本和图片  
    private void button2\_Click(object sender, EventArgs e)  
    {  
        Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();  
        PdfPageBase page = doc.Pages.Add();

        //添加文本  
        page.Canvas.DrawString("Hello!Welcome to my house!",  
        new Spire.Pdf.Graphics.PdfFont(PdfFontFamily.Helvetica, 20f),  
        new PdfSolidBrush(Color.Black), 10, 10);//中文汉字字符均不能正确生成 英文字母可以

        //添加图片  
        Spire.Pdf.Graphics.PdfImage image = Spire.Pdf.Graphics.PdfImage.FromFile("ff.jpg");  
        float width = image.Width \* 0.75f;  
        float height = image.Height \* 0.75f;  
        float x = (page.Canvas.ClientSize.Width - width) / 2;  
        page.Canvas.DrawImage(image, x, 60, width, height);

        //Spire.Pdf.Graphics.PdfImage image2 = Spire.Pdf.Graphics.PdfImage.FromFile("image.jpg");  
        //width = image2.Width \* 0.75f;  
        //height = image2.Height \* 0.75f;  
        //page.Canvas.DrawImage(image2, x - 100, 220, width, height);  
        doc.SaveToFile("sample.pdf");  
    }

    //读取图片 获取图片个数 并把图片保存到本地  
    private void button1\_Click(object sender, EventArgs e)  
    {  
        Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();  
        doc.LoadFromFile("sample.pdf");  
        IList<Image> images = new List<Image>();  
        foreach (PdfPageBase page in doc.Pages)  
        {  
            if (page.ExtractImages() != null)  
            {  
                foreach (Image image in page.ExtractImages())  
                {  
                    images.Add(image);  
                }  
            }  
        }  
        doc.Close();  
        int index = 0;  
        int aa = images.Count;  
        label3.Text = aa.ToString();  
        foreach (Image image in images)  
        {  
            String imageFileName = String.Format("Image-{0}.png", index++);  
            image.Save(imageFileName, ImageFormat.Png);  
        }  
    }

    //读取文本  
    private void button3\_Click(object sender, EventArgs e)  
    {  
        Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();  
        doc.LoadFromFile("sample.pdf");

        StringBuilder buffer = new StringBuilder();  
        foreach (PdfPageBase page in doc.Pages)  
        {  
            buffer.Append(page.ExtractText());  
        }  
        doc.Close();  
        label1.Text = buffer.ToString();//在界面显示读取到的文本  
        //把读取到的文本写入TXT文件  
        //String fileName = "TextInPdf.txt";  
        //File.WriteAllText(fileName, buffer.ToString());  
        buffer = null;  
    }

原文:https://blog.csdn.net/wk125570/article/details/73794257?utm_source=copy

参考:http://www.cnblogs.com/Yesi/p/4203686.html