SelectPdf for .NET - Pdf To Text Converter. Extract Text from PDF - C# / ASP.NET MVC Sample

This sample shows how to use SelectPdf Pdf Library for .NET to extract text from a PDF document.

The sample uses the following (existing) test PDF:
Test PDF document

Text Layout:

Start Page:


End Page:

(Leave empty to extract until the last page)

Note: The free trial version of SelectPdf will always extract text from the first 3 pages of the PDF document, no matter the page settings received.


Sample Code C#



using System;
using System.Web.Mvc;

namespace SelectPdf.Samples.Controllers
{
    public class PdfToTextConverterController : Controller
    {
        // GET: PdfToTextConverter
        public ActionResult Index()
        {
            return View();
        }

        [HttpPost]
        public ActionResult SubmitAction(FormCollection collection)
        {
            // the test file
            string filePdf = Server.MapPath("~/files/selectpdf.pdf");

            // settings
            string text_layout = collection["DdlTextLayout"];
            TextLayout textLayout = (TextLayout)Enum.Parse(typeof(TextLayout),
                text_layout, true);

            int startPage = 1;
            try
            {
                startPage = Convert.ToInt32(collection["TxtStartPage"]);
            }
            catch { }

            int endPage = 0;
            try
            {
                endPage = Convert.ToInt32(collection["TxtEndPage"]);
            }
            catch { }

            // instantiate a pdf to text converter object
            PdfToText pdfToText = new PdfToText();

            // load PDF file
            pdfToText.Load(filePdf);

            // set the properties
            pdfToText.Layout = textLayout;
            pdfToText.StartPageNumber = startPage;
            pdfToText.EndPageNumber = endPage;

            // extract the text
            string text = pdfToText.GetText();

            // convert text to UTF-8 bytes
            byte[] utf8 = System.Text.Encoding.UTF8.GetBytes(text);

            // return resulted text file
            FileResult fileResult = new FileContentResult(utf8, 
                "text/plain; charset=UTF-8");
            fileResult.FileDownloadName = "output.txt";
            return fileResult;
        }
    }
}