Quantcast
Channel: Aspose.Pdf Product Family
Viewing all articles
Browse latest Browse all 3131

Splitting Large PDF file into individual file causing outofmemory exception

$
0
0

Hi There,

 

I have a usecase where we get a large pdf file (of size 700to 800mb) containg 400k pages and the task is split the file into individual pages. I am getting into out of memory exception after splitting files around 100k pages. Please suggest any alternatives or workaround. My code is below:

 

My code to split pdf file is below:

 

 public int SplitPdfFileByPageDelimiter(string outputFilesDir, string pageDelimiterString, int extractStringLength)        {            if (string.IsNullOrEmpty(pageDelimiterString))            {                throw new ArgumentNullException("pageDelimiterString""[SplitPdfFileByPageDelimiter()]: delimiterString for spliting pdf file is null or empty.");            }           // open document            var pdfDocument = new Aspose.Pdf.Document(this.InputFileStream);            if (string.IsNullOrEmpty(outputFilesDir))            {                throw new Exception("OutputFileDirectory missing");            }            if (!Directory.Exists(outputFilesDir))            {                Directory.CreateDirectory(outputFilesDir);            }            var outputFileName = Path.GetFileName(this.InputFilePath);            if (string.IsNullOrEmpty(outputFileName))            {                outputFileName = string.Format("{0}.pdf"Path.GetDirectoryName(outputFilesDir));            }            var outputFileFormat = string.Concat(Path.GetFileNameWithoutExtension(outputFileName), "_{0}"Path.GetExtension(outputFileName));                        var docCount = 1;            var resetCount = 0;            var document = new Aspose.Pdf.Document();            // loop through all the pages            foreach (Page pdfPage in pdfDocument.Pages)            {                resetCount++;                document.Pages.Add(pdfPage);                var textFragmentAbsorber = new TextFragmentAbsorber();                pdfPage.Accept(textFragmentAbsorber);                var textFragmentCollection = textFragmentAbsorber.TextFragments;                foreach (TextFragment textFragment in textFragmentCollection)                {                    var pdfFileUniqueIdIndex = textFragment.Text.IndexOf(pageDelimiterString);                    if (pdfFileUniqueIdIndex <= -1)                    {                        continue;                    }                    pdfFileUniqueIdIndex += pageDelimiterString.Length;                    var pdfFileUniqueId = extractStringLength > 0 ? textFragment.Text.Mid(pdfFileUniqueIdIndex, extractStringLength).Trim() : docCount.ToString();                    if (string.IsNullOrWhiteSpace(pdfFileUniqueId))                    {                        pdfFileUniqueId = string.Format("NoPdfFileUniqueId.{0}", Guid.NewGuid().ToString());                    }                    var pdfFileFullName = Path.Combine(outputFilesDir, string.Format(outputFileFormat, pdfFileUniqueId));                    if (!File.Exists(pdfFileFullName))                    {                        document.Save(pdfFileFullName);                    }                    document.FreeMemory();                    document.Dispose();                    docCount += 1;                    document = new Aspose.Pdf.Document();                    break;                }                pdfPage.FreeMemory();                                // After every 200 pages let the process to sleep for couple of secs.                if (resetCount >= 200)                {                    pdfDocument.FreeMemory();                    Console.WriteLine(string.Format("[SplitPdfFileByPageDelimiter()]: Going to sleep at document count : [{0}]", docCount));                    Thread.Sleep(2000);                    resetCount = 0;                }            }

Viewing all articles
Browse latest Browse all 3131

Trending Articles