Chandrashekar
8/4/2017 - 5:01 PM

Extract tbz2 to tar to files

Extract tbz2 to tar to files

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using ICSharpCode.SharpZipLib.Core;
using ICSharpCode.SharpZipLib.BZip2;
using ICSharpCode.SharpZipLib.Tar;
using System.IO;

namespace PlayWithSharpZipLib
{
    //https://github.com/icsharpcode/SharpZipLib/wiki/GZip-and-Tar-Samples
    class Program
    {
        static void Main(string[] args)
        {
            String strPath = System.IO.Path.GetPathRoot(System.IO.Directory.GetCurrentDirectory());
            String path = Path.Combine(strPath, "Patients", "Patient_");

            string tbz2ArchiveName = @"C:\Users\cd\documents\visual studio 2013\Projects\PlayWithSharpZipLib\PlayWithSharpZipLib\Data\LogDump\abc.tbz2";
            string destFolder = @"C:\Users\cd\documents\visual studio 2013\Projects\PlayWithSharpZipLib\PlayWithSharpZipLib\Data\ExtractedDump\";

            string tarFileName = @"C:\Users\cd\documents\visual studio 2013\Projects\PlayWithSharpZipLib\PlayWithSharpZipLib\Data\ExtractedDump\abc.tar";
            string targetDir = @"C:\Users\cd\documents\visual studio 2013\Projects\PlayWithSharpZipLib\PlayWithSharpZipLib\Data\ExtractedDump\";

            //ExtractTBZ2(tbz2ArchiveName, destFolder);
            //ExtractBZipSample(tbz2ArchiveName, destFolder);
            ExtractTarByEntry(tarFileName, targetDir, true);
        }




        // example: ExtractTGZ(@"c:\temp\test.tar.gz", @"C:\DestinationFolder")
        // example: ExtractTGZ(@"C:\Drive\Project Warehouse\Precision Project\LOG File Samples\InSplore.boc3par401-1635881.20160314.2037.tbz2", @"C:\Drive\Project Warehouse\Precision Project\LOG File Samples\Extract")
        public static void ExtractTBZ2(String tbz2ArchiveName, String destFolder)
        {
            try
            {
                Stream inStream = File.OpenRead(tbz2ArchiveName);
                Stream tbzipStream = new BZip2InputStream(inStream);

                TarArchive tarArchive = TarArchive.CreateInputTarArchive(tbzipStream);
                tarArchive.ExtractContents(destFolder);

                tarArchive.Close();
                tbzipStream.Close();
                inStream.Close();
            }
            catch (Exception ex)
            {
                throw;
            }


        }

        public static void ExtractBZipSample(string bzipFileName, string targetDir)
        {

            // Use a 4K buffer. Any larger is a waste.    
            byte[] dataBuffer = new byte[4096];

            using (System.IO.Stream fs = new FileStream(bzipFileName, FileMode.Open, FileAccess.Read))
            {
                using (BZip2InputStream bzipStream = new BZip2InputStream(fs))
                {

                    // Change this to your needs
                    string fnOut = Path.Combine(targetDir, Path.GetFileNameWithoutExtension(bzipFileName));

                    using (FileStream fsOut = File.Create(fnOut))
                    {
                        StreamUtils.Copy(bzipStream, fsOut, dataBuffer);
                    }
                }
            }
        }



        // Iterates through each file entry within the supplied tar,
        // extracting them to the nominated folder.
        //
        public static void ExtractTarByEntry(string tarFileName, string targetDir, bool asciiTranslate)
        {

            using (FileStream fsIn = new FileStream(tarFileName, FileMode.Open, FileAccess.Read))
            {
                TarInputStream tarIn = new TarInputStream(fsIn);
                TarEntry tarEntry;
                while ((tarEntry = tarIn.GetNextEntry()) != null)
                {

                    if (tarEntry.IsDirectory)
                    {
                        continue;
                    }
                    // Converts the unix forward slashes in the filenames to windows backslashes
                    //
                    string name = tarEntry.Name.Replace('/', Path.DirectorySeparatorChar);
                    name = tarEntry.Name.Replace(':', '_');

                    // Remove any root e.g. '\' because a PathRooted filename defeats Path.Combine
                    if (Path.IsPathRooted(name))
                    {
                        name = name.Substring(Path.GetPathRoot(name).Length);
                    }

                    // Apply further name transformations here as necessary
                    string outName = Path.Combine(targetDir, name);

                    string directoryName = Path.GetDirectoryName(outName);
                    Directory.CreateDirectory(directoryName);		// Does nothing if directory exists

                    FileStream outStr = new FileStream(outName, FileMode.Create);

                    if (asciiTranslate)
                    {
                        CopyWithAsciiTranslate(tarIn, outStr);
                    }
                    else
                    {
                        tarIn.CopyEntryContents(outStr);
                    }
                    outStr.Close();
                    // Set the modification date/time. This approach seems to solve timezone issues.
                    DateTime myDt = DateTime.SpecifyKind(tarEntry.ModTime, DateTimeKind.Utc);
                    File.SetLastWriteTime(outName, myDt);
                }
                tarIn.Close();
            }
        }

        private static void CopyWithAsciiTranslate(TarInputStream tarIn, Stream outStream)
        {
            byte[] buffer = new byte[4096];
            bool isAscii = true;
            bool cr = false;

            int numRead = tarIn.Read(buffer, 0, buffer.Length);
            int maxCheck = Math.Min(200, numRead);
            for (int i = 0; i < maxCheck; i++)
            {
                byte b = buffer[i];
                if (b < 8 || (b > 13 && b < 32) || b == 255)
                {
                    isAscii = false;
                    break;
                }
            }
            while (numRead > 0)
            {
                if (isAscii)
                {
                    // Convert LF without CR to CRLF. Handle CRLF split over buffers.
                    for (int i = 0; i < numRead; i++)
                    {
                        byte b = buffer[i];	// assuming plain Ascii and not UTF-16
                        if (b == 10 && !cr)		// LF without CR
                            outStream.WriteByte(13);
                        cr = (b == 13);

                        outStream.WriteByte(b);
                    }
                }
                else
                {
                    outStream.Write(buffer, 0, numRead);
                }
                numRead = tarIn.Read(buffer, 0, buffer.Length);
            }
        }

    }
}