onlyforbopi
1/7/2017 - 6:21 PM

PERL_FILE_MANIPULATION

PERL_FILE_MANIPULATION


#!/usr/bin/perl

my $sub  = "aaa";
my $start = 0;
my $length = 3;
my $seen;

while (<>) {
        my $seen = substr $_, $start, $length;
        if ( $seen eq $sub ) {
                print;
        }
}

#!/usr/bin/perl
use File::stat;

my $file = shift;
my $st = stat($file);
my $size = $st->size;
my $gsize=-s "$file";

print $size, "KB\n";
print $gsize, "KB\n";
 
 # One liner to remove duplicate liens (entire line input)
 perl -ne 'print if ++$k{$_}==1' test.rdupl.txt > test2.test2.txt
 
 # One liner to remove duplicate lines based on substring
 perl -ne 'print if not $k{substr $_, 4, 3}++' testpel.txt
 
# Full Script v.1
# Call as : perl filter.pl my_input.file > my_output.file
use strict;
use warnings 'all';

my %seen;

while ( <> ) {

    my ($prefix) = substr $_, 3, 3;
    next if $seen{$prefix}++;

    print;
}


$lines   = 0;
$words   = 0;
$nwords  =  0;
$total = 0;

print "\n";
while (  <> ) {
    $lines=$lines+1;
    @words = split( " ", $_ );
    $nwords = @words;
    #print "Number of words on the line $lines are : $nwords \n";
    $total = $total+$nwords;
}

print "\nTotal no. of words in file are $total \n";
#print "\nTotal no. of lines in file are $lines \n";
# One liner to count lines in a file
perl -pe '}{$_=$.' testpel.txt

# One liner to count lines in a file v2
perl -lne 'END{print $.}' testpel.txt

# Full Script
# Call as : perl count.pl my_input file > my_output.file
# Call as : perl count.pl my_input file 
use strict;
use warnings 'all';

my $counta;

while ( <> ) {

    $counta++;
}

print "$counta\n";



# Full Script v.2
# Call as : perl filter.pl my_input.file > my_output.file
$lines   = 0;
$words   = 0;
$nwords  =  0;
$total = 0;

print "\n";
while (  <> ) {
    $lines=$lines+1;
    @words = split( " ", $_ );
    $nwords = @words;
    #print "Number of words on the line $lines are : $nwords \n";
    $total = $total+$nwords;
}

#print "\nTotal no. of words in file are $total \n";
print "\nTotal no. of lines in file are $lines \n";
#------------------------------------------------------------------------------
# sub load_file_in_array():
#       Load a text file into an array. Remove all CR/LF.
#
sub load_file_in_array {
  
  # Function: load_file_in_array
  # Description: Loads a file, line by line, into array
  # Input: file handle
  # Output: @array with lines of file as elements
  # Usage: load_file_in_array(file handle)
  # Notes: If file cant be opened, gives error
  
  (my $a_file) = @_;
  my @the_lines = ();
 
  unless ("$a_file" eq "")
  {
	  # load the test case file
	  if ($DEBUG > 1) { print "# loading the file '$a_file'\n" };
	  unless ( open ( FILE, $a_file ) )
	  {
	    die  "ERROR: $PROGNAME: Can't open '$a_file': $!";
	  }
	  @the_lines = <FILE>;
	  close FILE;
	  chomp(@the_lines);
	}
  return @the_lines;
}
# Full Script v.1
# Counts everything : Lines / Words / Characters
# Uncomment as necessary
# Call as : perl filter.pl my_input.file > my_output.file

my ($lines, $words, $chars) = (0,0,0);

while (<>) {
    $lines++;
    $chars += length($_);
    $words += scalar(split(/\s+/, $_));
}

print("lines=$lines words=$words chars=$chars\n");
1. perl_rdupl.pl              : Remove duplicate lines (Entire line, substring)
2. perl_count_lines.pl        : Count number of lines in a file
3. perl_count_word.pl         : Count number of words in a file
4. perl_count_char.pl         : Count number of characters in a file
5. perl_size_file.pl          : Count size of file in KB / Bytes
6. perl_srch_single_sub.pl    : Search for single substring
7. perl_load_file_array.pl    : Load a text file into array