#!/bin/sh
##
## Nautilus
## SCRIPT: 00_1file_lineLENGTHS_wc-while-read.sh
##                                 
## PURPOSE: Reads a file and shows the length of each line.
##
##          (Could include a prompt to ask for the maximum
##           number of lines to read --- for huge files.)
##
## METHOD:  Uses 'wc'  and a while-read loop.
##          Less efficient than using awk, but the 'length' function 
##          of awk does not seem to count bytes in binary lines properly.
##
##          Puts the results in a temp file and shows it in a GUI
##          text browser/editor of the user's choice.
##
## HOW TO USE: In Nautilus, navigate to a (text) file, select it,
##             right-click and choose this Nautilus script to run.
##
## Created: 2010sep26
## Changed: 2011may02 Added $USER to a temp filename.
## Changed: 2011may11 Get 'nautilus-scripts' directory via an include script.
## Changed: 2012feb29 Changed the script name in the comment above.

## FOR TESTING: (show statements as they execute)
# set -x

#######################################
## Get the filename.
#######################################

#  FILENAMES="$NAUTILUS_SCRIPT_SELECTED_URIS"
#  FILENAMES="$NAUTILUS_SCRIPT_SELECTED_FILE_PATHS"

# FILENAME="$@"
  FILENAME="$1"

#  CURDIR="$NAUTILUS_SCRIPT_CURRENT_URI"
   CURDIR="`pwd`"


#######################################################
## Check that the selected file is a text file.
## COMMENTED, for now.
#######################################################

#  FILECHECK=`file "$FILENAME" | egrep 'text|Mail|ASCII'`
 
#  if test "$FILECHECK" = ""
#  then
#     exit
#  fi


#########################################################
## Initialize the output file.
##
## NOTE: If the files is in a directory for which the user
##       does not have write-permission,
##       we put the output file in /tmp rather than in the
##       current working directory.
## CHANGE: To avoid junking up curdir, we use /tmp.
#########################################################

OUTFILE="${USER}_temp_lineLENGTHS_1file.lis"

# if test ! -w "$CURDIR"
# then
OUTFILE="/tmp/$OUTFILE"
# fi

if test -f  "$OUTFILE"
then
   rm -f "$OUTFILE"
fi


#######################################################
## Generate a header for the listing.
#######################################################

echo "\
.................... `date '+%Y %b %d  %a  %T%p %Z'` ..........................

LINE LENGTHS (and summary line length statistics)

for the file

  $FILENAME

in directory

  $CURDIR

.................. START OF 'wc -c' OUTPUT ............................
" >  "$OUTFILE"


###############################################################
## Use a while-read loop to read the file and output the length
## of the lines (using 'wc -c'_ into the OUTFILE --- and print
## summary stats.
###############################################################

NUMLINS=0
TOTCHAR=0
MAXLEN=0
MINLEN=64000000
 
echo "Line#    Length" 
echo "-------- --------"

# cat "$1" |

while read LINE
do

   LINLEN=`echo "$LINE" | wc -c`
   LINLEN=`expr $LINLEN - 1`

   if test $LINLEN -lt $MINLEN
   then
	   MINLEN=$LINLEN
   fi

   if test $LINLEN -gt $MAXLEN
   then
	   MAXLEN=$LINLEN
   fi

   NUMLINS=`expr $NUMLINS + 1`
   TOTCHAR=`expr $TOTCHAR + $LINLEN`

   # echo "$NUMLINS    $LINLEN" >> "$OUTFILE"
   echo "$NUMLINS    $LINLEN" | awk '{printf ("%8d %8d \n", $1 , $2);}' >> "$OUTFILE"

   ## FOR TESTING:
   #   print $LINE

done < "$1"
## END OF LOOP: while read LINE


###############################
## Add a trailer to the listing.
###############################

# CHARPERREC=`expr $TOTCHAR / $NUMLINS`

CHARPERREC=`echo "scale = 4; $TOTCHAR / $NUMLINS" | bc -l`

SCRIPT_BASENAME=`basename $0`
SCRIPT_DIRNAME=`dirname $0`

echo "
.................. END OF 'wc -c' OUTPUT ............................

Max Record Length (bytes) = $MAXLEN 

Min Record Length (bytes) = $MINLEN

Number of Records Read    = $NUMLINS 

Ave. Chars per Recs Read  = $CHARPERREC

.....................................................................

The output above is from script

  $SCRIPT_BASENAME

in directory

  $SCRIPT_DIRNAME

This script uses a while-read loop that is slow compared to
using the 'awk' version of this script. Use the 'awk' version on
text files. On binary files, the 'awk' version gives improper
line lengths for lines containing non-ASCII binary data, if
LC_ALL=C is not used.

.................... `date '+%Y %b %d  %a  %T%p %Z'` ..........................
" >> "$OUTFILE"


############################
## Show the list.
############################

## . $HOME/.gnome2/nautilus-scripts/.set_VIEWERvars.shi

. $HOME/.freedomenv/feNautilusScripts/set_DIR_NautilusScripts.shi
. $DIR_NautilusScripts/.set_VIEWERvars.shi

$TXTVIEWER "$OUTFILE" &

exit
########################################################
## EXIT to avoid trying to execute the sample code below.
########################################################

######################################################
## The following code is an example in case we want to
## prompt for a start line and end line (using a
## GUI dialog prompt utility like 'zenity') so that we
## can simply give the counts for PART of a huge file.
#######################################################

nawk -v STARTLINE=$STARTLINE -v ENDLINE=$ENDLINE 'BEGIN {
NUMLINS = 0;
TOTCHAR = 0;
MAXLEN = 0;
MINLEN = 64000000;
printf ("\n\n"); 
printf ("Line#    Length\n"); 
printf ("-------- --------\n"); 
}

{
     if (NR < STARTLINE) continue; 
     if (NR > ENDLINE)   exit; 

    if ( length < MINLEN ) {
	MINLEN = length
    };

    if ( length > MAXLEN ) {
	MAXLEN = length
    };

    NUMLINS += 1
    TOTCHAR += length($0)

    LINLEN = length($0)
printf ("%8d %8d \n", NR , LINLEN); 

}

END {
 print ".....................................................................";
# print "\n";
# printf ("MAX/MIN line lengths OF THE RECORDS READ (#%d to #%d)", STARTLINE, ENDLINE);
# print " ";
# printf ("in File: %s", FILENAME);
 print "\n";
 printf ("Max Record Length (bytes) = %s", MAXLEN); 
 print "\n";
 printf ("Min Record Length (bytes) = %s", MINLEN);
 print "\n";
 printf ("Number of Records Read    = %s", NUMLINS); 
 CHARPERREC = TOTCHAR / NUMLINS
 print "\n";
 printf ("Ave. Num. of CharsPerRec  = %s", CHARPERREC); 
 print "\n";
}' $FILEIN