#!/bin/sh ## ## SCRIPT: findANDshow_stringsINfile_plusminusNlines.sh ## ## adapted from the 'FE xpg' subsystem of the Freedom Environment system, ## whose home page is at www.freedomenv.com. ## ## This is a 'stand-alone' version for contribution to wiki.tcl.tk ## ############################################################################# ## PURPOSE: Let $1 $2 $3 $4 represent the 4 positional arguments to this script ## --- . ## ## For the filename specified in $4, this script finds the lines in ## the file which contains a match to the string in $1 --- ## and it shows N lines above and below the 'match' lines, where ## N=$2. It uses case-sensitivity in the search, depending on ## whether $3 is 'yes' or 'no'. ## ## Like 'egrep', the string argument may be sub-strings separated ## by vertical-bars (|). Hence, the file can actually be searched ## for an OR-match, in any record of the file, to the several ## sub-strings within the string. ## ## Example string: 'fatal|error|fail|warning' ## ## NOTE: INCLUDING THE SINGLE QUOTES. They are needed to keep the ## shell from trying to execute the string, esp. when it ## contains special characters like < or > or [ or ] or ## parentheses or ! or whatever. ## ## 'awk' is used to find the match(es) and save-and-print the N lines ## above a match line as well as the N lines following, where N = $2. ## ## This is an 'egrep-like' utility --- except that it is an extended ## 'egrep' (one might say an 'eegrep') utility that shows lines ## around the match lines. This script does this with 'awk', ## because 'egrep' is not capable of showing the nearby lines. ############################################################################# ## CALL FORMAT: ## ## $DIRxpg/findANDshow_stringsINfile_plusminusNlines.sh \ ## ## ############################################################################# ## CALLED BY: the tk-GUI utility script 'shofil.tk' ## ## to support the unique 'Show All Matches' productivity feature. ## ############################################################################# ## MAINTENANCE HISTORY for this 'stand-alone' version: ## ## Written: Blaise Montandon 2013aug05 Started work on creating this ## 'stand-alone' version from the ## 'integrated' 'FE xpg' subsystem at ## www.freedomenv.com. ## Updated: Blaise Montandon 20....... ############################################################################# THISscript="$0" THISscriptBASE=`basename "$THISscript"` ############################################################################# ## Save input, for err msg below. ############################################################################# ALLinput="$*" ## FOR TESTING: # echo " # ALLinput: $ALLinput" ############################################################################# ## Get input items #1 and #2 and #3 and #4 from one string of input. ############################################################################# ## Based on the following '$1' and 'shift' example, ## from /apps/ideas_9/bin/msplot, ## which preserves any arguments in quotations: ## ## while test "$1" != "" ## APP_ARGS="$APP_ARGS \"$1\"" ## shift ## done ############################################################################# ################################################################### ## 1) Get the first of 4 parms --- the parm. ################################################################### ## Use 'eval' to remove single-quotes that protect the string from ## interpretation by the shell. Now the string can be special ## characters like '>'. #################################################################### # eval STRINGin=$1 ## seems to work OK. But let's try double-quotes around $1. #################################################################### eval STRINGin="$1" #################################################################### ## Change "\" to "\\" --- to avoid awk err msg ## 'Newline in string ... at source line 1' #################################################################### if test "$STRINGin" = '\' then STRINGin='\\' fi ## FOR TESTING: # echo " # STRINGin: $STRINGin" shift Nlines="$1" ## FOR TESTING: # echo " # Nlines: $Nlines" shift CaseSense="$1" ## FOR TESTING: # echo " # CaseSense: $CaseSense" shift FILEin="$1" ## FOR TESTING: # echo " # FILEin: $FILEin" ############################################################################# ## Check for input item #1 and #2 and #3 and #4. ############################################################################# ERRMSG_MAIN="\ *********** INPUT ERROR: Supply 4 inputs --- a STRING and an INTEGER (Nlines) and yes/no (CaseSense) and a FILENAME to script $THISscript. INPUT FORMAT: EXAMPLES: $THISscriptBASE 'error' 3 no /var/adm/SYSLOG $THISscriptBASE 'error|warning|core|dump' 3 no /var/adm/SYSLOG $THISscriptBASE 'file systems' 3 yes /var/adm/SYSLOG CURRENT INPUT: $ALLinput " if test "$STRINGin" = "" then echo " $ERRMSG_MAIN Supply a (search) STRING to script $THISscript. Exiting ... " exit fi if test "$Nlines" = "" then echo " $ERRMSG_MAIN Supply an INTEGER (plus-minus-Num-Lines) to script $THISscript. Exiting ... " exit fi if test "$CaseSense" = "" then echo " $ERRMSG_MAIN Supply a Case-Sensitivity Indicator (yes/no) to script $THISscript. Exiting ... " exit fi if test "$FILEin" = "" then echo " $ERRMSG_MAIN Supply the FILENAME (of the file to search) to script $THISscript. Exiting ... " exit fi ############################################################################# ## Set DIRxpg for use in the 'stand-alone' version of the 'xpg' system. ############################################################################# # DIRxpg="$HOME/apps/xpg" DIRxpg="." ######################################################################## ## PREPARE A REPORT FILE --- and its HEADING. ## (Put the lines in a local file whose name is built in $OUTLIST, ## by the 'set_localoutlist' utility.) ######################################################################## ## This dot (.) is not needed. It just seems to be a little more efficient ## to not spawn off a new shell environment to run this script, if we ## do not need to do so. . $DIRxpg/set_localoutlist.sh CaseSenseMsg="( CASE-SENSITIVE! )" if test "$CaseSense" = "no" then CaseSenseMsg="(upper or lower case ; that is, CASE-INSENSITIVE)" fi MULTICHECK=`echo "$STRINGin" | grep '|'` if test "$MULTICHECK" = "" then StringMsg="STRING" else StringMsg="STRINGS separated by '|' in " fi echo "\ ********************* `date '+%Y %b %d %a %T%p %Z'` ****************** 'MATCH' LINES FROM FILE $FILEin Lines that contain the $StringMsg '$STRINGin' $CaseSenseMsg --- INCLUDING $Nlines line(s) ABOVE-AND-BELOW 'match' lines. All lines are preceded by line numbers. An asterisk (*) before a line-number indicates a match, to the string, was found in the line. LineNumber:Text ---------------------------------------------------------------------------- " > "$OUTLIST" ############################################################################# ## CALL 'awk' -- with an appropriate awk program -- with a file as input. ############################################################################# ## 'awk' program (AN EXAMPLE) to ## write all lines whose first field is different from the previous one. ## ## $1 != prev { print; prev = $1 } ## NOTE: ## This extended-egrep 'eegrep' script is basically ## a more complex version of this example. ############################################################################# ################################################## ## FOR TESTING: ################################################## # TEST="YES" TEST="NO" if test "$TEST" = "YES" then echo " *.......................................................... * Lines in file $FILEin * that match the '|'-separated sub-strings in '$STRINGin' * --- including $Nlines line(s) above-and-below matches * --- are shown below. *.......................................................... * All lines are preceded by line numbers. * An asterisk (*) before a line-number indicates a match. *.......................................................... " # set -x fi ################################################## ## HERE's the 'awk'. ################################################## ## Add 'cut -c1-3071 $FILEin |' before ## awk, to avoid 'Input record too long' ## error that stops awk dead. ################################################## cut -c1-3071 "$FILEin" | \ awk -v N="$Nlines" -v STRING="$STRINGin" -v CASESENSE="$CaseSense" \ 'BEGIN { ####################################################### ## Initialize the N "prev" vars to null. ## They are to hold the last N lines read. ####################################################### for ( i = 1 ; i <= N ; i++ ) { prev[i] = "" } ################################################## ## After converting STRING to upper-case, ## if CASESENSE=no, ## split the "STRING" into NS "subSTRING"s -- at ## occurrences of a vertical bar (|). ################################################## if ( CASESENSE == "no" ) { STRING = toupper(STRING) } NS=split(STRING,subSTRING,"|") ## FOR TESTING: # print "CASESENSE: " CASESENSE # print "NS: " NS # print "subSTRING[1] :" subSTRING[1] # print "subSTRING[2] :" subSTRING[2] # print "subSTRING[3] :" subSTRING[3] ################################################### ## "aftcount" holds the integer N,N-1,...,2,1, or 0 ## --- representing the number of lines after the ## last matched line that still need to be printed. ################################################### aftcount = 0 ###################################################### ## "lastprt" holds the line# of the line last printed. ## "lastprt" is reset any time "printf" is called. ###################################################### lastprt = 0 } #END OF BEGIN #START OF BODY { #################################################### ## IF WE HAVE A MATCH, SUSPEND PRINTING ## at N "AFTER-A-MATCH-LINES": ## If there is a new match, reset "aftcount" to zero. ## (We do not want to print a line twice.) ## We will restart aftcount at N after the new match ## line is printed. #################################################### ## We use "Match" to indicate whether there was a ## match to at least one of the subSTRINGs, in the ## current line ($0). Match==1 indicates a match. #################################################### Match = 0 if ( CASESENSE == "no" ) { HOLDline = toupper($0) } if ( CASESENSE == "yes" ) { HOLDline = $0 } ## FOR TESTING: # if ( NR < 10 ) { print "HOLDline :" $HOLDline } for ( i = 1 ; i <= NS ; i++ ) { ## This fails when certain special chars are in the substring. # if ( HOLDline ~ subSTRING[i] ) { aftcount = 0 ; Match = 1 } if ( index(HOLDline,subSTRING[i]) != 0 ) { aftcount = 0 ; Match = 1 } ## FOR TESTING: # print "" # print "HOLDline: " HOLDline # print "subSTRING LOOP - i: " i " subSTRING[i]: " subSTRING[i] " aftcount: " aftcount " Match: " Match # print "index(HOLDline,subSTRING[i]): "index(HOLDline,subSTRING[i]) } ## FOR TESTING: # }" "$FILEin" # exit ###################################################### ## PRINT ONE OF THE N "AFTER-A-MATCH-LINES": ## If "aftcount" is non-zero, print the current line. ## We had a match up to N lines ago. Decrement "aftcount" ## and save the number of the printed line in "lastprt". ###################################################### if ( aftcount != 0 ) { printf (" %s : %s \n", NR, $0); ## If this is the last of the "aftcount" lines, ## print a blank line. if ( aftcount == 1 ) {print ""} aftcount = aftcount - 1 ; lastprt = NR ## FOR TESTING: # print "aftcount != 0 CHECK:: aftcount: " aftcount " lastprt: " lastprt } ## FOR TESTING: # }" "$FILEin" # exit ###################################################### ## IF WE HAVE A MATCH, PRINT N-PREV & CURRENT: ## If there is a match, print the N previous lines ## --- as long as their linenums are greater than ## the last-printed line number. (We do not want ## to print a line twice.) ## ## Then print the current line. Also set "aftcount" ## to N, and save the ## number of the matched-printed line in "lastprt". ###################################################### for ( i = N ; i > 0 ; i-- ) { recnum = NR - i if ( Match == 1 && recnum > lastprt ) { printf (" %s : %s \n", recnum, prev[i]) } ## FOR TESTING: # print "prev[] PRINT-LOOP:: NR= " NR " recnum= " recnum " i= " i # print "prev[] PRINT-LOOP:: lastprt= " lastprt " prev[i]= " prev[i] } if ( Match == 1 ) { printf ("*%s : %s \n", NR, $0); aftcount = N; lastprt = NR ## FOR TESTING: # print "Match == 1 TEST:: aftcount: " aftcount " lastprt: " lastprt } ######################################################## ## Update prev[N], prev[N-1], ... , prev[2], and prev[1] ## before reading the next line. ######################################################## for ( i = N ; i > 1 ; i-- ) { prev[i] = prev[i-1] } prev[1] = $0 #END OF BODY }' >> "$OUTLIST" ## WAS: ## }' "$FILEin" >> "$OUTLIST" ######################################################################## ## ADD A TRAILER TO THE REPORT-FILE. ######################################################################## echo " ---------------------------------------------------------------------------- The report above was created by the utility script $THISscriptBASE The script uses an 'awk' program that essentially extends the capabilities of the 'egrep' (extended grep) program. ['grep' is a program that can find lines in a file that contain a given string of characters.] 'egrep' can show the lines of a file that contain matches to *one-or-more* strings. Example: 'error', 'fail', 'fatal', or 'warning'. With 'egrep', the multiple-strings argument is formed by separating the multiple strings by vertical-bars (|). Example: 'fatal|error|fail|warning' But 'egrep' cannot show nearby lines. The 'awk' program used here essentially creates an extension of the 'egrep' (extended grep) utility. As the name (above) of the utility script implies, this utility can show plus-or-minus N lines above and below the lines that have a match for the search string(s). You could say this is an 'eegrep' utility --- extended, extended grep. ---------------------------------------------------------------------------- With 'egrep', one can make the search case-insenstive with the '-i' option. Likewise, this '$THISscriptBASE' utility can be told to make the search either case-insensitive or case-sensitive. As indicated above, like 'egrep', this '$THISscriptBASE' utility accepts the '|' character. Then, to find all lines containing either 'memory' or 'RAM' or 'disk', you can use the search string 'memory|ram|disk' --- with case-sensitivity switch OFF. If that returns too many lines with the string 'ram' --- lines with words like 'datagram' or 'telegram' or 'ramble' or 'gram' --- then switch the case-sensitivity switch to ON --- and use a string like 'memory|RAM|disk'. ---------------------------------------------------------------------------- The script $THISscriptBASE is called from within the FE system 'xpg' text-file browse/extract/print utility. [Actually, the 'xpg' script calls the Tcl-Tk GUI script $DIRxpg/shofil.tk --- and it is this Tcl-Tk script that calls the script $THISscriptBASE.] --------------------------------------------------------------------------- The 'Extract-String-Match-Lines' capability of the 'xpg' utility is very useful --- especially to people who deal with huge files such as system log files and huge lists (say of files) and large README-like help files --- people like system administrators and application developers and engineers. --------------------------------------------------------------------------- If 'xpg' is 'not found' when you type 'xpg' at a shell command prompt, you can make an alias for 'xpg' by putting the following alias definition in your shell 'rc' (run control) file --- such as $HOME/.bashrc or $HOME/.bash_aliases. alias xpg='$HOME/apps/bin/xpg' Then logoff-logon to make the alias available in every window of a login session. --- You could also make a desktop icon for the $HOME/apps/bin/xpg utility, and drop text files on it. --- If you type 'xpg', without an input filename, at a command prompt, you will see a usage hint like the following. Usage: xpg [-h] [-f] file1 [ file2 ... file8 ] I.e. xpg is setup to browse up to 8 files at a time. The limit is to help avoid accidentally opening up more xpg-windows than one wants to deal with. This can happen if you have an 'xpg' icon on your desktop and you unintentionally drag a sheet-load of filenames onto the icon. ********************* `date '+%Y %b %d %a %T%p %Z'` ****************** " >> "$OUTLIST" ######################################################################## ## SHOW THE REPORT-FILE OF ERR LINES FROM THE SELECTED PRINT FILE, $FILEIN. ######################################################################## $DIRxpg/xpg "$OUTLIST"