#!/bin/bash
# This program greps for mails matching a given regexp
# and runs mutt on the resulting MBox.
#
# Author: http://the-brown-dragon.com
#
# History:
#   2009-08-28
#       - Added ability to search in compressed (bzip-ed) mailboxes.
#       - Now taking list of mboxes to search from user (clean solution).
#       - BUGFIX: Search expression with spaces
#   2009-08-25
#       - Instead of ignoring /archive/ subdirectories, relegated
#         to the end of the search.
#   2009-08-21
#       - Added ability for users to cancel search (trapping Ctrl+C)
#       - Somewhat improved UI
#       - Ignoring "/archive/" subdirectories
#       - Accepts an environment variable (GREPMAILPROG) for MUA
#         If not provided, defaults to mutt
#       - BUGFIX: Name display incorrect when only email present
#   2009-08-20
#       - Searches all MBox mailboxes under ~/Mail directory
#       - Does a smartsearch
#       - Ignores Base64-Blocks
#       - Runs Mutt on the search results
#       - Displays progress of search in color
##############################################################################

# Check args
if [ -z "$2" ];
then
    echo "Usage: `basename $0` regexp-to-search list-of-mboxes"
    exit 1
fi

# Set up variables
REGEXP=$1
SEDFILE1=grepmail1.$$.sed
SEDFILE2=grepmail2.$$.sed
RESULTMB=grepmail-` echo "${REGEXP}" | sed "s/ .*//g"`.$$.mbox

# Remaining are file names
shift

# Create sed script to find messages
# with the given regexp
# If no caps are given, ignore case (good default behaviour)
if [ -n "`echo "${REGEXP}" | sed "/[A-Z]/p;d;"`" ];
then
    IGNORECASE="" # Don't ignore case
else
    IGNORECASE="y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/"
fi
cat > ${SEDFILE1} <<SEDFILE1END
1{/^From /!q} # Not a mailbox - just quit
/^From /{   # New mail
x           # Get previously stored mail
/^MATCH/{   # Has it been marked as a match?
s/^MATCH//  # Remove marker
s/^\nFrom /From /   # Clean up any trailing newlines
p           # Save it!
}
s/.*//      # Empty previous message
x           # And start afresh
}
H           # Save this message line also
\${         # End of file?
x           # Get previously stored mail
/^MATCH/{   # Has it been marked as a match?
s/^MATCH//  # Remove marker
s/^\nFrom /From /   # Clean up any trailing newlines
p           # Save it!
}
d           # Empty last message
}
# Ignore Base64 encoded
/^[A-Za-z0-9+/=]\+$/d
# Match regular expression
${IGNORECASE}
/${REGEXP}/{
g           # Match found! Get the stored message
/^MATCH/!s/^/MATCH/ # Mark it as matched (if not already marked)
h           # Put it back
}
d           # Just continue
SEDFILE1END

# Create nice output sed script
cat > ${SEDFILE2} <<SEDFILE2END
/^From /{s/.*//;h;} # New mail - empty hold area
x                   # Get hold area
/DONE/{x;d;}        # Already processed message
x                   # Not yet, get back current line
/^From: /{          # Found a "From:" line
x                   # Save it and get stored text
/^From: /{x;d;}     # "From:" already found, revert!
/^Subject:/H        # (else)Stored text has "Subject:"? Add it
}
/^Subject:/{        # Found a "Subject:" line
x                   # Save it and get stored text
/\n*Subject:/{x;d;} # "Subject:" already found, revert!
/^From:/{G;h;}      # (else)Stored text has "From:"? Add it
}
g                   # Get stored text
/^From:.*\nSubject:.*$/{    # Got both "From:" and "Subject"?
s/^From: *\(.*\) *\nSubject: *\(.*\) *$/\1\n\2/ # Break 'em up
h;s/.*\n//          # Isolate "Subject:" value
x;s/\n.*//          # Isolate "From:" value
s/"//g              # Remove quotes
s/\(..*\)[[<(].*/\1/ # Remove email if name is present
s/[ \n]*$//         # Clean trailing spaces
s/^<//;s/>$//;      # Remove separators if email left
G                   # Append "Subject" value
s/\(.*\)\n\(.*\)/  [32m(\1) \2[m/   # Format...
p                   # ...and SHOW!
s/.*/DONE/          # Mark as done
h                   # Save DONE marker
}
d                   # Just continue...
SEDFILE2END

# Set up system to break out of search early (trap Ctrl+C)
USERINTERRUPT=0
stoploop ()
{
    USERINTERRUPT=1
}
trap stoploop SIGINT

# Prepare result mailbox
> ${RESULTMB}

# Disable internal file separator to handle filenames with spaces
IFS=$'\n'

# Search in specified mailboxes
for FNAME in $*
do
    if [ ${USERINTERRUPT} == 1 ]; then
        echo User Interrupted Search...
        break
    fi
    echo Searching ${FNAME}
    case ${FNAME} in
        *.bz2 )
        bzcat ${FNAME} | sed -f ${SEDFILE1} | tee -a ${RESULTMB} | sed -f ${SEDFILE2}
        ;;
        * )
        sed -f ${SEDFILE1} ${FNAME} | tee -a ${RESULTMB} | sed -f ${SEDFILE2}
        ;;
    esac
done

# Done
# Clean up
rm ${SEDFILE1} ${SEDFILE2}

# Run mutt on results
if [ -s ${RESULTMB} ]
then
    echo "Search results saved in: [32;1m${RESULTMB}[m"
    /usr/bin/bash -c "${GREPMAILPROG:-"mutt -f"} ${RESULTMB}"
else
    echo No results found!
    rm -f ${RESULTMB}
fi

# TODO
#   - Improve ability to specify mailboxes
#   - Allow user to turn off color and/or progress
#   - Ignore HTML tags and other meta information
#   - Allow search by date range


