Article 8966 of comp.lang.perl:
Xref: feenix.metronet.com comp.lang.perl:8966
Newsgroups: comp.lang.perl
Path: feenix.metronet.com!internet.spss.com!uchinews!vixen.cso.uiuc.edu!uwm.edu!spool.mu.edu!torn!nott!bnrgate!corpgate!nmerh207!friedman
From: friedman@bnr.ca (Barry Friedman)
Subject: Mailclip was Re: Newsclip?
Nntp-Posting-Host: nmerh53
Message-ID: <1993Dec17.073552.25096@bnr.ca>
Reply-To: friedman@bnr.ca
Organization: Northern Telecom, Ltd.
Sender: usenet@bnr.ca ( Usenet System )
References: <2elcpt$58f@steffi.demon.co.uk> <1993Dec16.014752.24396@bnr.ca>
Date: Fri, 17 Dec 1993 07:35:52 GMT
Lines: 1025

Rather than mail out multiple copies, I'm posting.


#---------------------------------- cut here ----------------------------------
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Infovue    <infovue@nmerh207> on Thu Dec 16 09:25:23 1993
#
# This archive contains:
#	README		Mailclip	clip		cliprc		
#
# Error checking via wc(1) will be performed.

LANG=""; export LANG
PATH=/bin:/usr/bin:$PATH; export PATH

echo x - README
cat >README <<'@EOF'
o   The clip version here is slightly modified from Larry's original.

o   A sample cliprc is included.

o   You can print the man page by saying:

          nroff -man clip


o   The mailing list file is
	 "address"  [a|b]

	 where a = underlined version
	       b = standard

o   mailclip should be run via cron

o   Please check & modify installation dependant constants in mailclip

Questions & comments to: friedman@bnr.ca
@EOF
set `wc -lwc <README`
if test $1$2$3 != 2067428
then
	echo ERROR: wc results of README are $* should be 20 67 428
fi

chmod 664 README

echo x - Mailclip
sed 's/^@//' >Mailclip <<'@EOF'
#!/usr/local/bin/perl

# $Log:	Mailclip,v $
# Revision 1.1  93/12/16  09:23:24  09:23:24  infovue (  Infovue)
# Initial revision
# 

#$dbg++;
#
#  usage: Mailclip [-r]
#  options:
#     -r   reprocesses last clip output without restarting clip
#  
#  
#
#  Mailclip reads the clip output and produces a formatted compendium
#  of all articles clipped.  Then it reads a list of recipients from
#  the file specified in $MLIST and mails them a copy.  The lines in
#  $MLIST are of the form:
#    destination-address [a|b]
#  where 'a' specifies the underlined version
#   and  'b'     "     the plain text version
#  
#  
#  if $POST is defined to be the full path to inews, a copy of the
#  clipping file will be posted to a suitable newsgroup specified as
#  $NEWSGROUPS  
#

#
#   Modify the constants below to match your environment
#  

$HOME = "/users/friedman";
$N = "$HOME/bin/N";
$MAIL = "/bin/mail";
$CLIPFILE = "$HOME/tmp/n1";
$TMP1 = "/tmp/clip1.$$";
$TMP2 = "/tmp/clip2.$$";
$MLIST= "/disc6/usenet/lib/news/clip.list";
$SPOOLDIR="/disc6/usenet/spool/news";
$NEWSGROUPS = "nt.general";
$POSTINGACCT = "infovue@nmerh207";
$POST = "/utils/bin/inews";  # path to inews for posting

if ($_ = shift(@ARGV) ) {
	/-r/ && $re_proc++;
} 

print STDERR "re-processing $CLIPFILE\n" if $re_proc;
unless($re_proc || $dbg) {
	#  start a new clip file
	system($N);
	# this is going to take a few seconds
	sleep 10;
} 

sub got_clippings_q {
  local($CLIPFILE) = shift;
  local($rc);

	open (CLIP, $CLIPFILE) || die "can't open clipfile";
	while (<CLIP>) {
		if (/^subject:/i) {
			$rc++;
			last;
		} 
	}
	close($CLIPFILE);
	$rc;
} 

unless (&got_clippings_q($CLIPFILE)) {
#  print STDERR "no articles found\n";
	exit; # do nothing unless articles found
} 

open (MLIST) || die "can't open mailing list"; 
while (<MLIST>) {
 ($nm, $opt)=split;
 if ($opt eq "a") {
	 push(@ml1,$nm);
 }else{
	 push(@ml2,$nm);
 } 
}

open (CLIP, $CLIPFILE) || die "can't open clipfile";
$lookflg=0;
while(<CLIP>) {
  $lookflg = 1 if /^$/;
	if ($lookflg) {
		if (/^(\w+\.\w+[^\s]*)\s*#\s*(\d+)\s*From:/) {
			print STDERR "found: $1 $2\n" if $dbg;
			push (@grp,$1);
			push (@art,$2);
			$lookflg++;
		} 
		if ( $lookflg == 2 && /^Subject: (.*)/ ) {
			($subj=$1) =~ s/^Re:\s*//;
			push (@subj,$subj);
			$lookflg=0;
		} 
	}
} 
print STDERR join("|",@grp)."\n" if $dbg;
print STDERR join("|",@subj)."\n" if $dbg;

close(CLIP);
open (CLIP, $CLIPFILE) || die "can't open clipfile";

# copy clipfile to tmp 

open (TMP1, ">$TMP1") || die "can't open tmpfile"; 
open (TMP2, ">$TMP2") || die "can't open tmpfile"; 

$dt = `/bin/date`;
chop ($dt);
($yr) = ($dt =~ /(\d+)$/);
($a,$b,$c,$mday,$mon) = localtime(time);
$clipdt=sprintf("%d-%2.2d-%2.2d",$yr,$mon+1,$mday);

$clip_hd = <<EOI ;
Subject: News clippings from Usenet $clipdt

                          Usenet Clipping Service
                      $dt
================================================================================
                                Contents
================================================================================

Newsgroup            Subject

EOI
print TMP1 $clip_hd;
print TMP2 $clip_hd;


for ($i=0; $i< scalar(@grp) ; $i++ ) {
	if (! $seen{$subj[$i]}++) {
		printf(TMP1 "%-21.21s%s\n",$grp[$i],$subj[$i]);
		printf(TMP2 "%-21.21s%s\n",$grp[$i],$subj[$i]);
	}
} 

$clip_hd1 = <<EOI ;

================================================================================
                               Clippings
================================================================================

EOI

print TMP1 $clip_hd1;
print TMP2 $clip_hd1;

while (<CLIP>) {
	next if /^\d+ starting at \d+% for/;
	next if /^[*]+ NEW HISTORY FILE /;
	next if /^clip: caught up/;
	print TMP1;
	s/.\010//g;
	print TMP2;
} 

$clip_hd2 = <<EOI ;

================================================================================
                       Full Text of Articles Clipped
================================================================================

EOI

print TMP1 $clip_hd2;
print TMP2 $clip_hd2;

for ($i=0; $i< scalar(@grp) ; $i++ ) {
	($grp=$grp[$i]) =~ s@\.@/@g;
  print STDERR "$i  group: ",$grp,"art: ",$art[$i],"\n" if $dbg;
  print STDERR "$SPOOLDIR/$grp/$art[$i]\n" if $dbg;
	if ( open(ART, "$SPOOLDIR/$grp/$art[$i]") ) {
		$inhd=1;
		@hd=();
		@bdy=();
		$cnt=0;
		while(<ART>) {
			next if ($cnt++ > 200);
			$inhd = 0 if /^$/;
			if ($inhd) {
				push (@hd,$_);
			} else {
				push (@bdy,$_);
			} 
		} 
		if ($dbg1) {
			print STDERR "headers --- \n";
			print STDERR @hd;
			print STDERR "body --- \n";
			print STDERR @bdy;
			print STDERR "----------\n";
		} 
		if (@hd1 = grep (/^Newsgroups:/,@hd) ) {
			print TMP1 @hd1 ;
			print TMP2 @hd1 ;
		} 
		if (@hd1 = grep (/^Date:/,@hd) ) {
			print TMP1 @hd1;
			print TMP2 @hd1 ;
		} 
		if (@hd1 = grep (/^From:/,@hd) ) {
			print TMP1 @hd1;
			print TMP2 @hd1 ;
		} 
		if (@hd1 = grep (/^Organization:/,@hd) ) {
			print TMP1 @hd1;
			print TMP2 @hd1 ;
		} 
		if (@hd1 = grep (/^Subject:/,@hd) ) {
			print TMP1 @hd1;
			print TMP2 @hd1 ;
		} 
$clip_hd3 = <<EOI ;

                -----------   End of Article   -----------

EOI
$clip_hd4 = <<EOI ;
                -----------   Article not available   -----------

EOI

		print TMP1 @bdy,$clip_hd3;
		print TMP2 @bdy,$clip_hd3;
	} else {
		print TMP1 "Subject: $subj[$i]\n",$clip_hd4;
		print TMP2 "Subject: $subj[$i]\n",$clip_hd4;
	}
} 


$clip_hd3 = <<EOI ;

================================================================================
EOI

print TMP1 $clip_hd3;
print TMP2 $clip_hd3;

close (TMP1);
close (TMP2);

if ($POST) {
	open(NEWS,"|$POST") || die "can't open pipe to POST"; ;

	print NEWS <<EOI ;
Newsgroups: $NEWSGROUPS
@From: $POSTINGACCT
EOI
	open (TMP2, $TMP2) || die "can't open tmpfile"; 
	while (<TMP2>) {
		print NEWS;	
	} 
	close (NEWS);
} 

exit if $dbg;

if ($#ml1 > -1) {
	open (MAIL, "|$MAIL ".join(" ",@ml1)) || die "can't open mailing list"; 
	open (TMP1, $TMP1) || die "can't open tmpfile"; 
	while (<TMP1>) {
	  print MAIL;	
	} 
	close (MAIL);
} 

if ($#ml2 > -1) {
	open (MAIL, "|$MAIL ".join(" ",@ml2)) || die "can't open mailing list"; 
	open (TMP2, $TMP2) || die "can't open tmpfile"; 
	while (<TMP2>) {
	  print MAIL;	
	} 
	close (MAIL);
} 

unlink($TMP1);
unlink($TMP2);
@EOF
set `wc -lwc <Mailclip`
if test $1$2$3 != 2878206289
then
	echo ERROR: wc results of Mailclip are $* should be 287 820 6289
fi

chmod 555 Mailclip

echo x - clip
sed 's/^@//' >clip <<'@EOF'
#!/utils/bin/perl
'di';
'ig00';
push(@INC,"/utils/lib/perl");
#
# $Header: /tmp_mnt/home/netlabs1/lwall/pl/RCS/clip,v 1.1 92/07/13 12:37:09 lwall Exp Locker: lwall $
#
# $Log:	clip,v $
# Revision 1.1  92/07/13  12:37:09  lwall
# Initial revision
# 

$HOME = $ENV{HOME}
     || $ENV{LOGDIR}
     || (getpwuid($<))[7]
     || die "No home directory!!!\n";

# Configurable parameters, may be overridden in .cliprc

$SPOOLDIR = "/usr/spool/news";		# Where news articles are stored.
$NEWSLIB = "/usr/lib/news";		# Where Cnews keeps history file.
$HOMETMP = "$HOME/tmp";			# Where clip should put output.
$HOMEBIN = "$HOME/bin";			# Where clip should install N script.
$MAXLOAD = 3;				# What load average to suspend at.
$NICE = 16;				# What priority to run at.
$DEBUG = 0;				# Whether to be noisy.

$CLIPRC = "$HOME/.cliprc";
require $CLIPRC;

# Everything from here on should be machine independent.

open(N0, "$HOMETMP/n0");
$oldpid = <N0> + 0;
close N0;
if ($oldpid) {
    die "Already a clip process running ($oldpid)\n" if kill 0, $oldpid;
}

die "You must call both &NGSKIP and &SCANNER in $CLIPRC\n"
    unless defined &ngskip && defined &scanner;

sub fixmsg {
    local($_, $file, $line) = @_;
    $line -= ($Preamble =~ y/\n//);
    s/ file \(eval\) at line (\d+)/" $file at line " .  ($1 + $line)/eg;
    s/ at \(eval\) line (\d+)/" in $file at line " .  ($1 + $line)/eg;
    # $* = 1;s/^/$0: /g; # too noisy to have this
    die;
}

sub NGSKIP {
    local($userstuff) = @_;
    if ($userstuff =~ tr/\n// >= 3 && $userstuff !~ /study/) {
	$study = "\t\tstudy;\n";
    }
    else {
	$study = "";
    }
    $eval = (($Preamble = <<'END1' . $study) . $userstuff . <<'END2');
	    sub ngskip {
		local($_) = $nglist;
		eval {
		    &skip if /^cancelled$/;
END1
		};
		if ($@) {
		    $@ = "" if $@ eq "You should never see this\n";
		    die $@ if $@;
		}
	    }
END2
    print STDERR $eval if $DEBUG & 1;
    eval $eval;
    &fixmsg($@, (caller)[1,2]) if $@;
    1;
}

sub SCANNER {
    local($userstuff) = @_;
    $eval = (($Preamble=<<'END1') . $userstuff . <<'END2');
	    sub scanner {
		while (<ART>) {
		    $totalhits = 0;
		    do {
			study;
			$hits = 0;
END1
			$totalhits += $hits;
		    } while $hits;
		    &printhit if $totalhits;
		}
	    }
END2
    print STDERR $eval if $DEBUG & 1;
    eval $eval;
    &fixmsg($@, (caller)[1,2]) if $@;
    1;
}

require "timelocal.pl";

fork && exit;			# avoid nohup behavior

$pid = $$;

$pmeter = fork;
defined $pmeter || die "can't fork: $!";
if ($pmeter == 0) {
    &pmeter($MAXLOAD, $pid, $DEBUG & 2);
    die "Not reached";
}

if ($HOMEBIN) {
    system "echo kill -HUP $pid >$HOMEBIN/N";
    system "(echo ps $pid; echo ps $pmeter) >$HOMEBIN/P";
    chmod 0755, "$HOMEBIN/N";
    chmod 0755, "$HOMEBIN/P";
}

#setpriority(0, 0, $NICE);		# set very slow priority

$date = shift;
chop($date = `cat $HOME/.lastclip`) if !$date && -f "$HOME/.lastclip";
$date = &lidate($date);

chdir $SPOOLDIR || die "Can't cd: $!\n";

select(STDERR); $| = 1;
select(STDOUT); $| = 1;

&CATCH;
$SIG{HUP} = CATCH;		# send SIGHUP to switch to new history file
$SIG{USR1} = RESTART;		# send SIGUSR1 to re-execute clip
$SIG{ALRM} = IGNORE;		# so we can send ourselves an alarm safely

$r = "\r" if -t STDOUT;

($dev,$ino,$mode,$nlink,$uid) = stat STDOUT;
$origuid = $uid;
for (;;) {
  LOGLINE:
    while (<LOG>) {
	$pos = tell(LOG);
	chop;
	($messid,$date,$nglist) = split(/\t/);
	$0 = "clip at $messid" if $DEBUG & 1;
	$wanted = 1;
	&ngskip;
	next LOGLINE unless $wanted;
	$date = &gidate($date);
	($ng,$art) = split(m![ /]!,$nglist);
	next unless $art;
	$ng_sav = $ng;
	$ng =~ y!.!/!;
	open(ART,"$ng/$art") || next;
	next if -s ART > 100_000;
	$count = 0;
	++$slept;		# to force quick update after big batch
	$/ = '';
	$header = <ART>;
	$_ = '';
	&scanner;
	close ART;
	$sleep = 5;
    }
    continue {
	$/ = "\n";
    }
    print STDERR "clip: caught up$r\n" unless $tailing++;
    sleep $sleep;
    $slept += $sleep;
    if ($slept > 300) {
	$slept = 0;
	if ($date != $lastdate) {
	    ($dev,$ino,$mode,$nlink,$uid,$gid) = stat STDOUT;
	    exit unless $uid == $origuid;
	    open(LASTDATE,">$HOME/.lastclip");
	    print LASTDATE &cdate($date),"\n";
	    close LASTDATE;
	    $lastdate = $date;
	}
	($dev,$ino) = stat("$NEWSLIB/history");
	if ($dev != $logdev || $ino != $logino) {
	    print STDERR "************* NEW HISTORY FILE *************$r\n";
	    open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
	    ($logdev, $logino) = stat LOG;
	    &seekdate($date);
	    $tailing = 0;
	    $slept = 300;		# force immediate update of lastclip.
	}
    }
    $sleep++ if $sleep < 120;
    seek(LOG,$pos,0);
}

sub hit {
    ($before,$match,$after) = ($`,$&,$');
    $match =~ s/(.)/_\b$1/g;
    $_ = "$before$match$after";
    study;
    $hits++;
    unless ($count++) {
	$header =~ /From: (.*)/;
	($from = $1) =~ s/.*\((.*)\).*/$1/;
	#print("$r\n\@$ng/$art  \tFrom: $from$r\n") || exit;
	print("$r\n$ng_sav  # $art  From: $from$r  $messid\n") || exit;
	$header =~ /Subject: (.*)/ && print "Subject: $1$r\n";
	$0 = "clip " . &cdate($date);
    }
}

sub skip {
    $wanted = 0;
    die "You should never see this\n";
}

sub printhit {
    if (length() > 500) {
	$wanted = '';
	for ($line = 0; /.*\n/g; $line++) {
	    if ($& =~ /_\010/) {
		for ($w = $line - 3; $w <= $line + 3; $w++) {
		    vec($wanted,$w,1) = 1;
		}
	    }
	}
	$unlines = 0;
	for ($line = 0; /.*\n/g; $line++) {
	    if (vec($wanted,$line,1)) {
		print($&,$r) || exit;
		$unlines = 0;
	    }
	    else {
		print("...\n$r") || exit unless $unlines++;
	    }
	}
    }
    else {
	s/\n/\n$r/g if $r;
	print() || exit;
    }
}

sub seekdate {
    local($start) = shift;
    if ($start == 9_999_999_999) {
	seek(LOG,0,2);
	$pos = tell(LOG);
	print STDERR "$pid starting at eof...$r\n";
	return;
    }
    ($st_dev,$st_ino,$st_mode,$st_nlink,$st_uid,$st_gid,$st_rdev,$st_size,
	$st_atime,$st_mtime,$st_ctime,$st_blksize,$st_blocks) = stat(LOG);
    for ($offset = $st_size - 100_000; $offset > 0; $offset -= 100_000) {
	if (seek(LOG,$offset,0)) {
	    $_ = <LOG>;            # probably starts in middle of a line
	    $_ = <LOG>;
	    ($messid,$date,$nglist) = split(/\t/);
	    $date = &gidate($date);
	    last if $date < $start;
	}
	else {
	    $offset = -1;
	}
    }
    seek(LOG,0,0) if $offset < 0;
    while (<LOG>) {
	($messid,$date,$nglist) = split(/\t/);
	$date = &gidate($date);
	last if $date >= $start;
    }
    $pos = tell(LOG);
    $pct = int($pos * 100 / $st_size);
    print STDERR "$pid starting at $pct% for $start...$r\n";
    $0 = "clip start at $messid" if $DEBUG & 1;
}

sub CATCH {
    &openout;
    open(LOG,"$NEWSLIB/history") || die "Can't open log: $!\n";
    ($logdev, $logino) = stat LOG;
    $date = 9_999_999_999 unless $date;
    &seekdate($date);
    $tailing = 0;
    $slept = 300;		# force immediate update of lastclip.
    $SIG{HUP} = CATCH;
    kill $$, ALRM;
}

sub RESTART {
    kill 9, $pmeter if $pmeter;
    exec "/usr/lib/newsbin/clip";
    die "Couldn't exec clip: $!\n";
}

sub cdate {
    ($sec,$min,$hr,$mday,$mon,$year) = localtime($date);
    sprintf("%02d/%02d/%02d %02d:%02d",$mon+1,$mday,$year,$hr,$min);
}

sub lidate {
    $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
      ? &timelocal(0, $5, $4, $2, $1-1, $3)
      : $_[0];
}

sub gidate {
    $_[0] =~ m#(\d+)/(\d+)/(\d+) (\d+):(\d+)#
      ? &timegm(0, $5, $4, $2, $1-1, $3)
      : $_[0];
}

sub openout {
    rename("$HOMETMP/n8", "$HOMETMP/n9");
    rename("$HOMETMP/n7", "$HOMETMP/n8");
    rename("$HOMETMP/n6", "$HOMETMP/n7");
    rename("$HOMETMP/n5", "$HOMETMP/n6");
    rename("$HOMETMP/n4", "$HOMETMP/n5");
    rename("$HOMETMP/n3", "$HOMETMP/n4");
    rename("$HOMETMP/n2", "$HOMETMP/n3");
    rename("$HOMETMP/n1", "$HOMETMP/n2");
    rename("$HOMETMP/n0", "$HOMETMP/n1");
    open(STDOUT, ">$HOMETMP/n0");
    open(STDERR, ">>&STDOUT");
#    open(STDERR, ">>$HOMETMP/stderr");
}

sub pmeter {
    # ($loadavg, $pid, $debug) = @ARGV;
    local($loadavg, $pid, $debug) = @_;
    $running = 1;
    $0 = "pmeter @_";

    while (kill 0, $pid) {
	`/usr/bin/uptime` =~ /load average:\s+([\d.]+)/
	    || die "Can't run uptime: $!\n";;

	if ($1 > $loadavg) {
	    kill 'STOP', $pid;
	    if ($running) {
		print STDERR "stopping at $1\r\n" if $debug;
		$0 = "pmeter (stopped $pid at $loadavg)";
		$running = 0;
	    }
	}
	else {
	    kill 'CONT', $pid;
	    if (!$running) {
		print STDERR "starting at $1\r\n" if $debug;
		$0 = "pmeter (started $pid at $loadavg)";
		$running = 1;
	    }
	}
	sleep 120;
    }
}
###############################################################

    # These next few lines are legal in both Perl and nroff.

@.00;                       # finish .ig
 
'di           \" finish diversion--previous line must be blank
@.nr nl 0-1    \" fake up transition to first page again
@.nr % 0         \" start at page 1
'; __END__ ##### From here on it's a standard manual page #####

@.TH CLIP 1 "July 14, 1992"
@.de M           \" man page reference
\\fI\\$1\\fR\\|(\\$2\)\\$3
@..
@.AT 3
@.SH NAME
clip \- personal news clipping service
@.br
pmeter \- start and stop a process according to load average
@.SH SYNOPSIS
@.B clip
[
@.I date
]
@.SH DESCRIPTION
@.I Newsclip
scans incoming news by following the C news history file as it grows
and examining each article listed there for patterns of interest.
@.PP
If a date argument is supplied on the command line (in the form 
@.IR "``mm/dd/yy hh:mm''" , 
including the embedded space)
@.I clip
will read articles since that date.  With no arguments, it examines
the user's ~/.lastclip file to know when it left off scanning;
if no such file exists, every news article will be scanned.
@.PP
Patterns and options are specified in a .cliprc file situated
in your home directory.  The .cliprc file is simply a gob of Perl
code that will be evaluated by clip after it has set its default
options but before it actually goes out to do anything.  The only
mandatory items are a call to each of two subroutines: &NGSKIP and
&SCANNER.  A typical .cliprc file looks like this:
@.nf

@.ne 6
    # Options I want to override.
    $MAXLOAD = 4;

    # Newsgroups I always read anyway, so don't bother.
    &NGSKIP( <<'END' );
        &skip if /comp\e.lang\e.perl/;
        &skip if /rec\e.humor\e.funny/;
    END

@.ne 8
    # Patterns I'm interested in scanning for.
    &SCANNER( <<'END' );
        &hit if /betty[^\e0]boop/i;
        &hit if /roger[^\e0]rabbit/i;
        &hit if /\ebw.*coyote\eb/i;

        if (/\ebacme\eb/i) {{
            next if $` =~ /roadrunner\e@$/;
            &hit;
        }}

        if (/\eblooney\eb/i) {{
            next if $' =~ /^\es*bin/;
            next if $` =~ /!$/;
            next if $` =~ /fudd\e@/i;
            next if $nglist =~ /alt\e.crazy\e.people/;
            &hit;
        }}
    END

@.fi
The argument to the &NGSKIP routine is a sequence of zero or more Perl
statements that call &skip if the current article is crossposted to a
newsgroup that we don't want to scan (generally because we'll read the
newsgroup anyway).  Before your code is called, the $_ variable is
automatically set to the list of newsgroups from the history file.
You can invert the logic and just pick the newsgroups you want by
saying:
@.nf


@.ne 9
    &NGSKIP(<<'END');
        study;
        {
            last if /comp\e.lang\e.perl/;
            last if /comp\e.org\e.usenix/;
            last if /comp\e.unix\e.bsd/;
            &skip;
        }
    END

@.fi
@.PP
The argument to the &SCANNER routine is a sequence of one or more Perl
statements that call &hit if an interesting pattern has been spotted.
Before your code is called, the $_ variable is set to the current paragraph
of the current article (articles are read and scanned paragraph by paragraph).
Newsclip will snip excerpts from any paragraph containing patterns,
underlining the patterns it found.  Note that some patterns are
unqualified hits, while others are hits only if some other pattern doesn't
match.  (It's important to the underlining algorithm that the value
of $& continue to contain the value of the last 
@.I successful
match \(em all
of the exceptions are expected to fail.)  For the purposes of exception
scanning, the variables $` and $' are automatically set to the preceding
and subsequent text by the successful initial pattern match.  In addition,
@.I clip 
sets the variable $nglist to the current list of newsgroups, and
the variable $header to be the header of the current article.
@.PP
The use of double curlies is merely to allow the ``next'' command to fall
through to the next test, since ``next'' will simply fall out of an ordinary
block.  The ``last'' command could also have been used, but would be less
intuitive.  Other special thingies you might want to use include \eb
to assert a word boundary, [^\e0] to match any character but a null,
including a newline, and any other regular expression goodies you can
think up to detect spelling errors and variants.  Tom Christiansen
searches for his name with:
@.nf

@.ne 4
    if (/\ebtchrist\eb/i || /tom[^\e0]christ(ia|e)ns[eo]n/i) {{ # grr...
        next if /\en\es*Tom Christiansen\es+tchrist\e@convex.com/;
        &hit;
    }}

@.fi
@.PP
@.I Newsclip
forks another copy of itself,
called
@.IR pmeter ,
which runs 
@.M uptime 1
every two minutes to make sure the load hasn't gone too
high.  If it has, 
@.I pmeter
suspends 
@.I clip 
with a SIGSTOP,
waiting until the load average goes back down before
allowing 
@.I clip 
to continue.
Both 
@.I pmeter
and 
@.I clip
continually
muck with their own externally-visible argument list
to keep folks running
@.M ps 1
amused.  This has no effect if your operating system doesn't
support changes to argv being noticed by ps.
@.PP
Output by default goes to ~/tmp/n0.  When 
@.I clip 
is hit by a SIGHUP,
it renames n8 to n9, n7 to n8, ..., n0 to n1, and creates a new n0
file.  For handiness, when 
@.I clip 
starts up, it creates a tiny script
called ``N'' that will send the SIGHUP to the correct process.  Typically
one runs N in the morning to set up a new n0 file, and then reads ~/tmp/n1
to see what the scanner found since the last time you ran N.  A companion
script, named ``P'', runs 
@.I ps
on the 
@.I clip
and 
@.I pmeter
processes.  (This assumes a BSD-style 
@.I ps 
program.)
@.PP
The first line of each article reference begins with an @, in case you
want to cut and paste that line to something that looks up the article
for you.  I use the following rn macro.  Unfortunately I have to paste
the line twice because of how rn eats typeahead, but hey, it beats a
kick in the head.  This macro should all be on one line \(em it's broken in
two so that nroff doesn't get upset:
@.nf

@.ne 12
@@ %(%m=n?:%(%m!=a?q:)q)%(%"^Jng/art: "=\e([^ @]*\e)/\e([^ ]*\e)?
g%`perl -e '($_="%1")=~tr#/#.#;print'`^J.%2^J:%(%m=n?^L:))

@.fi
@.PP
Interesting variables to set in your .cliprc file include the following:
@.TP 15
@.B $SPOOLDIR
Where news articles are stored; defaults to /usr/spool/news.
@.TP 
@.B $NEWSLIB 
Where Cnews keeps its history file; defaults to /usr/lib/news.
@.TP 
@.B $HOMETMP 
Where clip should put its output files;
defaults to ~/tmp.
@.TP 
@.B $HOMEBIN 
Where clip should install N script;
defaults to ~/bin.
@.TP 
@.B 
$MAXLOAD 
What load average to suspend at; defaults to 3.
@.TP 
@.B $NICE 
What priority to run at; defaults to 16.
@.TP 
@.B $DEBUG
Whether to be noisy; defaults to 0.  A value
with the 1 bit set causes 
@.I clip
to be noisy;
a value with the 2 bit set causes
@.I pmeter
to be noisy.
@.SH ENVIRONMENT
HOME or LOGDIR	
@.SH SIGNALS
@.nf 
SIGHUP	Cycle log files.
SIGUSR1	Re-exec oneself in case .cliprc changes.
SIGALRM	Make clip wake up early.
@.fi
@.SH FILES
@.nf
@.ta \w'$HOME/.cliprc   'u
$HOME/.cliprc	Your clipping preferences.
$HOME/.lastclip	Date and time of last article scanned.
$HOMETMP/n[0-9]	Log files.
$HOMEBIN/N	Cycle log files: n8 -> n9, n7 -> n8, etc.
@.fi 
@.SH AUTHOR
Larry Wall, with heckling by Tom Christiansen.
@.SH "SEE ALSO"
@.M perl 1 ,
@.M rn 1 ,
@.M ps 1 ,
@.M uptime 1 ,
@.M newsmaint 8 .
@.SH DIAGNOSTICS
Obscure diagnostics are an obsolete concept rooted in the notion that
programs must be shoehorned into memory.
@.SH BUGS
There ought to be a way for multiple users to share a single clip process.
@.PP
It doesn't work on NNTP-only systems.
@.ex
@EOF
set `wc -lwc <clip`
if test $1$2$3 != 618245916246
then
	echo ERROR: wc results of clip are $* should be 618 2459 16246
fi

chmod 777 clip

echo x - cliprc
cat >cliprc <<'@EOF'
#$DEBUG =1;
$MAXLOAD = 3;
$SPOOLDIR = "/disc4/usenet/spool/news";          # Where news articles are stored.

&NGSKIP( <<'END' );
&skip if /ott\.events/;
next if /comp\.dcom\.telecom/;
next if /comp\.dcom\.lans\.misc/;
next if /comp\.dcom\.isdn/;
next if /comp\.dcom\.cell-relay/;
next if /comp\.risks/;
next if /misc\.invest/;
next if /can\.vlsi/;
&skip;
END

&SCANNER( <<'END' );
	&hit if /\bdms-100\b/i;
	&hit if /\bdms-10\b/i;
	&hit if /\bsl-1\b/i;
	&hit if /\bdms-250\b/i;
	&hit if /\bdms\b/i;
	&hit if /\boc48\b/i;
	&hit if /\boc12\b/i;
	&hit if /\bfibreworld\b/i;
	&hit if /\baccessnode\b/i;
	&hit if /\bradionode\b/i;
	&hit if /\bsupernode\b/i;
	&hit if /\bbcs\b/i;
	next if /^---*\n/;
  &hit if /\bnorthern[^\0]telecom\b/i;
	&hit if /\bbell[^\0]northern\b/i;
	if (/\bbnr\b/i) {{
		next if $' =~ /\.ca/i;
		/\bbnr\b/i;
		&hit;
	}}
END
@EOF
set `wc -lwc <cliprc`
if test $1$2$3 != 38103843
then
	echo ERROR: wc results of cliprc are $* should be 38 103 843
fi

chmod 666 cliprc

exit 0


