?? urifind
字號:
#!/usr/local/bin/perl -w# ----------------------------------------------------------------------# $Id: urifind,v 1.2 2003/07/10 20:54:09 dlc Exp $ # ----------------------------------------------------------------------# urifind - find URIs in a document and dump them to STDOUT.# Copyright (C) 2003 darren chamberlain <darren@cpan.org># ----------------------------------------------------------------------use strict;use vars qw($VERSION $REVISION);$VERSION = 1.00;$REVISION = sprintf "%d.%02d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;use File::Basename qw(basename);use Getopt::Long qw(GetOptions);use IO::File;use URI::Find;# What to do, and howmy $help = 0;my $version = 0;my $sort = 0;my $reverse = 0;my $unique = 0;my $prefix = 0;my $noprefix = 0;my @pats = ();my @schemes = ();my $dump = 0;Getopt::Long::Configure(qw{no_ignore_case bundling});GetOptions('s!' => \$sort, 'u!' => \$unique, 'p!' => \$prefix, 'n!' => \$noprefix, 'r!' => \$reverse, 'h!' => \$help, 'v!' => \$version, 'd!' => sub { $dump = 1 }, 'D!' => sub { $dump = 2 }, 'P=s@' => \@pats, 'S=s@' => \@schemes);if ($help || $version) { my $prog = basename($0); if ($help) { print <<HELP;$prog - find URIs in a document and dump them to STDOUT. $prog [OPTIONS] file1 [file2[, file3[, ...]]]Options: -s Sort results. -r Reverse sort results (implies -s). -u Return unique results only. -n Don't include filename in output. -p Include filename in output (0 by default, but 1 if multiple files are included on the command line). -P \$re Print only lines matching regex '\$re' (may be specified multiple times). -S \$scheme Only this scheme (may be specified multiple times). -h This help screen. -v Display version and exit. -d Dump compiled regexes and continue. -D Dump compiled regexes and exit.HELP } else { printf "$prog v.%.02f\n", $VERSION; } exit(0);}my (@uris, $count);unshift @ARGV, \*STDIN unless @ARGV;if (($prefix + $noprefix) > 1) { my $prog = basename $0; die "Can't specify -p and -n at the same time; try $prog -h\n";}# Print filename with matches? -p / -n# If there is more than one file, then show filenames by# default, unless explicitly asked not to (-n)if (@ARGV > 1) { $prefix = 1 unless $noprefix;}else { $prefix = 0 unless $prefix;}# Add schemes to the list of regexenif (@schemes) { unshift @pats => sprintf '^(\b%s\b):' => join '\b|\b' => @schemes;}# If we are dumping (-d, -D), then dump. Exit if -D.if ($dump) { print STDERR "\$scheme = '" . (defined $pats[0] ? $pats[0] : '') . "'\n"; print STDERR "\@pats = ('" . join("', '", @pats) . "')\n"; exit if $dump == 2;}# Find the URIsfor my $argv (@ARGV) { my ($name, $fh, $data); $argv = \*STDIN if ($argv eq '-'); if (ref $argv eq 'GLOB') { local $/; $data = <$argv>; $name = '<stdin>' } else { local $/; $fh = IO::File->new($argv) or die "Can't open $argv: $!"; $data = <$fh>; $name = $argv; } my $finder = URI::Find->new(sub { push @uris => [ $name, $_[0] ] }); $finder->find(\$data);}# Apply patterns, in @patsfor my $pat (@pats) { @uris = grep { $_->[1] =~ /$pat/ } @uris;}# Remove redundant linksif ($unique) { my %unique; @uris = grep { ++$unique{$_->[1]} == 1 } @uris;}# Sort links, possibly in reverseif ($sort || $reverse) { if ($reverse) { @uris = sort { $b->[1] cmp $a->[1] } @uris; } else { @uris = sort { $a->[1] cmp $b->[1] } @uris; }}# Flatten the arrayrefsif ($prefix) { @uris = map { join ': ' => @$_ } @uris;}else { @uris = map { $_->[1] } @uris;}print map { "$_\n" } @uris;exit 0;__END__=head1 NAMEurifind - find URIs in a document and dump them to STDOUT.=head1 SYNOPSIS $ urifind file=head1 DESCRIPTIONF<urifind> is a simple script that finds URIs in one or more files(using C<URI::Find>), and outputs them to to STDOUT. That's it.To find all the URIs in F<file1>, use: $ urifind file1To find the URIs in multiple files, simply list them as arguments: $ urifind file1 file2 file3F<urifind> will read from C<STDIN> if no files are given or if afilename of C<-> is specified: $ wget http://www.boston.com/ -O - | urifindWhen multiple files are listed, F<urifind> prefixes each found URIwith the file from which it came: $ urifind file1 file2 file1: http://www.boston.com/index.html file2: http://use.perl.org/This can be turned on for single files with the C<-p> ("prefix") switch: $urifind -p file3 file1: http://fsck.com/rt/It can also be turned off for multiple files with the C<-n> ("noprefix") switch: $ urifind file1 file2 http://www.boston.com/index.html http://use.perl.org/By default, URIs will be displayed in the order found; to sort themascii-betically, use the C<-s> ("sort") option. To reverse sort them,use the C<-r> ("reverse") flag (C<-r> implies C<-s>). $ urifind -s file1 file2 http://use.perl.org/ http://www.boston.com/index.html mailto:webmaster@boston.com $ urifind -r file1 file2 mailto:webmaster@boston.com http://www.boston.com/index.html http://use.perl.org/Finally, F<urifind> supports limiting the returned URIs by scheme orby arbitrary pattern, using the C<-S> option (for schemes) and theC<-P> option. Both C<-S> and C<-P> can be specified multiple times: $ urifind -S mailto file1 mailto:webmaster@boston.com $ urifind -S mailto -S http mailto:webmaster@boston.com http://www.boston.com/index.htmlC<-P> takes an arbitrary Perl regex. It might need to be protectedfrom the shell: $ urifind -P 's?html?' file1 http://www.boston.com/index.html $ urifind -P '\.org\b' -S http file4 http://www.gnu.org/software/wget/wget.htmlAdd a C<-d> to have F<urifind> dump the refexen generated from C<-S>and C<-P> to C<STDERR>. C<-D> does the same but exits immediately: $ urifind -P '\.org\b' -S http -D $scheme = '^(\bhttp\b):' @pats = ('^(\bhttp\b):', '\.org\b')To remove duplicates from the results, use the C<-u> ("unique")switch.=head1 OPTION SUMMARY=over 4=item -sSort results.=item -rReverse sort results (implies -s).=item -uReturn unique results only.=item -nDon't include filename in output.=item -pInclude filename in output (0 by default, but 1 if multiple files areincluded on the command line).=item -P $rePrint only lines matching regex '$re' (may be specified multiple times).=item -S $schemeOnly this scheme (may be specified multiple times).=item -hHelp summary.=item -vDisplay version and exit.=item -dDump compiled regexes for C<-S> and C<-P> to C<STDERR>.=item -DSame as C<-d>, but exit after dumping.=back=head1 VERSIONThis is F<urifind>, revision $Revision: 1.2 $.=head1 AUTHORdarren chamberlain E<lt>darren@cpan.orgE<gt>=head1 COPYRIGHT(C) 2003 darren chamberlainThis library is free software; you may distribute it and/or modify itunder the same terms as Perl itself.=head1 SEE ALSOL<Perl>
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -