aboutsummaryrefslogtreecommitdiff
path: root/perl/contrib/getlinks.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'perl/contrib/getlinks.pl.in')
-rw-r--r--perl/contrib/getlinks.pl.in224
1 files changed, 112 insertions, 112 deletions
diff --git a/perl/contrib/getlinks.pl.in b/perl/contrib/getlinks.pl.in
index 0783e2d63..9bdc2bc7b 100644
--- a/perl/contrib/getlinks.pl.in
+++ b/perl/contrib/getlinks.pl.in
@@ -80,32 +80,32 @@ sub SplitURL {
my $inurl = $_[0];
if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = $3;
- $getdocument = $4;
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = $3;
+ $getdocument = $4;
}
elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = $3;
- $getdocument = "";
-
- if($getpath !~ /\//) {
- $getpath ="";
- $getdocument = $3;
- }
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = $3;
+ $getdocument = "";
+
+ if($getpath !~ /\//) {
+ $getpath ="";
+ $getdocument = $3;
+ }
}
elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) {
- $getprotocol = $1;
- $getserver = $2;
- $getpath = "";
- $getdocument = "";
+ $getprotocol = $1;
+ $getserver = $2;
+ $getpath = "";
+ $getdocument = "";
}
else {
- print "Couldn't parse the specified URL, retry please!\n";
- exit;
+ print "Couldn't parse the specified URL, retry please!\n";
+ exit;
}
}
@@ -119,52 +119,52 @@ sub SplitURL {
if(!$usestdin) {
open(HEADGET, "$linkcheck $geturl|") ||
- die "Couldn't get web page for some reason";
+ die "Couldn't get web page for some reason";
headget:
while(<HEADGET>) {
-# print $_;
- if($_ =~ /HTTP\/.*3\d\d /) {
- $pagemoved=1;
- }
- elsif($pagemoved &&
- ($_ =~ /^Location: (.*)/)) {
- $geturl = $1;
-
- &SplitURL($geturl);
-
- $pagemoved++;
- last headget;
- }
+# print $_;
+ if($_ =~ /HTTP\/.*3\d\d /) {
+ $pagemoved=1;
+ }
+ elsif($pagemoved &&
+ ($_ =~ /^Location: (.*)/)) {
+ $geturl = $1;
+
+ &SplitURL($geturl);
+
+ $pagemoved++;
+ last headget;
+ }
}
close(HEADGET);
if($pagemoved == 1) {
- print "Page is moved but we don't know where. Did you forget the ",
- "traling slash?\n";
- exit;
+ print "Page is moved but we don't know where. Did you forget the ",
+ "traling slash?\n";
+ exit;
}
open(WEBGET, "$htmlget $geturl|") ||
- die "Couldn't get web page for some reason";
+ die "Couldn't get web page for some reason";
while(<WEBGET>) {
- $line = $_;
- push @indoc, $line;
- $line=~ s/\n//g;
- $line=~ s/\r//g;
-# print $line."\n";
- $in=$in.$line;
+ $line = $_;
+ push @indoc, $line;
+ $line=~ s/\n//g;
+ $line=~ s/\r//g;
+# print $line."\n";
+ $in=$in.$line;
}
close(WEBGET);
}
else {
while(<STDIN>) {
- $line = $_;
- push @indoc, $line;
- $line=~ s/\n//g;
- $line=~ s/\r//g;
- $in=$in.$line;
+ $line = $_;
+ push @indoc, $line;
+ $line=~ s/\n//g;
+ $line=~ s/\r//g;
+ $in=$in.$line;
}
}
@@ -174,43 +174,43 @@ sub GetLinks {
getlinkloop:
while($in =~ /[^<]*(<[^>]+>)/g ) {
- # we have a tag in $1
- $tag = $1;
-
- if($tag =~ /^<!--/) {
- # this is a comment tag, ignore it
- }
- else {
- if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
- $url=$2;
- if($url =~ /^\"(.*)\"$/) {
- # this was a "string" now $1 has removed the quotes:
- $url=$1;
- }
-
-
- $url =~ s/([^\#]*)\#.*/$1/g;
-
- if($url eq "") {
- # if the link was nothing than a #-link it may now have
- # been emptied completely so then we skip the rest
- next getlinkloop;
- }
-
- if($done{$url}) {
- # if this url already is done, do next
- $done{$url}++;
- next getlinkloop;
- }
-
- $done{$url} = 1; # this is "done"
-
- push @result, $url;
- if($tag =~ /< *([^ ]+)/) {
-# print "TAG: $1\n";
- $tagtype{$url}=$1;
- }
- }
+ # we have a tag in $1
+ $tag = $1;
+
+ if($tag =~ /^<!--/) {
+ # this is a comment tag, ignore it
+ }
+ else {
+ if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
+ $url=$2;
+ if($url =~ /^\"(.*)\"$/) {
+ # this was a "string" now $1 has removed the quotes:
+ $url=$1;
+ }
+
+
+ $url =~ s/([^\#]*)\#.*/$1/g;
+
+ if($url eq "") {
+ # if the link was nothing than a #-link it may now have
+ # been emptied completely so then we skip the rest
+ next getlinkloop;
+ }
+
+ if($done{$url}) {
+ # if this url already is done, do next
+ $done{$url}++;
+ next getlinkloop;
+ }
+
+ $done{$url} = 1; # this is "done"
+
+ push @result, $url;
+ if($tag =~ /< *([^ ]+)/) {
+# print "TAG: $1\n";
+ $tagtype{$url}=$1;
+ }
+ }
}
}
return @result;
@@ -223,38 +223,38 @@ for(@links) {
$url = $_;
if($url =~ /^([^:]+):/) {
- $link = $url;
+ $link = $url;
}
else {
- # this is an absolute link on the same server:
- if($url =~ /^\//) {
- # from root
- $link = "$getprotocol://$getserver$url";
- }
- else {
- # from the scanned page's dir
- $nyurl=$url;
-
- if(length($getpath) &&
- ($getpath !~ /\/$/) &&
- ($nyurl !~ /^\//)) {
- # lacks ending slash, add one to the document part:
- $nyurl = "/".$nyurl;
- }
- $link = "$getprotocol://$getserver/$getpath$nyurl";
- }
+ # this is an absolute link on the same server:
+ if($url =~ /^\//) {
+ # from root
+ $link = "$getprotocol://$getserver$url";
+ }
+ else {
+ # from the scanned page's dir
+ $nyurl=$url;
+
+ if(length($getpath) &&
+ ($getpath !~ /\/$/) &&
+ ($nyurl !~ /^\//)) {
+ # lacks ending slash, add one to the document part:
+ $nyurl = "/".$nyurl;
+ }
+ $link = "$getprotocol://$getserver/$getpath$nyurl";
+ }
}
if($link =~ /$getregex/) {
- if($display) {
- print "$link\n";
- }
- else {
- if($verbose) {
- print "Gets $link\n";
- }
- print `$urlget $link`;
- }
+ if($display) {
+ print "$link\n";
+ }
+ else {
+ if($verbose) {
+ print "Gets $link\n";
+ }
+ print `$urlget $link`;
+ }
}