From 46b112bcd439f4413925a7300d66a3e6f148765e Mon Sep 17 00:00:00 2001 From: Yang Tse Date: Tue, 16 Feb 2010 13:32:45 +0000 Subject: replaced tabs with spaces --- perl/contrib/crawlink.pl | 166 +++++++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 83 deletions(-) (limited to 'perl/contrib/crawlink.pl') diff --git a/perl/contrib/crawlink.pl b/perl/contrib/crawlink.pl index ba10b8b7c..8cb239a62 100755 --- a/perl/contrib/crawlink.pl +++ b/perl/contrib/crawlink.pl @@ -125,32 +125,32 @@ my $badlinks=0; sub SplitURL { my $inurl = $_[0]; if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) { - $getprotocol = $1; - $getserver = $2; - $getpath = $3; - $getdocument = $4; + $getprotocol = $1; + $getserver = $2; + $getpath = $3; + $getdocument = $4; } elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) { - $getprotocol = $1; - $getserver = $2; - $getpath = $3; - $getdocument = ""; - - if($getpath !~ /\//) { - $getpath =""; - $getdocument = $3; - } + $getprotocol = $1; + $getserver = $2; + $getpath = $3; + $getdocument = ""; + + if($getpath !~ /\//) { + $getpath =""; + $getdocument = $3; + } } elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) { - $getprotocol = $1; - $getserver = $2; - $getpath = ""; - $getdocument = ""; + $getprotocol = $1; + $getserver = $2; + $getpath = ""; + $getdocument = ""; } else { - print "Couldn't parse the specified URL, retry please!\n"; - exit; + print "Couldn't parse the specified URL, retry please!\n"; + exit; } } @@ -164,35 +164,35 @@ sub GetRootPage { my $pagemoved=0; open(HEADGET, "$linkcheck $geturl|") || - die "Couldn't get web page for some reason"; + die "Couldn't get web page for some reason"; while() { - #print STDERR $_; - if($_ =~ /HTTP\/1\.[01] (\d\d\d) /) { + #print STDERR $_; + if($_ =~ /HTTP\/1\.[01] (\d\d\d) /) { $code=$1; if($code =~ /^3/) { $pagemoved=1; } - } + } elsif($_ =~ /^Content-Type: ([\/a-zA-Z]+)/) { $type=$1; } - elsif($pagemoved && - ($_ =~ /^Location: (.*)/)) { - $geturl = $1; + elsif($pagemoved && + ($_ =~ /^Location: (.*)/)) { + $geturl = $1; - &SplitURL($geturl); + &SplitURL($geturl); - $pagemoved++; - last; - } + $pagemoved++; + last; + } } close(HEADGET); if($pagemoved == 1) { - print "Page is moved but we don't know where. Did you forget the ", - "traling slash?\n"; - exit; + print "Page is moved but we don't know where. Did you forget the ", + "traling slash?\n"; + exit; } if($type ne "text/html") { @@ -229,21 +229,21 @@ sub LinkWorks { boo: if( $doc[0] =~ /^HTTP[^ ]+ (\d+)/ ) { - my $error = $1; - - if($error < 400 ) { - return "GOOD"; - } - else { - - if($head && ($error >= 500)) { - # This server doesn't like HEAD! - @doc = `$linkcheckfull \"$check\"`; - $head = 0; - goto boo; - } - return "BAD"; - } + my $error = $1; + + if($error < 400 ) { + return "GOOD"; + } + else { + + if($head && ($error >= 500)) { + # This server doesn't like HEAD! + @doc = `$linkcheckfull \"$check\"`; + $head = 0; + goto boo; + } + return "BAD"; + } } return "BAD"; } @@ -254,45 +254,45 @@ sub GetLinks { my @result; while($in =~ /[^<]*(<[^>]+>)/g ) { - # we have a tag in $1 - my $tag = $1; - - if($tag =~ /^