#!@PERL@ # # formfind.pl # # This script gets a HTML page from the specified URL and presents form # information you may need in order to machine-make a respond to the form. # # Written to use 'curl' for URL fetching. # # Author: Daniel Stenberg <Daniel.Stenberg@sth.frontec.se> # Version: 0.1 Nov 12, 1998 # # HISTORY # # 0.1 - Created now! # # TODO # respect file:// URLs for local file fetches! $in=""; $usestdin = 0; if($ARGV[0] eq "" ) { $usestdin = 1; } else { $geturl = $ARGV[0]; } if(($geturl eq "") && !$usestdin) { print "Usage: $0 <full source URL>\n", " Use a traling slash for directory URLs!\n"; exit; } # If you need a proxy for web access, edit your .curlrc file to feature # -x <proxy:port> # linkchecker, URL will be appended to the right of this command line # this is the one using HEAD: $linkcheck = "curl -s -m 20 -I"; # as a second attempt, this will be used. This is not using HEAD but will # get the whole frigging document! $linkcheckfull = "curl -s -m 20 -i"; # htmlget, URL will be appended to the right of this command line $htmlget = "curl -s"; # urlget, URL will be appended to the right of this command line # this stores the file with the remote file name in the current dir $urlget = "curl -O -s"; # Parse the input URL and split it into the relevant parts: sub SplitURL { my $inurl = $_[0]; if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) { $getprotocol = $1; $getserver = $2; $getpath = $3; $getdocument = $4; } elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) { $getprotocol = $1; $getserver = $2; $getpath = $3; $getdocument = ""; if($getpath !~ /\//) { $getpath =""; $getdocument = $3; } } elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) { $getprotocol = $1; $getserver = $2; $getpath = ""; $getdocument = ""; } else { print "Couldn't parse the specified URL, retry please!\n"; exit; } } if(!$usestdin) { &SplitURL($geturl); #print "protocol = $getprotocol\n"; #print "server = $getserver\n"; #print "path = $getpath\n"; #print "document = $getdocument\n"; #exit; open(HEADGET, "$linkcheck $geturl|") || die "Couldn't get web page for some reason"; headget: while(<HEADGET>) { # print $_; if($_ =~ /HTTP\/.*3\d\d /) { $pagemoved=1; } elsif($pagemoved && ($_ =~ /^Location: (.*)/)) { $geturl = $1; &SplitURL($geturl); $pagemoved++; last headget; } } close(HEADGET); if($pagemoved == 1) { print "Page is moved but we don't know where. Did you forget the ", "traling slash?\n"; exit; } open(WEBGET, "$htmlget $geturl|") || die "Couldn't get web page for some reason"; while(<WEBGET>) { $line = $_; push @indoc, $line; $line=~ s/\n//g; $line=~ s/\r//g; # print $line."\n"; $in=$in.$line; } close(WEBGET); } else { while(<STDIN>) { $line = $_; push @indoc, $line; $line=~ s/\n//g; $line=~ s/\r//g; $in=$in.$line; } } getlinkloop: while($in =~ /[^<]*(<[^>]+>)/g ) { # we have a tag in $1 $tag = $1; if($tag =~ /^<!--/) { # this is a comment tag, ignore it } else { if(!$form && ($tag =~ /^< *form/i )) { $method= $tag; if($method =~ /method *=/i) { $method=~ s/.*method *= *(\"|)([^ \">]*).*/$2/gi; } else { $method="get"; # default method } $action= $tag; $action=~ s/.*action *= *(\"|)([^ \">]*).*/$2/gi; $method=uc($method); $enctype=$tag; if ($enctype =~ /enctype *=/) { $enctype=~ s/.*enctype *= *(\'|\"|)([^ \"\'>]*).*/$2/gi; if($enctype eq "multipart/form-data") { $enctype="multipart form upload [use -F]" } $enctype = "\n--- type: $enctype"; } else { $enctype=""; } print "--- FORM report. Uses $method to URL \"$action\"$enctype\n"; # print "TAG: $tag\n"; # print "METHOD: $method\n"; # print "ACTION: $action\n"; $form=1; } elsif($form && ($tag =~ /< *\/form/i )) { # print "TAG: $tag\n"; print "--- end of FORM\n"; $form=0; if( 0 ) { print "*** Fill in all or any of these: (default assigns may be shown)\n"; for(@vars) { $var = $_; $def = $value{$var}; print "$var=$def\n"; } print "*** Pick one of these:\n"; for(@alts) { print "$_\n"; } } undef @vars; undef @alts; } elsif($form && ($tag =~ /^< *(input|select)/i)) { $mtag = $1; # print "TAG: $tag\n"; $name=$tag; if($name =~ /name *=/i) { $name=~ s/.*name *= *(\"|)([^ \">]*).*/$2/gi; } else { # no name given $name=""; } # get value tag $value= $tag; if($value =~ /value *=/i) { $value=~ s/.*value *= *(\"|)([^ \">]*).*/$2/gi; } else { $value=""; } if($mtag =~ /select/i) { print "Select: $name\n"; push @vars, "$name"; $select = 1; } else { $type=$tag; if($type =~ /type *=/i) { $type =~ s/.*type *= *(\"|)([^ \">]*).*/$2/gi; } else { $type="text"; # default type } $type=uc($type); if(lc($type) eq "reset") { # reset types are for UI only, ignore. } elsif($name eq "") { # let's read the value parameter print "Button: \"$value\" ($type)\n"; push @alts, "$value"; } else { $info=""; if($value ne "") { $info="=$value"; } print "Input: $name$info ($type)\n"; push @vars, "$name"; # store default value: $value{$name}=$value; } } } elsif($select && ($tag =~ /^< *\/ *select/i)) { $select = 0; } } }