#!/homes/www078/mbcmp/bin/perl # #---HOSTINFO.PL Display important owner organization info about a host. # # Invoked as: # hostinfo.pl hostname # # Arguments: # hostname full internet hostname, or ip number # # Purpose: # Display organization information about 'hostname'. # Also serves as a manual check on the information # generated from the 'clicklog.pl' program. # # How it works: # From there, it parses the domain and subdomain of the # host, and attempts to determine the organization information # about the owner of the subdomain, and log that information. # (By using the domain name to select the proper "whois" host # and asking it for the information about the subdomain owner.) # # This organization information is stored in a 'cache' file # to speed future lookups on that subdomain, and in general # to be a good network citizen. # # Current code knows about the following domains: # com, edu, net, org, gov, mil, us, jp # any other domain is assumed to be in Europe (e.g., ".fr"). # This leaves out the asia/pacific domains, # for which this author cannot yet find a working 'whois' server. # # Notes: specific path references (e.g. /homes/www078) are # tied to installation on a specific shared web-server host, and # should be generalized for independent use. # # History of revisions: #: RS 5/11/98 13:50 Began life as tiny 'clicklog' program by Randall Schwartz #: CR 5/28/98 14:08 hostname, org info, "arg" code #: CR 7/21/98 16:25 expand org info lookup and caching. #: CR 7/30/98 16:25 rearrange entire set of functions as hostinfo.pl #---------------------------------------------------------------------- if (@ARGV[0] eq "") { print "Usage: hostinfo.pl host_name\n"; print " or hostinfo.pl ip_address\n"; exit; } my $cachefile = "/homes/www078/mbcmp/logs/cache"; $global_whois = "/homes/www078/mbcmp/bin/whois"; #$global_whois = "/bin/whois"; #---From the 1st argument, determine the IP number and the HOSTNAME. $temp = @ARGV[0]; $temp =~ s/[0-9\.]//g; if ($temp eq "") { $ip = @ARGV[0]; $hostname = gethostbyaddr(pack('C4',split('\.',$ip)),2); } else { $hostname = $ARGV[0]; @lookup = `nslookup @ARGV[0]`; foreach $line (@lookup) { if ($line =~ s/^Address: //) { $ip = $line; $ip =~ s/ //g; chomp ($ip); } } } print "Hostname = ", $hostname, "\n"; print "IP addr = ", $ip, "\n"; if ($hostname eq "") { exit; } %cache = (); load_cache (\%cache, $cachefile); $cache_size0 = keys %cache; $orginfo = org_info ($hostname, $ip, \%cache, 0); #---If size of cache changed, we got a new entry; write it out. $cache_size1 = keys %cache; if ($cache_size1 != $cache_size0) { store_cache (\%cache, $cachefile); } if ($orginfo eq "") { print "Unknown\n"; } else { print $orginfo, "\n"; } exit; #------------------------------------------------------- #---org_info (hostname, ip, cache, retry) # input: host name (e.g. "gamgee.acad.emich.edu"), cache, # number of times to retry if whois host is busy. # output: 'whois' information about the sub-domain that owns hostname, # as one long line of text; or empty string if no info available. # sub org_info { my ($hostn, $ip, $cache, $retry) = @_; my $orginfo, @nameparts, $domain, $whois; $whois = $global_whois; @nameparts = split /\./, $hostn; @nameparts[$#nameparts] =~ tr/A-Z/a-z/; #---xyz.[edu|net|com|org] if (@nameparts[$#nameparts] eq "edu" || @nameparts[$#nameparts] eq "net" || @nameparts[$#nameparts] eq "com" || @nameparts[$#nameparts] eq "org") { $domain = join '.', @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse($whois, "whois.internic.net", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---xyz.gov elsif (@nameparts[$#nameparts] eq "gov") { $domain = join '.', @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "nic.gov", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---xyz.state.us elsif (@nameparts[$#nameparts] eq "us") { $domain = join '.', @nameparts[$#nameparts-2], @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.isi.edu", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---xyz.co.jp elsif (@nameparts[$#nameparts] eq "jp") { $domain = join '.', @nameparts[$#nameparts-2], @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.nic.ad.jp", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---xyz.service.mil elsif (@nameparts[$#nameparts] eq "mil") { $domain = join '.', @nameparts[$#nameparts-2], @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.nic.mil", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---.ca (canada) is weird... must work from IP number! elsif (@nameparts[$#nameparts] eq "ca") { $domain = join '.', @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.arin.net", $ip, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } #---International (well, Europe, really) # abc.xyz.fr # def.de else { $domain = join '.', @nameparts[$#nameparts-2], @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.ripe.net", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } #---If abc.xyz.fr fails, try xyz.fr else { $domain = join '.', @nameparts[$#nameparts-1], @nameparts[$#nameparts]; if ( ($orginfo = $$cache{$domain}) eq "") { $orginfo = whois_parse ($whois, "whois.ripe.net", $domain, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } #---If xyz.fr fails, try the IP number instead. else { $orginfo = whois_parse ($whois, "whois.ripe.net", $ip, $retry); if ($orginfo ne "" && $orginfo ne "Too_Busy") { $$cache{$domain} = $orginfo; } } } } } } return $orginfo; } #---whois_parse ($whois_com, $whois_host, $domain, $retry) # input: command line to run appropriate 'whois', # number of times to retry if whois host is busy. # output: string, empty (if whois failed), or "Too_Busy" # (if the whois site was too busy), else stripped-down relevant text # sub whois_parse { my ($whois_com, $whois_host, $domain, $retry) = @_; my @whois, $count, $xxx, $command, $inparen; $command = $whois_com." -h ".$whois_host." ".$domain; for ($count=0; $count <= $retry; $count += 1) { @whois = `$command`; $oinfo = ""; $xxx = ""; foreach $line (@whois) { #---Failure: no match if ($line =~ /No match/) { return ""; } #---Lines that should be ignored; if no real lines, then failure. if ($line =~ /^%/) { next; } if ($line =~ /^ *$/) { next; } if ($line =~ /^\[/) { next; } if ($line =~ /^tech-c/) { next; } if ($line =~ /^zone-c/) { next; } if ($line =~ /^server/) { next; } if ($line =~ /^mnt-by/) { next; } if ($line =~ /^source/) { next; } if ($line =~ /^nic-hdl/) { next; } #---Lines indicating the end of real data; we're done. if ($line =~ /domain server/i) { last; } if ($line =~ /last update/i) { last; } if ($line =~ /contains only/i) { last; } #---Words in ()'s may be a domain reference (e.g., BU-DOM). # Save the first such set in $xxx, or any set that contains # "-CA" (canada domain is weird). if (($line =~ /([A-Z0-9\-]+)/)) { $inparen = $line; $inparen =~ s/\).*$//; $inparen =~ s/^.*\(//; if ($xxx eq "" || $line =~ /-CA/) { $xxx = $inparen; } } # if ($xxx eq "" && ($line =~ /([A-Z0-9\-]+)/)) { # $xxx = $line; # $xxx =~ s/\).*$//; # $xxx =~ s/^.*\(//; # } $oinfo = $oinfo . $line; } $oinfo =~ s/\n/ /g; #---If we got a 'multiple record' entry, try again with # the "!xxx" code. if ($oinfo =~ /single out one record/) { $command = $whois_com." -h ".$whois_host." !".$xxx; $retry += 1; } #---If the server was busy, try again in a while. elsif ($oinfo =~ /system load is temporarily too heavy/) { $oinfo = "Too_Busy"; if ($retry > 0) { sleep 600; } } #---Otherwise, we got data! else { return $oinfo; } } return $oinfo; } sub load_cache { my ($cache, $fname) = @_; my $line, $subdomain, $oinfo; open CFILE, $fname or return; flock CFILE, 2; while ($line = ) { chomp $line; ($subdomain, $oinfo) = split /\t/, $line, 2; $$cache{$subdomain} = $oinfo; } close CFILE; return; } sub store_cache { my ($cache, $fname) = @_; my $key; open CFILE, ">".$fname or return; flock CFILE, 2; foreach $key (keys %$cache) { print CFILE $key, "\t", $$cache{$key}, "\n"; } close CFILE; return; } #------------------------------------------------------------------