# These are the spiders (robots) recognized by Sawmill. The first # column is the name of the spider; the second column is a value that # must be part of the USER-AGENT field for Sawmill to consider the # log entry as a hit from the spider. # # This list is based on the one found at http://www.robotstxt.org/wc/active.html. # # If you know of another well-known spider, and if you create your own # line for it below, please send it also to sawmill@flowerfire.com, so # we can add it to later versions. spiders = { msie_crawler = { label = "Internet Explorer Crawler" substring = "MSIECrawler" } msn_robot = { label = "MSN Robot" substring = "msnbot" } yahoo_slurp = { label = "Yahoo Slurp" substring = "Yahoo! Slurp" } wget = { label = "Wget" substring = "Wget" } atomz = { label = "Atomz.com Search Robot" substring = "Atomz" } netmechanic = { label = "NetMechanic" substring = "NetMechanic" } inktomi_slurp = { label = "Inktomi Slurp" substring = "Slurp" } gulliver = { label = "Northern Light Gulliver" substring = "Gulliver" } googlebot = { label = "Googlebot" substring = "Googlebot" } scooter = { label = "Scooter" substring = "Scooter" } linkwalker = { label = "LinkWalker" substring = "LinkWalker" } wwwc = { label = "WWWC" substring = "WWWC" } lycos = { label = "Lycos" substring = "Lycos" } arks = { label = "arks" substring = "arks" } diibot = { label = "Digital Integrity Robot" substring = "DIIbot" } architext_spider = { label = "ArchitextSpider" substring = "ArchitextSpider" } voyager = { label = "Voyager" substring = "Voyager" } xyleme = { label = "XYLEME Robot" substring = "cosmos" } collective = { label = "Collective or e-collector" substring = "LWP" } griffon = { label = "Griffon" substring = "griffon" } appie = { label = "Walhello appie" substring = "appie" } larbin = { label = "larbin" substring = "larbin" } titan = { label = "TITAN" substring = "TITAN" } htdig = { label = "ht://Dig" substring = "htdig" } iron33 = { label = "Iron33" substring = "Iron33" } moget = { label = "moget" substring = "moget" } openfind = { label = "Openfind data gatherer" substring = "Openfind data gatherer" } spiderman = { label = "SpiderMan" substring = "SpiderMan" } marvin = { label = "marvin/infoseek" substring = "marvin" } robozilla = { label = "Robozilla" substring = "Robozilla" } teoma = { label = "TeomaTechnologies" substring = "teoma_agent1" } { label = "Ask Jeeves/Teoma" substring = "Teoma" } { label = "URL Spider Pro" substring = "URL Spider Pro" } { label = "Checkbot" substring = "Checkbot" } mediapartners_google = { label = "Googlebot" substring = "Mediapartners-Google" } ## ## POLISH ## # wp_pl = { # label = "WP.PL" # substring = "NetSprint" # } # inktomi_onet_pl = { # label = "Inktomi - Onet.pl" # substring = "Inktomi" # } # gooru = { # label = "Gooru" # substring = "Gooru" # } # The spiders below this point are rare, and have been # disabled to increase Sawmill's log processing performance. # You can track these spiders by removing the pound (#) characters # in front of each spider line. You can also add your own. # { # label = "Ahoy! The Homepage Finder" # substring = "Ahoy! The Homepage Finder" # } # { # label = "Alkaline" # substring = "AlkalineBOT" # } # { # label = "Arachnophilia" # substring = "Arachnophilia" # } # { # label = "ASpider" # substring = "ASpider" # } # { # label = "ATN Worldwide" # substring = "ATN_Worldwide" # } # { # label = "AURESYS" # substring = "AURESYS" # } # { # label = "BackRub" # substring = "BackRub" # } # { # label = "BackRub" # substring = "BaySpider" # } # { # label = "Big Brother" # substring = "Big Brother" # } # { # label = "Bjaaland" # substring = "Bjaaland" # } # { # label = "BlackWidow" # substring = "BlackWidow" # } # { # label = "Die Blinde Kuh" # substring = "Die Blinde Kuh" # } # { # label = "BSpider" # substring = "BSpider" # } # { # label = "CACTVS Chemistry Spider" # substring = "CACTVS Chemistry Spider" # } # { # label = "Calif" # substring = "Calif" # } # { # label = "Digimarc Marcspider/CGI" # substring = "Digimarc CGIReader" # } # { # label = "CMC/0.01" # substring = "CMC" # } # { # label = "Combine System" # substring = "combine" # } # { # label = "Conceptbot" # substring = "conceptbot" # } # { # label = "CoolBot" # substring = "CoolBot" # } # { # label = "Web Core / Roots" # substring = "root" # } # { # label = "Internet Cruiser Robot" # substring = "Internet Cruiser Robot" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "Cusco" # substring = "Cusco" # } # { # label = "CyberSpyder Link Test" # substring = "CyberSpyder" # } # { # label = "DeWeb(c) Katalog/Index" # substring = "Deweb" # } # { # label = "DienstSpider" # substring = "dienstspider" # } # { # label = "Digger" # substring = "Digger" # } # { # label = "Direct Hit Grabber" # substring = "grabber" # } # { # label = "DNAbot" # substring = "DNAbot" # } # { # label = "DragonBot" # substring = "DragonBot" # } # { # label = "DWCP" # substring = "DWCP" # } # { # label = "EbiNess" # substring = "EbiNess" # } # { # label = "EIT Link Verifier Robot" # substring = "EIT-Link-Verifier-Robot" # } # { # label = "ELFINBOT" # substring = "elfinbot" # } # { # label = "Emacs-w3 Search Engine" # substring = "Emacs-w3" # } # { # label = "ananzi" # substring = "EMC Spider" # } # { # label = "Esther" # substring = "esther" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "Evliya Celebi" # substring = "Evliya Celebi" # } # { # label = "nzexplorer" # substring = "explorersearch" # } # { # label = "Felix IDE" # substring = "FelixIDE" # } # { # label = "Wild Ferret Web Hopper 1, 2, 3" # substring = "Hazel's Ferret Web hopper," # } # { # label = "FetchRover" # substring = "ESIRover" # } # { # label = "fido" # substring = "fido" # } # { # label = "KIT-Fireball" # substring = "KIT-Fireball" # } # { # label = "Fish search" # substring = "Fish-Search-Robot" # } # { # label = "Robot Francoroute" # substring = "Robot du CRIM" # } # { # label = "Freecrawl" # substring = "Freecrawl" # } # { # label = "FunnelWeb" # substring = "FunnelWeb" # } # { # label = "gammaSpider, FocusedCrawler" # substring = "gammaSpider" # } # { # label = "gazz" # substring = "gazz" # } # { # label = "GCreep" # substring = "gcreep" # } # { # label = "GetURL" # substring = "GetURL" # } # { # label = "Golem" # substring = "Golem" # } # { # label = "Gromit" # substring = "Gromit" # } # { # label = "havIndex" # substring = "havIndex" # } # { # label = "HI (HTML Index) Search" # substring = "AITCSRobot" # } # { # label = "Hometown Spider Pro" # substring = "Hometown Spider Pro" # } # { # label = "Wired Digital" # substring = "wired-digital-newsbot" # } # { # label = "HTMLgobble" # substring = "HTMLgobble" # } # { # label = "iajaBot" # substring = "iajaBot" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "IBM_Planetwide" # substring = "IBM_Planetwide" # } # { # label = "Popular Iconoclast" # substring = "gestaltIconoclast" # } # { # label = "Ingrid" # substring = "INGRID" # } # { # label = "IncyWincy" # substring = "IncyWincy" # } # { # label = "Informant" # substring = "Informant" # } # { # label = "InfoSeek Robot" # substring = "InfoSeek Robot" # } # { # label = "Infoseek Sidewinder" # substring = "Infoseek Sidewinder" # } # { # label = "InfoSpiders" # substring = "InfoSpiders" # } # { # label = "Inspector Web" # substring = "inspectorwww" # } # { # label = "IntelliAgent" # substring = "IAGENT" # } # { # label = "I, Robot" # substring = "I Robot" # } # { # label = "Israeli-search" # substring = "IsraeliSearch" # } # { # label = "JavaBee" # substring = "JavaBee" # } # { # label = "JBot Java Web Robot" # substring = "JBot" # } # { # label = "JCrawler" # substring = "JCrawler" # } # { # label = "Jeeves" # substring = "Jeeves" # } # { # label = "JoBo Java Web Robot" # substring = "JoBo" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "Jobot" # substring = "Jobot" # } # { # label = "JoeBot" # substring = "JoeBot" # } # { # label = "The Jubii Indexing Robot" # substring = "JubiiRobot" # } # { # label = "JumpStation" # substring = "jumpstation" # } # { # label = "Katipo" # substring = "Katipo" # } # { # label = "KDD-Explorer" # substring = "KDD-Explorer" # } # { # label = "KO_Yappo_Robot" # substring = "KO_Yappo_Robot" # } # { # label = "LabelGrabber" # substring = "LabelGrab" # } # { # label = "legs" # substring = "legs" # } # { # label = "Link Validator" # substring = "Linkidator" # } # { # label = "LinkScan" # substring = "LinkScan Server" # } # { # label = "Lockon" # substring = "Lockon" # } # { # label = "logo.gif Crawler" # substring = "logo.gif" # } # { # label = "Magpie" # substring = "Magpie" # } # { # label = "MediaFox" # substring = "MediaFox" # } # { # label = "MerzScope" # substring = "MerzScope" # } # { # label = "NEC-MeshExplorer" # substring = "NEC-MeshExplorer" # } # { # label = "MindCrawler" # substring = "MindCrawler" # } # { # label = "MOMspider" # substring = "MOMspider" # } # { # label = "Monster" # substring = "Monster" # } # { # label = "Motor" # substring = "Motor" # } # { # label = "Muscat Ferret" # substring = "MuscatFerret" # } # { # label = "Mwd.Search" # substring = "MwdSearch" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "NetCarta WebMap Engine" # substring = "NetCarta CyberPilot Pro" # } # { # label = "NetScoop" # substring = "NetScoop" # } # { # label = "newscan-online" # substring = "newscan-online" # } # { # label = "NHSE Web Forager" # substring = "NHSEWalker" # } # { # label = "Nomad" # substring = "Nomad-V2.x" # } # { # label = "The NorthStar Robot" # substring = "NorthStar" # } # { # label = "Occam" # substring = "Occam" # } # { # label = "HKU WWW Octopus" # substring = "HKU WWW Robot" # } # { # label = "Orb Search" # substring = "Orbsearch" # } # { # label = "Pack Rat" # substring = "PackRat" # } # { # label = "PageBoy" # substring = "PageBoy" # } # { # label = "ParaSite" # substring = "ParaSite" # } # { # label = "Patric" # substring = "Patric" # } # { # label = "pegasus" # substring = "web robot PEGASUS" # } # { # label = "The Peregrinator" # substring = "Peregrinator-Mathematics" # } # { # label = "PerlCrawler 1.0" # substring = "PerlCrawler" # } # { # label = "Phantom" # substring = "Duppies" # } # { # label = "PiltdownMan" # substring = "PiltdownMan" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "Pioneer" # substring = "Pioneer" # } # { # label = "Portal Juice Spider" # substring = "PortalJuice.com" # } # { # label = "PGP Key Agent" # substring = "PGP-KA" # } # { # label = "PlumtreeWebAccessor" # substring = "PlumtreeWebAccessor" # } # { # label = "Poppi" # substring = "Poppi" # } # { # label = "PortalB Spider" # substring = "PortalBSpider" # } # { # label = "GetterroboPlus Puu" # substring = "GetterroboPlus" # } # { # label = "Raven Search" # substring = "Raven" # } # { # label = "Resume Robot" # substring = "Resume Robot" # } # { # label = "RoadHouse Crawling System" # substring = "RHCS" # } # { # label = "Road Runner: The ImageScape Robot" # substring = "Road Runner" # } # { # label = "Robbie the Robot" # substring = "Robbie" # } # { # label = "ComputingSite Robi/1.0" # substring = "ComputingSite Robi" # } # { # label = "RoboFox" # substring = "Robofox" # } # { # label = "Roverbot" # substring = "Roverbot" # } # { # label = "RuLeS" # substring = "RuLeS" # } # { # label = "SafetyNet Robot" # substring = "SafetyNet Robot" # } # { # label = "SearchProcess" # substring = "searchprocess" # } # { # label = "Senrigan" # substring = "Senrigan" # } # { # label = "SG-Scout" # substring = "SG-Scout" # } # { # label = "ShagSeeker" # substring = "Shagseeker" # } # { # label = "Shai'Hulud" # substring = "Shai'Hulud" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "Simmany Robot Ver1.0" # substring = "SimBot" # } # { # label = "Site Valet" # substring = "Site Valet" # } # { # label = "Open Text Index Robot" # substring = "Open Text Site Crawler" # } # { # label = "SiteTech-Rover" # substring = "SiteTech-Rover" # } # { # label = "SLCrawler" # substring = "SLCrawler" # } # { # label = "Smart Spider" # substring = "ESISmartSpider" # } # { # label = "Snooper" # substring = "Snooper" # } # { # label = "Solbot" # substring = "Solbot" # } # { # label = "Spanner" # substring = "Spanner" # } # { # label = "Speedy Spider" # substring = "Speedy Spider" # } # { # label = "spider_monkey" # substring = "mouse.house" # } # { # label = "SpiderBot" # substring = "SpiderBot" # } # { # label = "Spiderline Crawler" # substring = "spiderline" # } # { # label = "Site Searcher" # substring = "ssearcher" # } # { # label = "Suke" # substring = "suke" # } # { # label = "suntek search engine" # substring = "suntek" # } # { # label = "Tarantula" # substring = "Tarantula" # } # { # label = "tarspider" # substring = "tarspider" # } # { # label = "Tcl W3 Robot" # substring = "dlw3robot" # } # { # label = " /* === VC++ can't handle long strings.... */ "# } # { # label = "TechBOT" # substring = "TechBOT" # } # { # label = "Templeton" # substring = "Templeton" # } # { # label = "TitIn" # substring = "TitIn" # } # { # label = "TLSpider" # substring = "TLSpider" # } # { # label = "UCSD Crawl" # substring = "UCSD-Crawler" # } # { # label = "UdmSearch" # substring = "UdmSearch" # } # { # label = "URL Check" # substring = "urlck" # } # { # label = "Valkyrie" # substring = "Valkyrie" # } # { # label = "Victoria" # substring = "Victoria" # } # { # label = "vision-search" # substring = "vision-search" # } # { # label = "VWbot" # substring = "VWbot_K" # } # { # label = "The NWI Robot" # substring = "w3index" # } # { # label = "W3M2" # substring = "W3M2" # } # { # label = "WallPaper" # substring = "WallPaper" # } # { # label = "the World Wide Web Wanderer" # substring = "WWWWanderer" # } # { # label = "w@pSpider" # substring = "w@pSpider" # } # { # label = "WebBandit Web Spider" # substring = "WebBandit" # } # { # label = "WebCatcher" # substring = "WebCatcher" # } # { # label = "WebCopy" # substring = "WebCopy" # } # { # label = "webfetcher" # substring = "WebFetcher" # } # { # label = "weblayers" # substring = "weblayers" # } # { # label = "WebLinker" # substring = "WebLinker" # } # { # label = "The Web Moose" # substring = "WebMoose" # } # { # label = "WebQuest" # substring = "WebQuest" # } # { # label = "Digimarc MarcSpider" # substring = "Digimarc WebReader" # } # { # label = "WebReaper" # substring = "WebReaper" # } # { # label = "webs" # substring = "webs@" # } # { # label = "WebVac" # substring = "webvac" # } # { # label = "webwalk" # substring = "webwalk" # } # { # label = "WebWalker" # substring = "WebWalker" # } # { # label = "WebWatch" # substring = "WebWatch" # } # { # label = "whatUseek Winona" # substring = "whatUseek_winona" # } # { # label = "w3mir" # substring = "w3mir" # } # { # label = "WebStolperer" # substring = "WOLP" # } # { # label = "XGET" # substring = "XGET" # } # { # label = "Nederland.zoek" # substring = "Nederland.zoek" # } # ia_archiver = { # label = "Alexa Web Search" # substring = "ia_archiver" # } # adsonar = { # label = "Quigo AdSonar Bot" # substring = "AdSonar Bot" # } } # spiders