# These are the spiders (robots) recognized by Sawmill. The first # column is the name of the spider; the second column is a value that # must be part of the USER-AGENT field for Sawmill to consider the # log entry as a hit from the spider. # # This list is based on the one found at http://www.robotstxt.org/wc/active.html. # # If you know of another well-known spider, and if you create your own # line for it below, please send it also to sawmill@flowerfire.com, so # we can add it to later versions. spiders = { { name = Wget substring = "Wget" } { name = "Atomz.com Search Robot" substring = "Atomz" } { name = "NetMechanic" substring = "NetMechanic" } { name = "Inktomi Slurp" substring = "Slurp" } { name = "Northern Light Gulliver" substring = "Gulliver" } { name = "Googlebot" substring = "Googlebot" } { name = "Scooter" substring = "Scooter" } { name = "LinkWalker" substring = "LinkWalker" } { name = "WWWC" substring = "WWWC" } { name = "Lycos" substring = "Lycos" } { name = "arks" substring = "arks" } { name = "Digital Integrity Robot" substring = "DIIbot" } { name = "ArchitextSpider" substring = "ArchitextSpider" } { name = "Voyager" substring = "Voyager" } { name = "XYLEME Robot" substring = "cosmos" } { name = "Collective or e-collector" substring = "LWP" } { name = "Griffon" substring = "griffon" } { name = "Walhello appie" substring = "appie" } { name = "larbin" substring = "larbin" } { name = "TITAN" substring = "TITAN" } { name = "ht://Dig" substring = "htdig" } { name = "Iron33" substring = "Iron33" } { name = "moget" substring = "moget" } { name = "Openfind data gatherer" substring = "Openfind data gatherer" } { name = "SpiderMan" substring = "SpiderMan" } { name = "marvin/infoseek" substring = "marvin" } { name = "Robozilla" substring = "Robozilla" } { name = "TeomaTechnologies" substring = "teoma_agent1" } { name = "URL Spider Pro" substring = "URL Spider Pro" } { name = "Checkbot" substring = "Checkbot" } # The spiders below this point are rare, and have been # disabled to increase Sawmill's log processing performance. # You can track these spiders by removing the pound (#) characters # in front of each spider line. You can also add your own. # { # name = "Ahoy! The Homepage Finder" # substring = "Ahoy! The Homepage Finder" # } # { # name = "Alkaline" # substring = "AlkalineBOT" # } # { # name = "Arachnophilia" # substring = "Arachnophilia" # } # { # name = "ASpider" # substring = "ASpider" # } # { # name = "ATN Worldwide" # substring = "ATN_Worldwide" # } # { # name = "AURESYS" # substring = "AURESYS" # } # { # name = "BackRub" # substring = "BackRub" # } # { # name = "BackRub" # substring = "BaySpider" # } # { # name = "Big Brother" # substring = "Big Brother" # } # { # name = "Bjaaland" # substring = "Bjaaland" # } # { # name = "BlackWidow" # substring = "BlackWidow" # } # { # name = "Die Blinde Kuh" # substring = "Die Blinde Kuh" # } # { # name = "BSpider" # substring = "BSpider" # } # { # name = "CACTVS Chemistry Spider" # substring = "CACTVS Chemistry Spider" # } # { # name = "Calif" # substring = "Calif" # } # { # name = "Digimarc Marcspider/CGI" # substring = "Digimarc CGIReader" # } # { # name = "CMC/0.01" # substring = "CMC" # } # { # name = "Combine System" # substring = "combine" # } # { # name = "Conceptbot" # substring = "conceptbot" # } # { # name = "CoolBot" # substring = "CoolBot" # } # { # name = "Web Core / Roots" # substring = "root" # } # { # name = "Internet Cruiser Robot" # substring = "Internet Cruiser Robot" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "Cusco" # substring = "Cusco" # } # { # name = "CyberSpyder Link Test" # substring = "CyberSpyder" # } # { # name = "DeWeb(c) Katalog/Index" # substring = "Deweb" # } # { # name = "DienstSpider" # substring = "dienstspider" # } # { # name = "Digger" # substring = "Digger" # } # { # name = "Direct Hit Grabber" # substring = "grabber" # } # { # name = "DNAbot" # substring = "DNAbot" # } # { # name = "DragonBot" # substring = "DragonBot" # } # { # name = "DWCP" # substring = "DWCP" # } # { # name = "EbiNess" # substring = "EbiNess" # } # { # name = "EIT Link Verifier Robot" # substring = "EIT-Link-Verifier-Robot" # } # { # name = "ELFINBOT" # substring = "elfinbot" # } # { # name = "Emacs-w3 Search Engine" # substring = "Emacs-w3" # } # { # name = "ananzi" # substring = "EMC Spider" # } # { # name = "Esther" # substring = "esther" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "Evliya Celebi" # substring = "Evliya Celebi" # } # { # name = "nzexplorer" # substring = "explorersearch" # } # { # name = "Felix IDE" # substring = "FelixIDE" # } # { # name = "Wild Ferret Web Hopper 1, 2, 3" # substring = "Hazel's Ferret Web hopper," # } # { # name = "FetchRover" # substring = "ESIRover" # } # { # name = "fido" # substring = "fido" # } # { # name = "KIT-Fireball" # substring = "KIT-Fireball" # } # { # name = "Fish search" # substring = "Fish-Search-Robot" # } # { # name = "Robot Francoroute" # substring = "Robot du CRIM" # } # { # name = "Freecrawl" # substring = "Freecrawl" # } # { # name = "FunnelWeb" # substring = "FunnelWeb" # } # { # name = "gammaSpider, FocusedCrawler" # substring = "gammaSpider" # } # { # name = "gazz" # substring = "gazz" # } # { # name = "GCreep" # substring = "gcreep" # } # { # name = "GetURL" # substring = "GetURL" # } # { # name = "Golem" # substring = "Golem" # } # { # name = "Gromit" # substring = "Gromit" # } # { # name = "havIndex" # substring = "havIndex" # } # { # name = "HI (HTML Index) Search" # substring = "AITCSRobot" # } # { # name = "Hometown Spider Pro" # substring = "Hometown Spider Pro" # } # { # name = "Wired Digital" # substring = "wired-digital-newsbot" # } # { # name = "HTMLgobble" # substring = "HTMLgobble" # } # { # name = "iajaBot" # substring = "iajaBot" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "IBM_Planetwide" # substring = "IBM_Planetwide" # } # { # name = "Popular Iconoclast" # substring = "gestaltIconoclast" # } # { # name = "Ingrid" # substring = "INGRID" # } # { # name = "IncyWincy" # substring = "IncyWincy" # } # { # name = "Informant" # substring = "Informant" # } # { # name = "InfoSeek Robot" # substring = "InfoSeek Robot" # } # { # name = "Infoseek Sidewinder" # substring = "Infoseek Sidewinder" # } # { # name = "InfoSpiders" # substring = "InfoSpiders" # } # { # name = "Inspector Web" # substring = "inspectorwww" # } # { # name = "IntelliAgent" # substring = "IAGENT" # } # { # name = "I, Robot" # substring = "I Robot" # } # { # name = "Israeli-search" # substring = "IsraeliSearch" # } # { # name = "JavaBee" # substring = "JavaBee" # } # { # name = "JBot Java Web Robot" # substring = "JBot" # } # { # name = "JCrawler" # substring = "JCrawler" # } # { # name = "Jeeves" # substring = "Jeeves" # } # { # name = "JoBo Java Web Robot" # substring = "JoBo" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "Jobot" # substring = "Jobot" # } # { # name = "JoeBot" # substring = "JoeBot" # } # { # name = "The Jubii Indexing Robot" # substring = "JubiiRobot" # } # { # name = "JumpStation" # substring = "jumpstation" # } # { # name = "Katipo" # substring = "Katipo" # } # { # name = "KDD-Explorer" # substring = "KDD-Explorer" # } # { # name = "KO_Yappo_Robot" # substring = "KO_Yappo_Robot" # } # { # name = "LabelGrabber" # substring = "LabelGrab" # } # { # name = "legs" # substring = "legs" # } # { # name = "Link Validator" # substring = "Linkidator" # } # { # name = "LinkScan" # substring = "LinkScan Server" # } # { # name = "Lockon" # substring = "Lockon" # } # { # name = "logo.gif Crawler" # substring = "logo.gif" # } # { # name = "Magpie" # substring = "Magpie" # } # { # name = "MediaFox" # substring = "MediaFox" # } # { # name = "MerzScope" # substring = "MerzScope" # } # { # name = "NEC-MeshExplorer" # substring = "NEC-MeshExplorer" # } # { # name = "MindCrawler" # substring = "MindCrawler" # } # { # name = "MOMspider" # substring = "MOMspider" # } # { # name = "Monster" # substring = "Monster" # } # { # name = "Motor" # substring = "Motor" # } # { # name = "Muscat Ferret" # substring = "MuscatFerret" # } # { # name = "Mwd.Search" # substring = "MwdSearch" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "NetCarta WebMap Engine" # substring = "NetCarta CyberPilot Pro" # } # { # name = "NetScoop" # substring = "NetScoop" # } # { # name = "newscan-online" # substring = "newscan-online" # } # { # name = "NHSE Web Forager" # substring = "NHSEWalker" # } # { # name = "Nomad" # substring = "Nomad-V2.x" # } # { # name = "The NorthStar Robot" # substring = "NorthStar" # } # { # name = "Occam" # substring = "Occam" # } # { # name = "HKU WWW Octopus" # substring = "HKU WWW Robot" # } # { # name = "Orb Search" # substring = "Orbsearch" # } # { # name = "Pack Rat" # substring = "PackRat" # } # { # name = "PageBoy" # substring = "PageBoy" # } # { # name = "ParaSite" # substring = "ParaSite" # } # { # name = "Patric" # substring = "Patric" # } # { # name = "pegasus" # substring = "web robot PEGASUS" # } # { # name = "The Peregrinator" # substring = "Peregrinator-Mathematics" # } # { # name = "PerlCrawler 1.0" # substring = "PerlCrawler" # } # { # name = "Phantom" # substring = "Duppies" # } # { # name = "PiltdownMan" # substring = "PiltdownMan" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "Pioneer" # substring = "Pioneer" # } # { # name = "Portal Juice Spider" # substring = "PortalJuice.com" # } # { # name = "PGP Key Agent" # substring = "PGP-KA" # } # { # name = "PlumtreeWebAccessor" # substring = "PlumtreeWebAccessor" # } # { # name = "Poppi" # substring = "Poppi" # } # { # name = "PortalB Spider" # substring = "PortalBSpider" # } # { # name = "GetterroboPlus Puu" # substring = "GetterroboPlus" # } # { # name = "Raven Search" # substring = "Raven" # } # { # name = "Resume Robot" # substring = "Resume Robot" # } # { # name = "RoadHouse Crawling System" # substring = "RHCS" # } # { # name = "Road Runner: The ImageScape Robot" # substring = "Road Runner" # } # { # name = "Robbie the Robot" # substring = "Robbie" # } # { # name = "ComputingSite Robi/1.0" # substring = "ComputingSite Robi" # } # { # name = "RoboFox" # substring = "Robofox" # } # { # name = "Roverbot" # substring = "Roverbot" # } # { # name = "RuLeS" # substring = "RuLeS" # } # { # name = "SafetyNet Robot" # substring = "SafetyNet Robot" # } # { # name = "SearchProcess" # substring = "searchprocess" # } # { # name = "Senrigan" # substring = "Senrigan" # } # { # name = "SG-Scout" # substring = "SG-Scout" # } # { # name = "ShagSeeker" # substring = "Shagseeker" # } # { # name = "Shai'Hulud" # substring = "Shai'Hulud" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "Simmany Robot Ver1.0" # substring = "SimBot" # } # { # name = "Site Valet" # substring = "Site Valet" # } # { # name = "Open Text Index Robot" # substring = "Open Text Site Crawler" # } # { # name = "SiteTech-Rover" # substring = "SiteTech-Rover" # } # { # name = "SLCrawler" # substring = "SLCrawler" # } # { # name = "Smart Spider" # substring = "ESISmartSpider" # } # { # name = "Snooper" # substring = "Snooper" # } # { # name = "Solbot" # substring = "Solbot" # } # { # name = "Spanner" # substring = "Spanner" # } # { # name = "Speedy Spider" # substring = "Speedy Spider" # } # { # name = "spider_monkey" # substring = "mouse.house" # } # { # name = "SpiderBot" # substring = "SpiderBot" # } # { # name = "Spiderline Crawler" # substring = "spiderline" # } # { # name = "Site Searcher" # substring = "ssearcher" # } # { # name = "Suke" # substring = "suke" # } # { # name = "suntek search engine" # substring = "suntek" # } # { # name = "Tarantula" # substring = "Tarantula" # } # { # name = "tarspider" # substring = "tarspider" # } # { # name = "Tcl W3 Robot" # substring = "dlw3robot" # } # { # name = " /* === VC++ can't handle long strings.... */ "# } # { # name = "TechBOT" # substring = "TechBOT" # } # { # name = "Templeton" # substring = "Templeton" # } # { # name = "TitIn" # substring = "TitIn" # } # { # name = "TLSpider" # substring = "TLSpider" # } # { # name = "UCSD Crawl" # substring = "UCSD-Crawler" # } # { # name = "UdmSearch" # substring = "UdmSearch" # } # { # name = "URL Check" # substring = "urlck" # } # { # name = "Valkyrie" # substring = "Valkyrie" # } # { # name = "Victoria" # substring = "Victoria" # } # { # name = "vision-search" # substring = "vision-search" # } # { # name = "VWbot" # substring = "VWbot_K" # } # { # name = "The NWI Robot" # substring = "w3index" # } # { # name = "W3M2" # substring = "W3M2" # } # { # name = "WallPaper" # substring = "WallPaper" # } # { # name = "the World Wide Web Wanderer" # substring = "WWWWanderer" # } # { # name = "w@pSpider" # substring = "w@pSpider" # } # { # name = "WebBandit Web Spider" # substring = "WebBandit" # } # { # name = "WebCatcher" # substring = "WebCatcher" # } # { # name = "WebCopy" # substring = "WebCopy" # } # { # name = "webfetcher" # substring = "WebFetcher" # } # { # name = "weblayers" # substring = "weblayers" # } # { # name = "WebLinker" # substring = "WebLinker" # } # { # name = "The Web Moose" # substring = "WebMoose" # } # { # name = "WebQuest" # substring = "WebQuest" # } # { # name = "Digimarc MarcSpider" # substring = "Digimarc WebReader" # } # { # name = "WebReaper" # substring = "WebReaper" # } # { # name = "webs" # substring = "webs@" # } # { # name = "WebVac" # substring = "webvac" # } # { # name = "webwalk" # substring = "webwalk" # } # { # name = "WebWalker" # substring = "WebWalker" # } # { # name = "WebWatch" # substring = "WebWatch" # } # { # name = "whatUseek Winona" # substring = "whatUseek_winona" # } # { # name = "w3mir" # substring = "w3mir" # } # { # name = "WebStolperer" # substring = "WOLP" # } # { # name = "XGET" # substring = "XGET" # } # { # name = "Nederland.zoek" # substring = "Nederland.zoek" # } } # spiders