# These are the spiders (robots) recognized by Sawmill. The first # column is the name of the spider; the second column is a value that # must be part of the USER-AGENT field for Sawmill to consider the # log entry as a hit from the spider. # # This list is based on the one found at http://www.robotstxt.org/wc/active.html. # # If you know of another well-known spider, and if you create your own # line for it below, please send it also to sawmill@flowerfire.com, so # we can add it to later versions. spiders = { msie_crawler = { label = "Internet Explorer Crawler" substring = "MSIECrawler" } msn_robot = { label = "MSN Robot" substring = "msnbot" } yahoo_slurp = { label = "Yahoo Slurp" substring = "Yahoo! Slurp" } wget = { label = "Wget" substring = "Wget" } yandex = { label = "Yandex" substring = "Yandex" } atomz = { label = "Atomz.com Search Robot" substring = "Atomz" } netmechanic = { label = "NetMechanic" substring = "NetMechanic" } baidu = { label = "BaiduSpider" substring = "Baiduspider" } sosospider = { label = "SosoSpider" substring = "Sosospider" } inktomi_slurp = { label = "Inktomi Slurp" substring = "Slurp" } gulliver = { label = "Northern Light Gulliver" substring = "Gulliver" } googlebot = { label = "Googlebot" substring = "Googlebot" } scooter = { label = "Scooter" substring = "Scooter" } linkwalker = { label = "LinkWalker" substring = "LinkWalker" } wwwc = { label = "WWWC" substring = "WWWC" } lycos = { label = "Lycos" substring = "Lycos" } arks = { label = "arks" substring = "arks" } diibot = { label = "Digital Integrity Robot" substring = "DIIbot" } architext_spider = { label = "ArchitextSpider" substring = "ArchitextSpider" } voyager = { label = "Voyager" substring = "Voyager" } xyleme = { label = "XYLEME Robot" substring = "cosmos" } collective = { label = "Collective or e-collector" substring = "LWP" } griffon = { label = "Griffon" substring = "griffon" } appie = { label = "Walhello appie" substring = "appie" } larbin = { label = "larbin" substring = "larbin" } titan = { label = "TITAN" substring = "TITAN" } htdig = { label = "ht://Dig" substring = "htdig" } iron33 = { label = "Iron33" substring = "Iron33" } moget = { label = "moget" substring = "moget" } openfind = { label = "Openfind data gatherer" substring = "Openfind data gatherer" } spiderman = { label = "SpiderMan" substring = "SpiderMan" } marvin = { label = "marvin/infoseek" substring = "marvin" } robozilla = { label = "Robozilla" substring = "Robozilla" } teoma = { label = "TeomaTechnologies" substring = "teoma_agent1" } ask_jeeves_teoma = { label = "Ask Jeeves/Teoma" substring = "Teoma" } url_spider_pro = { label = "URL Spider Pro" substring = "URL Spider Pro" } checkbot = { label = "Checkbot" substring = "Checkbot" } mediapartners_google = { label = "Googlebot" substring = "Mediapartners-Google" } # The spiders below this point are rare, and have been # disabled to increase Sawmill's log processing performance. # You can track these spiders by removing the pound (#) characters # in front of each spider line. You can also add your own. # ahoy = { # label = "Ahoy! The Homepage Finder" # substring = "Ahoy! The Homepage Finder" # } # alkaline = { # label = "Alkaline" # substring = "AlkalineBOT" # } # arachnophilia = { # label = "Arachnophilia" # substring = "Arachnophilia" # } # aspider = { # label = "ASpider" # substring = "ASpider" # } # atn_worldwide = { # label = "ATN Worldwide" # substring = "ATN_Worldwide" # } # auresys = { # label = "AURESYS" # substring = "AURESYS" # } # backrub = { # label = "BackRub" # substring = "BackRub" # } # backrub_bayspider = { # label = "BackRub" # substring = "BaySpider" # } # big_brother = { # label = "Big Brother" # substring = "Big Brother" # } # bjaaland = { # label = "Bjaaland" # substring = "Bjaaland" # } # blackwidow = { # label = "BlackWidow" # substring = "BlackWidow" # } # die_blinde_kuh = { # label = "Die Blinde Kuh" # substring = "Die Blinde Kuh" # } # bspider = { # label = "BSpider" # substring = "BSpider" # } # cactvs = { # label = "CACTVS Chemistry Spider" # substring = "CACTVS Chemistry Spider" # } # calif = { # label = "Calif" # substring = "Calif" # } # digimarc_cgi = { # label = "Digimarc Marcspider/CGI" # substring = "Digimarc CGIReader" # } # cmc = { # label = "CMC/0.01" # substring = "CMC" # } # combine = { # label = "Combine System" # substring = "combine" # } # conceptbot = { # label = "Conceptbot" # substring = "conceptbot" # } # coolbot = { # label = "CoolBot" # substring = "CoolBot" # } # web_core_root = { # label = "Web Core / Roots" # substring = "root" # } # internet_cruiser_robot = { # label = "Internet Cruiser Robot" # substring = "Internet Cruiser Robot" # } # cusco = { # label = "Cusco" # substring = "Cusco" # } # cyberspider = { # label = "CyberSpyder Link Test" # substring = "CyberSpyder" # } # deweb = { # label = "DeWeb(c) Katalog/Index" # substring = "Deweb" # } # dienstspider = { # label = "DienstSpider" # substring = "dienstspider" # } # digger = { # label = "Digger" # substring = "Digger" # } # grabber = { # label = "Direct Hit Grabber" # substring = "grabber" # } # dnabot = { # label = "DNAbot" # substring = "DNAbot" # } # dragonbot = { # label = "DragonBot" # substring = "DragonBot" # } # dwcp = { # label = "DWCP" # substring = "DWCP" # } # ebiness = { # label = "EbiNess" # substring = "EbiNess" # } # eit = { # label = "EIT Link Verifier Robot" # substring = "EIT-Link-Verifier-Robot" # } # elfinbot = { # label = "ELFINBOT" # substring = "elfinbot" # } # emacs_w3 = { # label = "Emacs-w3 Search Engine" # substring = "Emacs-w3" # } # ananzi = { # label = "ananzi" # substring = "EMC Spider" # } # esther = { # label = "Esther" # substring = "esther" # } # evliya_celebi = { # label = "Evliya Celebi" # substring = "Evliya Celebi" # } # nzexplorer = { # label = "nzexplorer" # substring = "explorersearch" # } # felix_ide = { # label = "Felix IDE" # substring = "FelixIDE" # } # wild_ferret = { # label = "Wild Ferret Web Hopper 1, 2, 3" # substring = "Hazel's Ferret Web hopper," # } # fetchrover = { # label = "FetchRover" # substring = "ESIRover" # } # fido = { # label = "fido" # substring = "fido" # } # kit_fireball = { # label = "KIT-Fireball" # substring = "KIT-Fireball" # } # fish_search = { # label = "Fish search" # substring = "Fish-Search-Robot" # } # robot_francoroute = { # label = "Robot Francoroute" # substring = "Robot du CRIM" # } # freecrawl = { # label = "Freecrawl" # substring = "Freecrawl" # } # funnelweb = { # label = "FunnelWeb" # substring = "FunnelWeb" # } # gammaspider = { # label = "gammaSpider, FocusedCrawler" # substring = "gammaSpider" # } # gazz = { # label = "gazz" # substring = "gazz" # } # gcreep = { # label = "GCreep" # substring = "gcreep" # } # geturl = { # label = "GetURL" # substring = "GetURL" # } # golem = { # label = "Golem" # substring = "Golem" # } # gromit = { # label = "Gromit" # substring = "Gromit" # } # havindex = { # label = "havIndex" # substring = "havIndex" # } # aitcs = { # label = "HI (HTML Index) Search" # substring = "AITCSRobot" # } # hometown_spider_pro = { # label = "Hometown Spider Pro" # substring = "Hometown Spider Pro" # } # wired_digital = { # label = "Wired Digital" # substring = "wired-digital-newsbot" # } # html_gobble = { # label = "HTMLgobble" # substring = "HTMLgobble" # } # iajabot = { # label = "iajaBot" # substring = "iajaBot" # } # ibm_planetwide = { # label = "IBM_Planetwide" # substring = "IBM_Planetwide" # } # iconoclast = { # label = "Popular Iconoclast" # substring = "gestaltIconoclast" # } # ingrid = { # label = "Ingrid" # substring = "INGRID" # } # incywincy = { # label = "IncyWincy" # substring = "IncyWincy" # } # informat = { # label = "Informant" # substring = "Informant" # } # infoseek_robot = { # label = "InfoSeek Robot" # substring = "InfoSeek Robot" # } # infoseek_sidewinder = { # label = "Infoseek Sidewinder" # substring = "Infoseek Sidewinder" # } # info_spiders = { # label = "InfoSpiders" # substring = "InfoSpiders" # } # inspector_web = { # label = "Inspector Web" # substring = "inspectorwww" # } # intelliagent = { # label = "IntelliAgent" # substring = "IAGENT" # } # i_robot = { # label = "I, Robot" # substring = "I Robot" # } # israeli_search = { # label = "Israeli-search" # substring = "IsraeliSearch" # } # javabee = { # label = "JavaBee" # substring = "JavaBee" # } # jbot_java_web_robot = { # label = "JBot Java Web Robot" # substring = "JBot" # } # jcrawler = { # label = "JCrawler" # substring = "JCrawler" # } # jeeves = { # label = "Jeeves" # substring = "Jeeves" # } # jobo_java_web_robot = { # label = "JoBo Java Web Robot" # substring = "JoBo" # } # jobot = { # label = "Jobot" # substring = "Jobot" # } # joebot = { # label = "JoeBot" # substring = "JoeBot" # } # jubii = { # label = "The Jubii Indexing Robot" # substring = "JubiiRobot" # } # jumpstation = { # label = "JumpStation" # substring = "jumpstation" # } # katipo = { # label = "Katipo" # substring = "Katipo" # } # kdd_explorer = { # label = "KDD-Explorer" # substring = "KDD-Explorer" # } # ko_yappo_robot = { # label = "KO_Yappo_Robot" # substring = "KO_Yappo_Robot" # } # labelgrabber = { # label = "LabelGrabber" # substring = "LabelGrab" # } # legs = { # label = "legs" # substring = "legs" # } # link_validator = { # label = "Link Validator" # substring = "Linkidator" # } # linkscan = { # label = "LinkScan" # substring = "LinkScan Server" # } # lockon = { # label = "Lockon" # substring = "Lockon" # } # logo_gif_crawler = { # label = "logo.gif Crawler" # substring = "logo.gif" # } # magpie = { # label = "Magpie" # substring = "Magpie" # } # mediafox = { # label = "MediaFox" # substring = "MediaFox" # } # merzscope = { # label = "MerzScope" # substring = "MerzScope" # } # nec_meshexplorer = { # label = "NEC-MeshExplorer" # substring = "NEC-MeshExplorer" # } # mindcrawler = { # label = "MindCrawler" # substring = "MindCrawler" # } # momspider = { # label = "MOMspider" # substring = "MOMspider" # } # monster = { # label = "Monster" # substring = "Monster" # } # motor = { # label = "Motor" # substring = "Motor" # } # muscat_ferret= { # label = "Muscat Ferret" # substring = "MuscatFerret" # } # mwd_search = { # label = "Mwd.Search" # substring = "MwdSearch" # } # netcarta_webmap_engine = { # label = "NetCarta WebMap Engine" # substring = "NetCarta CyberPilot Pro" # } # netscoop = { # label = "NetScoop" # substring = "NetScoop" # } # newscan_online = { # label = "newscan-online" # substring = "newscan-online" # } # nhse_web_forager = { # label = "NHSE Web Forager" # substring = "NHSEWalker" # } # nomad = { # label = "Nomad" # substring = "Nomad-V2.x" # } # northstar = { # label = "The NorthStar Robot" # substring = "NorthStar" # } # occam = { # label = "Occam" # substring = "Occam" # } # hku_www_octopus = { # label = "HKU WWW Octopus" # substring = "HKU WWW Robot" # } # orb_search = { # label = "Orb Search" # substring = "Orbsearch" # } # pack_rat = { # label = "Pack Rat" # substring = "PackRat" # } # pageboy = { # label = "PageBoy" # substring = "PageBoy" # } # parasite = { # label = "ParaSite" # substring = "ParaSite" # } # patric = { # label = "Patric" # substring = "Patric" # } # pegasus = { # label = "pegasus" # substring = "web robot PEGASUS" # } # peregrinator = { # label = "The Peregrinator" # substring = "Peregrinator-Mathematics" # } # perlcrawler = { # label = "PerlCrawler 1.0" # substring = "PerlCrawler" # } # phantom = { # label = "Phantom" # substring = "Duppies" # } # piltdownman = { # label = "PiltdownMan" # substring = "PiltdownMan" # } # pioneer = { # label = "Pioneer" # substring = "Pioneer" # } # portal_juice_spider = { # label = "Portal Juice Spider" # substring = "PortalJuice.com" # } # pgp_key_agent = { # label = "PGP Key Agent" # substring = "PGP-KA" # } # plumtree = { # label = "PlumtreeWebAccessor" # substring = "PlumtreeWebAccessor" # } # poppi = { # label = "Poppi" # substring = "Poppi" # } # portalb_spider = { # label = "PortalB Spider" # substring = "PortalBSpider" # } # getterroboplus = { # label = "GetterroboPlus Puu" # substring = "GetterroboPlus" # } # raven_search = { # label = "Raven Search" # substring = "Raven" # } # resume_robot = { # label = "Resume Robot" # substring = "Resume Robot" # } # roadhouse = { # label = "RoadHouse Crawling System" # substring = "RHCS" # } # road_runner = { # label = "Road Runner: The ImageScape Robot" # substring = "Road Runner" # } # robbie = { # label = "Robbie the Robot" # substring = "Robbie" # } # computingsite_robi = { # label = "ComputingSite Robi/1.0" # substring = "ComputingSite Robi" # } # robofox = { # label = "RoboFox" # substring = "Robofox" # } # roverbot = { # label = "Roverbot" # substring = "Roverbot" # } # rules = { # label = "RuLeS" # substring = "RuLeS" # } # safetynet_robot = { # label = "SafetyNet Robot" # substring = "SafetyNet Robot" # } # searchprocess = { # label = "SearchProcess" # substring = "searchprocess" # } # senrigan = { # label = "Senrigan" # substring = "Senrigan" # } # sg_scout = { # label = "SG-Scout" # substring = "SG-Scout" # } # shagseeker = { # label = "ShagSeeker" # substring = "Shagseeker" # } # shai_hulud = { # label = "Shai'Hulud" # substring = "Shai'Hulud" # } # simbot = { # label = "Simmany Robot Ver1.0" # substring = "SimBot" # } # site_valet = { # label = "Site Valet" # substring = "Site Valet" # } # open_text_index = { # label = "Open Text Index Robot" # substring = "Open Text Site Crawler" # } # sitetech_rover = { # label = "SiteTech-Rover" # substring = "SiteTech-Rover" # } # sl_crawler = { # label = "SLCrawler" # substring = "SLCrawler" # } # smart_spider = { # label = "Smart Spider" # substring = "ESISmartSpider" # } # snooper = { # label = "Snooper" # substring = "Snooper" # } # solbot = { # label = "Solbot" # substring = "Solbot" # } # spanner = { # label = "Spanner" # substring = "Spanner" # } # speedy = { # label = "Speedy Spider" # substring = "Speedy Spider" # } # spider_monkey = { # label = "spider_monkey" # substring = "mouse.house" # } # spiderbot = { # label = "SpiderBot" # substring = "SpiderBot" # } # spiderline = { # label = "Spiderline Crawler" # substring = "spiderline" # } # site_searcher = { # label = "Site Searcher" # substring = "ssearcher" # } # suke = { # label = "Suke" # substring = "suke" # } # suntek = { # label = "suntek search engine" # substring = "suntek" # } # tarantula = { # label = "Tarantula" # substring = "Tarantula" # } # tarspider = { # label = "tarspider" # substring = "tarspider" # } # dlw_robot = { # label = "Tcl W3 Robot" # substring = "dlw3robot" # } # techbot = { # label = "TechBOT" # substring = "TechBOT" # } # templeton = { # label = "Templeton" # substring = "Templeton" # } # titin = { # label = "TitIn" # substring = "TitIn" # } # tlspider = { # label = "TLSpider" # substring = "TLSpider" # } # ucsd_crawler = { # label = "UCSD Crawl" # substring = "UCSD-Crawler" # } # udmsearch = { # label = "UdmSearch" # substring = "UdmSearch" # } # url_check = { # label = "URL Check" # substring = "urlck" # } # valkyrie = { # label = "Valkyrie" # substring = "Valkyrie" # } # victoria = { # label = "Victoria" # substring = "Victoria" # } # vision_search = { # label = "vision-search" # substring = "vision-search" # } # vwbot = { # label = "VWbot" # substring = "VWbot_K" # } # nwi_robot = { # label = "The NWI Robot" # substring = "w3index" # } # wwwm2 = { # label = "W3M2" # substring = "W3M2" # } # wallpaper = { # label = "WallPaper" # substring = "WallPaper" # } # www_wanderer = { # label = "the World Wide Web Wanderer" # substring = "WWWWanderer" # } # w_pspider= { # label = "w@pSpider" # substring = "w@pSpider" # } # webbandit = { # label = "WebBandit Web Spider" # substring = "WebBandit" # } # webcatcher = { # label = "WebCatcher" # substring = "WebCatcher" # } # webcopy= { # label = "WebCopy" # substring = "WebCopy" # } # webfetcher = { # label = "webfetcher" # substring = "WebFetcher" # } # weblayers = { # label = "weblayers" # substring = "weblayers" # } # weblinker = { # label = "WebLinker" # substring = "WebLinker" # } # web_moose = { # label = "The Web Moose" # substring = "WebMoose" # } # webquest = { # label = "WebQuest" # substring = "WebQuest" # } # digimarc_webreader = { # label = "Digimarc MarcSpider" # substring = "Digimarc WebReader" # } # webreaper = { # label = "WebReaper" # substring = "WebReaper" # } # webs = { # label = "webs" # substring = "webs@" # } # webvac = { # label = "WebVac" # substring = "webvac" # } # webwalk = { # label = "webwalk" # substring = "webwalk" # } # webwalker = { # label = "WebWalker" # substring = "WebWalker" # } # webwatch = { # label = "WebWatch" # substring = "WebWatch" # } # whatuseek_winona = { # label = "whatUseek Winona" # substring = "whatUseek_winona" # } # wwwmir = { # label = "w3mir" # substring = "w3mir" # } # wolp = { # label = "WebStolperer" # substring = "WOLP" # } # xget = { # label = "XGET" # substring = "XGET" # } # nederland_zoek = { # label = "Nederland.zoek" # substring = "Nederland.zoek" # } # ia_archiver = { # label = "Alexa Web Search" # substring = "ia_archiver" # } # adsonar = { # label = "Quigo AdSonar Bot" # substring = "AdSonar Bot" # } # netsprint = { # label = "WP.PL" # substring = "NetSprint" # } # inktomi = { # label = "Inktomi - Onet.pl" # substring = "Inktomi" # } # gooru = { # label = "Gooru" # substring = "Gooru" # } } # spiders