# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. apache_combined_syslog_required = { plugin_version = "2.0" info.1.manufacturer = "Apache" info.1.device = "NCSA Combined with Syslog" info.1.version.1 = "" # 2006/07/13 - 1.0.0beta - KBB - initial creation - created from # beta_apache_combined.cfg (now apache_combined.cfg) # 2007/03/22 - 1.0.1beta - gas - converted to autodetect_expression for a new varient # 2007/09/11 - 1.0.2 - KBB - renumbered per new beta policy # 2008-02-27 - 1.1 - GMF - Improved performance of autodetect by checking for '[ID ' before running expensive # regexp. Previously, autodetecting a 1MB line took more than 5 minutes for this plug-in; # it now takes < 1 second if '[ID ' is there. # 2013-06-06 - 2.0 - GMF - Switched to using web_server_reports snapon (disabled most plug-in functionality, because it's done in the snapon now). # (for now, treating "user: [ID 99999 local1.info]" as part of syslog message, # though it may be part of the syslog header or # caused by syslog logging to syslog)) #May 5 04:04:35 www.lizard.com root: [ID 702911 local1.info] 199.99.99.99 - - [05/May/2006:04:04:35 +0800] "GET /news/bb/imgs/bull2.gif HTTP/1.1" 200 583 "http://www.lizard.com/news/bb/thu/may4s9.htm" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; InfoPath.1)" # 2010-10-11 - 1.0.2 - MSG - Edited info lines. # The name of the log format log.format.format_label = "Apache/NCSA Combined Log Format with Syslog" log.miscellaneous.log_data_type = "syslog_required" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression # 2008-02-27 - 1.1 - GMF log.format.autodetect_expression = ` (contains(volatile.log_data_line, '[ID ') and matches_regular_expression(volatile.log_data_line, '([a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\])? *[^ ]+ [^ ]+ .+ \\[../.../....:..:..:...*\\] "[A-Z]* [^ ]*("| HTTP.*") [0-9]* [-0-9]* [^ ]* "[^"]*" *$')) or matches_regular_expression(volatile.log_data_line, '^[A-Za-z]+ +[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [^ ]+ apache: [0-9.]+ [0-9.]+ [^ ]+ [^ ]+ [^ ]+ \\[../.../....:..:..:...*\\] \\"[A-Z]* [^ ]* HTTP/[0-9.]+\\" [0-9]* [-0-9]* [^ ]* [^ ]* [^ ]* \\"[^"]+\\" \\"[^"]+\\"$')` # The format of dates and times in this log log.format.date_format = "dd/mmm/yyyy:hh:mm:ss" log.format.time_format = "dd/mmm/yyyy:hh:mm:ss" # All log field parsing will be done using the parsing filters log.format.parse_only_with_filters = "true" # Log fields log.fields = { # hit_type = "" hostname = "" server_domain = "" authenticated_user = "" date_time = "" operation = "" # page.type = "page" page = "" protocol = "" server_response = "" size = "" # referrer.type = "URL" # agent.type = "agent" referrer = "" agent = "" # spiders = "" # worms = "" # errors = "" # broken_links = "" # screen_info_hits = "" server_domain = "" authenticated_user = "" } # log.fields # Database fields database.fields = { date_time = "" # day_of_week = "" # hour_of_day = "" # hit_type = "" page = "" # suppress_bottom = 9 # display_format_type = "page" # } # page # file_type = "" # worm = "" # screen_dimensions = "" # screen_depth = "" hostname = "" # domain_description = "" # location = "" referrer = "" # suppress_top = 1 # suppress_bottom = 3 # } # referrer # referrer_description = "" # search_engine = "" # search_phrase = "" # web_browser = "" # operating_system = "" # spider = "" server_domain = "" authenticated_user = "" server_response = "" } # database.fields # # Get search engine and search phrase information from the referrer field (before it gets simplified). # log.parsing_filters.compute_se_sp = ` #if (get_search_engine_info(referrer)) then ( # search_engine = volatile.search_engine; # search_phrase = volatile.search_phrase; #); #` # # # Get web browser, operating system, web browser, and spider information from the user-agent field. # log.parsing_filters.derive_from_user_agent = ` #get_user_agent_info(agent); #web_browser = volatile.web_browser; #operating_system = volatile.operating_system; #spider = volatile.spider; #` log.parsing_filters.parse = ` # this doesn't work if the optional part at the beginning isn't there - # regular expression parser limitation?? #if (matches_regular_expression(v.syslog_message, '([a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\])? *([^ ]+) ([^ ]+) (.+) \\[(../.../....:..:..:..).*\\] "([A-Z]*) ([^ ]*) ?(HTTP.*)?" ([0-9]*) ([-0-9]*) "?([^ "]*)"? "([^"]*)"')) then ( if (matches_regular_expression(v.syslog_message, '[a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\] (.*)$')) then ( v.syslog_message = $1; ); # added new variant 1.0.1beta # Feb 3 00:04:19 sys@sys.com apache: 186.14.11.136 186.14.11.136 anothe.co.uk - - [03/Feb/2007:00:04:19 +0000] "GET / HTTP/1.1" 200 31524 0 402 31847 "http://anothe.co.uk/index.cfm/another/homepage/id/5002" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en) AppleWebKit/418.9.1 (KHTML, like Gecko) Safari/419.3" else if (matches_regular_expression(v.syslog_message, 'apache: [^ ]+ ([^ ]+) ([^ ]+) ([^ ]+) - \\[(../.../....:..:..:..) [-+]....\\] \\"([A-Z]+) ([^ ]+) ([A-Z]+/[0-9.]+)\\" ([^ ]+) ([^ ]+) [^ ]+ [^ ]+ [^ ]+ \\"([^"]+)\\" \\"([^"]+)\\"$')) then ( set_collected_field('', 'hostname', $1); set_collected_field('', 'server_domain', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'date_time', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'page', $6); set_collected_field('', 'protocol', $7); set_collected_field('', 'server_response', $8); set_collected_field('', 'size', $9); set_collected_field('', 'referrer', $10); set_collected_field('', 'agent', $11); accept_collected_entry('', false); ); if (matches_regular_expression(v.syslog_message, '^ *([^ ]+) ([^ ]+) (.+) \\[(../.../....:..:..:..).*\\] "([A-Z]*) ([^ ]*) ?(HTTP.*)?" ([0-9]*) ([-0-9]*) "?([^ "]*)"? "([^"]*)"')) then ( set_collected_field('', 'hostname', $1); set_collected_field('', 'server_domain', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'date_time', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'page', $6); set_collected_field('', 'protocol', $7); set_collected_field('', 'server_response', $8); set_collected_field('', 'size', $9); set_collected_field('', 'referrer', $10); set_collected_field('', 'agent', $11); accept_collected_entry('', false); ) ` # Log Filters # log.filters = { # # simplify_referrer = { # label = "$lang_admin.log_filters.simplify_referrer_label" # comment = "$lang_admin.log_filters.simplify_referrer_comment" # value = "if (referrer eq '-') then referrer = '(no referrer)' else if (matches_regular_expression(referrer, '^([^:]+://[^/]+/)')) then referrer = $1 . '(omitted)'" # } # simplify_referrer # # internal_referrer = { # label = "$lang_admin.log_filters.internal_referrer_label" # comment = "$lang_admin.log_filters.internal_referrer_comment" # value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';" # disabled = true # } # internal_referrer # # not_authenticated = { # label = "$lang_admin.log_filters.not_authenticated_label" # comment = "$lang_admin.log_filters.not_authenticated_comment" # value = "if (authenticated_user eq '-') then authenticated_user = '(not authenticated)';" # } # not_authenticated # # remove_query = { # label = "$lang_admin.log_filters.remove_query_label" # comment = "$lang_admin.log_filters.remove_query_comment" # value = "if (contains(page, '?')) then page = substr(page, 0, index(page, '?') + 1) . '(parameters)';" # } # remove_query # # categorize = { # label = "$lang_admin.log_filters.categorize_hits_label" # comment = "$lang_admin.log_filters.categorize_hits_comment" # value = `hits = 1; #if (starts_with(server_response, '4')) then ( # errors = 1; # if (server_response eq '404') then ( # broken_links = 1; # hit_type = "broken link"; # ) # else ( # hit_type = "error"; # ) #) #else if (spider ne '(not a spider)') then ( # hit_type = "spider"; # spiders = 1; #) #else if (worm ne '(not a worm)') then ( # hit_type = "worm"; # worms = 1; #) #else if (!starts_with(screen_dimensions, '(')) then ( # hit_type = "screen info"; # screen_info_hits = 1; ##) #else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then ( # hit_type = "hit"; #) #else ( # hit_type = "page view"; # page_views = 1; #)` # } # categorize # # set_page_for_worm = { # label = "$lang_admin.log_filters.set_page_for_worm_label" # comment = "$lang_admin.log_filters.set_page_for_worm_comment" # value = "if (worms == 1) then page = '(worm)';" # } # set_page_for_worm # # strip_non_page_views = { # label = '$lang_admin.log_filters.strip_non_page_views_label' # comment = '$lang_admin.log_filters.strip_non_page_views_comment' # value = "if (page_views == 0) then page = substr(page, 0, last_index(page, '/') + 1) . '(nonpage)';" # } # strip_non_page_views # # } # log.filters # log.field_options = { # # sessions_page_field = "page" # sessions_visitor_id_field = "hostname" # sessions_event_field = "page_views" # # } # log.field_options # database.numerical_fields = { # # hits = { # label = "$lang_stats.field_labels.hits" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # entries_field = true # } # hits # # page_views = { # label = "$lang_stats.field_labels.page_views" # default = true # requires_log_field = false # type = "int" # display_format_type = "integer" # } # page_views # # spiders = { # label = "$lang_stats.field_labels.spiders" # default = true # requires_log_field = false # type = "int" # display_format_type = "integer" # } # spiders # # worms = { # label = "$lang_stats.field_labels.worms" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # } # worms # # errors = { # label = "$lang_stats.field_labels.errors" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # } # errors # # broken_links = { # label = "$lang_stats.field_labels.broken_links" # default = true # requires_log_field = false # type = "int" # display_format_type = "integer" # } # broken_links # # screen_info_hits = { # label = "$lang_stats.field_labels.screen_info_hits" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # } # screen_info_hits # # visitors = { # label = "$lang_stats.field_labels.visitors" # default = false # requires_log_field = true # log_field = "hostname" # type = "unique" # display_format_type = "integer" # } # visitors # # size = { # label = "$lang_stats.field_labels.size" # default = false # requires_log_field = true # log_field = "size" # type = "int" # integer_bits = 64 # display_format_type = "bandwidth" # } # size # # } # database.numerical_fields create_profile_wizard_options = { date_time_tracking = true host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" hit_type = "" content_group = { page = true file_type = true # broken_links = { # label = "$lang_stats.menu.reports.broken_links" # filter = "server_response within '404'" # columns = { # 0.field_name = "referrer" # 1.field_name = "page" # } # subtable = false # } # broken_links } visitor_demographics_group = { hostname = true domain_description = true location = true authenticated_user = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { referrer = true referrer_description = true search_engine = true search_phrase = true search_phrase_by_search_engine = true } other_group = { worm = true spider = true server_domain = true server_response = true } } # report_groups snapons = { # Attach a web_server_package snapon web_server_package = { snapon = "web_server_package" name = "web_server_package" label = "$lang_admin.snapons.web_server_package.label" parameters = { user_agent_field.parameter_value = "agent" page_field.parameter_value = "page" client_ip_field.parameter_value = "hostname" server_response_field.parameter_value = "server_response" referrer_field.parameter_value = "referrer" authenticated_user.parameter_value = "authenticated_user" hits_field = { parameter_value = "$lang_stats.field_labels.hits" final_node_name = "hits" } # hits visitors_field = { parameter_value = "$lang_stats.field_labels.visitors" final_node_name = "visitors" } # visitors_field sc_bytes_field = { parameter_value = "$lang_stats.field_labels.size" final_node_name = "size" } # sc_bytes_field cs_bytes_field = { parameter_value = "none" final_node_name = "none" } # cs_bytes_field } # parameters } # web_server_package } # snapons } # create_profile_wizard_options } # apache_combined_syslog_required