# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. apache_combined_syslog_required = { plugin_version = "1.1" info.1.manufacturer = "Apache" info.1.device = "NCSA Combined with Syslog" info.1.version.1 = "" # 2006/07/13 - 1.0.0beta - KBB - initial creation - created from # beta_apache_combined.cfg (now apache_combined.cfg) # 2007/03/22 - 1.0.1beta - gas - converted to autodetect_expression for a new varient # 2007/09/11 - 1.0.2 - KBB - renumbered per new beta policy # 2008-02-27 - 1.1 - GMF - Improved performance of autodetect by checking for '[ID ' before running expensive # regexp. Previously, autodetecting a 1MB line took more than 5 minutes for this plug-in; # it now takes < 1 second if '[ID ' is there. # (for now, treating "user: [ID 99999 local1.info]" as part of syslog message, # though it may be part of the syslog header or # caused by syslog logging to syslog)) #May 5 04:04:35 www.lizard.com root: [ID 702911 local1.info] 199.99.99.99 - - [05/May/2006:04:04:35 +0800] "GET /news/bb/imgs/bull2.gif HTTP/1.1" 200 583 "http://www.lizard.com/news/bb/thu/may4s9.htm" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; InfoPath.1)" # 2010-10-11 - 1.0.2 - MSG - Edited info lines. # The name of the log format log.format.format_label = "Apache/NCSA Combined Log Format with Syslog" log.miscellaneous.log_data_type = "syslog_required" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression # 2008-02-27 - 1.1 - GMF log.format.autodetect_expression = ` (contains(volatile.log_data_line, '[ID ') and matches_regular_expression(volatile.log_data_line, '([a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\])? *[^ ]+ [^ ]+ .+ \\[../.../....:..:..:...*\\] "[A-Z]* [^ ]*("| HTTP.*") [0-9]* [-0-9]* [^ ]* "[^"]*" *$')) or matches_regular_expression(volatile.log_data_line, '^[A-Za-z]+ +[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [^ ]+ apache: [0-9.]+ [0-9.]+ [^ ]+ [^ ]+ [^ ]+ \\[../.../....:..:..:...*\\] \\"[A-Z]* [^ ]* HTTP/[0-9.]+\\" [0-9]* [-0-9]* [^ ]* [^ ]* [^ ]* \\"[^"]+\\" \\"[^"]+\\"$')` # The format of dates and times in this log log.format.date_format = "dd/mmm/yyyy:hh:mm:ss" log.format.time_format = "dd/mmm/yyyy:hh:mm:ss" # All log field parsing will be done using the parsing filters log.format.parse_only_with_filters = "true" # Log fields log.fields = { hit_type = "" hostname.type = "host" server_domain = "" authenticated_user = "" date_time = "" operation = "" page.type = "page" protocol = "" server_response = "" size = "" referrer.type = "URL" agent.type = "agent" spiders = "" worms = "" errors = "" broken_links = "" screen_info_hits = "" server_domain = "" authenticated_user = "" } # log.fields # Database fields database.fields = { date_time = "" day_of_week = "" hour_of_day = "" hit_type = "" page = { suppress_bottom = 9 display_format_type = "page" } # page file_type = "" worm = "" screen_dimensions = "" screen_depth = "" hostname = "" domain_description = "" location = "" referrer = { suppress_top = 1 suppress_bottom = 3 } # referrer referrer_description = "" search_engine = "" search_phrase = "" web_browser = "" operating_system = "" spider = "" server_domain = "" authenticated_user = "" server_response = "" } # database.fields # Get search engine and search phrase information from the referrer field (before it gets simplified). log.parsing_filters.compute_se_sp = ` if (get_search_engine_info(referrer)) then ( search_engine = volatile.search_engine; search_phrase = volatile.search_phrase; ); ` # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = ` get_user_agent_info(agent); web_browser = volatile.web_browser; operating_system = volatile.operating_system; spider = volatile.spider; ` log.parsing_filters.parse = ` # this doesn't work if the optional part at the beginning isn't there - # regular expression parser limitation?? #if (matches_regular_expression(v.syslog_message, '([a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\])? *([^ ]+) ([^ ]+) (.+) \\[(../.../....:..:..:..).*\\] "([A-Z]*) ([^ ]*) ?(HTTP.*)?" ([0-9]*) ([-0-9]*) "?([^ "]*)"? "([^"]*)"')) then ( if (matches_regular_expression(v.syslog_message, '[a-zA-Z_]+: \\[ID [0-9]+ [^ ]+\\] (.*)$')) then ( v.syslog_message = $1; ); # added new variant 1.0.1beta # Feb 3 00:04:19 sys@sys.com apache: 186.14.11.136 186.14.11.136 anothe.co.uk - - [03/Feb/2007:00:04:19 +0000] "GET / HTTP/1.1" 200 31524 0 402 31847 "http://anothe.co.uk/index.cfm/another/homepage/id/5002" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en) AppleWebKit/418.9.1 (KHTML, like Gecko) Safari/419.3" else if (matches_regular_expression(v.syslog_message, 'apache: [^ ]+ ([^ ]+) ([^ ]+) ([^ ]+) - \\[(../.../....:..:..:..) [-+]....\\] \\"([A-Z]+) ([^ ]+) ([A-Z]+/[0-9.]+)\\" ([^ ]+) ([^ ]+) [^ ]+ [^ ]+ [^ ]+ \\"([^"]+)\\" \\"([^"]+)\\"$')) then ( set_collected_field('', 'hostname', $1); set_collected_field('', 'server_domain', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'date_time', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'page', $6); set_collected_field('', 'protocol', $7); set_collected_field('', 'server_response', $8); set_collected_field('', 'size', $9); set_collected_field('', 'referrer', $10); set_collected_field('', 'agent', $11); accept_collected_entry('', false); ); if (matches_regular_expression(v.syslog_message, '^ *([^ ]+) ([^ ]+) (.+) \\[(../.../....:..:..:..).*\\] "([A-Z]*) ([^ ]*) ?(HTTP.*)?" ([0-9]*) ([-0-9]*) "?([^ "]*)"? "([^"]*)"')) then ( set_collected_field('', 'hostname', $1); set_collected_field('', 'server_domain', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'date_time', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'page', $6); set_collected_field('', 'protocol', $7); set_collected_field('', 'server_response', $8); set_collected_field('', 'size', $9); set_collected_field('', 'referrer', $10); set_collected_field('', 'agent', $11); accept_collected_entry('', false); ) ` # Log Filters log.filters = { simplify_referrer = { label = "$lang_admin.log_filters.simplify_referrer_label" comment = "$lang_admin.log_filters.simplify_referrer_comment" value = "if (referrer eq '-') then referrer = '(no referrer)' else if (matches_regular_expression(referrer, '^([^:]+://[^/]+/)')) then referrer = $1 . '(omitted)'" } # simplify_referrer internal_referrer = { label = "$lang_admin.log_filters.internal_referrer_label" comment = "$lang_admin.log_filters.internal_referrer_comment" value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';" disabled = true } # internal_referrer not_authenticated = { label = "$lang_admin.log_filters.not_authenticated_label" comment = "$lang_admin.log_filters.not_authenticated_comment" value = "if (authenticated_user eq '-') then authenticated_user = '(not authenticated)';" } # not_authenticated remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(page, '?')) then page = substr(page, 0, index(page, '?') + 1) . '(parameters)';" } # remove_query categorize = { label = "$lang_admin.log_filters.categorize_hits_label" comment = "$lang_admin.log_filters.categorize_hits_comment" value = `hits = 1; if (starts_with(server_response, '4')) then ( errors = 1; if (server_response eq '404') then ( broken_links = 1; hit_type = "broken link"; ) else ( hit_type = "error"; ) ) else if (spider ne '(not a spider)') then ( hit_type = "spider"; spiders = 1; ) else if (worm ne '(not a worm)') then ( hit_type = "worm"; worms = 1; ) else if (!starts_with(screen_dimensions, '(')) then ( hit_type = "screen info"; screen_info_hits = 1; ) else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then ( hit_type = "hit"; ) else ( hit_type = "page view"; page_views = 1; )` } # categorize set_page_for_worm = { label = "$lang_admin.log_filters.set_page_for_worm_label" comment = "$lang_admin.log_filters.set_page_for_worm_comment" value = "if (worms == 1) then page = '(worm)';" } # set_page_for_worm strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then page = substr(page, 0, last_index(page, '/') + 1) . '(nonpage)';" } # strip_non_page_views } # log.filters log.field_options = { sessions_page_field = "page" sessions_visitor_id_field = "hostname" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { hits = { label = "$lang_stats.field_labels.hits" default = false requires_log_field = false type = "int" display_format_type = "integer" entries_field = true } # hits page_views = { label = "$lang_stats.field_labels.page_views" default = true requires_log_field = false type = "int" display_format_type = "integer" } # page_views spiders = { label = "$lang_stats.field_labels.spiders" default = true requires_log_field = false type = "int" display_format_type = "integer" } # spiders worms = { label = "$lang_stats.field_labels.worms" default = false requires_log_field = false type = "int" display_format_type = "integer" } # worms errors = { label = "$lang_stats.field_labels.errors" default = false requires_log_field = false type = "int" display_format_type = "integer" } # errors broken_links = { label = "$lang_stats.field_labels.broken_links" default = true requires_log_field = false type = "int" display_format_type = "integer" } # broken_links screen_info_hits = { label = "$lang_stats.field_labels.screen_info_hits" default = false requires_log_field = false type = "int" display_format_type = "integer" } # screen_info_hits visitors = { label = "$lang_stats.field_labels.visitors" default = false requires_log_field = true log_field = "hostname" type = "unique" display_format_type = "integer" } # visitors size = { label = "$lang_stats.field_labels.size" default = false requires_log_field = true log_field = "size" type = "float" display_format_type = "bandwidth" } # size } # database.numerical_fields create_profile_wizard_options = { date_time_tracking = true host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" hit_type = "" content_group = { page = true file_type = true broken_links = { label = "$lang_stats.menu.reports.broken_links" filter = "server_response within '404'" columns = { 0.field_name = "referrer" 1.field_name = "page" } subtable = false } # broken_links } visitor_demographics_group = { hostname = true domain_description = true location = true authenticated_user = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { referrer = true referrer_description = true search_engine = true search_phrase = true search_phrase_by_search_engine = true } other_group = { worm = true spider = true server_domain = true server_response = true } } # report_groups } # create_profile_wizard_options } # apache_combined_syslog_required