# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. squid_syslog_required = { plugin_version = "1.1.8" info.1.manufacturer = "Squid" info.1.device = "Proxy server" info.1.version = "" info.2.manufacturer = "Squid" info.2.device = "Web cache daemon" info.1.version = "" # 2006/06/19: 1.0.1: KBB - added geographic location field # 2008/11/29: 1.1: GMF - Improved performance by switching to parsing filters # 2009/01/21: 1.1.1: GMF - Added support for space in action field. # 2009/02/04: 1.1.2: GMF - Added support for Unix Syslog variants, where leading timestamp should be # extracted as date and time, and leading IP should be ignored. # 2009/04/22: 1.1.3: GMF - Added simplify_url filter # 2009/05/29: 1.1.4: MSG - Made the leading space in the first parsing filter optional # 2009/09/21: 1.1.5: gas - added support for possibly a new variant: # - Jul 20 06:27:53 10.28.4.28 squid[2418]: 1248064184.883 1586 192.168.26.52 TCP_CLIENT_REFRESH_MISS/200 1618 GET ftp://patch@update.something.com/Updates/1.1.0/Unix/ServicePacks/solaris/ - DIRECT/123.123.123.123 text/html # 2009/09/21: 1.1.6: gas - fixed bug in new variant support (elapsed field is space padded up to 6 chars) # 2010/09/14: 1.1.7: MSG - added support for another variant: without the 'squid[ ]': # 1284479376.447: 75 190.81.57.202 TCP_TUNNELED/200 49 CONNECT tcp://login.icq.com:443/ - DIRECT/- - # 2011/02/02: 1.1.8: GMF - Changed "UC San Diego" to "Squid" # The name of the log format log.format.format_label = "Squid Log Format" log.miscellaneous.log_data_type = "syslog_required" log.miscellaneous.log_format_type = "proxy_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "[0-9.]* [A-Z_ ]*/[0-9]* [0-9]* [A-Z]* [^ ]* [^ ]* [A-Z_]*/[^ ]* [^ ]* *" # All log field parsing will be done using the parsing filters log.format.parse_only_with_filters = "true" # Log fields log.fields = { source_ip = { label = "$lang_stats.field_labels.source_ip" type = "host" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # source_ip action = { label = "$lang_stats.field_labels.action" type = "flat" index = 0 subindex = 0 } # action server_response = { label = "$lang_stats.field_labels.server_response" type = "response" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # server_response size = { label = "$lang_stats.field_labels.size" type = "size" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # size operation = { label = "$lang_stats.field_labels.operation" type = "flat" index = 0 subindex = 0 } # operation url = { label = "$lang_stats.field_labels.url" type = "page" index = 0 subindex = 0 hierarchy_dividers = "/?" left_to_right = "true" leading_divider = "true" } # url username = { label = "$lang_stats.field_labels.username" type = "flat" index = 0 subindex = 0 } # username hierarchy = { label = "$lang_stats.field_labels.hierarchy" type = "flat" index = 0 subindex = 0 } # hierarchy server_ip = { label = "$lang_stats.field_labels.server_ip" type = "flat" index = 0 subindex = 0 } # server_ip mime_type = { label = "$lang_stats.field_labels.mime_type" type = "flat" index = 0 subindex = 0 } # mime_type } # log.fields # This plug-in is intended to support Squid 1.1+ format. # From the Squid docs: # The native format is different for different major versions of Squid. For Squid-1.0 it is: # time elapsed remotehost code/status/peerstatus bytes method URL # For Squid-1.1, the information from the hierarchy.log was moved into access.log. The format is: # time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost type # For Squid-2 the columns stay the same, though the content within may change a little. # Log Parsing Filters log.parsing_filters.parse = ` # Chop off the squid[2418]:, timestamp and elapsed sections from the start of v.syslog_message e.g. # squid[2418]: 1248064184.883 1586 192.168.26.52 TCP_CLIENT_REFRESH_MISS/200 1618 GET ftp://patch@update.something.com/Updates/1.1.0/Unix/ServicePacks/solaris/ - DIRECT/123.123.123.123 text/html # 2010/09/14: 1.1.7: MSG - added support for another variant: without the 'squid[ ]' e.g.: # 1284479376.447: 75 190.81.57.202 TCP_TUNNELED/200 49 CONNECT tcp://login.icq.com:443/ - DIRECT/- - if (matches_regular_expression(v.syslog_message, '^(squid\\[[0-9]+\\]: |[0-9])[0-9.:]+ +[0-9]+ (.*)$')) then ( #echo("v.syslog_message = : " . v.syslog_message); v.syslog_message = $2; ); # Chop off leading spaces and integer e.g. # 922293106.011 2892 157.150.114.102 TCP_MISS/302 501 GET http://somewhere.com/image.html - DIRECT/somewhere.com text/html [Referer: http://elsewhere.com/etc.html ...%0d%0aProxy-Connection: Keep-Alive%0d%0aUser-Agent: Mozilla/4.08 %5ben%5d (Win95%3b I %3bNav)%0d%0aHost: somewhere.com%0d%0aAccept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg image/png%0d%0aAccept-Encoding: gzip%0d%0aAccept-Language: en%0d%0aAccept-Charset: iso-8859-1,*,utf-8%0d%0aCookie: PreferencesID=e4Ru7fKzaDgRl5xHXaUzXq%3b MLCursor=iqK%252E%2525M%253Fh%252Ea%2525%2540cI2j7O%252BaD%2540ti9%252Cjq9%255E%2523R%252C%255EW%252Fea996%2523%255FWovJ%2525rc5Q%252A%255EF%253B%253A%0d%0a] [HTTP/1.1 302 Found%0d%0aDate: Wed, 24 Mar 1999 16:41:09 GMT%0d%0aServer: Apache/1.3.3 (Unix)%0d%0aLocation: http://12.34.56.78/dir/etc.gif%0d%0aConnection: close%0d%0aContent-Type: text/html%0d%0a%0d] else if (matches_regular_expression(v.syslog_message, "^ *[0-9]+ +([^ ].*)$")) then v.syslog_message = $1; # 2009-02-04 - GMF - Handle lines like this: # Dec 31 06:00:59 AN_SQUID_VIP_LOG 1230703259.275 9 12.34.56.78 98.76.54.32 TCP_MISS/200 1356 GET / - DIRECT/23.45.67.89 - # These make it through unix_syslog as "1230703259.275 9 12.34.56.78 98.76.54.32", so chop off the leading fields if (matches_regular_expression(v.syslog_message, "^([0-9]+[.][0-9][0-9][0-9]) [0-9]+ [0-9.]+ ([0-9.]+ .*)$")) then ( set_collected_field('', 'date', normalize_date($1, 'seconds_since_jan1_1970')); set_collected_field('', 'time', normalize_time($1, 'seconds_since_jan1_1970')); v.syslog_message = $2; ); if (matches_regular_expression(v.syslog_message, '^([^ ]*) ([A-Z_ ]*)/([0-9]*) ([0-9]*) ([A-Z]*) ([^ ]*) ([^ ]*) ([A-Z_]*)/([^ ]*) ([^ ]*)')) then ( set_collected_field('', 'source_ip', $1); set_collected_field('', 'action', $2); set_collected_field('', 'server_response', $3); set_collected_field('', 'size', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'url', $6); set_collected_field('', 'username', $7); set_collected_field('', 'hierarchy', $8); set_collected_field('', 'server_ip', $9); set_collected_field('', 'mime_type', $10); accept_collected_entry('', false); ); ` # Database fields database.fields = { source_ip = { label = "$lang_stats.field_labels.source_ip" log_field = "source_ip" type = "string" suppress_top = 0 suppress_bottom = 2 } # source_ip location = "" url = { label = "$lang_stats.field_labels.url" log_field = "url" type = "string" suppress_top = 1 suppress_bottom = 3 } # url file_type = { label = "$lang_stats.field_labels.file_type" log_field = "file_type" type = "string" suppress_top = 0 suppress_bottom = 2 } # file_type worm = { label = "$lang_stats.field_labels.worm" log_field = "worm" type = "string" suppress_top = 0 suppress_bottom = 2 } # worm server_response = { label = "$lang_stats.field_labels.server_response" log_field = "server_response" type = "string" suppress_top = 0 suppress_bottom = 2 } # server_response action = { label = "$lang_stats.field_labels.action" log_field = "action" type = "string" suppress_top = 0 suppress_bottom = 2 } # action operation = { label = "$lang_stats.field_labels.operation" log_field = "operation" type = "string" suppress_top = 0 suppress_bottom = 2 } # operation username = { label = "$lang_stats.field_labels.username" log_field = "username" type = "string" suppress_top = 0 suppress_bottom = 2 } # username server_ip = { label = "$lang_stats.field_labels.server_ip" log_field = "server_ip" type = "string" suppress_top = 0 suppress_bottom = 2 } # server_ip hierarchy = { label = "$lang_stats.field_labels.hierarchy" log_field = "hierarchy" type = "string" suppress_top = 0 suppress_bottom = 2 } # hierarchy mime_type = { label = "$lang_stats.field_labels.mime_type" log_field = "mime_type" type = "string" suppress_top = 0 suppress_bottom = 2 } # mime_type } # database.fields # Log Filters log.filters = { remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(url, '?')) then url = substr(url, 0, index(url, '?') + 1) . '(parameters)';" } # remove_query simplify_url = { label = "$lang_admin.log_filters.simplify_url_label" comment = "$lang_admin.log_filters.simplify_url_comment" value = "if (matches_regular_expression(url, '^([^:]+://[^/]+/)')) then url = $1 . '(omitted)'" } # simplify_url detect_page_views = { label = '$lang_admin.log_filters.detect_page_views_label' comment = '$lang_admin.log_filters.detect_page_views_comment' value = "if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then page_views = 0; else page_views = 1;" } # detect_page_views strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then url = substr(url, 0, last_index(url, '/') + 1) . '(nonpage)';" } # strip_non_page_views mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'hits = 1;' } # mark_entry } # log.filters log.field_options = { sessions_page_field = "url" sessions_visitor_id_field = "source_ip" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { hits = { label = "$lang_stats.field_labels.hits" default = false requires_log_field = false type = "int" display_format_type = "integer" entries_field = true } # hits page_views = { label = "$lang_stats.field_labels.page_views" default = true requires_log_field = false type = "int" display_format_type = "integer" } # page_views visitors = { label = "$lang_stats.field_labels.visitors" default = false requires_log_field = true log_field = "source_ip" type = "unique" display_format_type = "integer" } # visitors size = { label = "$lang_stats.field_labels.size" default = false requires_log_field = true log_field = "size" type = "float" display_format_type = "bandwidth" } # size } # database.numerical_fields create_profile_wizard_options = { host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { hierarchy = true url = true file_type = true mime_type = true } # content_group source_group = { source_ip = true location = true username = true } # source_group server_group = { server_response = true server_ip = true } # server_group other_group = { logging_device = true action = true operation = true worm = true } # other } # report_groups } # create_profile_wizard_options not_supported = { } # not_supported } # squid_syslog_required