# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. squid_syslog_required = { plugin_version = "1.4.5" info.1.manufacturer = "Squid" info.1.device = "Proxy server" info.1.version = "" info.2.manufacturer = "Squid" info.2.device = "Web cache daemon" info.2.version = "" # 2006/06/19: 1.0.1: KBB - added geographic location field # 2008/11/29: 1.1: GMF - Improved performance by switching to parsing filters # 2009/01/21: 1.1.1: GMF - Added support for space in action field. # 2009/02/04: 1.1.2: GMF - Added support for Unix Syslog variants, where leading timestamp should be # extracted as date and time, and leading IP should be ignored. # 2009/04/22: 1.1.3: GMF - Added simplify_url filter # 2009/05/29: 1.1.4: MSG - Made the leading space in the first parsing filter optional # 2009/09/21: 1.1.5: gas - added support for possibly a new variant: # - Jul 20 06:27:53 10.28.4.28 squid[2418]: 1248064184.883 1586 192.168.26.52 TCP_CLIENT_REFRESH_MISS/200 1618 GET ftp://patch@update.something.com/Updates/1.1.0/Unix/ServicePacks/solaris/ - DIRECT/123.123.123.123 text/html # 2009/09/21: 1.1.6: gas - fixed bug in new variant support (elapsed field is space padded up to 6 chars) # 2010/09/14: 1.1.7: MSG - added support for another variant: without the 'squid[ ]': # 1284479376.447: 75 190.81.57.202 TCP_TUNNELED/200 49 CONNECT tcp://login.icq.com:443/ - DIRECT/- - # 2011/02/02: 1.1.8: GMF - Changed "UC San Diego" to "Squid" # 2011-05-25: 1.1.8: GMF - Set server_ip to the host field (appropriate for forward proxy) # 2012-01-18: 1.1.9: MSG - edited the "Chop off the squid" parsing filter to ignore a leading field "logger:" # 2012-01-23: 1.2: GMF - Added top_level_domain and gateway_reports snapons # 2012-03-02: 1.2.1: GMF - Added support for "(squid): " variant # 2012-06-01: 1.3: GMF - Added support for optional "unix syslog" section at start of message; added support for "squid:" without [N]; added page_views sort for gateway reports. # 2012-09-20: 1.4: MSG - Added support for lines with cpu field before the time stamp. # 2012-11-21: 1.4.1: GMF - Added support for IPv6 addresses # 2013-02-06: 1.4.2: GMF - Removed add_standard_reports, which are now done in gateway_reports # 2013-02-15: 1.4.3: GMF - Changed user_field for gateway_report to username (ThreadID:1283588) # 2013-07-13: 1.4.4: GMF - Reduced bredth of the "squid:" remover, which was chopping off timestamps in some cases, causing lines to be ignored. # 2013-07-13: 1.4.5: GMF - Added support for optional extra field before DIRECT/* [ThreadID:1293647] # The name of the log format log.format.format_label = "Squid Log Format" log.miscellaneous.log_data_type = "syslog_optional" log.miscellaneous.log_format_type = "proxy_server" # The log is in this format if any of the first ten lines match this regular expression # 2013-09-20 - GMF - A # 14/Jul/2013:04:03:55 +0700 102313 192.168.0.51 TCP_MISS/200 16029 CONNECT www.facebook.com:443 - - DIRECT/31.13.79.1 - 192.168.0.51 8080 log.format.autodetect_expression = ` matches_regular_expression(volatile.log_data_line, "[0-9.a-f:]* [A-Z_ ]*/[0-9]* [0-9]* [A-Z]* [^ ]* [^ ]* [A-Z_]*/[^ ]* [^ ]* *") or matches_regular_expression(volatile.log_data_line, "[0-9.a-f:]* [A-Z_ ]*/[0-9]* [0-9]* [A-Z]* [^ ]* [^ ]* [^ ]+ [A-Z_]*/[^ ]* [^ ]* *") ` # All log field parsing will be done using the parsing filters log.format.parse_only_with_filters = "true" # Log fields log.fields = { source_ip = { label = "$lang_stats.field_labels.source_ip" # For a forward proxy, it makes more sense to have source_ip be flat, and server_ip be "host". For reverse proxy, the opposite. Forward is more common, so leaving it this way. # type = "host" type = "flat" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # source_ip action = { label = "$lang_stats.field_labels.action" type = "flat" index = 0 subindex = 0 } # action server_response = { label = "$lang_stats.field_labels.server_response" type = "response" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # server_response size = { label = "$lang_stats.field_labels.size" type = "size" index = 0 subindex = 0 hierarchy_dividers = "" left_to_right = false leading_divider = "false" } # size operation = { label = "$lang_stats.field_labels.operation" type = "flat" index = 0 subindex = 0 } # operation url = { label = "$lang_stats.field_labels.url" type = "page" index = 0 subindex = 0 hierarchy_dividers = "/?" left_to_right = "true" leading_divider = "true" } # url username = { label = "$lang_stats.field_labels.username" type = "flat" index = 0 subindex = 0 } # username hierarchy = { label = "$lang_stats.field_labels.hierarchy" type = "flat" index = 0 subindex = 0 } # hierarchy server_ip = { label = "$lang_stats.field_labels.server_ip" # For a forward proxy, it makes more sense to have source_ip be flat, and server_ip be "host". For reverse proxy, the opposite. Forward is more common, so leaving it this way. # type = "flat" type = "host" index = 0 subindex = 0 } # server_ip mime_type = { label = "$lang_stats.field_labels.mime_type" type = "flat" index = 0 subindex = 0 } # mime_type cpu = { label = "$lang_stats.field_labels.cpu" type = "flat" index = 0 subindex = 0 } # cpu } # log.fields # This plug-in is intended to support Squid 1.1+ format. # From the Squid docs: # The native format is different for different major versions of Squid. For Squid-1.0 it is: # time elapsed remotehost code/status/peerstatus bytes method URL # For Squid-1.1, the information from the hierarchy.log was moved into access.log. The format is: # time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost type # For Squid-2 the columns stay the same, though the content within may change a little. # Log Parsing Filters log.parsing_filters.parse = ` # Chop off extra Unix-Syslog data in message, e.g.: #2012-05-29 18:38:23 Local4.Info 12.34.56.78 May 29 18:37:55 AHS-InstaGate-PRO squid: 1338331075.486 14 10.29.31.18 TCP_MISS/200 2891 GET http://safebrowsing-cache.google.com/safebrowsing/rd/ChNnb29nLW1hbHdhcmUtc2hhdmFyEAAYqfEEINDxBCoIsjgBAP___38yBqk4AQD_AQ - DIRECT/173.194.43.1 application/vnd.google.safebrowsing-chunk allow: uncat - if (matches_regular_expression(v.syslog_message, '^[A-Z][a-z][a-z] [0-9 ]?[0-9] [0-9:]+ [^ ]+ (.*)$')) then ( v.syslog_message = $1; ); # Chop off the squid[2418]:, timestamp and elapsed sections from the start of v.syslog_message e.g. # squid[2418]: 1248064184.883 1586 192.168.26.52 TCP_CLIENT_REFRESH_MISS/200 1618 GET ftp://patch@update.something.com/Updates/1.1.0/Unix/ServicePacks/solaris/ - DIRECT/123.123.123.123 text/html # 2010/09/14: 1.1.7: MSG - added support for another variant: without the 'squid[ ]' e.g.: # 1284479376.447: 75 190.81.57.202 TCP_TUNNELED/200 49 CONNECT tcp://login.icq.com:443/ - DIRECT/- - # 2012-01-18 - MSG - edited the "Chop off the squid" parsing filter to ignore a leading field "logger:" # Oct 30 00:01:06 72.159.148.3 logger: 1319947111.518 74 10.79.9.40 TCP_MISS/302 563 POST http://tools.google.com/service/update2 - DIRECT/tools.google.com text/html # Or handle this one (with literal "(squid): " before timestamp # Mar 1 14:06:26 12.34.56.78 (squid): 1330628786.100 0 10.94.7.48 TCP_IMS_HIT/304 320 GET http://somesite.com/page.html - NONE/- text/css [] [] # 2013-07-13 - GMF - The expression that was here (now commented) was just too strong. It would pull off the initial timestamp field, which matched [0-9][0-9.a-f:]+. Note sure what it was trying to accomplish with that, but it's gotta be looser. Cut is back to just the squid/logger ones. #if (matches_regular_expression(v.syslog_message, '^(squid\\[[0-9]+\\]: |squid: |logger: |[(]squid[)]: |[0-9])[0-9.a-f:]+ +[0-9]+ (.*)$')) then ( if (matches_regular_expression(v.syslog_message, '^(squid\\[[0-9]+\\]: |squid: |logger: |[(]squid[)]: )(.*)$')) then ( #echo("v.syslog_message = : " . v.syslog_message); v.syslog_message = $2; ); # For lines with cpu field ahead of the time stamp # Sep 6 07:15:44 webcache (squid-1): 1346930144.485 560 10.1.100.23 TCP_MISS/200 337 POST http://209.18.41.132/idle/GUJmdz02wSLe203M/316 - HIER_DIRECT/etcetera if (matches_regular_expression(v.syslog_message, '^[(]([A-Za-z]+-[0-9]+)[)]: [0-9.a-f::]+ +[0-9]+ (.*)$')) then ( #echo("v.syslog_message = : " . v.syslog_message); set_collected_field('', 'cpu', $1); v.syslog_message = $2; ); # Chop off leading spaces and integer e.g. # 922293106.011 2892 157.150.114.102 TCP_MISS/302 501 GET http://somewhere.com/image.html - DIRECT/somewhere.com text/html [Referer: http://elsewhere.com/etc.html ...%0d%0aProxy-Connection: Keep-Alive%0d%0aUser-Agent: Mozilla/4.08 %5ben%5d (Win95%3b I %3bNav)%0d%0aHost: somewhere.com%0d%0aAccept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg image/png%0d%0aAccept-Encoding: gzip%0d%0aAccept-Language: en%0d%0aAccept-Charset: iso-8859-1,*,utf-8%0d%0aCookie: PreferencesID=e4Ru7fKzaDgRl5xHXaUzXq%3b MLCursor=iqK%252E%2525M%253Fh%252Ea%2525%2540cI2j7O%252BaD%2540ti9%252Cjq9%255E%2523R%252C%255EW%252Fea996%2523%255FWovJ%2525rc5Q%252A%255EF%253B%253A%0d%0a] [HTTP/1.1 302 Found%0d%0aDate: Wed, 24 Mar 1999 16:41:09 GMT%0d%0aServer: Apache/1.3.3 (Unix)%0d%0aLocation: http://12.34.56.78/dir/etc.gif%0d%0aConnection: close%0d%0aContent-Type: text/html%0d%0a%0d] else if (matches_regular_expression(v.syslog_message, "^ *[0-9]+ +([^ ].*)$")) then v.syslog_message = $1; # 2009-02-04 - GMF - Handle lines like this: # Dec 31 06:00:59 AN_SQUID_VIP_LOG 1230703259.275 9 12.34.56.78 98.76.54.32 TCP_MISS/200 1356 GET / - DIRECT/23.45.67.89 - # These make it through unix_syslog as "1230703259.275 9 12.34.56.78 98.76.54.32", so chop off the leading fields if (matches_regular_expression(v.syslog_message, "^([0-9]+[.][0-9][0-9][0-9]) [0-9]+ [0-9.a-f:]+ ([0-9.a-f:]+ .*)$")) then ( set_collected_field('', 'date', normalize_date($1, 'seconds_since_jan1_1970')); set_collected_field('', 'time', normalize_time($1, 'seconds_since_jan1_1970')); v.syslog_message = $2; ); # 2013-09-20 - GMF - Added second regexp here, to accept (but ignore) the extra field before DIRECT/. (which is - anyway) [ThreadID:1293647] # 14/Jul/2013:04:03:55 +0700 102313 192.168.0.51 TCP_MISS/200 16029 CONNECT www.facebook.com:443 - - DIRECT/31.13.79.1 - 192.168.0.51 8080 if (matches_regular_expression(v.syslog_message, '^([^ ]*) ([A-Z_ ]*)/([0-9]*) ([0-9]*) ([A-Z]*) ([^ ]*) ([^ ]*) ([A-Z_]*)/([^ ]*) ([^ ]*)') or matches_regular_expression(v.syslog_message, '^([^ ]*) ([A-Z_ ]*)/([0-9]*) ([0-9]*) ([A-Z]*) ([^ ]*) ([^ ]*) [^ ]* ([A-Z_]*)/([^ ]*) ([^ ]*)')) then ( set_collected_field('', 'source_ip', $1); set_collected_field('', 'action', $2); set_collected_field('', 'server_response', $3); set_collected_field('', 'size', $4); set_collected_field('', 'operation', $5); set_collected_field('', 'url', $6); set_collected_field('', 'username', $7); set_collected_field('', 'hierarchy', $8); set_collected_field('', 'server_ip', $9); set_collected_field('', 'mime_type', $10); accept_collected_entry('', false); ); ` # Database fields database.fields = { source_ip = { label = "$lang_stats.field_labels.source_ip" log_field = "source_ip" type = "string" suppress_top = 0 suppress_bottom = 2 } # source_ip location = "" url = { label = "$lang_stats.field_labels.url" log_field = "url" type = "string" suppress_top = 1 suppress_bottom = 3 } # url file_type = { label = "$lang_stats.field_labels.file_type" log_field = "file_type" type = "string" suppress_top = 0 suppress_bottom = 2 } # file_type worm = { label = "$lang_stats.field_labels.worm" log_field = "worm" type = "string" suppress_top = 0 suppress_bottom = 2 } # worm server_response = { label = "$lang_stats.field_labels.server_response" log_field = "server_response" type = "string" suppress_top = 0 suppress_bottom = 2 } # server_response action = { label = "$lang_stats.field_labels.action" log_field = "action" type = "string" suppress_top = 0 suppress_bottom = 2 } # action operation = { label = "$lang_stats.field_labels.operation" log_field = "operation" type = "string" suppress_top = 0 suppress_bottom = 2 } # operation username = { label = "$lang_stats.field_labels.username" log_field = "username" type = "string" suppress_top = 0 suppress_bottom = 2 } # username server_ip = { label = "$lang_stats.field_labels.server_ip" log_field = "server_ip" type = "string" suppress_top = 0 suppress_bottom = 2 } # server_ip hierarchy = { label = "$lang_stats.field_labels.hierarchy" log_field = "hierarchy" type = "string" suppress_top = 0 suppress_bottom = 2 } # hierarchy mime_type = { label = "$lang_stats.field_labels.mime_type" log_field = "mime_type" type = "string" suppress_top = 0 suppress_bottom = 2 } # mime_type cpu = { label = "$lang_stats.field_labels.cpu" log_field = "cpu" type = "string" suppress_top = 0 suppress_bottom = 2 } # cpu } # database.fields # Log Filters log.filters = { detect_page_views = { label = '$lang_admin.log_filters.detect_page_views_label' comment = '$lang_admin.log_filters.detect_page_views_comment' value = "if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then page_views = 0; else page_views = 1;" } # detect_page_views remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(url, '?')) then url = substr(url, 0, index(url, '?') + 1) . '(parameters)';" } # remove_query simplify_url = { label = "$lang_admin.log_filters.simplify_url_label" comment = "$lang_admin.log_filters.simplify_url_comment" value = "if (matches_regular_expression(url, '^([^:]+://[^/]+/)')) then url = $1 . '(omitted)'" } # simplify_url strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then url = substr(url, 0, last_index(url, '/') + 1) . '(nonpage)';" } # strip_non_page_views mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'hits = 1;' } # mark_entry } # log.filters log.field_options = { sessions_page_field = "url" sessions_visitor_id_field = "source_ip" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { hits = { label = "$lang_stats.field_labels.hits" default = false requires_log_field = false type = "int" display_format_type = "integer" entries_field = true } # hits page_views = { label = "$lang_stats.field_labels.page_views" default = true requires_log_field = false type = "int" display_format_type = "integer" } # page_views visitors = { label = "$lang_stats.field_labels.visitors" default = false requires_log_field = true log_field = "source_ip" type = "unique" display_format_type = "integer" } # visitors size = { label = "$lang_stats.field_labels.size" default = false requires_log_field = true log_field = "size" type = "int" integer_bits = 64 display_format_type = "bandwidth" } # size } # database.numerical_fields create_profile_wizard_options = { host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { hierarchy = true url = true file_type = true mime_type = true } # content_group source_group = { source_ip = true location = true username = true } # source_group server_group = { server_response = true server_ip = true } # server_group other_group = { logging_device = true action = true operation = true worm = true } # other } # report_groups snapons = { # Attach proxy_direction, to prompt for whether this is a forward or reverse proxy proxy_direction = { snapon = "proxy_direction" name = "proxy_direction" label = "$lang_admin.snapons.proxy_direction.label" parameters = { proxy_direction.parameter_value = "" source_ip_field.parameter_value = "source_ip" server_ip_field.parameter_value = "server_ip" } # parameters parameters_form = { group_1 = { description = "$lang_admin.snapons.proxy_direction.parameters_form.group_1.description" parameters = { proxy_direction = true } # parameters } # group 1 } # parameters_form } # proxy_direction # Attach a top_level_domain snapon top_level_domain = { snapon = "top_level_domain" name = "top_level_domain" label = "$lang_admin.snapons.top_level_domain.label" parameters = { url_field.parameter_value = "url" field_name = { parameter_value = "$lang_admin.field_labels.top_level_domain" final_node_name = "top_level_domain" } } # parameters } # top_level_domain # Attach a gateway_reports snapon gateway_reports = { snapon = "gateway_reports" name = "gateway_reports" label = "$lang_admin.snapons.gateway_reports.label" parameters = { user_field.parameter_value = "username" # user_field.parameter_value = "source_ip" have_category_field.parameter_value = false # category_field.parameter_value = "category" host_field.parameter_value = "top_level_domain" page_views_field.parameter_value = "page_views" bytes_in_field.parameter_value = "size" sort_by_field.parameter_value = "page_views" } # parameters } # gateway_reports # 2013-02-06 - GMF - Now added in gateway_reports # # Add the standard reports # add_standard_reports = { # name = "add_standard_reports" # label = "add_standard_reports" # snapon = "add_standard_reports" # } # add_standard_reports } # snapons } # create_profile_wizard_options not_supported = { } # not_supported } # squid_syslog_required