# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. shoutcast_w3_c = { plugin_version = "3.0.3" info.1.manfacturer = "Nullsoft" info.1.device = "SHOUTcast Media Server / DNAS (Distributed Network Audio Server) (W3C)" info.1.version.1 = "1.8" info.1.version.2 = "1.9" # 05/Jun/2006 - GMF - 1.1 - Fixed visitors # 05/Jun/2006 - GMF - 2.0 - Added session tracking with login/logout, for concurrent session tracking. # 2007-09-06 - GMF - 2.1 - Fixed bug which would give incorrect session durations if date_offset # was not 0. # 2009-06-26 - gas - 2.2 - Added log.fields.session_event_type and log.fields.session_id to fix create # profile error: Unknown configuration group "session_event_type" in node # "profiles.createradio.log.fields" # 2010-01-05 - 3.0 - KBB - Switched to use log.filter_preprocessor for parsing to handle a problem # with logging where a missing user agent field is empty instead of a dash. The dash is w3c standard, # so adjacent spaces are handled as a single separator and the remaining fields are shifted to # the left with the very bad consequence of huge durations. Note the difference below. # A parsing_regular_expression like* the following is built, and it works for both log entries below. #^([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)$ #44.44.44.44 32-32-33-33.there.here.jp 2008-01-19 23:36:15 /stream?title=A%20Great%20Song 200 NSPlayer%2F11%2E0%2E5721%2E5145%20WMFSDK%2F11%2E0 851 1 6808 #66.66.66.66 22-22-23-23.here.there.com 2008-01-19 23:36:15 /stream?title=A%20Great%20Song 200 851 1 6808 # *actual regular expression now includes "[^"]" option as well # 2010-01-19 - 3.0.1 - KBB - Restored parsing of user agent by using convert_escapes(). # 2010-02-17 - 3.0.2 - KBB - Fixed bug where file with multiple headers not handled correctly. # 2010-10-05 - 3.0.3 - MSG - Edited info lines. # 2010-11-09 - 3.0.4 - KBB - Edited info lines to reflect name change in shoutcast18.cfg and to add v1.9. # 2010-11-10 - 3.0.5 - KBB - Reversed 3.0.1. This field contains mostly Players and OS can't be derived. # Added new filter to decode cs_user_agent itself; # The name of the log format log.format.format_label = "SHOUTcast W3C Log Format" log.miscellaneous.log_data_type = "generic_w3c" log.miscellaneous.log_format_type = "media_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^#Software: SHOUTcast" # The format of dates and times in this log log.format.date_format = "auto" log.format.time_format = "auto" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( log.format.parsing_regular_expression = ""; # 2010-02-17 - 3.0.2 - KBB string fields = $1; string fieldname; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname, int top, int bottom), ( #echo("create_database_field: " . fieldname); # debug string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; #@databasefieldpath = ""; node databasefield = databasefieldpath; if (top ne 0) then set_subnode_value(databasefield, "suppress_top", top); if (bottom ne 0) then set_subnode_value(databasefield, "suppress_bottom", bottom); databasefield; )); subroutine(create_log_field(string fieldname, string type), ( string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Extract the fields one at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$') or matches_regular_expression(fields, '^([^ ]+)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_referrer') then ( fieldname = 'cs_referer'; ); if (fieldname eq 'cs_uri') then ( fieldname = 'cs_uri_stem'; ); if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ); else if (fieldname eq 'cs_user_agent') then ( log_field_type = 'agent'; ); else if (fieldname eq 'c_ip') then ( log_field_type = 'host'; ); # Create the log field #echo("fieldname " . fieldname . " log_field_type " . log_field_type); # debug create_log_field(fieldname, log_field_type); # log.format.parsing_regular_expression .= " ([^ ]*)"; # Allows field to be empty log.format.parsing_regular_expression .= ' ("[^"]*"|[^ ]*)'; # Allows field to be empty # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle date by creating date_time and derived database fields if (fieldname eq "date") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if date else if (fieldname eq "time") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if time # Create derived fields for agent # else if (fieldname eq "cs_user_agent") then ( # create_database_field('operating_system', 0, 0); # create_database_field('web_browser', 0, 0); # create_database_field('spider', 0, 0); # ); # Create database field cs_ip and derived field for client IP else if (fieldname eq "c_ip") then ( create_database_field('c_ip', 0, 0); create_database_field('location', 0, 0); ); # Create database field cs_referer and derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('cs_referer', 1, 9); create_database_field('search_engine', 0, 0); create_database_field('search_phrase', 0, 0); ); # Create derived file type field else if (fieldname eq "cs_uri_stem") then ( create_database_field('cs_uri_stem', 0, 9); create_database_field('file_type', 0, 0); ); # Don't add a database field for numerical fields else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname, 0, 0); ); # if creating profile ); # while another field if (matches_regular_expression(log.format.parsing_regular_expression, "^ (.*)$")) then log.format.parsing_regular_expression = "^" . $1 . "$"; #echo("X" . log.format.parsing_regular_expression . "X"); # debug # Do this here to make these fields last instead of first so # parsing_regular_expression works. create_log_field('session_event_type', ""); create_log_field('session_id', ""); # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( create_database_field('session_event_type', 0, 0); create_database_field('session_id', 0, 0); ); # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Don't time out session, discard them for being long, or remove reloads statistics.miscellaneous = { maximum_session_duration = "0" session_timeout = "0" remove_reloads_from_sessions = "false" } # statistics.miscellaneous # Get web browser, operating system, web browser, and spider information from the user-agent field. # log.parsing_filters.derive_from_user_agent = { # value = `get_user_agent_info(replace_all(convert_escapes(cs_user_agent), '+', ' ')); #web_browser = volatile.web_browser; #operating_system = volatile.operating_system; #spider = volatile.spider; #` # requires_fields = { # cs_user_agent = true # } # } # Convert the user agent field which often is url encoded. # The decode happens twice because we have one example with double escaping. It should be harmless. log.parsing_filters.decode_user_agent = { value = ` cs_user_agent = replace_all(convert_escapes(cs_user_agent), '+', ' '); cs_user_agent = replace_all(convert_escapes(cs_user_agent), '+', ' '); ` requires_fields = { cs_user_agent = true } } # decode_user_agent # Setup the log filters log.filter_initialization = ` int logout_date_time_epoc; int login_date_time_epoc; string login_date_time; string session_id; int session_id_counter = 0; int date_offset_seconds = log.processing.date_offset * (60*60); ` # This filter adds a "logout" event at the logged time, so sessions can be calculated based on # connection time. The "normal" event will be added normally, but this also subtracts the duration # from the date and time, so it logs the even at the time it *connected*, # and the logout at the time it *disconnected*. log.parsing_filters.add_logout_event = { value = ` # debug #echo("========================================"); #echo("*" . current_log_line() . "*"); #echo("c_ip " . c_ip); #echo("c_dns " . c_dns); #echo("date " . date); #echo("time " . time); #echo("cs_uri_stem " . cs_uri_stem); #echo("c_status " . c_status); #echo("cs_user_agent " . cs_user_agent); #echo("sc_bytes " . sc_bytes); #echo("x_duration " . x_duration); #echo("avgbandwidth " . avgbandwidth); # Compute the session id session_id = c_ip . '_' . session_id_counter; session_id_counter++; # Add the logout event. This has the same c_ip, cs_uri_stem as the login event, so it can # be correllated that way. set_collected_field('', 'date', date); set_collected_field('', 'time', time); set_collected_field('', 'session_event_type', '(logout)'); set_collected_field('', 'session_id', session_id); set_collected_field('', 'cs_uri_stem', cs_uri_stem); set_collected_field('', 'session_events', 1); set_collected_field('', 'clips', 0); accept_collected_entry('', false); # Compute the date_time of the login event # Note that we need to add date_offset_seconds to apply the date_offset value. logout_date_time_epoc = date_time_to_epoc(normalize_date(date, 'auto') . ' ' . time); login_date_time_epoc += date_offset_seconds; login_date_time_epoc = logout_date_time_epoc - x_duration; login_date_time = epoc_to_date_time(login_date_time_epoc); # Set up to make the next event a login session_event_type = 'login'; session_events = 1; clips = 1; ` requires_fields = { date = true time = true c_ip = true x_duration = true } } # add_logout_event # Log Filters log.filters = { login = { label = "Handle session information" comment = "This performs internal bookkeeping required to treat each access as a separate session" value = ` if (session_event_type eq 'login') then ( date_time = login_date_time; ); ` } # login } # log.filters database.numerical_fields = { clips = { default = true requires_log_field = false entries_field = true } # clips visitors = { log_field = "c_ip" type = "unique" } # visitors x_duration = { default = true type = "float" display_format_type = "duration_compact" } # x_duration sc_bytes = { type = "float" display_format_type = "bandwidth" } # sc_bytes avgbandwidth = { type = "float" display_format_type = "bandwidth" aggregation_method = "average" average_denominator_field = "clips" } # avgbandwidth session_events = { default = true requires_log_field = false entries_field = true } # session_events } # database.numerical_fields log.field_options = { sessions_page_field = "session_event_type" sessions_visitor_id_field = "session_id" sessions_event_field = "session_events" } # log.field_options # Do this in the preprocessor because the parsing regular # expression counts on the position of the log fields, # and these already exist if this is done here. (Database # fields don't matter, but move them too for consistency.) # database.fields.session_event_type = "" # database.fields.session_id = "" # log.fields.session_event_type = "" # log.fields.session_id = "" create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { cs_uri_stem = true file_type = true worm = true } visitor_demographics_group = { c_ip = true c_dns = true domain_description = true location = true cs_user_agent = true } # visitor_systems_group = { # screen_dimensions = true # screen_depth = true # web_browser = true # operating_system = true # spider = true # } other_group = { session_event_type = true session_id = true c_status = true } } # report_groups } # create_profile_wizard_options } # shoutcast_w3_c