# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. shoutcast_w3_c = { plugin_version = "3.3" info.1.manufacturer = "Nullsoft" info.1.device = "SHOUTcast Media Server / DNAS (Distributed Network Audio Server) (W3C)" info.1.version.1 = "1.8" info.1.version.2 = "1.9" # 05/Jun/2006 - GMF - 1.1 - Fixed visitors # 05/Jun/2006 - GMF - 2.0 - Added session tracking with login/logout, for concurrent session tracking. # 2007-09-06 - GMF - 2.1 - Fixed bug which would give incorrect session durations if date_offset # was not 0. # 2009-06-26 - gas - 2.2 - Added log.fields.session_event_type and log.fields.session_id to fix create # profile error: Unknown configuration group "session_event_type" in node # "profiles.createradio.log.fields" # 2010-01-05 - 3.0 - KBB - Switched to use log.filter_preprocessor for parsing to handle a problem # with logging where a missing user agent field is empty instead of a dash. The dash is w3c standard, # so adjacent spaces are handled as a single separator and the remaining fields are shifted to # the left with the very bad consequence of huge durations. Note the difference below. # A parsing_regular_expression like* the following is built, and it works for both log entries below. #^([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)$ #44.44.44.44 32-32-33-33.there.here.jp 2008-01-19 23:36:15 /stream?title=A%20Great%20Song 200 NSPlayer%2F11%2E0%2E5721%2E5145%20WMFSDK%2F11%2E0 851 1 6808 #66.66.66.66 22-22-23-23.here.there.com 2008-01-19 23:36:15 /stream?title=A%20Great%20Song 200 851 1 6808 # *actual regular expression now includes "[^"]" option as well # 2010-01-19 - 3.0.1 - KBB - Restored parsing of user agent by using convert_escapes(). # 2010-02-17 - 3.0.2 - KBB - Fixed bug where file with multiple headers not handled correctly. # 2010-10-05 - 3.0.3 - MSG - Edited info lines. # 2010-11-09 - 3.0.4 - KBB - Edited info lines to reflect name change in shoutcast18.cfg and to add v1.9. # 2010-11-10 - 3.0.5 - KBB - Reversed 3.0.1. This field contains mostly Players and OS can't be derived. # Added new filter to decode cs_user_agent itself; # 2011-05-20 - 3.1 - GMF - Removed session information; not needed in nunchaku anymore (we can use concurrent connections snapon if desired). # 2013-02-12 - 3.2 - GMF - Added media_reports snapon # 2013-02-12 - 3.3 - GMF - Restored session reports, via the sessions snapon; come customers were using them for real session analysis, not just concurrency [ThreadID:1279246] # The name of the log format log.format.format_label = "SHOUTcast W3C Log Format" log.miscellaneous.log_data_type = "generic_w3c" log.miscellaneous.log_format_type = "media_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^#Software: SHOUTcast" # The format of dates and times in this log log.format.date_format = "auto" log.format.time_format = "auto" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( log.format.parsing_regular_expression = ""; # 2010-02-17 - 3.0.2 - KBB string fields = $1; string fieldname; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname, int top, int bottom), ( #echo("create_database_field: " . fieldname); # debug string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; #@databasefieldpath = ""; node databasefield = databasefieldpath; if (top ne 0) then set_subnode_value(databasefield, "suppress_top", top); if (bottom ne 0) then set_subnode_value(databasefield, "suppress_bottom", bottom); databasefield; )); subroutine(create_log_field(string fieldname, string type), ( string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Extract the fields one at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$') or matches_regular_expression(fields, '^([^ ]+)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_referrer') then ( fieldname = 'cs_referer'; ); if (fieldname eq 'cs_uri') then ( fieldname = 'cs_uri_stem'; ); if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ); else if (fieldname eq 'cs_user_agent') then ( log_field_type = 'agent'; ); else if (fieldname eq 'c_ip') then ( log_field_type = 'host'; ); # Create the log field #echo("fieldname " . fieldname . " log_field_type " . log_field_type); # debug create_log_field(fieldname, log_field_type); # log.format.parsing_regular_expression .= " ([^ ]*)"; # Allows field to be empty log.format.parsing_regular_expression .= ' ("[^"]*"|[^ ]*)'; # Allows field to be empty # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle date by creating date_time and derived database fields if (fieldname eq "date") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if date else if (fieldname eq "time") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if time # Create derived fields for agent # else if (fieldname eq "cs_user_agent") then ( # create_database_field('operating_system', 0, 0); # create_database_field('web_browser', 0, 0); # create_database_field('spider', 0, 0); # ); # Create database field cs_ip and derived field for client IP else if (fieldname eq "c_ip") then ( create_database_field('c_ip', 0, 0); create_database_field('location', 0, 0); ); # Create database field cs_referer and derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('cs_referer', 1, 9); create_database_field('search_engine', 0, 0); create_database_field('search_phrase', 0, 0); ); # Create derived file type field else if (fieldname eq "cs_uri_stem") then ( create_database_field('cs_uri_stem', 0, 9); create_database_field('file_type', 0, 0); ); # Don't add a database field for numerical fields else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname, 0, 0); ); # if creating profile ); # while another field if (matches_regular_expression(log.format.parsing_regular_expression, "^ (.*)$")) then log.format.parsing_regular_expression = "^" . $1 . "$"; #echo("X" . log.format.parsing_regular_expression . "X"); # debug # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # Do this here to make these fields last instead of first so # parsing_regular_expression works. # create_log_field('session_event_type', ""); # create_log_field('session_id', ""); # If we're creating a profile, create the database fields too. # if (node_exists("volatile.creating_profile")) then ( # create_database_field('session_event_type', 0, 0); # create_database_field('session_id', 0, 0); # ); # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Don't time out session, discard them for being long, or remove reloads # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # statistics.miscellaneous = { # maximum_session_duration = "0" # session_timeout = "0" # remove_reloads_from_sessions = "false" # } # statistics.miscellaneous # Get web browser, operating system, web browser, and spider information from the user-agent field. # log.parsing_filters.derive_from_user_agent = { # value = `get_user_agent_info(replace_all(convert_escapes(cs_user_agent), '+', ' ')); #web_browser = volatile.web_browser; #operating_system = volatile.operating_system; #spider = volatile.spider; #` # requires_fields = { # cs_user_agent = true # } # } # Convert the user agent field which often is url encoded. # The decode happens twice because we have one example with double escaping. It should be harmless. log.parsing_filters.decode_user_agent = { value = ` cs_user_agent = replace_all(convert_escapes(cs_user_agent), '+', ' '); cs_user_agent = replace_all(convert_escapes(cs_user_agent), '+', ' '); ` requires_fields = { cs_user_agent = true } } # decode_user_agent # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # Setup the log filters # log.filter_initialization = ` #int logout_date_time_epoc; #int login_date_time_epoc; #string login_date_time; #string session_id; #int session_id_counter = 0; #int date_offset_seconds = log.processing.date_offset * (60*60); #` # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # # This filter adds a "logout" event at the logged time, so sessions can be calculated based on # # connection time. The "normal" event will be added normally, but this also subtracts the duration # # from the date and time, so it logs the even at the time it *connected*, # # and the logout at the time it *disconnected*. # log.parsing_filters.add_logout_event = { # # value = ` # ## debug ##echo("========================================"); ##echo("*" . current_log_line() . "*"); ##echo("c_ip " . c_ip); ##echo("c_dns " . c_dns); ##echo("date " . date); ##echo("time " . time); ##echo("cs_uri_stem " . cs_uri_stem); ##echo("c_status " . c_status); ##echo("cs_user_agent " . cs_user_agent); ##echo("sc_bytes " . sc_bytes); ##echo("x_duration " . x_duration); ##echo("avgbandwidth " . avgbandwidth); # ## Compute the session id #session_id = c_ip . '_' . session_id_counter; #session_id_counter++; # ## Add the logout event. This has the same c_ip, cs_uri_stem as the login event, so it can ## be correllated that way. #set_collected_field('', 'date', date); #set_collected_field('', 'time', time); #set_collected_field('', 'session_event_type', '(logout)'); #set_collected_field('', 'session_id', session_id); #set_collected_field('', 'cs_uri_stem', cs_uri_stem); #set_collected_field('', 'session_events', 1); #set_collected_field('', 'clips', 0); #accept_collected_entry('', false); # ## Compute the date_time of the login event ## Note that we need to add date_offset_seconds to apply the date_offset value. #logout_date_time_epoc = date_time_to_epoc(normalize_date(date, 'auto') . ' ' . time); #login_date_time_epoc += date_offset_seconds; #login_date_time_epoc = logout_date_time_epoc - x_duration; #login_date_time = epoc_to_date_time(login_date_time_epoc); # ## Set up to make the next event a login #session_event_type = 'login'; #session_events = 1; #clips = 1; # #` # # requires_fields = { # date = true # time = true # c_ip = true # x_duration = true # } # # } # add_logout_event # Log Filters log.filters = { # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # login = { # label = "Handle session information" # comment = "This performs internal bookkeeping required to treat each access as a separate session" # value = ` #if (session_event_type eq 'login') then ( # date_time = login_date_time; #); #` # } # login mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'clips = 1;' } # mark_entry } # log.filters database.numerical_fields = { clips = { default = true requires_log_field = false entries_field = true } # clips visitors = { log_field = "c_ip" type = "unique" } # visitors x_duration = { default = true type = "int" integer_bits = 64 display_format_type = "duration_compact" } # x_duration sc_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # sc_bytes avgbandwidth = { type = "int" integer_bits = 64 display_format_type = "bandwidth" aggregation_method = "average" average_denominator_field = "clips" } # avgbandwidth # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # session_events = { # default = true # requires_log_field = false # entries_field = true # } # session_events } # database.numerical_fields # 2011-05-20 - GMF - commented out as part of removal of session analysis in 3.1 # log.field_options = { # # sessions_page_field = "session_event_type" # sessions_visitor_id_field = "session_id" # sessions_event_field = "session_events" # # } # log.field_options # Do this in the preprocessor because the parsing regular # expression counts on the position of the log fields, # and these already exist if this is done here. (Database # fields don't matter, but move them too for consistency.) # database.fields.session_event_type = "" # database.fields.session_id = "" # log.fields.session_event_type = "" # log.fields.session_id = "" create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { cs_uri_stem = true file_type = true worm = true } visitor_demographics_group = { c_ip = true c_dns = true domain_description = true location = true cs_user_agent = true } # visitor_systems_group = { # screen_dimensions = true # screen_depth = true # web_browser = true # operating_system = true # spider = true # } other_group = { session_event_type = true session_id = true c_status = true } } # report_groups snapons = { # Attach a media_reports snapon media_reports = { snapon = "media_reports" name = "media_reports" label = "$lang_admin.snapons.media_reports.label" parameters = { user_field.parameter_value = "c_ip" client_ip_field.parameter_value = "c_ip" duration_field.parameter_value = "x_duration" stream_name_field.parameter_value = "cs_uri_stem" have_stream_id_field.parameter_value = false stream_id_field.parameter_value = "" attach_concurrent_events.parameter_value = true } # parameters requires_database_fields = { c_ip = true x_duration = true cs_uri_stem = true } # requires_database_fields } # media_reports sessions = { snapon = "sessions" name = "sessions" label = "$lang_admin.snapons.sessions.label" prompt_to_attach = true prompt_to_attach_default = false parameters = { session_user_field.parameter_value = "c_ip" session_page_field.parameter_value = "cs_uri_stem" session_date_time_field.parameter_value = "date_time" session_events_field.parameter_value = "clips" } # parameters } # sessions } # snapons } # create_profile_wizard_options } # shoutcast_w3_c