# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. cloudfront_streaming = { plugin_version = "2.3.1" info.1.manfacturer = "Amazon" info.1.device = "Cloudfront Streaming" info.1.version = "" # 2010-09-30 - GMF - 1.0 - Initial plug-in based on Flash Media Server # 2011-12-04 - GMF - 2.0 - Removed all session information, to align it with 8.5 (concurrency, if required, can be done with a snapon). # 2011-12-08 - GMF - 2.1 - Added "total bytes" field as a snapon # 2011-12-22 - GMF - 2.2 - Added connections field # 2011-12-22 - GMF - 2.3 - Added plays field # 2012-01-24 - GMF - 2.3.1 - Turned off log.processing.distributed.method = "1" # The name of the log format log.format.format_label = "Amazon Cloudfront Streaming Log Format" log.miscellaneous.log_data_type = "generic_w3c" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^#Fields: date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url c-user-agent x-sname x-sname-query x-file-ext x-sid$" # Literal apostrophes can appear in field values, and should not be treated as quotes log.format.treat_apostrophes_as_quotes = false # Expire entries after 100,000 lines have gone by (during database filtering) log.format.collected_entry_lifespan = "100000" # Use single-process builds for profiles based on this plug-in, because the filter code requires in-order log data to get correct results. # Not any more--it uses a database filter now, and pre-sorts the data chronologically. # log.processing.distributed.method = "1" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname), ( #echo("create_database_field: " . fieldname); debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; # set_subnode_value(databasefield, "label", fieldname); databasefield; )); subroutine(create_log_field(string fieldname, string type, bool withindex), ( debug_message("create_log_field(" . fieldname . "; type=" . type . ")\n"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; # set_subnode_value(logfield, "label", fieldname); if (withindex) then ( set_subnode_value(logfield, "index", v.logfieldindex); v.logfieldindex++; ); set_subnode_value(logfield, "subindex", 0); if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Assume there isn't a localtime field until we see one. v.parse_localtime = false; # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_uri_path') then ( log_field_type = 'page'; ("profiles." . internal.profile_name . ".log.fields.url.type") = 'flat'; ); if (fieldname eq 'cs_user_agent') then log_field_type = 'agent'; if (fieldname eq 'cs_referer') then log_field_type = 'url'; # Create the log field create_log_field(fieldname, log_field_type, true); if (fieldname eq "localtime") then v.parse_localtime = true; # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle localtime by creating date_time and derived database fields if (fieldname eq "localtime") then ( create_log_field('date', '', false); create_log_field('time', '', false); create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = false; ); # if localtime # Handle date by creating date_time and derived database fields else if (fieldname eq "date") then ( create_log_field('localtime', '', false); # placeholder - 7/Nov/2006 - KBB create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = true; ); # if date else if (fieldname eq "time") then ( create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = true; ); # if time # Create derived field for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system'); create_database_field('web_browser'); ); # Create derived field for agent else if (fieldname eq "c_ip") then ( create_database_field('c_ip'); create_database_field('location'); ); # Create derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('search_engine'); create_database_field('search_phrase'); ); # Create derived file type field else if (fieldname eq "cs_uri_path") then ( create_database_field('file_type'); ); # Don't add a database field for numerical fields # else if (subnode_exists('database.fields', fieldname)) then ( else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname); ); # if creating profile ); # while another field # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` log.format.field_separator = " " log.fields = { saved_cs_uri_stem = "" saved_vhost = "" location = "" session_bytes = "" connections = "" plays = "" } # From http://livedocs.adobe.com/fms/2/docs/wwhelp/wwhimpl/common/html/wwhelp.htm?context=LiveDocs_Parts&file=00000181.html # cs-bytes - This field shows the number of bytes transferred from the client to the server. # This information can be used to bill customers per session. To calculate the bandwidth # usage per session, subtract the 'cs-bytes' in the 'connect' event from the 'cs-bytes' in # the 'disconnect' event. # sc-bytes - This field shows the number of bytes transferred from the server to the client. # This information can be used to bill customers per session. To calculate the bandwidth # usage per session, subtract the 'sc-bytes' in the 'connect' event by the 'sc-bytes' in # the 'disconnect' event # cs-stream-bytes - This field shows the number of bytes transferred from the client to the server # per stream. To calculate the bandwidth usage per stream, subtract the 'cs-stream-bytes' # in the 'publish' event by the 'cs-stream-bytes' in the 'unpublish' event. # sc-stream-bytes - This field shows the number of bytes transferred from the server to the client per # stream. To calculate the bandwidth usage per stream, subtract the 'sc-stream-bytes' # in the 'play' event by the 'sc-stream-bytes' in the 'stop' event. log.filter_initialization = ` #v.last_cs_bytes = ""; #v.last_sc_bytes = ""; #v.last_cs_stream_bytes = ""; #v.last_sc_stream_bytes = ""; #float cs_bytes_for_db; #float sc_bytes_for_db; #float cs_stream_bytes_for_db; #float sc_stream_bytes_for_db; string visitor_id; # For sessions int login_date_time_epoc; int logout_date_time_epoc; string logout_date_time; string session_id; #int session_id_counter = 0; int date_offset_seconds = log.processing.date_offset * (60*60); ` log.parsing_filters = { # Logs will have c_ip or c_client_id or both. Set visitor_id, used in other filters # based on whichever is available. c_client_id will be used if both exist. set_visitor_to_c_ip = { value = `visitor_id = replace_all(c_ip, '.', '_');` requires_fields = { c_ip = true } } # set_visitor_to_c_ip # Keep this filter 2nd. c_client_id is the more precise id. set_visitor_to_c_client_id = { value = `visitor_id = c_client_id;` requires_fields = { c_client_id = true } } # set_visitor_to_c_client_id # # Fix the cs_bytes field by subtracting this value from the previous one (it's a running total, # # which otherwise will be aggregated to give too-large numbers). # fix_cs_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # #session disconnect 2008-06-13 16:42:59 4882 222.22.222.2 -1746602884 73147 - - - - - # # # If there was a previous value, use the difference in the database entry # v.last_cs_bytes = get_collected_field(visitor_id, 'last_cs_bytes'); # #if (subnode_exists('v.last_cs_bytes', visitor_id)) then ( # if (v.last_cs_bytes ne '') then ( # # # If value is negative due to logging bug (above), then set it to the previous # # value as if there have been no bytes. The result is the same as setting it # # to zero if the x-event is disconnect as it is in all examples seen so far. # if (cs_bytes < 0) then cs_bytes = v.last_cs_bytes; # # cs_bytes_for_db = 0.0 + cs_bytes - v.last_cs_bytes; # if (cs_bytes_for_db < 0) then ( # cs_bytes_for_db = cs_bytes; # ); # ); # else ( # if (cs_bytes < 0) then cs_bytes = 0; # This compensates for a logging bug. # cs_bytes_for_db = cs_bytes; # ); # # # Remember the current cs_bytes value for a later event for this visitor # if (x_event eq "disconnect") then ( # #set_subnode_value('v.last_cs_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_cs_bytes', 0); # ); # else ( # #set_subnode_value('v.last_cs_bytes', visitor_id, cs_bytes); # set_collected_field(visitor_id, 'last_cs_bytes', cs_bytes); # ); # # # In the database, the cs_bytes field should be the difference # cs_bytes = cs_bytes_for_db; # #); # if visitor_id #else ( # cs_bytes = 0; #); #` # requires_fields = { # cs_bytes = true # x_event = true # } # } # fix_cs_bytes # 2011-12-19 - GMF - Moved this to a snapon, so the input log data doesn't need to be sorted. # # Session bytes, intended to emulate Amazon's own byte reporting, is the bytes at disconnect, minus the bytes at connect # compute_session_bytes = { # value = ` #session_bytes = 0; #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # if (x_event eq "connect") then ( # set_collected_field(visitor_id, 'connect_bytes', sc_bytes); # ); # # else if (x_event eq "disconnect") then # session_bytes = sc_bytes - get_collected_field(visitor_id, 'connect_bytes'); # #); ##echo("session_bytes=" . session_bytes); #` # requires_fields = { # sc_bytes = true # x_event = true # } # } # compute_session_bytes # Fix the sc_stream_bytes field by subtracting this value from the previous one (it's a running total, # which otherwise will be aggregated to give too-large numbers). # fix_sc_stream_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # # If there was a previous value, use the difference in the database entry # v.last_sc_stream_bytes = get_collected_field(visitor_id, 'last_sc_stream_bytes'); # #if (subnode_exists('v.last_sc_stream_bytes', visitor_id)) then ( # if (v.last_sc_stream_bytes ne '') then ( # # # Compensates for a logging bug - seen only with cs-bytes so far. # if (sc_stream_bytes < 0) then sc_stream_bytes = v.last_sc_stream_bytes; # # #sc_stream_bytes_for_db = 0.0 + sc_stream_bytes - node_value(subnode_by_name('v.last_sc_stream_bytes', visitor_id)); # sc_stream_bytes_for_db = 0.0 + sc_stream_bytes - v.last_sc_stream_bytes; # if (sc_stream_bytes_for_db < 0) then ( # sc_stream_bytes_for_db = sc_stream_bytes; # ); # ); # else ( # # Compensates for a logging bug - seen only with cs-bytes so far. # if (sc_stream_bytes < 0) then sc_stream_bytes = 0; # sc_stream_bytes_for_db = sc_stream_bytes; # ); # # # Remember the current sc_stream_bytes value for a later event for this visitor # if (x_event eq "stop") then ( # #set_subnode_value('v.last_sc_stream_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_sc_stream_bytes', 0); # ); # else ( # #set_subnode_value('v.last_sc_stream_bytes', visitor_id, sc_stream_bytes); # set_collected_field(visitor_id, 'last_sc_stream_bytes', sc_stream_bytes); # ); # # # In the database, the sc_stream_bytes field should be the difference # sc_stream_bytes = sc_stream_bytes_for_db; # #); # if visitor_id #else ( # sc_stream_bytes = 0; #); #` # requires_fields = { # sc_stream_bytes = true # x_event = true # } # } # fix_sc_stream_bytes # Fix the cs_stream_bytes field by subtracting this value from the previous one (it's a running total, # which otherwise will be aggregated to give too-large numbers). # fix_cs_stream_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # # If there was a previous value, use the difference in the database entry # v.last_cs_stream_bytes = get_collected_field(visitor_id, 'last_cs_stream_bytes'); # #if (subnode_exists('v.last_cs_stream_bytes', visitor_id)) then ( # if (v.last_cs_stream_bytes ne '') then ( # # # Compensates for a logging bug - seen only with cs-bytes so far. # if (cs_stream_bytes < 0) then cs_stream_bytes = v.last_cs_stream_bytes; # # #cs_stream_bytes_for_db = 0.0 + cs_stream_bytes - node_value(subnode_by_name('v.last_cs_stream_bytes', visitor_id)); # cs_stream_bytes_for_db = 0.0 + cs_stream_bytes - v.last_cs_stream_bytes; # if (cs_stream_bytes_for_db < 0) then ( # cs_stream_bytes_for_db = cs_stream_bytes; # ); # ); # else ( # # Compensates for a logging bug - seen only with cs-bytes so far. # if (cs_stream_bytes < 0) then cs_stream_bytes = 0; # cs_stream_bytes_for_db = cs_stream_bytes; # ); # # # Remember the current cs_stream_bytes value for a later event for this visitor # if (x_event eq "unpublish") then ( # #set_subnode_value('v.last_cs_stream_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_cs_stream_bytes', 0); # ); # else ( # #set_subnode_value('v.last_cs_stream_bytes', visitor_id, cs_stream_bytes); # set_collected_field(visitor_id, 'last_cs_stream_bytes', cs_stream_bytes); # ); # # # In the database, the cs_stream_bytes field should be the difference # cs_stream_bytes = cs_stream_bytes_for_db; # #); # if visitor_id #else ( # cs_stream_bytes = 0; #); #` # requires_fields = { # cs_stream_bytes = true # x_event = true # } # } # fix_cs_stream_bytes set_duration = { value = ` if ((x_event eq "stop") and (x_category eq "stream")) then stream_duration = x_duration; else if ((x_event eq "disconnect") and (x_category eq "session")) then session_duration = x_duration; ` requires_fields = { x_duration = true x_event = true x_category = true stream_duration = true } } # set_duration # set x-sname to (empty) when '-' for session connect/disconnect lines # so it does not appear in the stream name report as it is only session traffic set_stream_name = { value = `if (x_sname eq '-') then x_sname = '(empty)';` requires_fields = { x_sname = true } # requires_fields } # set_stream_name count_connections = { value = "if (x_event eq 'connect') then connections = 1" } # count_connection count_plays = { value = "if (x_event eq 'play') then plays = 1" } # count_connection } # Log.parsing_filters log.filters = { mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'events = 1;' } # mark_entry } # log.filters database.numerical_fields = { events = { default = true requires_log_field = false } visitors = { default = false requires_log_field = true log_field = "c_client_id" type = "unique" } # visitors connections = { default = true } plays = { default = true } unique_client_ips = { default = false requires_log_field = true log_field = "c_ip" type = "unique" } # unique_client_ips sc_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } cs_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } sc_stream_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } cs_stream_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } x_file_size = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } x_file_length = { type = "int" integer_bits = 64 display_format_type = "duration_compact" } # session_bytes = { # type = "int" # integer_bits = 64 # display_format_type = "bandwidth" # } x_duration = { default = false requires_log_field = false type = "int" integer_bits = 64 display_format_type = "duration_compact" } stream_duration = { default = false requires_log_field = false type = "int" integer_bits = 64 display_format_type = "duration_compact" } session_duration = { default = false requires_log_field = false type = "int" integer_bits = 64 display_format_type = "duration_compact" } stream_duration_per_event = { default = false log_field = "stream_duration" requires_log_field = true type = "int" integer_bits = 64 aggregation_method = "average" average_denominator_field = "events" display_format_type = duration_compact } stream_duration_per_visitor = { default = false log_field = "stream_duration" requires_log_field = true type = "int" integer_bits = 64 aggregation_method = "average" average_denominator_field = "visitors" display_format_type = duration_compact } } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = { tz = true } content_group = { cs_uri_stem = true cs_uri_query = true file_type = true s_uri = true x_ctx = true x_sname = true x_sname_query = true x_suri_query = true x_suri_stem = true x_suri = true x_spos = true x_file_ext = true x_file_name = true } # content_group client_group = { c_client_id = true c_connect_type = true c_ip = true c_proto = true c_referrer = true c_user_agent = true domain = true domain_description = true isp = true location = true organization = true screen_depth = true screen_dimensions = true } # client_group other_group = { s_ip = true x_adaptor = true x_app = true x_appinst = true x_category = true x_comment = true x_cpu_load = true x_event = true x_mem_load = true x_pid = true x_sc_qos_bytes = true x_service_name = true x_status = true x_vhost = true } # other_group } # report_groups snapons = { # Attach a concurrent_events snapon to compute concurrent connections # concurrent_connections = { # snapon = "concurrent_events" # name = "concurrent_connections" # label = "$lang_admin.snapons.plugins.cloudfront.concurrent_connections" # parameters = { # date_time_field.parameter_value = "date_time" # duration_field.parameter_value = "session_duration" # concurrent_events_name = { # parameter_value = "$lang_admin.snapons.plugins.cloudfront.concurrent_connections" # final_node_name = "concurrent_connections" # } # } # parameters # # requires_database_fields = { # date_time = true # x_duration = true # } # # } # concurrent_connections # # Attach a cumulative_field_sum snapon to compute sc_bytes # sc_bytes = { # snapon = "cumulative_field_sum" # name = "sc_bytes_total" # label = "$lang_admin.snapons.plugins.cloudfront.sc_bytes" # parameters = { # date_time_field.parameter_value = "date_time" # cumulative_field.parameter_value = "sc_bytes" ## session_id_field.parameter_value = "c_client_id" # session_id_field.parameter_value = "x_cf_client_id" # cumulative_field_sum_name = { # parameter_value = "$lang_admin.snapons.plugins.cloudfront.sc_bytes" # final_node_name = "sc_bytes_total" # } # } # parameters # # requires_log_fields = { # sc_bytes = true # date_time = true ## c_client_id = true # x_cf_client_id = true # } # # } # sc_bytes # # Attach a cumulative_field_sum snapon to compute cs_bytes # cs_bytes = { # snapon = "cumulative_field_sum" # name = "cs_bytes_total" # label = "$lang_admin.snapons.plugins.cloudfront.cs_bytes" # parameters = { # date_time_field.parameter_value = "loadorder" # cumulative_field.parameter_value = "cs_bytes" ## session_id_field.parameter_value = "c_client_id" # session_id_field.parameter_value = "x_cf_client_id" # cumulative_field_sum_name = { # parameter_value = "$lang_admin.snapons.plugins.cloudfront.cs_bytes" # final_node_name = "cs_bytes_total" # } # } # parameters # # requires_log_fields = { # cs_bytes = true # date_time = true ## c_client_id = true # x_cf_client_id = true # } # # } # cs_bytes # Attach a cloudfront_sessions_bytes field cloudfront_session_bytes = { snapon = "cumulative_session_bytes" name = "cumulative_session_bytes" label = "$lang_admin.snapons.plugins.cloudfront.cloudfront_session_bytes" parameters = { date_time_field.parameter_value = "date_time" cumulative_field.parameter_value = "sc_bytes" # session_id_field.parameter_value = "c_client_id" session_id_field.parameter_value = "x_cf_client_id" x_event_field.parameter_value = "x_event" cumulative_session_bytes_name = { parameter_value = "$lang_admin.snapons.plugins.cloudfront.cloudfront_session_bytes" final_node_name = "cloudfront_session_bytes" } } # parameters requires_log_fields = { sc_bytes = true date_time = true # c_client_id = true x_cf_client_id = true } } # cloudfront_session_bytes # Add the standard reports add_standard_reports = { name = "add_standard_reports" label = "add_standard_reports" snapon = "add_standard_reports" } # add_standard_reports } # snapons } # create_profile_wizard_options } # cloudfront_streaming