# # tomcat_parser snapon # # This snapon implements tomcat parsing functionality, based on the pattern from an Access Log Valve # ( http://tomcat.apache.org/tomcat-6.0-doc/config/valve.html#Access_Log_Valve ). # tomcat_parser = { label = "$lang_admin.snapons.tomcat_parser.label" comment = "$lang_admin.snapons.tomcat_parser.comment" config_snapon_category = "" version = "1.0.1" # 2012-??-?? - 1.0 - GMF - initial creation # 2012-07-11 - 1.0.1 - GMF - Fixed field_label.tomcat_parser reference. parameters = { pattern = { parameter_value = "" validation_type = "string" form_element_label = "$lang_admin.snapons.tomcat_parser.parameters.pattern.form_element_label" form_element_type = "text" form_element_width = "500" description = "" } # pattern } # parameters parameters_form = { group_1 = { description = "$lang_admin.snapons.tomcat_parser.parameters_form.group_1.description" parameters = { pattern = true } # parameters } # group 1 } # parameters_form attach_operations = { configure_parsing = { type = "execute_expression" expression = ` #echo("EXPRESSION"); #echo("profile: " . profile); #echo("profile: " . node_as_string(profile)); string pattern = @profile{"create_profile_wizard_info"}{"snapons"}{"parser"}{"parameters"}{"pattern"}{"parameter_value"}; #echo("pattern: " . pattern); # This subroutine creates a database field subroutine(create_database_field(node profile, string fieldname), ( #echo("create_database_field: " . fieldname); node database_field = profile{"database"}{"fields"}{fieldname}; # Disabled all this, because it's done automatically now by final_step # if (fieldname eq 'day_of_week') then # @database_field{"type"} = 'int'; # else if (fieldname eq 'hour_of_day') then # @database_field{"type"} = 'int'; # else # @database_field{"type"} = 'string'; # @database_field{"label"} = fieldname; # @database_field{"derivation_method"} = "log_field"; # @database_field{"log_field"} = fieldname; # if (fieldname eq "date_time") then # @database_field{"category"} = "date_time"; # else if (fieldname eq "date") then # @database_field{"category"} = "date"; # else if (fieldname eq "time") then # @database_field{"category"} = "time"; # else # @database_field{"category"} = ""; # @database_field{"aggregation_method"} = "none"; # @database_field{"index"} = true; # @database_field{"suppress_top"} = 0; # @database_field{"suppress_bottom"} = 2; # @database_field{"always_include_bottom_level_items"} = false; # @database_field{"integer_bits"} = 0; # @database_field{"itemnums_hash_function"} = "rand_sum"; )); # create_database_field() # This subroutine creates an aggregating database field subroutine(create_aggregating_database_field(node profile, string fieldname, string type, string display_format_type), ( #echo("create_aggregating_database_field: " . fieldname); node database_field = profile{"database"}{"fields"}{fieldname}; @database_field{"type"} = type; @database_field{"integer_bits"} = 64; if ('lang_stats.field_labels_by_log_format.tomcat_pattern'?{fieldname}) then @database_field{"label"} = "{" . "=capitalize(expand(lang_stats.field_labels_by_log_format.tomcat_pattern." . fieldname . "))=" . "}"; else if ('lang_stats.field_labels'?{fieldname}) then @database_field{"label"} = "{" . "=capitalize(expand(lang_stats.field_labels." . fieldname . "))=" . "}"; else @database_field{"label"} = fieldname; @database_field{"display_format_type"} = display_format_type; @database_field{"aggregation_method"} = "sum"; )); # create_database_field() subroutine(create_log_field(node profile, string fieldname, string type), ( #echo("create_log_field: fieldname=" . fieldname); #echo("profile: " . profile); node logfield = profile{"log"}{"fields"}{fieldname}; #echo("logfield: " . logfield); @logfield{"type"} = type; # @logfield{"label"} = "{" . "=capitalize(expand(lang_stats.field_labels." . fieldname . "))=" . "}"; # @logfield{"index"} = 0; # @logfield{"subindex"} = 0; # @logfield{"hierarchy_dividers"} = ""; # @logfield{"left_to_right"} = false; # @logfield{"leading_divider"} = false; # @logfield{"case_sensitive"} = false; #echo("done"); )); ##=== create_log_field() ===## node log_fields_at_end = new_node(); subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), ( log_fields_at_end{fieldname}{"type"} = type; )); # Delete any existing log fields delete_node(profile{"log"}{"fields"}); # Parse the string, building the regular expression from it, and the fields string regexp = "^"; string field_regexp; string field_name; string log_field_type; string c; for (int i = 0; i < length(pattern); i++) ( # echo("i: " . i); c = substr(pattern, i, 1); # echo("c: " . c); # Handle % variables if (c eq '%') then ( i++; c = substr(pattern, i, 1); log_field_type = "flat"; if (c eq 'a') then ( field_regexp = '([^ ]+)'; field_name = "remote_ip_address"; ); # a else if (c eq 'A') then ( field_regexp = '([^ ]+)'; field_name = "local_ip_address"; ); # A else if (c eq 'b') then ( field_regexp = '([^ ]+)'; field_name = "bytes_sent"; ); # b else if (c eq 'B') then ( field_regexp = '([^ ]+)'; field_name = "bytes_sent2"; ); # B else if (c eq 'h') then ( field_regexp = '([^ ]+)'; field_name = "remote_host"; log_field_type = "host"; ); # h else if (c eq 'H') then ( field_regexp = '([^ ]+)'; field_name = "request_protocol"; ); # H else if (c eq 'l') then ( field_regexp = '([^ ]+)'; field_name = "remote_logical_username"; ); # l else if (c eq 'm') then ( field_regexp = '([^ ]+)'; field_name = "request_method"; ); # m else if (c eq 'p') then ( field_regexp = '([^ ]+)'; field_name = "local_port"; ); # p else if (c eq 'q') then ( field_regexp = '([^ ]+)'; field_name = "query_string"; ); # q else if (c eq 'F') then ( field_regexp = '([^ ]+)'; field_name = "time_taken_response"; ); # q else if (c eq 'r') then ( field_regexp = '([^ ]*) ([^ ]*) ([^ ]*)'; create_log_field(profile, "operation", "flat"); create_database_field(profile, "operation"); create_log_field(profile, "url", "page"); create_database_field(profile, "url"); create_log_field_at_end(log_fields_at_end, 'file_type', 'flat'); create_database_field(profile, 'file_type'); field_name = "protocol"; ); # r else if (c eq 's') then ( field_regexp = '([^ ]+)'; field_name = "status_code"; ); # s else if (c eq 'S') then ( field_regexp = '([^ ]+)'; field_name = "user_session_id"; ); # S else if (c eq 't') then ( field_regexp = '[[]([0-9][0-9]/[A-Z][a-z][a-z]/[0-9][0-9][0-9][0-9]):([0-9][0-9]:[0-9][0-9]:[0-9][0-9]) [^]]+[]]'; field_name = "date"; log_field_type = "date"; ); # t else if (c eq 'u') then ( field_regexp = '([^ ]+)'; field_name = "remote_user"; ); # u else if (c eq 'U') then ( field_regexp = '([^ ]+)'; field_name = "requested_url"; ); # U else if (c eq 'v') then ( field_regexp = '([^ ]+)'; field_name = "local_server_name"; ); # v else if (c eq 'D') then ( field_regexp = '([^ ]+)'; field_name = "time_taken_milliseconds"; ); # D else if (c eq 'T') then ( field_regexp = '([^ ]+)'; field_name = "time_taken"; ); # T else if (c eq 'I') then ( field_regexp = '([^ ]+)'; field_name = "request_thread_name"; ); # I else if (c eq '{') then ( #echo("bracket detected"); int closing_brace_pos = index(pattern, '}', i); #echo("closing_brace_pos: " . closing_brace_pos); if (closing_brace_pos == -1) then error("Invalid pattern: mismatched braces"); #LM field_name = substr(pattern, i+1, closing_brace_pos - i - 1); field_name = replace_all(field_name, "-", "_"); i = closing_brace_pos + 1; field_name = lowercase(field_name); # Handle things like %{NUMBER:bytes} if (matches_regular_expression(field_name, "^([^:]+):(.*)$")) then field_name = $2; #echo("bracketed field name: " . field_name); field_regexp = '([^"]+)'; ); # { else error("Invalid pattern; unknown % directive %" . c); # LM # echo("found field_name=" . field_name . "; field_regexp=" . field_regexp); # Add this field to the regular expression regexp .= field_regexp; create_log_field(profile, field_name, log_field_type); # echo("NEED TO ADD FIELDS HERE (" . field_name . ")"); if (field_name eq "date") then ( create_log_field(profile, 'time', 'time'); create_log_field_at_end(log_fields_at_end, 'date_time', 'date_time'); create_log_field_at_end(log_fields_at_end, 'day_of_week', 'flat'); create_log_field_at_end(log_fields_at_end, 'hour_of_day', 'flat'); #echo("SCHEDULING date_time log field"); create_database_field(profile, 'date_time'); create_database_field(profile, 'day_of_week'); create_database_field(profile, 'hour_of_day'); ); # Create derived field for user_agent else if (field_name eq "user_agent") then ( create_log_field(profile, 'operating_system', 'flat'); create_database_field(profile, 'operating_system'); create_log_field_at_end(log_fields_at_end, 'web_browser', 'flat'); create_database_field(profile, 'web_browser'); ); # Create derived field for remote_host else if (field_name eq "remote_host") then ( create_database_field(profile, 'remote_host'); create_log_field_at_end(log_fields_at_end, 'location', 'flat'); create_log_field_at_end(log_fields_at_end, 'country', 'flat'); create_log_field_at_end(log_fields_at_end, 'region', 'flat'); create_log_field_at_end(log_fields_at_end, 'city', 'flat'); create_database_field(profile, 'location'); ); # Create derived fields for referrer else if (field_name eq "referrer") then ( create_database_field(profile, 'referrer'); create_log_field_at_end(log_fields_at_end, 'search_engine', 'flat'); create_database_field(profile, 'search_engine'); create_log_field_at_end(log_fields_at_end, 'search_phrase', 'flat'); create_database_field(profile, 'search_phrase'); ); # Create derived file type field else if (field_name eq "url") then ( create_database_field(profile, 'url'); create_log_field_at_end(log_fields_at_end, 'file_type', 'flat'); create_database_field(profile, 'file_type'); ); # Handle aggregating fields else if (field_name eq "bytes_sent") then create_aggregating_database_field(profile, field_name, "int", "bandwidth"); else if (field_name eq "bytes_sent_nodash") then create_aggregating_database_field(profile, field_name, "int", "bandwidth"); else if (field_name eq "time_taken_milliseconds") then create_aggregating_database_field(profile, field_name, "int", "duration_millis"); else if (field_name eq "time_taken") then create_aggregating_database_field(profile, field_name, "int", "duration_compact"); # Create a normal database field else create_database_field(profile, field_name); ); # if % # Handle \[ else if (c eq '\\\\') then ( i++; c = substr(pattern, i, 1); regexp .= ('[' . c . ']'); ); # Handle e.g. [WORD -] else if (c eq '[') then ( int closing_brace_pos = index(pattern, ']', i); if (closing_brace_pos == -1) then error("Invalid pattern: mismatched braces"); #LM string bracketed_section = substr(pattern, i+1, closing_brace_pos - i - 1); i += length(bracketed_section) + 1; # field_name = 'unknown'; if (matches_regular_expression(bracketed_section, '^WORD (.*)$')) then regexp .= $1; else error("Unknown Tomcat pattern bracketed section: " . bracketed_section); ); # if [ # If it's not a variable, add it literally to the regular expression. else regexp .= c; ); # for i regexp .= "$"; # Install the parsing_regular_expression #echo("Final regular expression: " . regexp); @profile{"log"}{"format"}{"parsing_regular_expression"} = regexp; #echo("numerical fields: " . node_as_string(profile{"database"}{"numerical_fields"})); # Create any final log fields node lfae; foreach lfae log_fields_at_end ( #echo("Final log field creation: " . node_name(lfae)); create_log_field(profile, node_name(lfae), @lfae{"type"}); ); create_log_field(profile, "events", "flat"); #echo("Final log fields: " . node_as_string(profile{"log"}{"fields"})); #echo("Final database fields: " . node_as_string(profile{"database"}{"fields"})); ` } # configure_parsing } # attach_operations } # tomcat_parser