# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. openfire_im = { plugin_version = "1.0" # 2009-09-14 - 1.0 - KBB - Initial creation # Note: Actual log has two problems. # 1) There two entries for each message which are identical expect for the Stream ID, # so the count of Message IDs is half the number of entries in the database. # 2) There are no newlines in the log except those in the message bodies and # Sawmill truncates the extremely long lines, which causes tags to be lost # and processing to stop. This plug-in was tested with a log where newlines had # been added between "packet>" and " # # #we're all in the same boat #S9wxH3 # # # log.format.autodetect_regular_expression = "'); ); else ( greater_than_index = index(v.line, '>'); ); if (isMultiLineTag and (greater_than_index == -1)) then ( # Process attributes of multi-line tag, or CDATA contents if (isCDATA) then ( current_value .= v.line . "\n"; ); else ( v.attr = v.line; while (matches_regular_expression(v.attr, '^[ ]*([^ ]+)="([^"]*)"(.*)$')) ( v.key = $1; v.value = $2; v.attr = $3; # Report the proposed field name and value v.field_name = replace_first(xml_tag_stack, ignoreTags, ''); v.field_name = replace_all(v.field_name, '//', '_'); # multi_line_tag_name is on the stack already v.field_name .= '_' . v.key; if (matches_regular_expression(v.field_name, '^_(.*)$')) then ( v.field_name = $1; ); # Put this value in the log entry set_collected_field('', lowercase(v.field_name), v.value); ); ); ); else ( # Finish with a multi line tag if (isMultiLineTag) then ( v.rest_of_tag = substr(v.line, 0, greater_than_index); # Process remaining attributes or CDATA contents if (isCDATA) then ( current_value .= substr(v.rest_of_tag, 0, greater_than_index) . "\n"; isCDATA = false; greater_than_index = greater_than_index + 2; ); else ( v.attr = v.rest_of_tag; while (matches_regular_expression(v.attr, '^[ ]*([^ ]+)="([^"]*)"(.*)$')) ( v.key = $1; v.value = $2; v.attr = $3; # Report the proposed field name and value v.field_name = replace_first(xml_tag_stack, ignoreTags, ''); v.field_name = replace_all(v.field_name, '//', '_'); # multi_line_tag_name is on the stack already v.field_name .= '_' . v.key; if (matches_regular_expression(v.field_name, '^_(.*)$')) then ( v.field_name = $1; ); # Put this value in the log entry set_collected_field('', lowercase(v.field_name), v.value); ); # Pop the tag off the stack if it turns out to be single if (ends_with(v.rest_of_tag, '/')) then ( xml_tag_stack = substr(xml_tag_stack, 0, length(xml_tag_stack) - length(multi_line_tag_name) - 2); ); ); isMultiLineTag = false; multi_line_tag_name = ""; # Continue reading the rest of the line begin_ptr = greater_than_index + 1; ); while (begin_ptr < line_length) ( # Find the first tag in this line int less_than_index = index(v.line, '<', begin_ptr); #echo("Setting less_than_index " . less_than_index); # debug # If there are no tags in this line, the whole line is part of the current value # (Note that nested tags with text, such as in html ( text text text text) # will give pecular results because current value of nested tags will not be in value of # outer tags.) if (less_than_index == -1) then ( current_value .= substr(v.line, begin_ptr) . "\n"; begin_ptr = line_length; #echo("No tags - set begin_ptr " . begin_ptr); # debug #echo("current_value *" . current_value . "*"); # debug ); # Found a tag else ( #echo("Found a tag v.line=*" . substr(v.line, begin_ptr) . "*"); # debug string tag_name = ""; # Everything up to the tag is part of the current value current_value .= substr(v.line, begin_ptr, less_than_index - begin_ptr); #echo("Setting current_value " . current_value); # debug # Find the close of the tag greater_than_index = index(v.line, '>', less_than_index); #echo("Setting greater_than_index " . greater_than_index); # debug # Get the tag name - Multi-line tag if (greater_than_index == -1) then ( tag_name = substr(v.line, less_than_index + 1); isMultiLineTag = true; begin_ptr = line_length; # skip rest of line #echo("Multi-line tag - set begin_ptr " . begin_ptr); # debug ); # Regular tag else ( tag_name = substr(v.line, less_than_index + 1, greater_than_index - less_than_index - 1); #echo("Set tag_name " . tag_name); # debug # Continue reading the rest of the line begin_ptr = greater_than_index + 1; #echo("Regular tag - set begin_ptr " . begin_ptr); # debug ); # Check for closing tag if (starts_with(tag_name, '/')) then ( # Get the tag name tag_name = substr(tag_name, 1); #echo("Closing tag: " . tag_name . "; value=" . current_value); # debug # Verify that this is the open tag name if (!ends_with(xml_tag_stack, "//" . tag_name)) then error("XML parsing error while processing log data; closing tag '" . tag_name . "' is out of order; stack=" . xml_tag_stack); # Report the proposed field name and value v.field_name = xml_tag_stack; if (starts_with(xml_tag_stack, ignoreTags) and (xml_tag_stack ne ignoreTags)) then ( v.field_name = replace_first(xml_tag_stack, ignoreTags, ''); ); v.field_name = replace_all(v.field_name, '//', '_'); if (matches_regular_expression(v.field_name, '^_(.*)$')) then ( v.field_name = $1; ); # Put this value in the log entry set_collected_field('', lowercase(v.field_name), current_value); # Accept when a complete message ends. if (xml_tag_stack eq ignoreTags) then ( v.timestamp = get_collected_field('', 'jive_packet_timestamp'); v.thread = get_collected_field('', 'message_thread'); if (v.thread ne '(empty)' and v.timestamp ne '(empty)') then ( # if (matches_regular_expression(v.timestamp, '([A-Za-z]{3}) ([0-9]{2}), ([0-9]{4}) ([0-9]{2}:[0-9]{2}:[0-9]{2}):[0-9]+ ([AP]M)')) then ( set_collected_field('', 'date', $2 . "/" . $1 . "/" . $3); set_collected_field('', 'time', $4 . " " . $5); accept_collected_entry('', false); ); ); ); # Pop the closing tag off the stack xml_tag_stack = substr(xml_tag_stack, 0, length(xml_tag_stack) - length(tag_name) - 2); #echo("Pop - now stack=" . xml_tag_stack); # debug # Reset the value for the next tag current_value = ""; ); # if closing tag else ( # single or opening tag # It's a single tag, a CDATA or an xml special tag - don't put it on the stack # Get this info before possibly chopping off '/' below bool isSingleTag = (ends_with(tag_name, '/') or starts_with(tag_name, '!') or # e.g.: starts_with(tag_name, '?')); # e.g.: if (matches_regular_expression(tag_name, '!\\\\[CDATA\\\\[(.*)(\\\\]\\\\])') or matches_regular_expression(tag_name, '!\\\\[CDATA\\\\[(.*)')) then ( current_value .= $1; if (isMultiLineTag) then ( current_value .= "\n"; isCDATA = true; ); ); else ( # Remove and process attributes if (matches_regular_expression(tag_name, '^([^ ]*)( .*)$')) then ( tag_name = $1; v.attr = $2; while (matches_regular_expression(v.attr, '^[ ]*([^ ]+)="([^"]*)"(.*)$')) ( v.key = $1; v.value = $2; v.attr = $3; # Report the proposed field name and value v.field_name = replace_first(xml_tag_stack, ignoreTags, ''); v.field_name = replace_all(v.field_name, '//', '_'); # We know this is an opening tag, but it isn't on the stack yet, (and my never be) v.field_name .= '_' . tag_name . '_' . v.key; if (matches_regular_expression(v.field_name, '^_(.*)$')) then ( v.field_name = $1; ); # Put this value in the log entry set_collected_field('', lowercase(v.field_name), v.value); ); #echo("After removing attributes from tag_name *" . tag_name . "*"); # debug ); ); # Save (after mods) in case it is single and needs to be # popped from the stack - doesn't hurt to save if not multi-line multi_line_tag_name = tag_name; # It's a single tag, a CDATA or an xml special tag - don't put it on the stack if (isSingleTag) then ( #echo("Single tag: " . tag_name); # debug ); # if single tag # On open tag, remember the tag in the tag stack else ( #echo("Opening tag: *" . tag_name . "*"); # debug xml_tag_stack .= "//" . tag_name; #echo("Push - now stack=" . xml_tag_stack); # debug ); ); # if not closing tag ); # if found a tag #echo("Now v.line=" . substr(v.line, begin_ptr, 100)); # debug #echo("Now v.line=" . substr(v.line, begin_ptr)); # debug ); # while line length ); # if not in the middle of a multi-line tag ` # Database fields database.fields = { date_time = "" day_of_week = "" hour_of_day = "" message_body = "" message_from = "" message_to = "" jive_packet_streamid = "" message_thread = "" message_id = "" message_type = "" jive_packet_status = "" } # database.fields # Log filters log.filters = { # mark_entry = { # label = '$lang_admin.log_filters.mark_entry_label' # comment = '$lang_admin.log_filters.mark_entry_comment' # value = 'messages = 1;' # } # mark_entry } # log.filters database.numerical_fields = { messages = { default = true requires_log_field = true log_field = message_id type = "unique" } # messages threads = { default = true requires_log_field = true log_field = message_thread type = "unique" } # threads streams = { default = true requires_log_field = true log_field = jive_packet_streamid type = "unique" } # streams } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" } # report_groups } # create_profile_wizard_options } # openfire_im