#!/usr/bin/perl
#
# This program is Copyright (c) 2004 by Greg Ferrar.
#
# This script converts from Apache Combined log format to IIS W3C log format.
# It is useful when analyzing a dataset containing both Apache and IIS log data;
# it can be used to convert the Apache to IIS so all data can be analyzed in a
# single configuration.
#
# Usage:   apache_combined_to_iis.pl < infile > outfile
#     or   apache_combined_to_iis.pl infile > outfile
#


# Create a hash to map Apache month names to IIS month numbers
my %months = (
	      "Jan" => 1,
	      "Feb" => 2,
	      "Mar" => 3,
	      "Apr" => 4,
	      "May" => 5,
	      "Jun" => 6,
	      "Jul" => 7,
	      "Aug" => 8,
	      "Sep" => 9,
	      "Oct" => 10,
	      "Nov" => 11,
	      "Dec" => 12
	      );

# Add W3C header
print "#Software: Microsoft Internet Information Services 5.0\n";
print "#Version: 1.0\n";
print "#Date: 2004-01-01 00:00:00\n";
print "#Fields: date time c-ip cs-username cs-method cs-uri-stem cs-uri-query sc-status sc-bytes cs(User-Agent) cs(Referer)\n";

# Parse all lines of input
while (<>) {

    # Parse the line
    my ($c_ip, $cs_username, $date, $time, $cs_method, $cs_uri, $sc_status, $sc_bytes, $cs_Referer, $cs_User_Agent) =
#	/^([^ ]+) - ([^ ]+) \[([^:]*):([^ ]*) [^]]*\] \"([A-Z]+) ([^ ]*) [^\"]*\" ([0-9]+) ([^ ]+) \"(.*)\" \"(.*)\"$/;
	/^([^ ]+) - ([^ ]+) \[([^:]*):([^ ]*) [^]]*\] \"([A-Z-]+) ([^ ]*) [^\"]*\" ([0-9]+) ([^ ]+)/;

    # Convert \" to ' in some fields
    $cs_Referer =~ s/\\"/'/g;
    $cs_uri =~ s/\\"/'/g;

    # If this line parsed okay, convert it to W3C
    if ($c_ip) {

      # Break the URI into stem and query
      my $cs_uri_stem = $cs_uri;
      my $cs_uri_query = "-";
      if ($cs_uri =~ /^([^\?]*)\?(.*)$/) {
        $cs_uri_stem = $1;
        $cs_uri_query = $2;
        if (!$cs_uri_query) {
          $cs_uri_query = "-";
        }
        if (!$cs_uri_stem) {
          $cs_uri_stem = "-";
        }
      }

      # Convert the referrer and agent from Apache to W3C format
      $cs_User_Agent =~ s/ /\+/g;
      $cs_Referer =~ s/ /\+/g;

      # Convert the date from Apache to W3C format
      my ($mday, $month, $year) = $date =~ /^([0-9][0-9])\/([A-Z][a-z][a-z])\/([0-9][0-9][0-9][0-9])/;
      my $monthnum = $months{$month};
      my $iisdate = sprintf("%04d-%02d-%02d", $year, $monthnum, $mday);

      # Display the W3C line
      print "$iisdate $time $c_ip $cs_username $cs_method $cs_uri_stem $cs_uri_query $sc_status $sc_bytes $cs_User_Agent $cs_Referer\n";

  } # if match

  # If the regular expression doesn't match, display a warning:
  else {
    printf STDERR "WARNING: line does not match Apache Combined format; ignoring: $_\n";
  }

} # while <>

