{= include("docs.util"); start_docs_page(docs.technical_manual.page_titles.newsletters); =}
![]() |
Sawmill Newsletter September 15, 2008 |
#!/usr/bin/perl use strict; my \$usage = "compute_size_data.pl <pathname>"; my \$pathname = \$ARGV[0]; if (\$pathname eq "") { print "Usage: \$usage\n"; exit(-1); } my \$findcmd = "find \$pathname -type f"; open(FIND, "\$findcmd|") || die("Can't run \$findcmd: \$!"); while(<FIND>) { my \$foundpathname = \$_; chomp(\$foundpathname); my \$filesize = -s \$foundpathname; my \$uncompressedsize = \$filesize; my \$lines = 0; if (\$foundpathname =~ /[.]gz\$/) { \$uncompressedsize = `gunzip -l \$foundpathname | fgrep % | sed -e 's/^ *[0-9][0-9]* * \\([0-9][0-9]*\\) .*\\$/\\1/'`; chomp(\$uncompressedsize); \$lines = `gunzip -c \$foundpathname | wc -l`; chomp(\$lines); } else { \$lines = `wc -l \$foundpathname`; chomp(\$lines); } print "pathname=\$foundpathname|size=\$filesize|uncompressedsize=\$uncompressedsize|lines=\$lines\n"; } |
pathname=/logs/12345/log_12345.200806292100-2200-0.log.gz|size=542192|uncompressedsize=4046692|lines=7883 pathname=/logs/12345/log_12345.200808172000-2100-0.log.gz|size=667984|uncompressedsize=5331102|lines=11740 pathname=/logs/12345/log_12345.200806131300-1400-0.log.gz|size=380606|uncompressedsize=2970825|lines=5608 pathname=/logs/12345/log_12345.200805222000-2100-0.log.gz|size=589198|uncompressedsize=4567431|lines=8284 pathname=/logs/12345/log_12345.200803252100-2200-0.log.gz|size=691357|uncompressedsize=6072894|lines=12695 pathname=/logs/12346/log_12346.200803012200-2300-0.log.gz|size=513444|uncompressedsize=3881224|lines=7514 pathname=/logs/12346/log_12346.200805101400-1500-0.log.gz|size=322774|uncompressedsize=2501874|lines=4937 pathname=/logs/12346/log_12346.200712311800-1900-0.log.gz|size=461202|uncompressedsize=3422076|lines=6165 pathname=/logs/12346/log_12346.200806270700-0800-0.log.gz|size=105324|uncompressedsize=813253|lines=1807 pathname=/logs/12346/log_12346.200803172000-2100-0.log.gz|size=751699|uncompressedsize=5731115|lines=10523 |
compute_size_data = { plugin_version = "1.0" # The name of the log format log.format.format_label = "compute_size_data.pl Log Format" log.miscellaneous.log_data_type = "other" log.miscellaneous.log_format_type = "other" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^pathname=.*uncompressedsize=" # Log fields log.fields = { date = "" time = "" pathname = { type = "page" hierarchy_dividers = "/" left_to_right = true leading_divider = "true" } # pathname size = "" uncompressed_size = "" lines = "" files = "" } # log.fields # Database fields database.fields = { date_time = "" day_of_week = "" hour_of_day = "" pathname = { suppress_bottom = 99999 } } # database.fields database.numerical_fields = { files = { default = true } lines = { default = true } size = { type = "float" default = true display_format_type = "bandwidth" } # size uncompressed_size = { label = "uncompressed size" type = "float" default = true display_format_type = "bandwidth" } # uncompressed_size } # database.numerical_fields log.parsing_filters.parse = ` if (matches_regular_expression(current_log_line(), '^pathname=([^|]+)[|]size=([0-9]+)[|]uncompressedsize=([0-9]+)[|]lines=([0-9]+)')) then ( # Add an entry which reports total usage by all files pathname = \$1; size = \$2; uncompressed_size = \$3; lines = \$4; files = 1; if (matches_regular_expression(pathname, '[.]([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])-')) then ( date = \$1 . '-' . \$2 . '-' . \$3; time = \$4 . ':' . \$5 . ':00'; ); ); # if matches line ` create_profile_wizard_options = { # The reports menu report_groups = { date_time_group = "" } # report_groups } # create_profile_wizard_options } # compute_size_data |
compute_size_data = { plugin_version = "1.0" # The name of the log format log.format.format_label = "compute_size_data.pl Log Format" log.miscellaneous.log_data_type = "other" log.miscellaneous.log_format_type = "other" |
# The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^pathname=.*uncompressedsize=" |
# Log fields log.fields = { date = "" time = "" pathname = { type = "page" hierarchy_dividers = "/" left_to_right = true leading_divider = "true" } # pathname size = "" uncompressed_size = "" lines = "" files = "" } # log.fields |
# Database fields database.fields = { date_time = "" day_of_week = "" hour_of_day = "" pathname = { suppress_bottom = 99999 } } # database.fields |
database.numerical_fields = { files = { default = true } lines = { default = true } size = { type = "float" default = true display_format_type = "bandwidth" } # size uncompressed_size = { label = "uncompressed size" type = "float" default = true display_format_type = "bandwidth" } # uncompressed_size } # database.numerical_fields |
log.parsing_filters.parse = ` if (matches_regular_expression(current_log_line(), '^pathname=([^|]+)[|]size=([0-9]+)[|]uncompressedsize=([0-9]+)[|]lines=([0-9]+)')) then ( # Add an entry which reports total usage by all files pathname = \$1; size = \$2; uncompressed_size = \$3; lines = \$4; files = 1; if (matches_regular_expression(pathname, '[.]([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])-')) then ( date = \$1 . '-' . \$2 . '-' . \$3; time = \$4 . ':' . \$5 . ':00'; ); ); # if matches line ` |
create_profile_wizard_options = { # The reports menu report_groups = { date_time_group = "" } # report_groups } # create_profile_wizard_options |
} # compute_size_data |