Skip to content

Latest commit

 

History

History
executable file
·
204 lines (169 loc) · 5.32 KB

cleanup_offload_cache.pl

File metadata and controls

executable file
·
204 lines (169 loc) · 5.32 KB
 
1
2
3
4
5
#!/usr/bin/perl -w
use warnings;
use strict;
Jul 21, 2005
Jul 21, 2005
6
7
8
# unbuffered output.
$| = 1;
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
require LWP;
require LWP::UserAgent;
require HTTP::Status;
require HTTP::Date;
my $ua = new LWP::UserAgent; # we create a global UserAgent object
$ua->env_proxy;
sub dumpHttpHeads {
foreach (@_) {
my $url = $_;
my $request = HTTP::Request->new(HEAD => $url);
my $response = $ua->request($request);
if ($response->is_error()) {
print("HEAD request to $url failed.\n");
} else {
print("$url:\n");
my @keys = $response->header_field_names();
foreach (@keys) {
print(" $_: " . $response->header($_) . "\n");
}
print("\n\n");
}
}
}
sub loadMetadata {
my $fname = shift;
return undef if not open(FH, '<', $fname);
my %retval;
while (not eof(FH)) {
my $key = <FH>;
my $val = <FH>;
chomp($key) if (defined $key);
chomp($val) if (defined $val);
#print("'$key' == '$val'\n");
$retval{$key} = $val;
}
close(FH);
if (not defined $retval{'X-Offload-Hostname'}) {
$retval{'X-Offload-Hostname'} = 'icculus.org';
}
return(%retval);
}
Jul 21, 2005
Jul 21, 2005
55
sub usage {
Nov 6, 2009
Nov 6, 2009
56
die("USAGE: $0 <offloaddir> [--outputurls] [--nukeshortfiles] [--youngerthan=X]\n")
Jul 21, 2005
Jul 21, 2005
57
58
}
Mar 25, 2009
Mar 25, 2009
59
my $youngerthan = undef;
Jul 21, 2005
Jul 21, 2005
60
my $nukeshortfiles = 0;
Nov 6, 2009
Nov 6, 2009
61
my $outputurls = 0;
Jul 21, 2005
Jul 21, 2005
62
63
64
my $offloaddir = undef;
foreach (@ARGV) {
$nukeshortfiles = 1, next if ($_ eq '--nukeshortfiles');
Nov 6, 2009
Nov 6, 2009
65
$outputurls = 1, next if ($_ eq '--outputurls');
Mar 25, 2009
Mar 25, 2009
66
$youngerthan = $1, next if (/\A--youngerthan=(\d+)\Z/);
Jul 21, 2005
Jul 21, 2005
67
68
69
$offloaddir = $_, next if not defined $offloaddir;
usage();
}
Jul 21, 2005
Jul 21, 2005
71
usage() if (not defined $offloaddir);
72
73
74
opendir(DIRH, $offloaddir) || die("Couldn't open directory [$offloaddir]: $!");
Jul 21, 2005
Jul 21, 2005
75
my $diskrecovered = 0;
Mar 25, 2009
Mar 25, 2009
76
my $headrequests = 0;
Aug 29, 2008
Aug 29, 2008
77
78
79
my $filesseen = 0;
my $filesdelete = 0;
my $totalfilespace = 0;
Mar 25, 2009
Mar 25, 2009
81
82
83
84
85
86
87
88
89
print("\n");
print("mod_offload cleanup script starting up...\n");
if (defined $youngerthan) {
print("Only checking files younger than $youngerthan days.\n");
$youngerthan *= 24 * 60 * 60; # convert days to seconds.
} else {
print("Checking all files.\n");
}
Aug 29, 2008
Aug 29, 2008
90
while (my $f = readdir(DIRH)) {
Jul 21, 2005
Jul 21, 2005
91
# '7' is the file size info in stat().
Mar 25, 2009
Mar 25, 2009
92
# '9' is the mtime info in stat().
Jul 21, 2005
Jul 21, 2005
93
94
my $filespace = 0;
Aug 29, 2008
Aug 29, 2008
95
96
97
$filesseen++;
if ($f =~ /\Adebug-/) {
Jul 21, 2005
Jul 21, 2005
98
print(" - Deleting debug file '$f'.\n");
Sep 2, 2005
Sep 2, 2005
99
100
101
102
my @statbuf = (stat($f));
my $size = 0;
$size = $statbuf[7] if @statbuf;
$diskrecovered += $size;
Aug 29, 2008
Aug 29, 2008
103
104
$totalfilespace += $size;
$filesdelete++;
Jul 21, 2005
Jul 21, 2005
105
106
107
unlink("$offloaddir/$f");
}
Aug 29, 2008
Aug 29, 2008
108
109
next if (not $f =~ /\A(meta|file)data-/);
my ($filetype, $etag) = ($f =~ /\A(meta|file)data-(.*)\Z/);
110
111
112
my $metadatapath = $offloaddir . '/metadata-' . $etag;
my $filedatapath = $offloaddir . '/filedata-' . $etag;
Jul 21, 2005
Jul 21, 2005
113
my $filecachesize = (stat($filedatapath))[7];
Mar 25, 2009
Mar 25, 2009
114
115
116
117
118
my @metastat = stat($metadatapath);
my $filecachemtime = $metastat[9];
$filespace += $metastat[7] if (-f $metadatapath);
Jul 21, 2005
Jul 21, 2005
119
$filespace += $filecachesize if (-f $filedatapath);
Jul 21, 2005
Jul 21, 2005
120
Aug 29, 2008
Aug 29, 2008
121
122
$totalfilespace += $filespace;
123
124
125
if ((not -f $filedatapath) || (not -f $metadatapath)) {
unlink $metadatapath;
unlink $filedatapath;
Aug 29, 2008
Aug 29, 2008
126
$filesdelete++;
Jul 21, 2005
Jul 21, 2005
127
$diskrecovered += $filespace;
128
129
130
131
next;
}
next if ($filetype eq 'file');
Mar 25, 2009
Mar 25, 2009
132
next if ((defined $youngerthan) && ((time()-$metastat[9]) > $youngerthan));
133
134
135
136
137
138
139
140
141
my %metadata = loadMetadata($metadatapath);
next if (not %metadata);
my $tmp = $metadata{'ETag'};
$tmp = '"BOGUSSTRING"' if (not defined $tmp);
$tmp =~ s/\A\"(.*?)\"\Z/$1/;
if ($tmp ne $etag) {
print("File '$metadatapath' is bogus.\n");
Jul 21, 2005
Jul 21, 2005
142
$diskrecovered += $filespace;
143
144
unlink $metadatapath;
unlink $filedatapath;
Aug 29, 2008
Aug 29, 2008
145
$filesdelete++;
146
147
148
next;
}
Jul 21, 2005
Jul 21, 2005
149
my $len = $metadata{'Content-Length'};
150
151
152
my $hostname = $metadata{'X-Offload-Hostname'};
my $origurl = $metadata{'X-Offload-Orig-URL'};
my $url = 'http://' . $hostname . $origurl;
Nov 6, 2009
Nov 6, 2009
153
154
155
156
157
158
159
if ($outputurls) {
print "$url\n";
next;
}
$headrequests++;
160
161
162
163
164
165
166
167
168
169
170
171
172
my $request = HTTP::Request->new(HEAD => $url);
my $response = $ua->request($request);
print(" - $url ($etag) ... ");
my $dokill = 0;
my $httpcode = $response->code();
if ($httpcode == 404) {
print("is no longer on base server.");
$dokill = 1;
} elsif ($response->is_error()) {
# everything else we ignore for now.
print("status unknown (HTTP error $httpcode).");
Jul 21, 2005
Jul 21, 2005
173
174
175
} elsif (($nukeshortfiles) && ($len != $filecachesize)) {
$dokill = 1;
print("Cached file is wrong size.");
176
177
178
179
180
181
182
183
184
185
} else {
my $hetag = $response->header('ETag');
$hetag = '' if (not defined $hetag);
$dokill = 1 if ($hetag ne "\"$etag\"");
# !!! FIXME: check other attributes...
print("out of date in some way.") if ($dokill);
}
if ($dokill) {
print(" DELETE!\n");
Jul 21, 2005
Jul 21, 2005
186
$diskrecovered += $filespace;
187
188
unlink $metadatapath;
unlink $filedatapath;
Aug 29, 2008
Aug 29, 2008
189
$filesdelete++;
190
191
192
193
194
} else {
print("KEEP!\n");
}
}
Aug 29, 2008
Aug 29, 2008
195
196
closedir(DIRH);
Nov 6, 2009
Nov 6, 2009
197
198
199
200
201
202
if (not $outputurls) {
print("Recovered $diskrecovered bytes of $totalfilespace.\n");
print("$filesseen files seen, $filesdelete deleted.\n");
print("$headrequests HTTP HEAD requests.\n");
}
203
exit 0;