Skip to content

Latest commit

 

History

History
1163 lines (978 loc) · 32.9 KB

nph-offload.c

File metadata and controls

1163 lines (978 loc) · 32.9 KB
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// This is a C program that handles offloading of bandwidth from a web
// server. It's a sort of poor-man's Akamai. It doesn't need anything
// terribly complex (a webserver with cgi-bin support, a writable directory).
//
// It works like this:
// - You have a webserver with dynamic content, and static content that
// may change arbitrarily (i.e. - various users making changes to their
// homepages, etc). This server is under a lot of load, mostly from
// the static content, which tends to be big. There may be multiple virtual
// hosts on this machine. We call this the "base" server.
// - You have at least one other webserver that you can use to offload some
// of the bandwidth. We call this the "offload" server.
// - You set up an Apache module (mod_offload) on the first server.
// mod_offload inserts itself into the request chain, and decides if a
// given file is safe static content (real file, not a script/cgi, no
// password, etc). In those cases, it sends a 302 redirect, pointing the
// client to the offload server.
// - The offload server gets a request from the redirected client. It then
// sends an HTTP HEAD request for the file in question to the base server
// while the client waits. It decides if it has the right file based on
// the HEAD. If it does, it serves the cached file.
// - If the file is out of date, or doesn't exist on the offload server, it
// sends a regular HTTP request for it to the base server and
// begins caching it. While caching it, it also feeds it to the client
// that has been waiting.
// - If another request comes in while the file is being cached, it will
// stream what is already there from disk, and then continue to feed as
// the rest shows up.
// !!! FIXME: issues to work out.
// - Could have a partial file cached if server crashes or power goes out.
// Add a "cacher's process id" to the metadata, and have those feeding
// from the cache decide if this process died...if so, wipe the entry and
// recache it.
// - Need to have a way to clean out old files. If x.zip is on the base,
// gets cached, and then is deleted, it'll stay on the offload server
// forever. Getting a 404 from the HEAD request will clean it out, but
// the offload server needs to know to do that.
//
// Installation:
// You need PHP with --enable-sysvsem support. You should configure PHP to not
// have a time limit on script execution (max_execution_time setting, or
// just don't run this script in safe mode and it'll handle it). PHP for
// Windows currently doesn't support sysvsem, so until someone writes me
// a mutex implementation, we assume you'll use a Unix box for this script.
//
// You need Apache to push every web request to this script, presumably in a
// virtual host, if not the entire server.
//
// Assuming this script was at /www/scripts/index.php, you would want to add
// this to Apache's config:
//
// AliasMatch ^.*$ "/www/scripts/index.php"
//
// If you don't have control over the virtual host's config file, you can't
// use AliasMatch, but if you can put an .htaccess file in the root of the
// virtual host, you can get away with this:
//
// ErrorDocument 404 /index.php
//
// This will make all missing files (everything) run the script, which will
// then cache and distribute the correct content, including overriding the
// 404 status code with the correct one. Be careful about files that DO exist
// in that vhost directory, though. They won't offload.
//
// You can offload multiple base servers with one box: set up one virtual host
// on the offload server for each base server. This lets each base server
// have its own cache and configuration.
//
// Then edit offload_server_config.php to fit your needs.
//
// Restart the server so the AliasMatch configuration tweak is picked up.
//
// This file is written by Ryan C. Gordon (icculus@icculus.org).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <stdint.h>
#include <time.h>
#include <errno.h>
#include <semaphore.h>
#include <limits.h>
Aug 31, 2008
Aug 31, 2008
89
#include <sys/types.h>
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#define GVERSION "1.0.0"
#define GSERVERSTRING "nph-offload.c/" GVERSION
#include "offload_server_config.h"
#ifdef __GNUC__
#define ISPRINTF(x,y) __attribute__((format (printf, x, y)))
#else
#define ISPRINTF(x,y)
#endif
#ifdef max
#undef max
#endif
typedef int64_t int64;
static char *Guri = NULL;
static char *GFilePath = NULL;
static char *GMetaDataPath = NULL;
static void *GSemaphore = NULL;
static int GSemaphoreOwned = 0;
static FILE *GDebugFilePointer = NULL;
static void failure_location(const char *, const char *, const char *);
static inline void failure(const char *httperr, const char *errmsg)
{
failure_location(httperr, errmsg, NULL);
} // failure
#if ( ((GDEBUG) && (GDEBUGTOFILE)) == 0 )
Aug 31, 2008
Aug 31, 2008
126
#define getDebugFilePointer() (NULL)
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#else
static FILE *getDebugFilePointer(void)
{
if (GDebugFilePointer == NULL)
{
char buf[PATH_MAX];
snprintf(buf, sizeof(buf), GOFFLOADDIR "/debug-%d", (int) getpid());
GDebugFilePointer = fopen(buf, "a");
} // if
return GDebugFilePointer;
} // getDebugFilePointer
#endif
#if ((!GDEBUG) && defined(__GNUC__))
#define debugEcho(fmt, ...)
#else
static void debugEcho(const char *fmt, ...) ISPRINTF(1, 2);
static void debugEcho(const char *fmt, ...)
{
#if GDEBUG
#if !GDEBUGTOFILE
FILE *fp = stdout;
#else
FILE *fp = getDebugFilePointer();
#endif
if (fp != NULL)
{
va_list ap;
va_start(ap, fmt);
vfprintf(fp, fmt, ap);
va_end(ap);
fputs("\n", fp);
fflush(fp);
} // else
#endif
} // debugEcho
#endif
Aug 31, 2008
Aug 31, 2008
167
static void printf_date_header(FILE *out)
168
169
170
171
172
173
174
175
176
177
178
179
{
// strftime()'s "%a" gives you locale-dependent strings...
static const char *weekday[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat",
};
// strftime()'s "%b" gives you locale-dependent strings...
static const char *month[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
};
Aug 31, 2008
Aug 31, 2008
180
181
182
if (out == NULL)
out = stdout;
183
184
time_t now = time(NULL);
const struct tm *tm = gmtime(&now);
Aug 31, 2008
Aug 31, 2008
185
fprintf(out, "Date: %s, %02d %s %04d %02d:%02d:%02d GMT\r\n",
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
weekday[tm->tm_wday], tm->tm_mday, month[tm->tm_mon],
tm->tm_year+1900, tm->tm_hour, tm->tm_min, tm->tm_sec);
} // printf_date_header
static int64 atoi64(const char *str)
{
int64 retval = 0;
int64 mult = 1;
int i = 0;
while (*str == ' ')
str++;
if (*str == '-')
{
mult = -1;
str++;
} // if
while (1)
{
const char ch = str[i];
if ((ch < '0') || (ch > '9'))
break;
i++;
} // for
while (--i >= 0)
{
const char ch = str[i];
retval += ((int64) (ch - '0')) * mult;
mult *= 10;
} // while
return retval;
} // atoi64
static const char *makeNum(int64 num)
{
static char buf[64];
snprintf(buf, sizeof (buf), "%lld", (long long) num);
return buf;
} // makeNum
static void *xmalloc(const size_t len)
{
void *ptr = malloc(len);
if (ptr == NULL)
failure("500 Internal Server Error", "Out of memory.");
return ptr;
} // xmalloc
static char *xstrdup(const char *str)
{
char *ptr = (char *) xmalloc(strlen(str) + 1);
strcpy(ptr, str);
return ptr;
} // xstrdup
static char *makeStr(const char *fmt, ...) ISPRINTF(1, 2);
static char *makeStr(const char *fmt, ...)
{
va_list ap;
char ch;
va_start(ap, fmt);
const int len = vsnprintf(&ch, 1, fmt, ap);
va_end(ap);
char *retval = (char *) xmalloc(len + 1);
va_start(ap, fmt);
vsnprintf(retval, len + 1, fmt, ap);
va_end(ap);
return retval;
} // makeStr
// a hashtable would be more sane, but really, we're talking about a handful
// of items, so this is probably the lower memory option, and it's fast
// enough for the simplicity.
typedef struct list
{
const char *key;
const char *value;
struct list *next;
} list;
static const char *listSet(list **l, const char *key, const char *value)
{
// maybe substring of current item, so copy it before we free() anything.
const char *newvalue = xstrdup(value);
list *item = *l;
while (item)
{
if (strcmp(item->key, key) == 0)
break;
item = item->next;
} // while
if (item != NULL)
free((void *) item->value);
else
{
item = (list *) xmalloc(sizeof (list));
item->key = xstrdup(key);
item->next = *l;
*l = item;
} // else
item->value = newvalue;
return newvalue;
} // listSet
static const char *listFind(const list *l, const char *key)
{
const list *item = l;
while (item)
{
if (strcmp(item->key, key) == 0)
break;
item = item->next;
} // while
return item ? item->value : NULL;
} // listFind
static void listFree(list **l)
{
list *item = *l;
while (item)
{
list *next = item->next;
free((void *) item->key);
free((void *) item->value);
free(item);
item = next;
} // while
*l = NULL;
} // listFree
static void *createSemaphore(const int initialVal)
{
char semname[64];
void *retval = NULL;
const int value = initialVal ? 0 : 1;
int created = 1;
snprintf(semname, sizeof (semname), "MOD-OFFLOAD-%d", (int) getuid());
retval = sem_open(semname, O_CREAT | O_EXCL, 0600, value);
if ((retval == (void *) SEM_FAILED) && (errno == EEXIST))
{
created = 0;
retval = sem_open(semname, 0);
} // if
if (retval == (void *) SEM_FAILED)
return NULL;
return retval;
} // createSemaphore
static void getSemaphore(void)
{
debugEcho("grabbing semaphore...(owned %d time(s).)", GSemaphoreOwned);
if (GSemaphoreOwned++ > 0)
return;
if (GSemaphore != NULL)
{
if (sem_wait(GSemaphore) == -1)
failure("503 Service Unavailable", "Couldn't lock semaphore.");
} // if
else
{
debugEcho("(have to create semaphore...)");
GSemaphore = createSemaphore(0);
if (GSemaphore == NULL)
failure("503 Service Unavailable", "Couldn't allocate semaphore.");
} // else
} // getSemaphore
static void putSemaphore(void)
{
if (GSemaphoreOwned == 0)
return;
if (--GSemaphoreOwned == 0)
{
if (GSemaphore != NULL)
{
if (sem_post(GSemaphore) == -1)
failure("503 Service Unavailable", "Couldn't unlock semaphore.");
} // if
} // if
debugEcho("released semaphore...(now owned %d time(s).)", GSemaphoreOwned);
} // putSemaphore
static void terminate(void)
{
debugEcho("offload script is terminating...");
while (GSemaphoreOwned > 0)
putSemaphore();
if (GDebugFilePointer != NULL)
fclose(GDebugFilePointer);
exit(0);
} // terminate
static list *loadMetadata(const char *fname)
{
list *retval = NULL;
struct stat statbuf;
int fd = open(fname, O_RDONLY);
if (fd == -1)
return NULL;
if (fstat(fd, &statbuf) == -1)
{
close(fd);
return NULL;
} // if
char *buf = (char *) xmalloc(statbuf.st_size + 1);
if (read(fd, buf, statbuf.st_size) != statbuf.st_size)
{
free(buf);
close(fd);
return NULL;
} // if
buf[statbuf.st_size] = '\0';
close(fd);
char *ptr = buf;
int total = 0;
while (1)
{
char *key = ptr;
ptr = strchr(ptr, '\n');
if (ptr == NULL)
break;
*(ptr++) = '\0';
char *value = ptr;
ptr = strchr(ptr, '\n');
if (ptr == NULL)
break;
*(ptr++) = '\0';
if (*key != '\0')
listSet(&retval, key, value);
debugEcho("Loaded metadata '%s' => '%s'", key, value);
total++;
} // while
free(buf);
debugEcho("Loaded %d metadata pair(s).", total);
return retval;
} // loadMetadata
static int process_dead(int pid)
{
// !!! FIXME: Linux specific!
struct stat statbuf;
char fname[64];
snprintf(fname, sizeof (fname), "/proc/%d", pid);
return ((stat(fname, &statbuf) != -1) && (S_ISDIR(statbuf.st_mode)));
} // process_dead
static int cachedMetadataMostRecent(const list *metadata, const list *head)
{
const char *contentlength = listFind(metadata, "Content-Length");
if (!contentlength)
return 0;
const char *etag = listFind(metadata, "ETag");
if (!etag)
return 0;
const char *lastmodified = listFind(metadata, "Last-Modified");
if (!lastmodified)
return 0;
if (strcmp(contentlength, listFind(head, "Content-Length")) != 0)
return 0;
if (strcmp(etag, listFind(head, "ETag")) != 0)
return 0;
if (strcmp(lastmodified, listFind(head, "Last-Modified")) != 0)
{
const char *isweak = listFind(metadata, "X-Offload-Is-Weak");
if ( (!isweak) || (strcmp(isweak, "0") != 0) )
return 0;
} // if
// See if file size != Content-Length, and if it isn't,
// see if X-Offload-Caching-PID still exists. If process
// is missing, assume transfer died and recache.
struct stat statbuf;
if (stat(GFilePath, &statbuf) == -1)
return 0;
const int64 fsize = statbuf.st_size;
if (fsize != atoi64(contentlength))
{
// whoa, we were supposed to cache this!
const char *cacher = listFind(metadata, "X-Offload-Caching-PID");
if (!cacher)
return 0;
const int cacherpid = atoi(cacher);
if (process_dead(cacherpid))
{
debugEcho("Caching process ID died!");
return 0;
} // if
} // if
return 1;
} // cachedMetadataMostRecent
static void nukeRequestFromCache(void)
{
debugEcho("Nuking request from cache...");
getSemaphore();
if (GMetaDataPath != NULL)
unlink(GMetaDataPath);
if (GFilePath != NULL)
unlink(GFilePath);
putSemaphore();
} // nukeRequestFromCache
static void failure_location(const char *httperr, const char *errmsg,
const char *location)
{
if (strncasecmp(httperr, "HTTP", 4) == 0)
{
const char *ptr = strchr(httperr, ' ');
if (ptr != NULL)
httperr = ptr+1;
} // if
debugEcho("failure() called:");
debugEcho(" %s", httperr);
debugEcho(" %s", errmsg);
printf("HTTP/1.1 %s\r\n", httperr);
printf("Status: %s\r\n", httperr);
printf("Server: %s\r\n", GSERVERSTRING);
Aug 31, 2008
Aug 31, 2008
553
printf_date_header(NULL);
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
if (location != NULL)
printf("Location: %s\r\n", location);
printf("Connection: close\r\n");
printf("Content-type: text/plain; charset=utf-8\r\n");
printf("\r\n");
printf("%s\n\n", errmsg);
terminate();
} // failure_location
static int invalidContentRange(const int64 startRange, const int64 endRange,
const int64 max)
{
if ((startRange < 0) || (startRange >= max))
return 1;
else if ((endRange < 0) || (endRange >= max))
return 1;
else if (startRange > endRange)
return 1;
return 0;
} // invalidContentRange
#if !GDEBUG
#define debugInit()
#else
static void debugInit()
{
#if !GDEBUGTOFILE
printf("HTTP/1.1 200 OK\r\n");
printf("Status: 200 OK\r\n");
printf("Content-type: text/plain; charset=utf-8\r\n");
Aug 31, 2008
Aug 31, 2008
586
printf_date_header(NULL);
587
588
589
590
591
592
593
594
595
596
printf("Server: " GSERVERSTRING "\r\n");
printf("Connection: close\r\n");
printf("\r\n");
#endif
debugEcho("%s", "");
debugEcho("%s", "");
debugEcho("%s", "");
debugEcho("Offload Debug Run!");
debugEcho("%s", "");
Aug 31, 2008
Aug 31, 2008
597
printf_date_header(getDebugFilePointer());
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
debugEcho("Base server: %s", GBASESERVER);
debugEcho("User wants to get: %s", Guri);
debugEcho("Request from address: %s", getenv("REMOTE_ADDR"));
debugEcho("Client User-Agent: %s", getenv("HTTP_USER_AGENT"));
debugEcho("Referrer string: %s", getenv("HTTP_REFERER"));
debugEcho("Timeout for HTTP HEAD request is %d", GTIMEOUT);
debugEcho("Data cache goes in %s", GOFFLOADDIR);
debugEcho("My PID: %d\n", (int) getpid());
debugEcho("%s", "");
debugEcho("%s", "");
} // debugInit
#endif
static void readHeaders(const int fd, list **headers)
{
const time_t endtime = time(NULL) + GTIMEOUT;
int br = 0;
char buf[1024];
int seenresponse = 0;
while (1)
{
const time_t now = time(NULL);
int rc = -1;
fd_set rfds;
if (endtime >= now)
{
struct timeval tv;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
tv.tv_sec = endtime - now;
tv.tv_usec = 0;
rc = select(fd+1, &rfds, NULL, NULL, &tv);
} // if
if ((rc <= 0) || (FD_ISSET(fd, &rfds) == 0))
failure("503 Service Unavailable", "Timeout while talking to offload host.");
// we can only read one byte at a time, since we don't want to
// read past end of headers, into actual content, here.
if (read(fd, buf + br, 1) != 1)
failure("503 Service Unavailable", "Read error while talking to offload host.");
if (buf[br] == '\r')
; // ignore these.
else if (buf[br] == '\n')
{
char *ptr;
if (br == 0) // empty line, end of headers.
return;
buf[br] = '\0';
if (seenresponse)
{
ptr = strchr(buf, ':');
if (ptr != NULL)
{
*(ptr++) = '\0';
while (*ptr == ' ')
ptr++;
listSet(headers, buf, ptr);
} // if
} // if
else
{
listSet(headers, "response", buf);
if (strncmp(buf, "HTTP/", 5) == 0)
{
ptr = strchr(buf + 5, ' ');
if (ptr != NULL)
{
char *start = ptr + 1;
ptr = strchr(start, ' ');
if (ptr != NULL)
*ptr = '\0';
listSet(headers, "response_code", start);
ptr = start;
} // if
} // if
seenresponse = 1;
} // else
if (ptr == NULL)
failure("503 Service Unavailable", "Bogus response from offload host server.");
br = 0;
} // if
else
{
br++;
if (br >= sizeof (buf))
failure("503 Service Unavailable", "Buffer overflow.");
} // else
} // while
} // readHeaders
static void doWrite(const int fd, const char *str)
{
const int len = strlen(str);
int bw = 0;
const time_t endtime = time(NULL) + GTIMEOUT;
while (bw < len)
{
const time_t now = time(NULL);
int rc = -1;
fd_set wfds;
if (endtime >= now)
{
struct timeval tv;
FD_ZERO(&wfds);
FD_SET(fd, &wfds);
tv.tv_sec = endtime - now;
tv.tv_usec = 0;
rc = select(fd+1, NULL, &wfds, NULL, &tv);
} // if
if ((rc <= 0) || (FD_ISSET(fd, &wfds) == 0))
failure("503 Service Unavailable", "Timeout while talking to offload base server.");
rc = write(fd, str + bw, len - bw);
if (rc <= 0) // error? closed connection?
failure("503 Service Unavailable", "Write error while talking to offload base server.");
bw += rc;
} // while
} // doWrite
static int doHttp(const char *method, list **headers)
{
struct addrinfo *dns = NULL;
if (getaddrinfo(GBASESERVER, "80", NULL, &dns) != 0)
failure("503 Service Unavailable", "Offload base server hostname lookup failure.");
int fd = -1;
struct addrinfo *addr;
for (addr = dns; addr != NULL; addr = addr->ai_next)
{
if (addr->ai_socktype != SOCK_STREAM)
continue;
fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
if (connect(fd, addr->ai_addr, addr->ai_addrlen) == 0)
break;
else
{
close(fd);
fd = -1;
} // else
} // for
freeaddrinfo(dns);
if (fd == -1)
failure("503 Service Unavailable", "Couldn't connect to offload base server.");
doWrite(fd, method);
doWrite(fd, " ");
doWrite(fd, Guri);
doWrite(fd, " HTTP/1.1\r\n");
doWrite(fd, "Host: " GBASESERVER "\r\n");
doWrite(fd, "User-Agent: " GSERVERSTRING "\r\n");
doWrite(fd, "Connection: close\r\n");
doWrite(fd, "X-Mod-Offload-Bypass: true\r\n");
doWrite(fd, "\r\n");
readHeaders(fd, headers);
return fd;
} // doHttp
static void http_head(list **head)
{
const int fd = doHttp("HEAD", head);
if (fd != -1)
close(fd);
} // http_head
static int http_get(list **head)
{
list *headers = NULL;
const int fd = doHttp("GET", &headers);
if ((head == NULL) || (fd == -1))
listFree(&headers);
if (head != NULL)
*head = headers;
return fd;
} // http_get
static char *etagToCacheFname(const char *etag)
{
static const char chs[] = { ' ', '\t', 0x0B, '\"', '\'' };
char *retval = xstrdup(etag);
int i, j;
for (i = 0; retval[i]; i++)
{
const char ch = retval[i];
const int total = (sizeof (chs) / sizeof (chs[0]));
for (j = 0; j < total; j++)
if (ch == chs[j]) break;
if (j == total)
break;
} // for
if (i != 0)
memmove(retval, retval + i, strlen(retval + i) + 1);
for (i = strlen(retval) - 1; i >= 0; i--)
{
const char ch = retval[i];
const int total = (sizeof (chs) / sizeof (chs[0]));
for (j = 0; j < total; j++)
if (ch == chs[j]) break;
if (j == total)
break;
} // for
retval[i+1] = '\0';
return retval;
} // etagToCacheFname
int main(int argc, char **argv)
{
Guri = getenv("REQUEST_URI");
debugInit();
if ((Guri == NULL) || (*Guri != '/'))
failure("500 Internal Server Error", "Bad request URI");
// Feed a fake robots.txt to keep webcrawlers out of the offload server.
if (strcmp(Guri, "/robots.txt") == 0)
failure("200 OK", "User-agent: *\nDisallow: /");
const char *reqmethod = getenv("REQUEST_METHOD");
const int isget = (strcasecmp(reqmethod, "GET") == 0);
const int ishead = (strcasecmp(reqmethod, "HEAD") == 0);
if ( (strchr(Guri, '?') != NULL) || ((!isget) && (!ishead)) )
failure("403 Forbidden", "Offload server doesn't do dynamic content.");
list *head = NULL;
http_head(&head);
#if GDEBUG
{
debugEcho("The HTTP HEAD from %s ...", GBASESERVER);
list *item;
for (item = head; item; item = item->next)
debugEcho(" '%s' => '%s'", item->key, item->value);
}
#endif
const char *responsecodestr = listFind(head, "response_code");
const char *response = listFind(head, "response");
const char *etag = listFind(head, "ETag");
const char *contentlength = listFind(head, "Content-Length");
const char *lastmodified = listFind(head, "Last-Modified");
const int iresponse = responsecodestr ? atoi(responsecodestr) : 0;
if ((iresponse == 401) || (listFind(head, "WWW-Authenticate")))
failure("403 Forbidden", "Offload server doesn't do protected content.");
else if (iresponse != 200)
failure_location(response, response, listFind(head, "Location"));
else if ((!etag) || (!contentlength) || (!lastmodified))
failure("403 Forbidden", "Offload server doesn't do dynamic content.");
listSet(&head, "X-Offload-Orig-ETag", etag);
if ((strlen(etag) <= 2) || (strncasecmp(etag, "W/", 2) != 0))
listSet(&head, "X-Offload-Is-Weak", "0");
else // a "weak" ETag?
{
debugEcho("There's a weak ETag on this request.");
listSet(&head, "X-Offload-Is-Weak", "1");
etag = listSet(&head, "ETag", etag + 2);
debugEcho("Chopped ETag to be [%s]", etag);
} // if
// !!! FIXME: Check Cache-Control, Pragma no-cache
FILE *cacheio = NULL; // will be non-NULL if we're WRITING to the cache...
int frombaseserver = 0;
int io = -1;
if (ishead)
debugEcho("This is a HEAD request to the offload server.");
// Partial content:
// Does client want a range (download resume, "web accelerators", etc)?
const int64 max = atoi64(contentlength);
int64 startRange = 0;
int64 endRange = max-1;
int reportRange = 0;
char *responseCode = "200 OK";
const char *httprange = getenv("HTTP_RANGE");
const char *ifrange = getenv("HTTP_IF_RANGE");
if (ifrange != NULL)
{
// !!! FIXME: handle this.
debugEcho("Client set If-Range: [%s]...unsupported!", ifrange);
httprange = NULL;
} // if
if (httprange != NULL)
{
debugEcho("There's a HTTP_RANGE specified: [%s].", httprange);
if (strncasecmp(httprange, "bytes=", 6) != 0)
failure("400 Bad Request", "Only ranges of 'bytes' accepted.");
else if (strchr(httprange, ',') != NULL)
failure("400 Bad Request", "Multiple ranges not currently supported");
else
{
httprange += 6;
char *pos = strchr(httprange, '-');
if (pos != NULL)
{
*(pos++) = '\0';
startRange = *httprange == '\0' ? 0 : atoi64(httprange);
endRange = *pos == '\0' ? max-1 : atoi64(pos);
responseCode = "206 Partial Content";
reportRange = 1;
} // if
} // else
} // if
if (endRange >= max) // apparently, this is legal to request.
endRange = max - 1;
debugEcho("We are feeding the client bytes %lld to %lld of %lld",
(long long) startRange, (long long) endRange, (long long) max);
if (invalidContentRange(startRange, endRange, max))
failure("400 Bad Request", "Bad content range requested.");
char *etagFname = etagToCacheFname(etag);
GFilePath = makeStr("%s/filedata-%s", GOFFLOADDIR, etagFname);
GMetaDataPath = makeStr("%s/metadata-%s", GOFFLOADDIR, etagFname);
free(etagFname);
listSet(&head, "X-Offload-Orig-URL", Guri);
listSet(&head, "X-Offload-Hostname", GBASESERVER);
debugEcho("metadata cache is %s", GMetaDataPath);
debugEcho("file cache is %s", GFilePath);
list *metadata = NULL;
if (ishead)
metadata = head;
else
{
getSemaphore();
metadata = loadMetadata(GMetaDataPath);
if (cachedMetadataMostRecent(metadata, head))
{
listFree(&head);
io = open(GFilePath, O_RDONLY);
if (io == -1)
failure("500 Internal Server Error", "Couldn't access cached data.");
debugEcho("File is cached.");
} // if
else
{
listFree(&metadata);
// we need to pull a new copy from the base server...
//ignore_user_abort(true); // if we're caching, we MUST run to completion!
frombaseserver = 1;
io = http_get(NULL); // !!! FIXME: may block, don't hold semaphore here!
cacheio = fopen(GFilePath, "wb");
if (cacheio == NULL)
{
close(io);
failure("500 Internal Server Error", "Couldn't update cached data.");
} // if
FILE *metaout = fopen(GMetaDataPath, "wb");
if (metaout == NULL)
{
fclose(cacheio);
close(io);
nukeRequestFromCache();
failure("500 Internal Server Error", "Couldn't update metadata.");
} // if
// !!! FIXME: This is a race condition...may change between HEAD
// !!! FIXME: request and actual HTTP grab. We should really
// !!! FIXME: just use this for comparison once, and if we are
// !!! FIXME: recaching, throw this out and use the headers from the
// !!! FIXME: actual HTTP grab when really updating the metadata.
//
// !!! FIXME: Also, write to temp file and rename in case of write failure!
if (!listFind(head, "Content-Type")) // make sure this is sane.
listSet(&head, "Content-Type", "application/octet-stream");