You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
2.0 KiB

  1. <?php
  2. // Laura Kajpust/Falconer's Dailies
  3. // This one requires some weird stuff relative to the other archivers.
  4. // We start with the current day, then click the Previous link until we find
  5. // an image we've already saved before.
  6. // We also save the comic with the date it was posted so that they are
  7. // readable in chronological order.
  8. $html = file_get_contents('http://falcdaily.smackjeeves.com/');
  9. preg_match('@Date Posted:</strong> ([^<>]+)</div>@', $html, $dateMatches);
  10. preg_match('@src="(https?:)?//((www|img[0-9]+).smackjeeves.com/images/uploaded/comics/[^"]+\\.(png|jpg|gif))@', $html, $matches);
  11. if (empty($matches[2])) {
  12. echo "No comic found on home page! :(\n";
  13. return;
  14. }
  15. $date = date('Y-m-d-His', strtotime($dateMatches[1]));
  16. if (!is_dir('laura-kajpust-dailies')) {
  17. mkdir('laura-kajpust-dailies');
  18. }
  19. // Download current page's comic, load previous comic webpage, repeat
  20. while (true) {
  21. $ext = substr($matches[2], -3);
  22. $name = basename($matches[2]);
  23. $path = "laura-kajpust-dailies/$date-$name.$ext";
  24. if (is_file($path)) {
  25. return;
  26. }
  27. echo "Downloading {$matches[2]}\n";
  28. $url = "http://{$matches[2]}";
  29. $data = @file_get_contents($url);
  30. if ($data) {
  31. file_put_contents($path, $data);
  32. }
  33. // Find previous page link
  34. $regex = '@href="(http://falcdaily.smackjeeves.com/comics/[0-9a-zA-Z/-]+)"><i class="fa fa-angle-left"@';
  35. preg_match($regex, $html, $prevMatch);
  36. if (empty($prevMatch[1])) {
  37. echo "No previous URL found!\n";
  38. return;
  39. }
  40. $html = @file_get_contents($prevMatch[1]);
  41. if (!$html) {
  42. echo "Failed to load previous page!\n";
  43. return;
  44. }
  45. preg_match('@Date Posted:</strong> ([^<>]+)</div>@', $html, $dateMatches);
  46. preg_match('@src="(https?:)?//((www|img[0-9]+).smackjeeves.com/images/uploaded/comics/[^"]+\\.(png|jpg|gif))@', $html, $matches);
  47. if (empty($matches[2])) {
  48. echo "No image found on page!\n";
  49. return;
  50. }
  51. $date = date('Y-m-d-His', strtotime($dateMatches[1]));
  52. usleep(5e5);
  53. }