You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.4 KiB

  1. <?php
  2. // I AM ARG!
  3. // Based on the Dumbing of Age ripper since they have similar DOMs
  4. // This one is nice because every comic is named by it's ISO-8601 date!
  5. // We start with the current day, then click the Previous link until we find
  6. // an image we've already saved before.
  7. $html = file_get_contents('http://iamarg.com/');
  8. preg_match('@/comics/(.+\\.(jpg|png|gif))@', $html, $matches);
  9. if (empty($matches[1])) {
  10. echo "No comic found on home page! :(\n";
  11. exit(1);
  12. }
  13. if (!is_dir('iamarg')) {
  14. mkdir('iamarg');
  15. }
  16. // Download current page's comic, load previous comic webpage, repeat
  17. while (true) {
  18. if (is_file('iamarg/' . $matches[1])) {
  19. return;
  20. }
  21. echo "Downloading {$matches[1]}\n";
  22. $url = "http://iamarg.com/comics/{$matches[1]}";
  23. $data = @file_get_contents($url);
  24. if ($data) {
  25. file_put_contents("iamarg/{$matches[1]}", $data);
  26. }
  27. // Find previous page link
  28. $regex = '@href="(http://iamarg.com/[0-9a-zA-Z/-]+)" class="navi navi-prev"@';
  29. preg_match($regex, $html, $prevMatch);
  30. if (empty($prevMatch[1])) {
  31. echo "No previous URL found!\n";
  32. return;
  33. }
  34. $html = @file_get_contents($prevMatch[1]);
  35. if (!$html) {
  36. echo "Failed to load previous page!\n";
  37. return;
  38. }
  39. preg_match('@/comics/(.+\\.(jpg|png|gif))@', $html, $matches);
  40. if (empty($matches[1])) {
  41. echo "No image found on page!\n";
  42. return;
  43. }
  44. usleep(5e5);
  45. }