You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

35 lines
1.1 KiB

  1. <?php
  2. // CommitStrip
  3. // I chose to use the main comic page URLs instead of the comic filenames here
  4. // since they're actually sequential that way. Makes it much easier to read.
  5. // We'll work from start to end
  6. $start = 'https://www.commitstrip.com/en/2012/02/22/interview/';
  7. if (!is_dir('commitstrip')) {
  8. mkdir('commitstrip');
  9. }
  10. $url = $start;
  11. while ($url) {
  12. $html = file_get_contents($url);
  13. preg_match('@src="https://www.commitstrip.com/wp-content/uploads/([0-9a-zA-Z/-]+\\.[a-z]{3,4})"@', $html, $matches);
  14. if (!empty($matches[1])) {
  15. $name = str_replace('/', '-', trim(substr($url, 31), '/'));
  16. if (!glob("commitstrip/$name*")) {
  17. $data = @file_get_contents('https://www.commitstrip.com/wp-content/uploads/' . $matches[1]);
  18. if ($data) {
  19. $ext = pathinfo(parse_url($matches[1])['path'], PATHINFO_EXTENSION);
  20. file_put_contents("commitstrip/$name.$ext", $data);
  21. }
  22. }
  23. }
  24. preg_match('@href="(https://www.commitstrip.com/20[^"]+)" rel="next"@', $html, $matches);
  25. $url = str_replace('.com/20', '.com/en/20', $matches[1]);
  26. usleep(5e5);
  27. }