|
|
@ -1,57 +1,56 @@ |
|
|
|
<?php |
|
|
|
// It's Walky
|
|
|
|
// This one has directory indexes! Yay!
|
|
|
|
// Based on the Dumbing of Age script
|
|
|
|
|
|
|
|
// We basically just read the directories and filter out the thumbnails
|
|
|
|
// This is actually generalized enough it could easily fit most WP sites
|
|
|
|
// We start with the current day, then click the Previous link until we find
|
|
|
|
// an image we've already saved before.
|
|
|
|
|
|
|
|
// If running multiple times per day, the $years and $months can be changed
|
|
|
|
// to only contain the current year/month
|
|
|
|
$html = file_get_contents('http://www.itswalky.com/'); |
|
|
|
preg_match('@img src="http://www.itswalky.com/wp-content/uploads/(.+\\.png)@', $html, $matches); |
|
|
|
|
|
|
|
$base = 'http://www.itswalky.com/wp-content/uploads/'; |
|
|
|
$years = range(2012, date('Y')); |
|
|
|
$months = [1,12]; |
|
|
|
if (empty($matches[1])) { |
|
|
|
echo "No comic found on home page! :(\n"; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
if (!is_dir('itswalky')) { |
|
|
|
mkdir('itswalky'); |
|
|
|
} |
|
|
|
|
|
|
|
foreach ($years as $y) { |
|
|
|
foreach ($months as $m) { |
|
|
|
// Skip known nonexistent months
|
|
|
|
if ($y == 2012 && $m < 8) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
$dir = $base . sprintf('%d/%02d/', $y, $m); |
|
|
|
$html = file_get_contents($dir); |
|
|
|
|
|
|
|
preg_match_all('@<a href="([^/"]+)">@', $html, $matches); |
|
|
|
foreach ($matches[1] as $file) { |
|
|
|
// Remove thumbnails
|
|
|
|
if (preg_match('/-[0-9]+x[0-9]+\\.(png|gif)$/', $file)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Remove non-comics for now
|
|
|
|
if (!preg_match('/^[0-9]{4}/', $file)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Skip already downloaded images
|
|
|
|
if (is_file("itswalky/$file")) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Download image
|
|
|
|
echo "Downloading $file\n"; |
|
|
|
$url = $dir . $file; |
|
|
|
$data = @file_get_contents($url); |
|
|
|
if ($data) { |
|
|
|
file_put_contents("itswalky/$file", $data); |
|
|
|
} |
|
|
|
|
|
|
|
usleep(5e5); |
|
|
|
} |
|
|
|
// Download current page's comic, load previous comic webpage, repeat
|
|
|
|
while (true) { |
|
|
|
$filename = basename($matches[1]); |
|
|
|
if (is_file('itswalky/' . $filename)) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
echo "Downloading {$filename}\n"; |
|
|
|
$url = "http://www.itswalky.com/wp-content/uploads/{$matches[1]}"; |
|
|
|
$data = @file_get_contents($url); |
|
|
|
if ($data) { |
|
|
|
file_put_contents("itswalky/{$filename}", $data); |
|
|
|
} |
|
|
|
|
|
|
|
// Find previous page link
|
|
|
|
$regex = '@href="(http://www.itswalky.com/comic/[0-9a-zA-Z/-]+)" class="comic-nav-base comic-nav-previous"@'; |
|
|
|
preg_match($regex, $html, $prevMatch); |
|
|
|
|
|
|
|
if (empty($prevMatch[1])) { |
|
|
|
echo "No previous URL found!\n"; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
$html = @file_get_contents($prevMatch[1]); |
|
|
|
if (!$html) { |
|
|
|
echo "Failed to load previous page!\n"; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
preg_match('@img src="http://www.itswalky.com/wp-content/uploads/(.+\\.png)@', $html, $matches); |
|
|
|
if (empty($matches[1])) { |
|
|
|
echo "No image found on page!\n"; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
usleep(5e5); |
|
|
|
} |