Browse Source

Update It's Walky ripper to work like DoA

Fixes #4
master
Alan Hardman 11 months ago
parent
commit
cd120642d9
1 changed files with 44 additions and 45 deletions
  1. +44
    -45
      itswalky.php

+ 44
- 45
itswalky.php View File

@@ -1,57 +1,56 @@
<?php
// It's Walky
// This one has directory indexes! Yay!
// Based on the Dumbing of Age script

// We basically just read the directories and filter out the thumbnails
// This is actually generalized enough it could easily fit most WP sites
// We start with the current day, then click the Previous link until we find
// an image we've already saved before.

// If running multiple times per day, the $years and $months can be changed
// to only contain the current year/month
$html = file_get_contents('http://www.itswalky.com/');
preg_match('@img src="http://www.itswalky.com/wp-content/uploads/(.+\\.png)@', $html, $matches);

$base = 'http://www.itswalky.com/wp-content/uploads/';
$years = range(2012, date('Y'));
$months = [1,12];
if (empty($matches[1])) {
echo "No comic found on home page! :(\n";
return;
}

if (!is_dir('itswalky')) {
mkdir('itswalky');
}

foreach ($years as $y) {
foreach ($months as $m) {
// Skip known nonexistent months
if ($y == 2012 && $m < 8) {
continue;
}

$dir = $base . sprintf('%d/%02d/', $y, $m);
$html = file_get_contents($dir);

preg_match_all('@<a href="([^/"]+)">@', $html, $matches);
foreach ($matches[1] as $file) {
// Remove thumbnails
if (preg_match('/-[0-9]+x[0-9]+\\.(png|gif)$/', $file)) {
continue;
}

// Remove non-comics for now
if (!preg_match('/^[0-9]{4}/', $file)) {
continue;
}

// Skip already downloaded images
if (is_file("itswalky/$file")) {
continue;
}

// Download image
echo "Downloading $file\n";
$url = $dir . $file;
$data = @file_get_contents($url);
if ($data) {
file_put_contents("itswalky/$file", $data);
}

usleep(5e5);
}
// Download current page's comic, load previous comic webpage, repeat
while (true) {
$filename = basename($matches[1]);
if (is_file('itswalky/' . $filename)) {
return;
}

echo "Downloading {$filename}\n";
$url = "http://www.itswalky.com/wp-content/uploads/{$matches[1]}";
$data = @file_get_contents($url);
if ($data) {
file_put_contents("itswalky/{$filename}", $data);
}

// Find previous page link
$regex = '@href="(http://www.itswalky.com/comic/[0-9a-zA-Z/-]+)" class="comic-nav-base comic-nav-previous"@';
preg_match($regex, $html, $prevMatch);

if (empty($prevMatch[1])) {
echo "No previous URL found!\n";
return;
}

$html = @file_get_contents($prevMatch[1]);
if (!$html) {
echo "Failed to load previous page!\n";
return;
}

preg_match('@img src="http://www.itswalky.com/wp-content/uploads/(.+\\.png)@', $html, $matches);
if (empty($matches[1])) {
echo "No image found on page!\n";
return;
}

usleep(5e5);
}

Loading…
Cancel
Save