Although there are plenty of interesting discussions at this forum my main reason for coming here is to see what Scawen is up to. When the new forum was initially released I spent a couple of minutes on a userscript to highlight the developers' posts, but I figured I could do a little better.
So I wrote a little PHP-scraper that visits the profiles of the devs and collect their most recent posts and then publish the data in a RSS feed.
The feed is available here NO LONGER AVAILABLE.
... And if you'd like to extend on this here's the scraper:
(Vic is OK wit this, I checked ...)
So I wrote a little PHP-scraper that visits the profiles of the devs and collect their most recent posts and then publish the data in a RSS feed.
The feed is available here NO LONGER AVAILABLE.
... And if you'd like to extend on this here's the scraper:
<?php
/**
* Return the most recent posts by given user at lfsforum.net.
* Breakage-prone due to DOM-parsing! :)
*
* @author felplacerad
* @version 0.1
*/
class LfsForumScraper
{
public $posts;
protected $dom;
protected $cache;
/**
* Create a new DOMDocument object, and
* Read cache file into an array.
*/
public function __construct()
{
if (is_file('./scraper.cache') && time() - filemtime('./scraper.cache') < 600) die("No hammering!\n");
$this->dom = new DOMDocument();
libxml_use_internal_errors(true); // Disable libxml errors
$this->cache = (file_exists('./scraper.cache') ? file('./scraper.cache', FILE_IGNORE_NEW_LINES) : []);
}
/**
* Load HTML, evaluate XPath expressions and sanitize the input a bit
* (ie: remove element attributes and most tags)
* store seen post ids in cache file.
* Return posts that wasn't already seen.
*/
public function scrapeAuthor($targetAuthor = 'Scawen')
{
$url = "https://www.lfs.net/forum/-1/search/user:'$targetAuthor'";
$opts = array('http'=>array('header'=>"User-Agent: fel-notify/0.1"));
$context = stream_context_create($opts);
$this->dom->loadHTML(file_get_contents($url, false, $context));
$xpath = new DOMXPath($this->dom);
// Example: <div class="FPost">
$tags = $xpath->query('//div[@class="FPost"]');
foreach ($tags as $tag) {
$id = $xpath->query('./div[contains(@id, "Post")]', $tag)->
item(0)->getAttribute('id');
if (!in_array($id, $this->cache)) {
$topic = $xpath->query('./div/a', $tag)->
item(0)->nodeValue;
$tlink = $xpath->query('./div/a', $tag)->
item(0)->getAttribute('href');
$plink = $xpath->query('./div[@class="FPostHeader"]/div/a', $tag)->
item(0)->getAttribute('href');
$text = $this->dom->saveXML($xpath->query('./div/div/div[@class="FPostText"]/node()', $tag)->
item(0)->parentNode);
$author = $xpath->query('./div/div[@class="FUserInfo"]/a[@class="UserLink"]', $tag)->
item(0)->nodeValue;
$alink = $xpath->query('./div/div[@class="FUserInfo"]/a[@class="UserLink"]', $tag)->
item(0)->getAttribute('href');
$datetime = $xpath->query('./div[@class="FPostHeader"]/div/time', $tag)->
item(0)->getAttribute('datetime');
if ($author === $targetAuthor) { // LFS Forum may yield false results due to wildcard matches
$this->posts[$id]['id'] = $id;
$this->posts[$id]['datetime'] = date(DATE_RFC2822, (strtotime($datetime)));
$this->posts[$id]['author'] = $author;
$this->posts[$id]['topic'] = htmlspecialchars($topic);
$this->posts[$id]['alink'] = $alink;
$this->posts[$id]['tlink'] = $tlink;
$this->posts[$id]['plink'] = $plink;
$this->posts[$id]['text'] = preg_replace("/<([a-z][a-z0-9]*)[^>]*?(\/?)>/i",'<$1$2>',
strip_tags($text, '<div><p><a><fieldset><legend>'));
}
$ids[] = $id;
}
}
if (isset($ids) && count($ids) > 0) {
file_put_contents('./scraper.cache', "\n" . implode("\n", $ids), FILE_APPEND);
}
return $this->posts;
}
}
$scraper = new LfsForumScraper;
$posts = $scraper->scrapeAuthor('Scawen');
$posts = $scraper->scrapeAuthor('Victor');
$posts = $scraper->scrapeAuthor('Eric');
print_r($posts);
(Vic is OK wit this, I checked ...)