woop's post in Import Disqus content to Comments module? (.xml to comments) was marked as the answer
Hi again! Here's my Disqus import script! Some fields doesn't work correctly, but the basic parsing and import works fine. Will update this post as my code improves. Feel free to chip in!
EDIT: Updated code, which runs fine now. Just successfully imported +16000 comments
<?php
# Heavily inspired by: http://www.binarytides.com/disqus-comments-importer-script-in-php/
ini_set('max_execution_time', 0); // unlimited execution time, because of large amount of comments
ini_set('memory_limit', '512M');
$file = 'disquscomments.xml';
$doc = new DOMDocument();
$doc->load($file);
$thread_list = array();
$threads = $doc->getElementsByTagName('thread');
foreach($threads as $thread) {
if (!isset($thread->getElementsByTagName('link')->item(0)->textContent)) continue;
$comment = array();
$comment['thread_id'] = $thread->getAttribute('dsq:id');
$comment['url'] = $thread->getElementsByTagName('link')->item(0)->textContent;
$path = parse_url($comment['url'], PHP_URL_PATH);
$path = preg_replace("/(\/){2,}/", "/", $path); // remove multiple slashes
$path = $sanitizer->url($path);
if ($pages->get($path)->id){
$comment['page_id'] = $pages->get($path)->id;
}
$thread_list[$comment['thread_id']] = $comment;
}
$post_list = array();
$posts = $doc->getElementsByTagName('post');
foreach($posts as $post) {
$comment = array();
$comment['comment_id'] = $post->getAttribute('dsq:id');
$comment['thread_id'] = $post->getElementsByTagName('thread')->item(0)->getAttribute('dsq:id');
$comment['comment'] = $post->getElementsByTagName('message')->item(0)->nodeValue;
$comment['created_at'] = $post->getElementsByTagName('createdAt')->item(0)->nodeValue;
$comment['email'] = $post->getElementsByTagName('author')->item(0)->getElementsByTagName('email')->item(0)->nodeValue;
$comment['name'] = $post->getElementsByTagName('author')->item(0)->getElementsByTagName('name')->item(0)->nodeValue;
if ($post->getElementsByTagName('parent')->item(0)) {
$comment['d_parent_id'] = $post->getElementsByTagName('parent')->item(0)->getAttribute('dsq:id');
}
if (isset($thread_list[$comment['thread_id']]) && isset($thread_list[$comment['thread_id']]['page_id'])){
$thread = $thread_list[$comment['thread_id']];
$comment['page_id'] = $thread['page_id']; // the corresponding PW page's ID
$post_list[$comment['comment_id']] = $comment; // only accept pages with pageids
}
}
$postsadded = 0;
foreach($post_list as $post){
if ($pages->get("disqus_id={$post['comment_id']}")->id) continue; //ignore already imported
$c = new Page();
$c->setOutputFormatting(false);
$c->template = $templates->get("mycomment");
$c->username = $post['name'];
$c->title = "temporary title";
$c->publish_date = $post['created_at'];
$c->disqus_id = $post['comment_id'];
$c->body = $post['comment'];
// If there's a parent comment, use this as parent
if (isset($post['d_parent_id']) && isset($post_list[$post['d_parent_id']])){
$disqusparentID = $post_list[$post['d_parent_id']];
$savedParent = $pages->get("disqus_id={$disqusparentID['comment_id']}"); // must find already created page
if ($savedParent->id){
$c->parent = $savedParent;
} else {
$c->parent = $page; // dump it here
}
} elseif (isset($post['page_id'])){
$c->parent = $post['page_id']; // root comment
} else {
continue;
}
$c->save();
$c->name = $c->id;
$c->title = $c->id;
$c->save();
$postsadded++;
}
echo "<br>#######STATS#########<br>";
echo "added +{$postsadded} comments<br>";
echo "total of threads in disquscomments.xml: ".$threads->length."<br>";
echo "total of posts in disquscomments.xml: ".$posts->length."<br>";
echo "total of posts imported: ".count($pages->find('template=mycomment'));