Jump to content

woop

Members
  • Posts

    70
  • Joined

  • Last visited

Community Answers

  1. woop's post in Import Disqus content to Comments module? (.xml to comments) was marked as the answer   
    Hi again! Here's my Disqus import script! Some fields doesn't work correctly, but the basic parsing and import works fine. Will update this post as my code improves. Feel free to chip in!
    EDIT: Updated code, which runs fine now. Just successfully imported +16000 comments
    <?php  # Heavily inspired by: http://www.binarytides.com/disqus-comments-importer-script-in-php/ ini_set('max_execution_time', 0); // unlimited execution time, because of large amount of comments ini_set('memory_limit', '512M'); $file = 'disquscomments.xml'; $doc = new DOMDocument(); $doc->load($file); $thread_list = array(); $threads = $doc->getElementsByTagName('thread'); foreach($threads as $thread) { if (!isset($thread->getElementsByTagName('link')->item(0)->textContent)) continue; $comment = array(); $comment['thread_id'] = $thread->getAttribute('dsq:id'); $comment['url'] = $thread->getElementsByTagName('link')->item(0)->textContent; $path = parse_url($comment['url'], PHP_URL_PATH); $path = preg_replace("/(\/){2,}/", "/", $path); // remove multiple slashes $path = $sanitizer->url($path); if ($pages->get($path)->id){ $comment['page_id'] = $pages->get($path)->id; } $thread_list[$comment['thread_id']] = $comment; } $post_list = array(); $posts = $doc->getElementsByTagName('post'); foreach($posts as $post) { $comment = array();   $comment['comment_id'] = $post->getAttribute('dsq:id');   $comment['thread_id'] = $post->getElementsByTagName('thread')->item(0)->getAttribute('dsq:id');   $comment['comment'] = $post->getElementsByTagName('message')->item(0)->nodeValue;   $comment['created_at'] = $post->getElementsByTagName('createdAt')->item(0)->nodeValue;   $comment['email'] = $post->getElementsByTagName('author')->item(0)->getElementsByTagName('email')->item(0)->nodeValue;   $comment['name'] = $post->getElementsByTagName('author')->item(0)->getElementsByTagName('name')->item(0)->nodeValue;   if ($post->getElementsByTagName('parent')->item(0)) {   $comment['d_parent_id'] = $post->getElementsByTagName('parent')->item(0)->getAttribute('dsq:id'); } if (isset($thread_list[$comment['thread_id']]) && isset($thread_list[$comment['thread_id']]['page_id'])){ $thread = $thread_list[$comment['thread_id']]; $comment['page_id'] = $thread['page_id']; // the corresponding PW page's ID $post_list[$comment['comment_id']] = $comment; // only accept pages with pageids } } $postsadded = 0; foreach($post_list as $post){ if ($pages->get("disqus_id={$post['comment_id']}")->id) continue; //ignore already imported $c = new Page(); $c->setOutputFormatting(false); $c->template = $templates->get("mycomment"); $c->username = $post['name']; $c->title = "temporary title"; $c->publish_date = $post['created_at']; $c->disqus_id = $post['comment_id']; $c->body = $post['comment']; // If there's a parent comment, use this as parent if (isset($post['d_parent_id']) && isset($post_list[$post['d_parent_id']])){ $disqusparentID = $post_list[$post['d_parent_id']]; $savedParent = $pages->get("disqus_id={$disqusparentID['comment_id']}"); // must find already created page if ($savedParent->id){ $c->parent = $savedParent;  } else { $c->parent = $page; // dump it here } } elseif (isset($post['page_id'])){ $c->parent = $post['page_id']; // root comment } else { continue; } $c->save(); $c->name = $c->id; $c->title = $c->id; $c->save(); $postsadded++; } echo "<br>#######STATS#########<br>"; echo "added +{$postsadded} comments<br>"; echo "total of threads in disquscomments.xml: ".$threads->length."<br>"; echo "total of posts in disquscomments.xml: ".$posts->length."<br>"; echo "total of posts imported: ".count($pages->find('template=mycomment'));
×
×
  • Create New...