Google+のHTMLをJekyllのMarkdownに書き出すRubyスクリプト

今更感が凄まじいが、もうタイトル通り。

バッチで処理する。


require 'nokogiri'

html = open(ARGV[0]) {|f| f.read }

doc = Nokogiri::HTML.parse(html, nil, 'utf-8')

title = doc.title.gsub(/(\r\n|\r|\n)/, "")

fileName = nil
published = nil
doc.css('abbr.published').each do |pub|
  published = pub.attribute('title')
  fileName = "#{published}".gsub(/\//, "-").gsub(/\:/, "_")
end

updated = nil
doc.css('abbr.updated').each {|upd| updated = upd.attribute('title') }

doc.search('br').each {|br| br.replace("\n") }

content = nil
doc.css('div.entry-content').each {|ent| content = ent.content }

output = <<EOF
---
layout: post
title: #{title}
date: '#{published}'
modified_time: '#{updated}'
author: Asada Santohei
---

#{content}
EOF

File.write("#{fileName}.md", output)