#!/usr/bin/perl -w # # nnw2vienna # Migrate NetNewsWire data to Vienna. I use my RSS reader data # as a repository for unread and important (flagged) articles, # sort of like bookmarks in a web browser. I realize that this # is somewhat against the design model of RSS aggregators, but # so be it. Under these circumstances, it's difficult to switch # to a new RSS reader, because the new reader wouldn't know about # all of those archived articles (OPML exports only contain # subscription lists). # # This is a migration script that I wrote to solve that problem, # for myself. If it's also useful for you, then all the better. # # Without options, this script will copy all UNREAD and FLAGGED # messages in your NetNewsWire/FeedData files to your Vienna # database. # # Version # 0.1 08Nov2005 - release # # Usage # nnw2vienna [options] # --get-all get all messages, even if already read # --skip-unread don't get unread messages # --skip-marked don't get marked messages # --direct-to-db insert directly into db instead of writing # to STDOUT (seems to be almost 30% slower # than importing the SQL manually) # # (these two options must be used together or not at all) # --file=filename read articles from filename only # --dest-folder=id copy articles to folder_id id # # How to use this script # 1. From NetNewsWire, export your feeds to an OPML file. # 2. In Vienna, import the OPML file. # 3. QUIT BOTH NETNEWSWIRE AND VIENNA # 4. Run this script in one of two ways: # The first option is simpler but takes ~30% more time. # Recommended if you don't have many articles to copy. # (OPTION 1) run script and update DB (direct-to-db) # nnw2vienna [options] --direct-to-db # (OPTION 2) run script and update DB (manual import) # nnw2vienna [options] > ~/Desktop/nnw2vienna.sql # cd ~/Library/Application\ Support/Vienna # sqlite messages.db < ~/Desktop/nnw2vienna.sql # 5. Start Vienna. Hope it worked. # # ** If you already have important data in Vienna, you might # want to make a copy of your messages.db file first. # # Caveats # Quick hack one-time-use coding style, very little error # checking, almost certainly has unfixed bugs. # Seems to work on my NNW data (126 feeds, 10k messages), # but not tested on anyone else's. # Works with OSX 10.4.3, NNW 2.0b45, Vienna 2.0.0.2012. # Might not work with other versions. # Fixes and corrections welcome, but this code might not # be actively maintained -- no promises at this point. # For the above NNW data, this script took 4 minutes to # export the data to a file, but the manual import step # took 60 minutes. Using the --direct-to-db option is # ~30% slower. See Dependencies, below. I tried to # improve performance by doing the insert on a hash of # arrays, but the DBI module seems to convert all # thusly-inserted values to strings (also, the time # reduction was minimal). Alternate solutions welcomed. # While the DB inserts are happening, your machine will # be IO bound -- trying to get other things done that # require disk access will be less than rewarding. # If you really, really, really need help with this, you # can email me: reynhout@quesera.com. But please # exhaust general package support avenues first. # # License # Do as thou wilt shall be the whole of the law. If this # is useful to you in any way, please use it. If you # want to rewrite it with fewer dependencies, a less # involved installation procedure, more optimization, # or fewer bugs, please do. Let me know about your # improvements, and I'll post them or link to them. # http://www.quesera.com/reynhout/misc/nnw2vienna # # Dependencies # expat library (from darwinports or expat.sourceforge.net) # Perl modules: DBI, DBD::SQLite, and XML::Parser (from CPAN) # # Installation Instrux # Starting from a default OSX 10.4.3 installion: # # - install XCode Tools from OSX DVD # # - install darwinports # curl -O http://darwinports.opendarwin.org/downloads/DarwinPorts-1.1.dmg # open DarwinPorts-1.1.dmg # (double-click to install) # export PATH="$PATH:/opt/local/bin" # sudo port -d selfupdate # # - install expat # sudo port install expat # # - install XML-Parser # sudo perl -MCPAN -e "install XML::Parser" # ## (fails, can't find expat libs, so fix LIBPATH or do by hand...) # cd ~/.cpan/build/XML-Parser-2.34 # perl Makefile.PL EXPATLIBPATH=/opt/local/lib \ # EXPATINCPATH=/opt/local/include # sudo make install # # - install DBI and SQLite # sudo perl -MCPAN -e "install DBI" # sudo perl -MCPAN -e "install DBD::SQLite" # # - script is now ready to run. # use DBI; use XML::Parser; use Time::Local; my $GET_ALL="n"; my $GET_MARKED="y"; my $GET_UNREAD="y"; my $DIRECT_TO_DB="n"; my ($INFILE,$DESTFID); my $nnwfeeddir="$ENV{'HOME'}/Library/Application Support/NetNewsWire/FeedData"; my $vdbfile="$ENV{'HOME'}/Library/Application Support/Vienna/messages.db"; my ($inkeytag,$currkey,%r); my ($parser,$dbh,$res,$currvfeedid,%vfeedid,$xmlfile,%vrfeed,%vwfeed); my $i=0; my $arraylev=0; my $dictlev=0; my @vfields=qw(message_id folder_id parent_id read_flag marked_flag deleted_flag title sender link date text); { my ($arg,$dum); while ( $arg=shift ) { if ( $arg eq "--skip-marked" ) { $GET_MARKED="n"; } elsif ( $arg eq "--skip-unread" ) { $GET_UNREAD="n"; } elsif ( $arg eq "--get-all" ) { $GET_ALL="y"; } elsif ( $arg eq "--direct-to-db" ) { $DIRECT_TO_DB="y"; } elsif ( $arg =~ /--file=..*/ ) { ($dum,$INFILE)=split(/=/,$arg); } elsif ( $arg =~ /--dest-folder=..*/ ) { ($dum,$DESTFID)=split(/=/,$arg); } else { print STDERR "$0: ERROR: unrecognized argument: $arg\n"; exit 1; } } if ( defined $INFILE xor defined $DESTFID ) { print STDERR "$0: ERROR: must use --file and --dest-folder together\n"; exit 1; } $parser=new XML::Parser(Style=>'Subs',Pkg=>'SubHandlers',ErrorContext=>2); $parser->setHandlers(Char => \&char_handler); $dbh=DBI->connect("dbi:SQLite:$vdbfile") || die "Cannot connect: $DBI::errstr"; if ( defined $INFILE and defined $DESTFID ) { $currvfeedid=$DESTFID; $vrfeed{$INFILE}=$DESTFID; if ( ! -r $INFILE ) { die "can\'t read $INFILE"; exit 1; } $parser->parsefile("$INFILE"); } else { $res=$dbh->selectall_arrayref("SELECT folder_id,feed_url FROM rss_folders"); foreach( @$res ) { $_->[1] =~ s/^http:\/\///; $_->[1] =~ s/\//_/g; $vfeedid{$_->[1]}=$_->[0]; } for $xmlfile (keys %vfeedid) { undef %vrfeed; undef %vwfeed; $currvfeedid=$vfeedid{$xmlfile}; print STDERR "--- $xmlfile [id=$currvfeedid]\n"; ## build hash of vienna articles for this feed -- vrfeed{message_id}=date $res=$dbh->selectall_arrayref("SELECT message_id,date,folder_id FROM messages WHERE folder_id=$currvfeedid"); foreach( @$res ) { $vrfeed{$_->[0]}=$_->[1]; } if ( ! -r "$nnwfeeddir/$xmlfile" ) { print "-- can\'t read $xmlfile (folder_id $vfeedid{$xmlfile})\n"; next; } else { print "-- from $xmlfile (folder_id $vfeedid{$xmlfile})\n"; } $parser->parsefile("$nnwfeeddir/$xmlfile"); } } $dbh->disconnect; } sub char_handler { my ($p, $data) = @_; if ( $arraylev == 0 or $dictlev > 2 ) { return; } if ( defined $inkeytag ) { $currkey=$data; } elsif ( defined $currkey ) { if ( defined $r{$currkey} ) { $r{$currkey}.=$data; } else { $r{$currkey}=$data; } } } sub write_vwfeed_ng { my ($k); ## this results in all values being quoted..NG for numbers. my $sqlstr="INSERT INTO messages (".join(', ',@vfields).") VALUES ("; for (my $x=0;$x<$#vfields;$x++) { $sqlstr .= "?, "; } $sqlstr .= "? )"; my $sth = $dbh->prepare($sqlstr) or die $dbh->errstr; for $k (keys %vwfeed) { $sth->execute(@{$vwfeed{$k}}{@vfields}) or die $dbh->errstr; } } sub write_vwfeed { my ($k,$sqlstr); binmode STDOUT, ":utf8"; binmode STDERR, ":utf8"; for $k (keys %vwfeed) { print STDERR " + $vwfeed{$k}{'title'} "; if ($vwfeed{$k}{'read_flag'} != 1) { print STDERR "[UNREAD]"; } if ($vwfeed{$k}{'marked_flag'} == 1) { print STDERR "[MARKED]"; } print STDERR "\n"; #$sqlstr = "INSERT INTO messages VALUES ("; $sqlstr = "INSERT INTO messages (".join(', ',@vfields).") VALUES ("; $sqlstr .= "\'$vwfeed{$k}{'message_id'}\',"; $sqlstr .= "$vwfeed{$k}{'folder_id'},"; $sqlstr .= "$vwfeed{$k}{'parent_id'},"; $sqlstr .= "$vwfeed{$k}{'read_flag'},"; $sqlstr .= "$vwfeed{$k}{'marked_flag'},"; $sqlstr .= "$vwfeed{$k}{'deleted_flag'},"; $sqlstr .= "\'$vwfeed{$k}{'title'}\',"; $sqlstr .= "\'$vwfeed{$k}{'sender'}\',"; $sqlstr .= "\'$vwfeed{$k}{'link'}\',"; $sqlstr .= "$vwfeed{$k}{'date'},"; $sqlstr .= "\'$vwfeed{$k}{'text'}\'"; $sqlstr .= ");"; if ( $DIRECT_TO_DB eq "y" ) { my $sth=$dbh->prepare($sqlstr); $sth->execute(); } else { print $sqlstr . "\n"; } } } sub filter_text { my $text=shift; $text =~ s/\'/\'\'/g; $text =~ s/<//gi; ## TODO more filtering? return $text; } sub zdate_to_secs { ## 2005-09-10T19:21:00Z -> 00 21 19 10 8 105 my $rawdate; my ($d,$t)=split(/[TZ]/,shift); my @d=split(/-/,$d); $d[1]--; my @t=split(/:/,$t); #print STDERR "-- POST $t[2],$t[1],$t[0],$d[2],$d[1],$d[0]\n"; $rawdate=timelocal($t[2],$t[1],$t[0],$d[2],$d[1],$d[0]); return $rawdate; } sub init_r { $i++; %r = ( 'guid'=>'', 'read'=>'', 'flagged'=>'', 'title'=>'', 'creator'=>'', 'link'=>'', 'datePublished'=>'', 'dateArrived'=>'', 'description'=>''); } sub add_to_vwfeed { my $tdate="2000-01-01T07:37:00Z"; my $getme=0; for $key (keys %r) { $r{$key} =~ s/^\s*(..*)\s*$/$1/; } if ( $r{'datePublished'} ne '' ) { $tdate=$r{'datePublished'}; } elsif ( $r{'dateArrived'} ne '' ) { $tdate=$r{'dateArrived'}; } $tdate=&zdate_to_secs($tdate); if ( $r{'guid'} eq '' ) { $r{'guid'}="nnwxferid$i"; } if ( defined $vrfeed{$r{'guid'}} ) { return; } if ( $r{'flagged'} eq '' ) { $r{'flagged'}=0; } if ( $r{'read'} eq '' ) { $r{'read'}=0; } if ( $GET_ALL eq "y" ) { $getme++; } else { if ( $GET_MARKED eq "y" and $r{'flagged'} == 1 ) { $getme++; } if ( $GET_UNREAD eq "y" and $r{'read'} == 0 ) { $getme++; } } return if ( $getme == 0 ); $vwfeed{$r{'guid'}}{'message_id'}=$r{'guid'}; $vwfeed{$r{'guid'}}{'folder_id'}=$currvfeedid; $vwfeed{$r{'guid'}}{'parent_id'}=0; $vwfeed{$r{'guid'}}{'read_flag'}=$r{'read'}; $vwfeed{$r{'guid'}}{'marked_flag'}=$r{'flagged'}; $vwfeed{$r{'guid'}}{'deleted_flag'}=0; $vwfeed{$r{'guid'}}{'title'}=&filter_text($r{'title'}); $vwfeed{$r{'guid'}}{'sender'}=&filter_text($r{'creator'}); $vwfeed{$r{'guid'}}{'link'}=$r{'link'}; $vwfeed{$r{'guid'}}{'date'}=$tdate; $vwfeed{$r{'guid'}}{'text'}=&filter_text($r{'description'}); } package SubHandlers; sub array { $arraylev++; } sub array_ { $arraylev--; if ( $arraylev == 0 ) { &::write_vwfeed; } } sub plist { } sub plist_ { } sub key { if ( $arraylev == 0 ) { return; } $inkeytag="1"; } sub key_ { if ( $arraylev == 0 ) { return; } undef $inkeytag; } sub false { $r{$currkey}.="0"; } sub true { $r{$currkey}.="1"; } sub dict { $dictlev++; if ( $arraylev == 1 ) { &::init_r; } } sub dict_ { $dictlev--; if ( $arraylev == 1 ) { &::add_to_vwfeed; } } ## 13.5MB SQL data, 10k articles # 4min to export # 60min to import same via < redirect # 66min to import same via sqlite shell .read # 73min to import same via --direct-to-db # (messages.db goes from 3.7MB to 18.5MB)