#!/usr/bin/perl # # Programmer: Craig Stuart Sapp # Creation Date: Thu Apr 14 11:30:07 PDT 2016 # Last Modified: Thu May 4 10:42:24 PDT 2017 Added -d option for **deg input # Filename: sdmarkov # Syntax: perl 5 # # Description: Generate scale-degree markov transition tables from # **kern data. The input can be polyphonic, but only the # first note in chords will be considered, and only the # first sub-spine will be followed if spines split. # # Options: # -v == convert context tokens to variables, such as "3 4" to "t3_4". # -h == print histogram of input data, then exit # -c == preserve chromatic alterations (such as "3-" which is flat 3). # -d == starting with deg data already. # -b == print only base of filename (JRPID). # -a == display all possible transitions, including 0 count cases. # -n # == n-gram length to generate with context. # --csv == output CSV data rather than TSV. # --humdrum == output Humdrum data rather than TSV. # --min-count # == exclude files which have few than this number of context # tokens (default is 100). # --count == show data as counts rather than percentages. # --space string == convert spaces to this character with -v option # (default is "_"); # --prefix string == add this to start of token when using -v option # (default is "t"); # --postfix string == add this to end of token when using -v option # (default is ""); use strict; use Getopt::Long; my $Prefix = "t"; my $Postfix = ""; my $Space = "_"; my $Separator = "\t"; my $variableQ = 0; my $countQ = 0; my $context = 2; my $baseQ = 0; my $noZerosQ = 0; my $chromaticQ = 0; my $histogramQ = 0; my $debugQ = 0; my $degQ = 0; my $csvQ = 0; my $humdrumQ = 0; my $MinCount = 1; Getopt::Long::Configure("bundling"); GetOptions ( 'h|histogram' => \$histogramQ, 'v|variables' => \$variableQ, 'z|no-zeros' => \$noZerosQ, 'debug' => \$debugQ, 'd|deg' => \$degQ, 'space=s' => \$Space, 'prefix=s' => \$Prefix, 'postfix=s' => \$Postfix, 'min-count=s' => \$MinCount, 'csv' => \$csvQ, 'humdrum' => \$humdrumQ, 'c|chromatic' => \$chromaticQ, 'b|base|file-base' => \$baseQ, 'n|context=s' => \$context ); $context = 2 if $context < 2; # can't be 1 in input to context $context = 2 if $context > 100; $Separator = "," if $csvQ; $Prefix = "**$Prefix" if $humdrumQ; my @files = @ARGV; my %TOKENS; my %RESULTS; # Allow for all possibilities for transitions: if (!$noZerosQ) { for (my $i=1; $i<=7; $i++) { for (my $j=1; $j<=7; $j++) { my $tag = "$i $j"; $TOKENS{$tag} = 0; } } } getTokens(@files); if ($histogramQ) { my $sum = getHistogramSum(%TOKENS); foreach my $key (sort keys %TOKENS) { my $percent = $TOKENS{$key} / $sum * 100.0; my $token = prepareToken($key); print "$token\t$TOKENS{$key}\n"; } exit(0); } # generate data for each work my @order = sort keys %TOKENS; printColumnHeader(@order); my @files = sort keys %RESULTS; foreach my $file (@files) { printFileRowData($file, $RESULTS{$file}, @order); } printColumnFooter(@order); exit(0); ########################################################################### ############################## ## ## printFileRowData -- Print a row of data for a particular file ## sub printFileRowData { my ($file, $results, @order) = @_; my %data; my @tokens = parseTokensFromResults($results); my $sum = 0; for (my $i=0; $i<@tokens; $i++) { $data{$tokens[$i]}++; $sum++; } return if $sum < $MinCount; print "$file"; for (my $i=0; $i<@order; $i++) { my $key = $order[$i]; my $value = $data{$key}; $value = 0 if !$value; my $percent = int($value / $sum * 100000.0 + 0.5) / 1000.0; # my $token = prepareToken($key); print "$Separator$percent"; } print "\n"; } ############################## ## ## printColumnHeader -- ## sub printColumnHeader { my @tokens = @_; if ($humdrumQ) { print "**file"; } else { print "FILENAME"; } foreach my $token (@tokens) { $token = prepareToken($token); print "$Separator$token"; } print "\n"; } ############################## ## ## printColumnFooter -- ## sub printColumnFooter { my @order = @_; return if !$humdrumQ; print "*-"; for (my $i=0; $i<@order; $i++) { print "$Separator*-"; } print "\n"; } ############################## ## ## prepareToken -- Convert context token into variable if -v option given. ## sub prepareToken { my ($token) = @_; return $token if !$variableQ; $token =~ s/\s/$Space/g; $token =~ s/^/$Prefix/g; $token =~ s/$/$Postfix/g; return $token; } ############################## ## ## getHistogramSum -- Sum all counts in the token histogram. ## sub getHistogramSum { my %data = @_; my $sum = 0; foreach my $token (keys %data) { $sum += $data{$token}; } return $sum; } ############################## ## ## getTokens -- get a list of all context tokens foun in the data files. ## sub getTokens { my @filelist = @_; foreach my $file (@filelist) { processFile($file); } } ############################## ## ## processFile -- Do finalis/scale-degree/context analysis. ## sub processFile { my ($file) = @_; my $base = $file; $base =~s/-.*//; # ignore chromatic alterations of the scale degree: my $removeaccidentals = " | sed s/[+-]//g"; if ($chromaticQ) { $removeaccidentals = ""; } my $command = ""; if ($degQ) { $command .= " cat $file"; $command .= " | grep -v '[=]'"; $command .= " | serialize -L | context -n $context | ridx -H"; $command .= $removeaccidentals; # scale degrees should not be linked if separated by rests: $command .= " | grep -v r "; } else { $command .= " finalis-tonic $file | extractx -i '**kern' | deg -a"; $command .= " | grep -v '[=]'"; $command .= " | serialize -L | context -n $context | ridx -H"; $command .= $removeaccidentals; # scale degrees should not be linked if separated by rests: $command .= " | grep -v r "; } my $result = `$command`; if ($debugQ) { print "Processing file $file\n"; print "Result = $result\n"; } if ($baseQ) { $RESULTS{$base} = $result; } else { $RESULTS{$file} = $result; } my @tokens = parseTokensFromResults($result); for (my $i=0; $i<@tokens; $i++) { $TOKENS{$tokens[$i]}++; } } ############################## ## ## parseTokensFromResults -- Extract the list of tokens for a paticular file. ## sub parseTokensFromResults { my ($result) = @_; my @tokens = split(/\n/, $result); my @output; foreach my $token (@tokens) { $output[@output] = $token; } return @output; }