This repository has been archived by the owner on Dec 20, 2022. It is now read-only.
forked from stanfordnlp/chirpycardinal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind-old-lines.pl
executable file
·187 lines (144 loc) · 4.61 KB
/
find-old-lines.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/perl
# This script parses Git blame's "porcelain" output format and
# ascertains the oldest lines of code seen.
#
# If you want to perform a custom report, just define your own callback
# function and invoke parse_porcelain() with it.
#
# The expected input format is slightly modified from raw `git blame
# -p`. Here is an example script for producing input:
#
# for f in `git ls-tree -r --name-only HEAD`; do \
# echo "BEGIN_RECORD $f"; \
# git blame -l -t -M -C -n -w -p $f; \
# echo "END_RECORD $f"; \
# done
use strict;
use warnings FATAL => "all";
use POSIX qw(strftime);
our @STATES = qw(global header_first header_metadata);
our $RE_BEGIN_RECORD = qr/^BEGIN_RECORD\s(.*)$/msx;
our $RE_END_RECORD = qr/^END_RECORD\s(.*)$/msx;
our $RE_LINE_HEADER = qr/
^
([a-z0-9]{40}) # SHA
\s(\d+) # Original line number
\s(\d+) # Current line number
(?:\s(\d+))? # Number of lines in group (optional)
$/msx;
our $RE_HEADER_METADATA = qr/^([a-z-]+)\s(.*)$/msx;
our $RE_LINE_DATA = qr/^\t(.*)$/msx;
# Parses Git blame's porcelain output.
# Calls the supplied $onBlock callback function when a full block of
# code has been parsed. The function receives a hashref describing the
# block.
sub parse_porcelain {
my ($fh, $onBlock) = @_;
my $state = "global";
my $metadata = {};
my @lines;
my ($commit, $original_line, $current_line);
my $current_file;
my $callOnBlock = sub {
my $data = {};
$data->{'filename'} = $current_file;
$data->{'lines'} = \@lines;
$data->{'metadata'} = $metadata;
$data->{'commit'} = $commit;
&$onBlock($data);
@lines = ();
};
while (my $line = <$fh>) {
chomp $line;
if ($line =~ $RE_BEGIN_RECORD) {
$state eq "global" or die "Parser error. Unexpected BEGIN_RECORD.";
$current_file = $1;
$state = "header_first";
next;
}
elsif ($line =~ $RE_END_RECORD) {
$1 eq $current_file or die "Parser error. END_RECORD mismatch!";
if ($onBlock and scalar(@lines)) {
&$callOnBlock();
}
$state = "global";
next;
}
if ($state eq "header_first") {
$line =~ $RE_LINE_HEADER or die "Invalid initial header line! $line";
my ($new_commit, $new_original_line, $new_current_line, $block_count);
($new_commit, $new_original_line, $new_current_line, $block_count) = ($1, $2, $3, $4);
if ($block_count and $onBlock and scalar(@lines)) {
&$callOnBlock();
}
$commit = $new_commit;
$original_line = $new_original_line;
$current_line = $new_current_line;
$state = "header_metadata";
next;
}
if ($state eq "header_metadata") {
# Lines beginning with a tab denote line content. Subsequent line(s)
# will be metadata for that line.
if ($line =~ $RE_LINE_DATA) {
my $content = $1;
push @lines, [$content, $original_line, $current_line];
$state = "header_first";
next;
}
next if $line eq "boundary";
$line =~ $RE_HEADER_METADATA or die "Could not parse header metadata.";
my ($k, $v) = ($1, $2);
$metadata->{$k} = $v;
next;
}
die "Unknown state!";
}
}
# onBlock callback that collects oldest commit times for blocks.
my $old_lines = {};
sub collect_times {
my ($data) = @_;
# We filter non-relevant lines.
my $have_content = 0;
foreach my $line (@{$data->{'lines'}}) {
my $s = $line->[0];
# Skip empty and whitespace.
next if $s =~ m/^\s*$/;
# Skip things looking like comments.
next if $s =~ m/^\s*(#|\/\/|\/\*|\*\/)/;
if ($s =~ m/[a-z0-9]/) {
$have_content = 1;
last;
}
}
if (!$have_content) {
return;
}
my $time = $data->{'metadata'}->{'committer-time'};
my $metadata = {};
$metadata->{'commit'} = $data->{'commit'};
$metadata->{'author'} = $data->{'metadata'}->{'author'};
$metadata->{'filename'} = $data->{'filename'};
$metadata->{'lines'} = [];
foreach my $line (@{$data->{'lines'}}) {
push @{$metadata->{'lines'}}, $line->[2];
}
push @{$old_lines->{$time}}, $metadata;
}
sub print_oldest_blocks {
my ($times) = @_;
foreach my $time (sort { $a <=> $b } keys %$times) {
my $blocks = $times->{$time};
my $date = strftime("%Y-%m-%d %H:%M:%S", gmtime($time));
print "Time: $time ($date)\n";
foreach my $data (@$blocks) {
print " Commit: " . $data->{'commit'} . "\n";
print " Author: " . $data->{'author'} . "\n";
print " Filename: " . $data->{'filename'} . "\n";
print " Lines: " . join(', ', @{$data->{'lines'}}) . "\n";
}
}
}
parse_porcelain(*STDIN, \&collect_times);
print_oldest_blocks($old_lines);