1 | #!/usr/bin/env perl |
---|
2 | use strict; |
---|
3 | |
---|
4 | # Change by Thomas Esser, Sept. 1998: The above lines allows us to find |
---|
5 | # perl along $PATH rather than guessing a fixed location. The above |
---|
6 | # construction should work with most shells. |
---|
7 | |
---|
8 | # A script to transform an EPS file so that: |
---|
9 | # a) it is guarenteed to start at the 0,0 coordinate |
---|
10 | # b) it sets a page size exactly corresponding to the BoundingBox |
---|
11 | # This means that when Ghostscript renders it, the result needs no |
---|
12 | # cropping, and the PDF MediaBox is correct. |
---|
13 | # c) the result is piped to Ghostscript and a PDF version written |
---|
14 | # |
---|
15 | # It needs a Level 2 PS interpreter. |
---|
16 | # If the bounding box is not right, of course, you have problems... |
---|
17 | # |
---|
18 | # The only thing I have not allowed for is the case of |
---|
19 | # "%%BoundingBox: (atend)", which is more complicated. |
---|
20 | # |
---|
21 | # Sebastian Rahtz, for Elsevier Science |
---|
22 | # |
---|
23 | # now with extra tricks from Hans Hagen's texutil. |
---|
24 | # |
---|
25 | # History |
---|
26 | # 1999/05/06 v2.5 (Heiko Oberdiek) |
---|
27 | # * New options: --hires, --exact, --filter, --help. |
---|
28 | # * Many cosmetics: title, usage, ... |
---|
29 | # * New code for debug, warning, error |
---|
30 | # * Detecting of cygwin perl |
---|
31 | # * Scanning for %%{Hires,Exact,}BoundingBox. |
---|
32 | # * Scanning only the header in order not to get a wrong |
---|
33 | # BoundingBox of an included file. |
---|
34 | # * (atend) supported. |
---|
35 | # * uses strict; (earlier error detecting). |
---|
36 | # * changed first comment from '%!PS' to '%!'; |
---|
37 | # * corrected (atend) pattern: '\s*\(atend\)' |
---|
38 | # * using of $bbxpat in all BoundingBox cases, |
---|
39 | # correct the first white space to '...Box:\s*$bb...' |
---|
40 | # * corrected first line (one line instead of two before 'if 0;'; |
---|
41 | # 2000/11/05 v2.6 (Heiko Oberdiek) |
---|
42 | # * %%HiresBoundingBox corrected to %%HiResBoundingBox |
---|
43 | # 2001/03/05 v2.7 (Heiko Oberdiek) |
---|
44 | # * Newline before grestore for the case that there is no |
---|
45 | # whitespace at the end of the eps file. |
---|
46 | # 2002/02/18 v2.8draft (Gerben Wierda) |
---|
47 | # * Handle different eol styles transparantly |
---|
48 | # * Applied fix from Peder Axensten for Freehand bug |
---|
49 | # 2002/02/21 v2.8draft (Gerben Wierda) |
---|
50 | # * Fixed bug where last line of buffer was not copied out (ugh!) |
---|
51 | # 2003/04/22 v2.9draft (Gerben Wierda) |
---|
52 | # * Fixed bug where with cr-eol files everything up to the first %! |
---|
53 | # * in the first 2048 bytes was gobbled (double ugh!) |
---|
54 | # 2004/03/17 v2.9.1draft (Gerben Wierda) |
---|
55 | # * No autorotate page |
---|
56 | # 2005/09/29 v2.9.2draft (Gerben Wierda) |
---|
57 | # * Quote OutFilename |
---|
58 | # 2005/10/01 v2.9.3draft (Gerben Wierda) |
---|
59 | # * Quote OutFilename |
---|
60 | # 2005/10/06 v2.9.4gw (Gerben Wierda) |
---|
61 | # * This has become the official version for now |
---|
62 | # 2005/10/06 v2.9.5gw (Gerben Wierda) |
---|
63 | # * Fixed a horrendous bug in the (atend) handling code |
---|
64 | # |
---|
65 | |
---|
66 | ### program identification |
---|
67 | my $program = "epstopdf"; |
---|
68 | my $filedate="2006/01/29"; |
---|
69 | my $fileversion="2.9.5gw"; |
---|
70 | my $copyright = "Copyright 1998-2006 by Sebastian Rahtz et al."; |
---|
71 | my $title = "\U$program\E $fileversion, $filedate - $copyright\n"; |
---|
72 | |
---|
73 | ### ghostscript command name |
---|
74 | my $GS = "gs"; |
---|
75 | $GS = "gswin32c" if $^O eq 'MSWin32'; |
---|
76 | $GS = "gswin32c" if $^O =~ /cygwin/; |
---|
77 | |
---|
78 | ### options |
---|
79 | $::opt_help=0; |
---|
80 | $::opt_debug=0; |
---|
81 | $::opt_compress=1; |
---|
82 | $::opt_gs=1; |
---|
83 | $::opt_hires=0; |
---|
84 | $::opt_exact=0; |
---|
85 | $::opt_filter=0; |
---|
86 | $::opt_outfile=""; |
---|
87 | |
---|
88 | ### usage |
---|
89 | my @bool = ("false", "true"); |
---|
90 | my $usage = <<"END_OF_USAGE"; |
---|
91 | ${title}Syntax: $program [options] <eps file> |
---|
92 | Options: |
---|
93 | --help: print usage |
---|
94 | --outfile=<file>: write result to <file> |
---|
95 | --(no)filter: read standard input (default: $bool[$::opt_filter]) |
---|
96 | --(no)gs: run ghostscript (default: $bool[$::opt_gs]) |
---|
97 | --(no)compress: use compression (default: $bool[$::opt_compress]) |
---|
98 | --(no)hires: scan HiResBoundingBox (default: $bool[$::opt_hires]) |
---|
99 | --(no)exact: scan ExactBoundingBox (default: $bool[$::opt_exact]) |
---|
100 | --(no)debug: debug informations (default: $bool[$::opt_debug]) |
---|
101 | Examples for producing 'test.pdf': |
---|
102 | * $program test.eps |
---|
103 | * produce postscript | $program --filter >test.pdf |
---|
104 | * produce postscript | $program -f -d -o=test.pdf |
---|
105 | Example: look for HiResBoundingBox and produce corrected PostScript: |
---|
106 | * $program -d --nogs -hires test.ps>testcorr.ps |
---|
107 | END_OF_USAGE |
---|
108 | |
---|
109 | ### process options |
---|
110 | use Getopt::Long; |
---|
111 | GetOptions ( |
---|
112 | "help!", |
---|
113 | "debug!", |
---|
114 | "filter!", |
---|
115 | "compress!", |
---|
116 | "gs!", |
---|
117 | "hires!", |
---|
118 | "exact!", |
---|
119 | "outfile=s", |
---|
120 | ) or die $usage; |
---|
121 | |
---|
122 | ### help functions |
---|
123 | sub debug { |
---|
124 | print STDERR "* @_\n" if $::opt_debug; |
---|
125 | } |
---|
126 | sub warning { |
---|
127 | print STDERR "==> Warning: @_!\n"; |
---|
128 | } |
---|
129 | sub error { |
---|
130 | die "$title!!! Error: @_!\n"; |
---|
131 | } |
---|
132 | sub errorUsage { |
---|
133 | die "$usage\n!!! Error: @_!\n"; |
---|
134 | } |
---|
135 | |
---|
136 | ### option help |
---|
137 | die $usage if $::opt_help; |
---|
138 | |
---|
139 | ### get input filename |
---|
140 | my $InputFilename = ""; |
---|
141 | if ($::opt_filter) { |
---|
142 | @ARGV == 0 or |
---|
143 | die errorUsage "Input file cannot be used with filter option"; |
---|
144 | $InputFilename = "-"; |
---|
145 | debug "Input file: standard input"; |
---|
146 | } |
---|
147 | else { |
---|
148 | @ARGV > 0 or die errorUsage "Input filename missing"; |
---|
149 | @ARGV < 2 or die errorUsage "Unknown option or too many input files"; |
---|
150 | $InputFilename = $ARGV[0]; |
---|
151 | -f $InputFilename or error "'$InputFilename' does not exist"; |
---|
152 | debug "Input filename:", $InputFilename; |
---|
153 | } |
---|
154 | |
---|
155 | ### option compress |
---|
156 | my $GSOPTS = ""; |
---|
157 | $GSOPTS = "-dUseFlateCompression=false " unless $::opt_compress; |
---|
158 | |
---|
159 | ### option BoundingBox types |
---|
160 | my $BBName = "%%BoundingBox:"; |
---|
161 | !($::opt_hires and $::opt_exact) or |
---|
162 | error "Options --hires and --exact cannot be used together"; |
---|
163 | $BBName = "%%HiResBoundingBox:" if $::opt_hires; |
---|
164 | $BBName = "%%ExactBoundingBox:" if $::opt_exact; |
---|
165 | debug "BoundingBox comment:", $BBName; |
---|
166 | |
---|
167 | ### option outfile |
---|
168 | my $OutputFilename = $::opt_outfile; |
---|
169 | if ($OutputFilename eq "") { |
---|
170 | if ($::opt_gs) { |
---|
171 | $OutputFilename = $InputFilename; |
---|
172 | if (!$::opt_filter) { |
---|
173 | $OutputFilename =~ s/\.[^\.]*$//; |
---|
174 | $OutputFilename .= ".pdf"; |
---|
175 | } |
---|
176 | } |
---|
177 | else { |
---|
178 | $OutputFilename = "-"; # standard output |
---|
179 | } |
---|
180 | } |
---|
181 | if ($::opt_filter) { |
---|
182 | debug "Output file: standard output"; |
---|
183 | } |
---|
184 | else { |
---|
185 | debug "Output filename:", $OutputFilename; |
---|
186 | } |
---|
187 | |
---|
188 | ### option gs |
---|
189 | if ($::opt_gs) { |
---|
190 | debug "Ghostscript command:", $GS; |
---|
191 | debug "Compression:", ($::opt_compress) ? "on" : "off"; |
---|
192 | } |
---|
193 | |
---|
194 | ### open input file |
---|
195 | open(IN,"<$InputFilename") or error "Cannot open", |
---|
196 | ($::opt_filter) ? "standard input" : "'$InputFilename'"; |
---|
197 | binmode IN; |
---|
198 | |
---|
199 | ### open output file |
---|
200 | if ($::opt_gs) { |
---|
201 | my $pipe = "$GS -q -sDEVICE=pdfwrite $GSOPTS -dAutoRotatePages=/None" . |
---|
202 | " -sOutputFile='$OutputFilename' - -c quit"; |
---|
203 | debug "Ghostscript pipe:", $pipe; |
---|
204 | open(OUT,"|$pipe") or error "Cannot open Ghostscript for piped input"; |
---|
205 | } |
---|
206 | else { |
---|
207 | open(OUT,">$OutputFilename") or error "Cannot write '$OutputFilename"; |
---|
208 | } |
---|
209 | |
---|
210 | # reading a cr-eol file on a lf-eol system makes it impossible to parse |
---|
211 | # the header and besides it will read the intire file into yor line by line |
---|
212 | # scalar. this is also true the other way around. |
---|
213 | |
---|
214 | ### scan a block, try to determine eol style |
---|
215 | |
---|
216 | my $buf; |
---|
217 | my $buflen; |
---|
218 | my @bufarray; |
---|
219 | my @parsedbufarray; # for mytell/myseek |
---|
220 | my $bufarraypos; |
---|
221 | |
---|
222 | # We assume 2048 is big enough. |
---|
223 | my $EOLSCANBUFSIZE = 2048; |
---|
224 | |
---|
225 | $buflen = read( IN, $buf, $EOLSCANBUFSIZE); |
---|
226 | if ($buflen > 0) { |
---|
227 | my $crlfpos; |
---|
228 | my $lfpos; |
---|
229 | my $crpos; |
---|
230 | |
---|
231 | # remove binary junk before header |
---|
232 | # if there is no header, we assume the file starts with ascii style and |
---|
233 | # we look for a eol style anyway, to prevent possible loading of the |
---|
234 | # entire file |
---|
235 | if ($buf =~ /%!/) { |
---|
236 | # throw away binary junk before %! |
---|
237 | $buf =~ s/(.*?)%!/%!/o; |
---|
238 | } |
---|
239 | $lfpos = index( $buf, "\n"); |
---|
240 | $crpos = index( $buf, "\r"); |
---|
241 | $crlfpos = index( $buf, "\r\n"); |
---|
242 | |
---|
243 | if ($crpos > 0 and ($lfpos == -1 or $lfpos > $crpos+1)) { |
---|
244 | # The first eol was a cr and it was not immediately followed by a lf |
---|
245 | $/ = "\r"; |
---|
246 | debug "The first eol character was a CR ($crpos) and not immediately followed by a LF ($lfpos)"; |
---|
247 | } |
---|
248 | |
---|
249 | # Now we have set the correct eol-character. Get one more line and add |
---|
250 | # it to our buffer. This will make the buffer contain an entire line |
---|
251 | # at the end. Then split the buffer in an array. We will draw lines from |
---|
252 | # that array until it is empty, then move again back to <IN> |
---|
253 | $buf .= <IN> unless eof( IN); |
---|
254 | $buflen = length( $buf); |
---|
255 | $bufarraypos = 0; |
---|
256 | |
---|
257 | # Some extra magic is needed here: if we set $/ to \r, Perl's re engine |
---|
258 | # still thinks eol is \n in regular expressions (not very nice) so we |
---|
259 | # cannot split on ^, but have to split on \r and reappend those. |
---|
260 | if ($/ eq "\r") { |
---|
261 | @bufarray = split( /\r/ms, $buf); # split on \r |
---|
262 | grep( $_ .= "\r", @bufarray); # re-append \r to each array item |
---|
263 | } |
---|
264 | else { |
---|
265 | @bufarray = split( /^/ms, $buf); |
---|
266 | } |
---|
267 | } |
---|
268 | |
---|
269 | ### getline |
---|
270 | sub getline { |
---|
271 | if ($#bufarray >= 0) { |
---|
272 | $_ = shift( @bufarray); |
---|
273 | unshift( @parsedbufarray, $_); # for myseek and mytell |
---|
274 | $bufarraypos += length( $_); |
---|
275 | # debug "getline from array. bufarraypos = $bufarraypos"; |
---|
276 | # debug "*** READ: $_"; |
---|
277 | } |
---|
278 | else { |
---|
279 | $_ = <IN>; |
---|
280 | } |
---|
281 | return( defined( $_)); |
---|
282 | } |
---|
283 | |
---|
284 | ### mytell and myseek, work on <IN> only |
---|
285 | sub mytell { |
---|
286 | if ($#bufarray) { |
---|
287 | # debug "Telling ARRAY position $bufarraypos"; |
---|
288 | return $bufarraypos; |
---|
289 | } |
---|
290 | else { |
---|
291 | my $pos = tell( IN); |
---|
292 | # debug "Telling FILE position $pos"; |
---|
293 | return $pos; |
---|
294 | } |
---|
295 | } |
---|
296 | |
---|
297 | sub myseek { |
---|
298 | my $pos = shift; |
---|
299 | # debug "Seeking to position $pos in input"; |
---|
300 | if ($pos < $buflen) { |
---|
301 | # debug "myseek position $pos < buffer size $buflen"; |
---|
302 | # We were still parsing the array, reset to the end of buf and |
---|
303 | # move to the right line in the array. |
---|
304 | # Now, move stuff from the @parsedbufarray back until we are back at $pos |
---|
305 | my $tmpline; |
---|
306 | while ($bufarraypos > $pos) { |
---|
307 | # debug "myseek bufarray position $bufarraypos > position $pos"; |
---|
308 | # we test on parsedbufarray to prevent an infinite loop on |
---|
309 | # a programming error (DEVELOP only) |
---|
310 | die "Programming error 1\n" unless $#parsedbufarray; |
---|
311 | $tmpline = shift( @parsedbufarray); |
---|
312 | $bufarraypos -= length( $tmpline); |
---|
313 | unshift( @bufarray, $tmpline); |
---|
314 | debug "*** UNREAD: $tmpline"; |
---|
315 | } |
---|
316 | # debug "Returning to ARRAY size position $buflen (bufarraypos = $bufarraypos)"; |
---|
317 | return seek( IN, $buflen, 0); |
---|
318 | } |
---|
319 | else { |
---|
320 | # debug "Seeking to FILE position $pos"; |
---|
321 | return seek( IN, $pos, 0); |
---|
322 | } |
---|
323 | } |
---|
324 | |
---|
325 | ### scan first line |
---|
326 | my $header = 0; |
---|
327 | getline(); |
---|
328 | if (/%!/) { |
---|
329 | # throw away binary junk before %! |
---|
330 | s/(.*)%!/%!/o; |
---|
331 | } |
---|
332 | $header = 1 if /^%/; |
---|
333 | debug "Scanning header for BoundingBox"; |
---|
334 | print OUT; |
---|
335 | |
---|
336 | ### variables and pattern for BoundingBox search |
---|
337 | my $bbxpatt = '[0-9eE\.\-]'; |
---|
338 | # protect backslashes: "\\" gets '\' |
---|
339 | my $BBValues = "\\s*($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)"; |
---|
340 | my $BBCorrected = 0; |
---|
341 | |
---|
342 | sub CorrectBoundingBox { |
---|
343 | my ($llx, $lly, $urx, $ury) = @_; |
---|
344 | debug "Old BoundingBox:", $llx, $lly, $urx, $ury; |
---|
345 | my ($width, $height) = ($urx - $llx, $ury - $lly); |
---|
346 | my ($xoffset, $yoffset) = (-$llx, -$lly); |
---|
347 | debug "New BoundingBox: 0 0", $width, $height; |
---|
348 | debug "Offset:", $xoffset, $yoffset; |
---|
349 | |
---|
350 | print OUT "%%BoundingBox: 0 0 $width $height\n"; |
---|
351 | print OUT "<< /PageSize [$width $height] >> setpagedevice\n"; |
---|
352 | print OUT "gsave $xoffset $yoffset translate\n"; |
---|
353 | } |
---|
354 | |
---|
355 | ### scan header |
---|
356 | if ($header) { |
---|
357 | HEADER: while (getline()) { |
---|
358 | ### Fix for freehand bug ### by Peder Axensten |
---|
359 | next HEADER if(!/\S/); |
---|
360 | |
---|
361 | ### end of header |
---|
362 | if (!/^%/ or /^%%EndComments/) { |
---|
363 | print OUT; |
---|
364 | last; |
---|
365 | } |
---|
366 | |
---|
367 | ### BoundingBox with values |
---|
368 | if (/^$BBName$BBValues/) { |
---|
369 | CorrectBoundingBox $1, $2, $3, $4; |
---|
370 | $BBCorrected = 1; |
---|
371 | last; |
---|
372 | } |
---|
373 | |
---|
374 | ### BoundingBox with (atend) |
---|
375 | if (/^$BBName\s*\(atend\)/) { |
---|
376 | debug $BBName, "(atend)"; |
---|
377 | if ($::opt_filter) { |
---|
378 | warning "Cannot look for BoundingBox in the trailer", |
---|
379 | "with option --filter"; |
---|
380 | last; |
---|
381 | } |
---|
382 | my $pos = mytell(); |
---|
383 | debug "Current file position:", $pos; |
---|
384 | |
---|
385 | # looking for %%BoundingBox |
---|
386 | while (getline()) { |
---|
387 | # skip over included documents |
---|
388 | if (/^%%BeginDocument/) { |
---|
389 | while (getline()) { |
---|
390 | last if /^%%EndDocument/; |
---|
391 | } |
---|
392 | } |
---|
393 | if (/^$BBName$BBValues/) { |
---|
394 | CorrectBoundingBox $1, $2, $3, $4; |
---|
395 | $BBCorrected = 1; |
---|
396 | last; |
---|
397 | } |
---|
398 | } |
---|
399 | |
---|
400 | # go back |
---|
401 | myseek( $pos) or error "Cannot go back to line '$BBName (atend)'"; |
---|
402 | last; |
---|
403 | } |
---|
404 | |
---|
405 | # print header line |
---|
406 | print OUT; |
---|
407 | } |
---|
408 | } |
---|
409 | |
---|
410 | ### print rest of file |
---|
411 | while (getline()) { |
---|
412 | print OUT; |
---|
413 | } |
---|
414 | |
---|
415 | ### close files |
---|
416 | close(IN); |
---|
417 | print OUT "\ngrestore\n" if $BBCorrected; |
---|
418 | close(OUT); |
---|
419 | warning "BoundingBox not found" unless $BBCorrected; |
---|
420 | debug "Ready."; |
---|
421 | ; |
---|