| 1 | #!/usr/bin/env perl |
|---|
| 2 | use strict; |
|---|
| 3 | |
|---|
| 4 | # Change by Thomas Esser, Sept. 1998: The above lines allows us to find |
|---|
| 5 | # perl along $PATH rather than guessing a fixed location. The above |
|---|
| 6 | # construction should work with most shells. |
|---|
| 7 | |
|---|
| 8 | # A script to transform an EPS file so that: |
|---|
| 9 | # a) it is guarenteed to start at the 0,0 coordinate |
|---|
| 10 | # b) it sets a page size exactly corresponding to the BoundingBox |
|---|
| 11 | # This means that when Ghostscript renders it, the result needs no |
|---|
| 12 | # cropping, and the PDF MediaBox is correct. |
|---|
| 13 | # c) the result is piped to Ghostscript and a PDF version written |
|---|
| 14 | # |
|---|
| 15 | # It needs a Level 2 PS interpreter. |
|---|
| 16 | # If the bounding box is not right, of course, you have problems... |
|---|
| 17 | # |
|---|
| 18 | # The only thing I have not allowed for is the case of |
|---|
| 19 | # "%%BoundingBox: (atend)", which is more complicated. |
|---|
| 20 | # |
|---|
| 21 | # Sebastian Rahtz, for Elsevier Science |
|---|
| 22 | # |
|---|
| 23 | # now with extra tricks from Hans Hagen's texutil. |
|---|
| 24 | # |
|---|
| 25 | # History |
|---|
| 26 | # 1999/05/06 v2.5 (Heiko Oberdiek) |
|---|
| 27 | # * New options: --hires, --exact, --filter, --help. |
|---|
| 28 | # * Many cosmetics: title, usage, ... |
|---|
| 29 | # * New code for debug, warning, error |
|---|
| 30 | # * Detecting of cygwin perl |
|---|
| 31 | # * Scanning for %%{Hires,Exact,}BoundingBox. |
|---|
| 32 | # * Scanning only the header in order not to get a wrong |
|---|
| 33 | # BoundingBox of an included file. |
|---|
| 34 | # * (atend) supported. |
|---|
| 35 | # * uses strict; (earlier error detecting). |
|---|
| 36 | # * changed first comment from '%!PS' to '%!'; |
|---|
| 37 | # * corrected (atend) pattern: '\s*\(atend\)' |
|---|
| 38 | # * using of $bbxpat in all BoundingBox cases, |
|---|
| 39 | # correct the first white space to '...Box:\s*$bb...' |
|---|
| 40 | # * corrected first line (one line instead of two before 'if 0;'; |
|---|
| 41 | # 2000/11/05 v2.6 (Heiko Oberdiek) |
|---|
| 42 | # * %%HiresBoundingBox corrected to %%HiResBoundingBox |
|---|
| 43 | # 2001/03/05 v2.7 (Heiko Oberdiek) |
|---|
| 44 | # * Newline before grestore for the case that there is no |
|---|
| 45 | # whitespace at the end of the eps file. |
|---|
| 46 | # 2002/02/18 v2.8draft (Gerben Wierda) |
|---|
| 47 | # * Handle different eol styles transparantly |
|---|
| 48 | # * Applied fix from Peder Axensten for Freehand bug |
|---|
| 49 | # 2002/02/21 v2.8draft (Gerben Wierda) |
|---|
| 50 | # * Fixed bug where last line of buffer was not copied out (ugh!) |
|---|
| 51 | # 2003/04/22 v2.9draft (Gerben Wierda) |
|---|
| 52 | # * Fixed bug where with cr-eol files everything up to the first %! |
|---|
| 53 | # * in the first 2048 bytes was gobbled (double ugh!) |
|---|
| 54 | # 2004/03/17 v2.9.1draft (Gerben Wierda) |
|---|
| 55 | # * No autorotate page |
|---|
| 56 | # 2005/09/29 v2.9.2draft (Gerben Wierda) |
|---|
| 57 | # * Quote OutFilename |
|---|
| 58 | # 2005/10/01 v2.9.3draft (Gerben Wierda) |
|---|
| 59 | # * Quote OutFilename |
|---|
| 60 | # 2005/10/06 v2.9.4gw (Gerben Wierda) |
|---|
| 61 | # * This has become the official version for now |
|---|
| 62 | # 2005/10/06 v2.9.5gw (Gerben Wierda) |
|---|
| 63 | # * Fixed a horrendous bug in the (atend) handling code |
|---|
| 64 | # |
|---|
| 65 | |
|---|
| 66 | ### program identification |
|---|
| 67 | my $program = "epstopdf"; |
|---|
| 68 | my $filedate="2006/01/29"; |
|---|
| 69 | my $fileversion="2.9.5gw"; |
|---|
| 70 | my $copyright = "Copyright 1998-2006 by Sebastian Rahtz et al."; |
|---|
| 71 | my $title = "\U$program\E $fileversion, $filedate - $copyright\n"; |
|---|
| 72 | |
|---|
| 73 | ### ghostscript command name |
|---|
| 74 | my $GS = "gs"; |
|---|
| 75 | $GS = "gswin32c" if $^O eq 'MSWin32'; |
|---|
| 76 | $GS = "gswin32c" if $^O =~ /cygwin/; |
|---|
| 77 | |
|---|
| 78 | ### options |
|---|
| 79 | $::opt_help=0; |
|---|
| 80 | $::opt_debug=0; |
|---|
| 81 | $::opt_compress=1; |
|---|
| 82 | $::opt_gs=1; |
|---|
| 83 | $::opt_hires=0; |
|---|
| 84 | $::opt_exact=0; |
|---|
| 85 | $::opt_filter=0; |
|---|
| 86 | $::opt_outfile=""; |
|---|
| 87 | |
|---|
| 88 | ### usage |
|---|
| 89 | my @bool = ("false", "true"); |
|---|
| 90 | my $usage = <<"END_OF_USAGE"; |
|---|
| 91 | ${title}Syntax: $program [options] <eps file> |
|---|
| 92 | Options: |
|---|
| 93 | --help: print usage |
|---|
| 94 | --outfile=<file>: write result to <file> |
|---|
| 95 | --(no)filter: read standard input (default: $bool[$::opt_filter]) |
|---|
| 96 | --(no)gs: run ghostscript (default: $bool[$::opt_gs]) |
|---|
| 97 | --(no)compress: use compression (default: $bool[$::opt_compress]) |
|---|
| 98 | --(no)hires: scan HiResBoundingBox (default: $bool[$::opt_hires]) |
|---|
| 99 | --(no)exact: scan ExactBoundingBox (default: $bool[$::opt_exact]) |
|---|
| 100 | --(no)debug: debug informations (default: $bool[$::opt_debug]) |
|---|
| 101 | Examples for producing 'test.pdf': |
|---|
| 102 | * $program test.eps |
|---|
| 103 | * produce postscript | $program --filter >test.pdf |
|---|
| 104 | * produce postscript | $program -f -d -o=test.pdf |
|---|
| 105 | Example: look for HiResBoundingBox and produce corrected PostScript: |
|---|
| 106 | * $program -d --nogs -hires test.ps>testcorr.ps |
|---|
| 107 | END_OF_USAGE |
|---|
| 108 | |
|---|
| 109 | ### process options |
|---|
| 110 | use Getopt::Long; |
|---|
| 111 | GetOptions ( |
|---|
| 112 | "help!", |
|---|
| 113 | "debug!", |
|---|
| 114 | "filter!", |
|---|
| 115 | "compress!", |
|---|
| 116 | "gs!", |
|---|
| 117 | "hires!", |
|---|
| 118 | "exact!", |
|---|
| 119 | "outfile=s", |
|---|
| 120 | ) or die $usage; |
|---|
| 121 | |
|---|
| 122 | ### help functions |
|---|
| 123 | sub debug { |
|---|
| 124 | print STDERR "* @_\n" if $::opt_debug; |
|---|
| 125 | } |
|---|
| 126 | sub warning { |
|---|
| 127 | print STDERR "==> Warning: @_!\n"; |
|---|
| 128 | } |
|---|
| 129 | sub error { |
|---|
| 130 | die "$title!!! Error: @_!\n"; |
|---|
| 131 | } |
|---|
| 132 | sub errorUsage { |
|---|
| 133 | die "$usage\n!!! Error: @_!\n"; |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | ### option help |
|---|
| 137 | die $usage if $::opt_help; |
|---|
| 138 | |
|---|
| 139 | ### get input filename |
|---|
| 140 | my $InputFilename = ""; |
|---|
| 141 | if ($::opt_filter) { |
|---|
| 142 | @ARGV == 0 or |
|---|
| 143 | die errorUsage "Input file cannot be used with filter option"; |
|---|
| 144 | $InputFilename = "-"; |
|---|
| 145 | debug "Input file: standard input"; |
|---|
| 146 | } |
|---|
| 147 | else { |
|---|
| 148 | @ARGV > 0 or die errorUsage "Input filename missing"; |
|---|
| 149 | @ARGV < 2 or die errorUsage "Unknown option or too many input files"; |
|---|
| 150 | $InputFilename = $ARGV[0]; |
|---|
| 151 | -f $InputFilename or error "'$InputFilename' does not exist"; |
|---|
| 152 | debug "Input filename:", $InputFilename; |
|---|
| 153 | } |
|---|
| 154 | |
|---|
| 155 | ### option compress |
|---|
| 156 | my $GSOPTS = ""; |
|---|
| 157 | $GSOPTS = "-dUseFlateCompression=false " unless $::opt_compress; |
|---|
| 158 | |
|---|
| 159 | ### option BoundingBox types |
|---|
| 160 | my $BBName = "%%BoundingBox:"; |
|---|
| 161 | !($::opt_hires and $::opt_exact) or |
|---|
| 162 | error "Options --hires and --exact cannot be used together"; |
|---|
| 163 | $BBName = "%%HiResBoundingBox:" if $::opt_hires; |
|---|
| 164 | $BBName = "%%ExactBoundingBox:" if $::opt_exact; |
|---|
| 165 | debug "BoundingBox comment:", $BBName; |
|---|
| 166 | |
|---|
| 167 | ### option outfile |
|---|
| 168 | my $OutputFilename = $::opt_outfile; |
|---|
| 169 | if ($OutputFilename eq "") { |
|---|
| 170 | if ($::opt_gs) { |
|---|
| 171 | $OutputFilename = $InputFilename; |
|---|
| 172 | if (!$::opt_filter) { |
|---|
| 173 | $OutputFilename =~ s/\.[^\.]*$//; |
|---|
| 174 | $OutputFilename .= ".pdf"; |
|---|
| 175 | } |
|---|
| 176 | } |
|---|
| 177 | else { |
|---|
| 178 | $OutputFilename = "-"; # standard output |
|---|
| 179 | } |
|---|
| 180 | } |
|---|
| 181 | if ($::opt_filter) { |
|---|
| 182 | debug "Output file: standard output"; |
|---|
| 183 | } |
|---|
| 184 | else { |
|---|
| 185 | debug "Output filename:", $OutputFilename; |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | ### option gs |
|---|
| 189 | if ($::opt_gs) { |
|---|
| 190 | debug "Ghostscript command:", $GS; |
|---|
| 191 | debug "Compression:", ($::opt_compress) ? "on" : "off"; |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | ### open input file |
|---|
| 195 | open(IN,"<$InputFilename") or error "Cannot open", |
|---|
| 196 | ($::opt_filter) ? "standard input" : "'$InputFilename'"; |
|---|
| 197 | binmode IN; |
|---|
| 198 | |
|---|
| 199 | ### open output file |
|---|
| 200 | if ($::opt_gs) { |
|---|
| 201 | my $pipe = "$GS -q -sDEVICE=pdfwrite $GSOPTS -dAutoRotatePages=/None" . |
|---|
| 202 | " -sOutputFile='$OutputFilename' - -c quit"; |
|---|
| 203 | debug "Ghostscript pipe:", $pipe; |
|---|
| 204 | open(OUT,"|$pipe") or error "Cannot open Ghostscript for piped input"; |
|---|
| 205 | } |
|---|
| 206 | else { |
|---|
| 207 | open(OUT,">$OutputFilename") or error "Cannot write '$OutputFilename"; |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | # reading a cr-eol file on a lf-eol system makes it impossible to parse |
|---|
| 211 | # the header and besides it will read the intire file into yor line by line |
|---|
| 212 | # scalar. this is also true the other way around. |
|---|
| 213 | |
|---|
| 214 | ### scan a block, try to determine eol style |
|---|
| 215 | |
|---|
| 216 | my $buf; |
|---|
| 217 | my $buflen; |
|---|
| 218 | my @bufarray; |
|---|
| 219 | my @parsedbufarray; # for mytell/myseek |
|---|
| 220 | my $bufarraypos; |
|---|
| 221 | |
|---|
| 222 | # We assume 2048 is big enough. |
|---|
| 223 | my $EOLSCANBUFSIZE = 2048; |
|---|
| 224 | |
|---|
| 225 | $buflen = read( IN, $buf, $EOLSCANBUFSIZE); |
|---|
| 226 | if ($buflen > 0) { |
|---|
| 227 | my $crlfpos; |
|---|
| 228 | my $lfpos; |
|---|
| 229 | my $crpos; |
|---|
| 230 | |
|---|
| 231 | # remove binary junk before header |
|---|
| 232 | # if there is no header, we assume the file starts with ascii style and |
|---|
| 233 | # we look for a eol style anyway, to prevent possible loading of the |
|---|
| 234 | # entire file |
|---|
| 235 | if ($buf =~ /%!/) { |
|---|
| 236 | # throw away binary junk before %! |
|---|
| 237 | $buf =~ s/(.*?)%!/%!/o; |
|---|
| 238 | } |
|---|
| 239 | $lfpos = index( $buf, "\n"); |
|---|
| 240 | $crpos = index( $buf, "\r"); |
|---|
| 241 | $crlfpos = index( $buf, "\r\n"); |
|---|
| 242 | |
|---|
| 243 | if ($crpos > 0 and ($lfpos == -1 or $lfpos > $crpos+1)) { |
|---|
| 244 | # The first eol was a cr and it was not immediately followed by a lf |
|---|
| 245 | $/ = "\r"; |
|---|
| 246 | debug "The first eol character was a CR ($crpos) and not immediately followed by a LF ($lfpos)"; |
|---|
| 247 | } |
|---|
| 248 | |
|---|
| 249 | # Now we have set the correct eol-character. Get one more line and add |
|---|
| 250 | # it to our buffer. This will make the buffer contain an entire line |
|---|
| 251 | # at the end. Then split the buffer in an array. We will draw lines from |
|---|
| 252 | # that array until it is empty, then move again back to <IN> |
|---|
| 253 | $buf .= <IN> unless eof( IN); |
|---|
| 254 | $buflen = length( $buf); |
|---|
| 255 | $bufarraypos = 0; |
|---|
| 256 | |
|---|
| 257 | # Some extra magic is needed here: if we set $/ to \r, Perl's re engine |
|---|
| 258 | # still thinks eol is \n in regular expressions (not very nice) so we |
|---|
| 259 | # cannot split on ^, but have to split on \r and reappend those. |
|---|
| 260 | if ($/ eq "\r") { |
|---|
| 261 | @bufarray = split( /\r/ms, $buf); # split on \r |
|---|
| 262 | grep( $_ .= "\r", @bufarray); # re-append \r to each array item |
|---|
| 263 | } |
|---|
| 264 | else { |
|---|
| 265 | @bufarray = split( /^/ms, $buf); |
|---|
| 266 | } |
|---|
| 267 | } |
|---|
| 268 | |
|---|
| 269 | ### getline |
|---|
| 270 | sub getline { |
|---|
| 271 | if ($#bufarray >= 0) { |
|---|
| 272 | $_ = shift( @bufarray); |
|---|
| 273 | unshift( @parsedbufarray, $_); # for myseek and mytell |
|---|
| 274 | $bufarraypos += length( $_); |
|---|
| 275 | # debug "getline from array. bufarraypos = $bufarraypos"; |
|---|
| 276 | # debug "*** READ: $_"; |
|---|
| 277 | } |
|---|
| 278 | else { |
|---|
| 279 | $_ = <IN>; |
|---|
| 280 | } |
|---|
| 281 | return( defined( $_)); |
|---|
| 282 | } |
|---|
| 283 | |
|---|
| 284 | ### mytell and myseek, work on <IN> only |
|---|
| 285 | sub mytell { |
|---|
| 286 | if ($#bufarray) { |
|---|
| 287 | # debug "Telling ARRAY position $bufarraypos"; |
|---|
| 288 | return $bufarraypos; |
|---|
| 289 | } |
|---|
| 290 | else { |
|---|
| 291 | my $pos = tell( IN); |
|---|
| 292 | # debug "Telling FILE position $pos"; |
|---|
| 293 | return $pos; |
|---|
| 294 | } |
|---|
| 295 | } |
|---|
| 296 | |
|---|
| 297 | sub myseek { |
|---|
| 298 | my $pos = shift; |
|---|
| 299 | # debug "Seeking to position $pos in input"; |
|---|
| 300 | if ($pos < $buflen) { |
|---|
| 301 | # debug "myseek position $pos < buffer size $buflen"; |
|---|
| 302 | # We were still parsing the array, reset to the end of buf and |
|---|
| 303 | # move to the right line in the array. |
|---|
| 304 | # Now, move stuff from the @parsedbufarray back until we are back at $pos |
|---|
| 305 | my $tmpline; |
|---|
| 306 | while ($bufarraypos > $pos) { |
|---|
| 307 | # debug "myseek bufarray position $bufarraypos > position $pos"; |
|---|
| 308 | # we test on parsedbufarray to prevent an infinite loop on |
|---|
| 309 | # a programming error (DEVELOP only) |
|---|
| 310 | die "Programming error 1\n" unless $#parsedbufarray; |
|---|
| 311 | $tmpline = shift( @parsedbufarray); |
|---|
| 312 | $bufarraypos -= length( $tmpline); |
|---|
| 313 | unshift( @bufarray, $tmpline); |
|---|
| 314 | debug "*** UNREAD: $tmpline"; |
|---|
| 315 | } |
|---|
| 316 | # debug "Returning to ARRAY size position $buflen (bufarraypos = $bufarraypos)"; |
|---|
| 317 | return seek( IN, $buflen, 0); |
|---|
| 318 | } |
|---|
| 319 | else { |
|---|
| 320 | # debug "Seeking to FILE position $pos"; |
|---|
| 321 | return seek( IN, $pos, 0); |
|---|
| 322 | } |
|---|
| 323 | } |
|---|
| 324 | |
|---|
| 325 | ### scan first line |
|---|
| 326 | my $header = 0; |
|---|
| 327 | getline(); |
|---|
| 328 | if (/%!/) { |
|---|
| 329 | # throw away binary junk before %! |
|---|
| 330 | s/(.*)%!/%!/o; |
|---|
| 331 | } |
|---|
| 332 | $header = 1 if /^%/; |
|---|
| 333 | debug "Scanning header for BoundingBox"; |
|---|
| 334 | print OUT; |
|---|
| 335 | |
|---|
| 336 | ### variables and pattern for BoundingBox search |
|---|
| 337 | my $bbxpatt = '[0-9eE\.\-]'; |
|---|
| 338 | # protect backslashes: "\\" gets '\' |
|---|
| 339 | my $BBValues = "\\s*($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)"; |
|---|
| 340 | my $BBCorrected = 0; |
|---|
| 341 | |
|---|
| 342 | sub CorrectBoundingBox { |
|---|
| 343 | my ($llx, $lly, $urx, $ury) = @_; |
|---|
| 344 | debug "Old BoundingBox:", $llx, $lly, $urx, $ury; |
|---|
| 345 | my ($width, $height) = ($urx - $llx, $ury - $lly); |
|---|
| 346 | my ($xoffset, $yoffset) = (-$llx, -$lly); |
|---|
| 347 | debug "New BoundingBox: 0 0", $width, $height; |
|---|
| 348 | debug "Offset:", $xoffset, $yoffset; |
|---|
| 349 | |
|---|
| 350 | print OUT "%%BoundingBox: 0 0 $width $height\n"; |
|---|
| 351 | print OUT "<< /PageSize [$width $height] >> setpagedevice\n"; |
|---|
| 352 | print OUT "gsave $xoffset $yoffset translate\n"; |
|---|
| 353 | } |
|---|
| 354 | |
|---|
| 355 | ### scan header |
|---|
| 356 | if ($header) { |
|---|
| 357 | HEADER: while (getline()) { |
|---|
| 358 | ### Fix for freehand bug ### by Peder Axensten |
|---|
| 359 | next HEADER if(!/\S/); |
|---|
| 360 | |
|---|
| 361 | ### end of header |
|---|
| 362 | if (!/^%/ or /^%%EndComments/) { |
|---|
| 363 | print OUT; |
|---|
| 364 | last; |
|---|
| 365 | } |
|---|
| 366 | |
|---|
| 367 | ### BoundingBox with values |
|---|
| 368 | if (/^$BBName$BBValues/) { |
|---|
| 369 | CorrectBoundingBox $1, $2, $3, $4; |
|---|
| 370 | $BBCorrected = 1; |
|---|
| 371 | last; |
|---|
| 372 | } |
|---|
| 373 | |
|---|
| 374 | ### BoundingBox with (atend) |
|---|
| 375 | if (/^$BBName\s*\(atend\)/) { |
|---|
| 376 | debug $BBName, "(atend)"; |
|---|
| 377 | if ($::opt_filter) { |
|---|
| 378 | warning "Cannot look for BoundingBox in the trailer", |
|---|
| 379 | "with option --filter"; |
|---|
| 380 | last; |
|---|
| 381 | } |
|---|
| 382 | my $pos = mytell(); |
|---|
| 383 | debug "Current file position:", $pos; |
|---|
| 384 | |
|---|
| 385 | # looking for %%BoundingBox |
|---|
| 386 | while (getline()) { |
|---|
| 387 | # skip over included documents |
|---|
| 388 | if (/^%%BeginDocument/) { |
|---|
| 389 | while (getline()) { |
|---|
| 390 | last if /^%%EndDocument/; |
|---|
| 391 | } |
|---|
| 392 | } |
|---|
| 393 | if (/^$BBName$BBValues/) { |
|---|
| 394 | CorrectBoundingBox $1, $2, $3, $4; |
|---|
| 395 | $BBCorrected = 1; |
|---|
| 396 | last; |
|---|
| 397 | } |
|---|
| 398 | } |
|---|
| 399 | |
|---|
| 400 | # go back |
|---|
| 401 | myseek( $pos) or error "Cannot go back to line '$BBName (atend)'"; |
|---|
| 402 | last; |
|---|
| 403 | } |
|---|
| 404 | |
|---|
| 405 | # print header line |
|---|
| 406 | print OUT; |
|---|
| 407 | } |
|---|
| 408 | } |
|---|
| 409 | |
|---|
| 410 | ### print rest of file |
|---|
| 411 | while (getline()) { |
|---|
| 412 | print OUT; |
|---|
| 413 | } |
|---|
| 414 | |
|---|
| 415 | ### close files |
|---|
| 416 | close(IN); |
|---|
| 417 | print OUT "\ngrestore\n" if $BBCorrected; |
|---|
| 418 | close(OUT); |
|---|
| 419 | warning "BoundingBox not found" unless $BBCorrected; |
|---|
| 420 | debug "Ready."; |
|---|
| 421 | ; |
|---|