pdumper.c 174 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (C) 2018-2019 Free Software Foundation, Inc.

This file is part of GNU Emacs.

GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */

Daniel Colascione's avatar
Daniel Colascione committed
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
#include <config.h>

#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <math.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "blockinput.h"
#include "buffer.h"
#include "charset.h"
#include "coding.h"
#include "fingerprint.h"
#include "frame.h"
#include "getpagesize.h"
#include "intervals.h"
#include "lisp.h"
#include "pdumper.h"
#include "window.h"
#include "systime.h"
#include "thread.h"
#include "bignum.h"

Paul Eggert's avatar
Paul Eggert committed
49 50 51 52
#ifdef CHECK_STRUCTS
# include "dmpstruct.h"
#endif

Daniel Colascione's avatar
Daniel Colascione committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/*
  TODO:

  - Two-pass dumping: first assemble object list, then write all.
    This way, we can perform arbitrary reordering or maybe use fancy
    graph algorithms to get better locality.

  - Don't emit relocations that happen to set Emacs memory locations
    to values they will already have.

  - Nullify frame_and_buffer_state.

  - Preferred base address for relocation-free non-PIC startup.

  - Compressed dump support.

*/

#ifdef HAVE_PDUMPER

73
#if GNUC_PREREQ (4, 7, 0)
Daniel Colascione's avatar
Daniel Colascione committed
74
# pragma GCC diagnostic error "-Wconversion"
75
# pragma GCC diagnostic ignored "-Wsign-conversion"
Daniel Colascione's avatar
Daniel Colascione committed
76 77 78 79 80 81 82
# pragma GCC diagnostic error "-Wshadow"
# define ALLOW_IMPLICIT_CONVERSION                       \
  _Pragma ("GCC diagnostic push")                        \
  _Pragma ("GCC diagnostic ignored \"-Wconversion\"")
# define DISALLOW_IMPLICIT_CONVERSION \
  _Pragma ("GCC diagnostic pop")
#else
83 84
# define ALLOW_IMPLICIT_CONVERSION ((void) 0)
# define DISALLOW_IMPLICIT_CONVERSION ((void) 0)
Daniel Colascione's avatar
Daniel Colascione committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
#endif

#define VM_POSIX 1
#define VM_MS_WINDOWS 2

#if defined (HAVE_MMAP) && defined (MAP_FIXED)
# define VM_SUPPORTED VM_POSIX
# if !defined (MAP_POPULATE) && defined (MAP_PREFAULT_READ)
#  define MAP_POPULATE MAP_PREFAULT_READ
# elif !defined (MAP_POPULATE)
#  define MAP_POPULATE 0
# endif
#elif defined (WINDOWSNT)
  /* Use a float infinity, to avoid compiler warnings in comparing vs
     candidates' score.  */
# undef INFINITY
# define INFINITY __builtin_inff ()
# include <windows.h>
# define VM_SUPPORTED VM_MS_WINDOWS
#else
# define VM_SUPPORTED 0
#endif

#define DANGEROUS 0

/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
   check, for each hash table it dumps, that the hash table means the
   same thing after rehashing.  */
#ifndef PDUMPER_CHECK_REHASHING
# if ENABLE_CHECKING
#  define PDUMPER_CHECK_REHASHING 1
# else
#  define PDUMPER_CHECK_REHASHING 0
# endif
#endif

/* We require an architecture in which all pointers are the same size
   and have the same layout, where pointers are either 32 or 64 bits
   long, and where bytes have eight bits --- that is, a
   general-purpose computer made after 1990.  */
125
verify (sizeof (ptrdiff_t) == sizeof (void *));
Daniel Colascione's avatar
Daniel Colascione committed
126
verify (sizeof (intptr_t) == sizeof (ptrdiff_t));
127
verify (sizeof (void (*) (void)) == sizeof (void *));
Daniel Colascione's avatar
Daniel Colascione committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
verify (sizeof (ptrdiff_t) <= sizeof (Lisp_Object));
verify (sizeof (ptrdiff_t) <= sizeof (EMACS_INT));
verify (CHAR_BIT == 8);

#define DIVIDE_ROUND_UP(x, y) (((x) + (y) - 1) / (y))

static const char dump_magic[16] = {
  'D', 'U', 'M', 'P', 'E', 'D',
  'G', 'N', 'U',
  'E', 'M', 'A', 'C', 'S'
};

static pdumper_hook dump_hooks[24];
static int nr_dump_hooks = 0;

static struct
{
  void *mem;
  int sz;
} remembered_data[32];
static int nr_remembered_data = 0;

150 151 152
typedef int_least32_t dump_off;
#define DUMP_OFF_MIN INT_LEAST32_MIN
#define DUMP_OFF_MAX INT_LEAST32_MAX
Daniel Colascione's avatar
Daniel Colascione committed
153

154
static void ATTRIBUTE_FORMAT ((printf, 1, 2))
Daniel Colascione's avatar
Daniel Colascione committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
dump_trace (const char *fmt, ...)
{
  if (0)
    {
      va_list args;
      va_start (args, fmt);
      vfprintf (stderr, fmt, args);
      va_end (args);
    }
}

static ssize_t dump_read_all (int fd, void *buf, size_t bytes_to_read);

static dump_off
ptrdiff_t_to_dump_off (ptrdiff_t value)
{
  eassert (DUMP_OFF_MIN <= value);
  eassert (value <= DUMP_OFF_MAX);
  return (dump_off) value;
}

/* Worst-case allocation granularity on any system that might load
   this dump.  */
static int
dump_get_page_size (void)
{
#if defined (WINDOWSNT) || defined (CYGWIN)
  return 64 * 1024;  /* Worst-case allocation granularity.  */
#else
  return getpagesize ();
#endif
}

#define dump_offsetof(type, member)                             \
  (ptrdiff_t_to_dump_off (offsetof (type, member)))

enum dump_reloc_type
  {
    /* dump_ptr = dump_ptr + emacs_basis()  */
    RELOC_DUMP_TO_EMACS_PTR_RAW,
    /* dump_ptr = dump_ptr + dump_base  */
    RELOC_DUMP_TO_DUMP_PTR_RAW,
    /* dump_mpz = [rebuild bignum]  */
    RELOC_BIGNUM,
199 200
    /* dump_lv = make_lisp_ptr (dump_lv + dump_base,
				type - RELOC_DUMP_TO_DUMP_LV)
Daniel Colascione's avatar
Daniel Colascione committed
201 202 203
       (Special case for symbols: make_lisp_symbol)
       Must be second-last.  */
    RELOC_DUMP_TO_DUMP_LV,
204 205
    /* dump_lv = make_lisp_ptr (dump_lv + emacs_basis(),
				type - RELOC_DUMP_TO_DUMP_LV)
Daniel Colascione's avatar
Daniel Colascione committed
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
       (Special case for symbols: make_lisp_symbol.)
       Must be last.  */
    RELOC_DUMP_TO_EMACS_LV = RELOC_DUMP_TO_DUMP_LV + 8,
  };

enum emacs_reloc_type
  {
    /* Copy raw bytes from the dump into Emacs.  The length field in
       the emacs_reloc is the number of bytes to copy.  */
    RELOC_EMACS_COPY_FROM_DUMP,
    /* Set a piece of memory in Emacs to a value we store directly in
       this relocation.  The length field contains the number of bytes
       we actually copy into Emacs.  */
    RELOC_EMACS_IMMEDIATE,
    /* Set an aligned pointer-sized object in Emacs to a pointer into
       the loaded dump at the given offset.  The length field is
       always the machine word size.  */
    RELOC_EMACS_DUMP_PTR_RAW,
    /* Set an aligned pointer-sized object in Emacs to point to
       something also in Emacs.  The length field is always
       the machine word size.  */
    RELOC_EMACS_EMACS_PTR_RAW,
    /* Set an aligned Lisp_Object in Emacs to point to a value in the
       dump.  The length field is the _tag type_ of the Lisp_Object,
       not a byte count!  */
    RELOC_EMACS_DUMP_LV,
    /* Set an aligned Lisp_Object in Emacs to point to a value in the
       Emacs image.  The length field is the _tag type_ of the
       Lisp_Object, not a byte count!  */
    RELOC_EMACS_EMACS_LV,
  };

#define EMACS_RELOC_TYPE_BITS 3
#define EMACS_RELOC_LENGTH_BITS                         \
  (sizeof (dump_off) * CHAR_BIT - EMACS_RELOC_TYPE_BITS)

struct emacs_reloc
{
  ENUM_BF (emacs_reloc_type) type : EMACS_RELOC_TYPE_BITS;
  dump_off length : EMACS_RELOC_LENGTH_BITS;
  dump_off emacs_offset;
  union
  {
    dump_off dump_offset;
    dump_off emacs_offset2;
    intmax_t immediate;
  } u;
};

/* Set the type of an Emacs relocation.

   Also make sure that the type fits in the bitfield.  */
static void
emacs_reloc_set_type (struct emacs_reloc *reloc,
                      enum emacs_reloc_type type)
{
  reloc->type = type;
  eassert (reloc->type == type);
}

struct dump_table_locator
{
  /* Offset in dump, in bytes, of the first entry in the dump
     table.  */
  dump_off offset;
  /* Number of entries in the dump table.  We need an explicit end
     indicator (as opposed to a special sentinel) so we can efficiently
     binary search over the relocation entries.  */
  dump_off nr_entries;
};

#define DUMP_RELOC_TYPE_BITS 5
verify (RELOC_DUMP_TO_EMACS_LV + 8 < (1 << DUMP_RELOC_TYPE_BITS));

#define DUMP_RELOC_ALIGNMENT_BITS 2
#define DUMP_RELOC_OFFSET_BITS                          \
  (sizeof (dump_off) * CHAR_BIT - DUMP_RELOC_TYPE_BITS)

/* Minimum alignment required by dump file format.  */
#define DUMP_RELOCATION_ALIGNMENT (1<<DUMP_RELOC_ALIGNMENT_BITS)

/* The alignment granularity (in bytes) for objects we store in the
   dump.  Always suitable for heap objects; may be more aligned.  */
#define DUMP_ALIGNMENT (max (GCALIGNMENT, DUMP_RELOCATION_ALIGNMENT))
verify (DUMP_ALIGNMENT >= GCALIGNMENT);

struct dump_reloc
{
294
  unsigned int raw_offset : DUMP_RELOC_OFFSET_BITS;
Daniel Colascione's avatar
Daniel Colascione committed
295 296
  ENUM_BF (dump_reloc_type) type : DUMP_RELOC_TYPE_BITS;
};
297
verify (sizeof (struct dump_reloc) == sizeof (dump_off));
Daniel Colascione's avatar
Daniel Colascione committed
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

/* Set the type of a dump relocation.

   Also assert that the type fits in the bitfield.  */
static void
dump_reloc_set_type (struct dump_reloc *reloc, enum dump_reloc_type type)
{
  reloc->type = type;
  eassert (reloc->type == type);
}

static dump_off
dump_reloc_get_offset (struct dump_reloc reloc)
{
  return reloc.raw_offset << DUMP_RELOC_ALIGNMENT_BITS;
}

static void
dump_reloc_set_offset (struct dump_reloc *reloc, dump_off offset)
{
  eassert (offset >= 0);
  ALLOW_IMPLICIT_CONVERSION;
  reloc->raw_offset = offset >> DUMP_RELOC_ALIGNMENT_BITS;
  DISALLOW_IMPLICIT_CONVERSION;
  if (dump_reloc_get_offset (*reloc) != offset)
    error ("dump relocation out of range");
}

326
static void
327
dump_fingerprint (const char *label, unsigned char const *xfingerprint)
328
{
Daniel Colascione's avatar
Daniel Colascione committed
329
  fprintf (stderr, "%s: ", label);
330
  for (int i = 0; i < 32; ++i)
Daniel Colascione's avatar
Daniel Colascione committed
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
    fprintf (stderr, "%02x", (unsigned) xfingerprint[i]);
  fprintf (stderr, "\n");
}

/* Format of an Emacs portable dump file.  All offsets are relative to
   the beginning of the file.  An Emacs portable dump file is coupled
   to exactly the Emacs binary that produced it, so details of
   alignment and endianness are unimportant.

   An Emacs dump file contains the contents of the Lisp heap.
   On startup, Emacs can start faster by mapping a dump file into
   memory and using the objects contained inside it instead of
   performing initialization from scratch.

   The dump file can be loaded at arbitrary locations in memory, so it
   includes a table of relocations that let Emacs adjust the pointers
   embedded in the dump file to account for the location where it was
   actually loaded.

   Dump files can contain pointers to other objects in the dump file
   or to parts of the Emacs binary.  */
struct dump_header
{
  /* File type magic.  */
  char magic[sizeof (dump_magic)];

  /* Associated Emacs binary.  */
358
  unsigned char fingerprint[32];
Daniel Colascione's avatar
Daniel Colascione committed
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

  /* Relocation table for the dump file; each entry is a
     struct dump_reloc.  */
  struct dump_table_locator dump_relocs;

  /* "Relocation" table we abuse to hold information about the
     location and type of each lisp object in the dump.  We need for
     pdumper_object_type and ultimately for conservative GC
     correctness.  */
  struct dump_table_locator object_starts;

  /* Relocation table for Emacs; each entry is a struct
     emacs_reloc.  */
  struct dump_table_locator emacs_relocs;

  /* Start of sub-region of hot region that we can discard after load
     completes.  The discardable region ends at cold_start.

     This region contains objects that we copy into the Emacs image at
     dump-load time.  */
  dump_off discardable_start;

  /* Start of the region that does not require relocations and that we
     expect never to be modified.  This region can be memory-mapped
     directly from the backing dump file with the reasonable
     expectation of taking few copy-on-write faults.

     For correctness, however, this region must be modifible, since in
     rare cases it is possible to see modifications to these bytes.
     For example, this region contains string data, and it's
     technically possible for someone to ASET a string character
     (although nobody tends to do that).

     The start of the cold region is always aligned on a page
     boundary.  */
  dump_off cold_start;
};

/* Double-ended singly linked list.  */
struct dump_tailq
{
  Lisp_Object head;
  Lisp_Object tail;
  intptr_t length;
};

/* Queue of objects to dump.  */
struct dump_queue
{
  /* Objects with no link weights at all.  Kept in dump order.  */
  struct dump_tailq zero_weight_objects;
  /* Objects with simple link weight: just one entry of type
     WEIGHT_NORMAL.  Score in this special case is non-decreasing as
     position increases, so we can avoid the need to rescan a big list
     for each object by storing these objects in order.  */
  struct dump_tailq one_weight_normal_objects;
  /* Likewise, for objects with one WEIGHT_STRONG weight.  */
  struct dump_tailq one_weight_strong_objects;
  /* List of objects with complex link weights --- i.e., not one of
     the above cases.  Order is irrelevant, since we scan the whole
     list every time.  Relatively few objects end up here.  */
  struct dump_tailq fancy_weight_objects;
  /* Hash table of link weights: maps an object to a list of zero or
     more (BASIS . WEIGHT) pairs.  As a special case, an object with
     zero weight is marked by Qt in the hash table --- this way, we
     can distinguish objects we've seen but that have no weight from
     ones that we haven't seen at all.  */
  Lisp_Object link_weights;
  /* Hash table mapping object to a sequence number --- used to
     resolve ties.  */
  Lisp_Object sequence_numbers;
  dump_off next_sequence_number;
};

enum cold_op
  {
    COLD_OP_OBJECT,
    COLD_OP_STRING,
    COLD_OP_CHARSET,
    COLD_OP_BUFFER,
    COLD_OP_BIGNUM,
  };

/* This structure controls what operations we perform inside
   dump_object.  */
struct dump_flags
{
  /* Actually write object contents to the dump.  Without this flag
     set, we still scan objects and enqueue pointed-to objects; making
     this flag false is useful when we want to process an object's
     referents normally, but dump an object itself separately,
     later.  */
  bool_bf dump_object_contents : 1;
  /* Record object starts. We turn this flag off when writing to the
     discardable section so that we don't trick conservative GC into
     thinking we have objects there.  Ignored (we never record object
     starts) if dump_object_contents is false.  */
  bool_bf record_object_starts : 1;
  /* Pack objects tighter than GC memory alignment would normally
     require.  Useful for objects copied into the Emacs image instead
     of used directly from the loaded dump.
  */
  bool_bf pack_objects : 1;
  /* Sometimes we dump objects that we've already scanned for outbound
     references to other objects.  These objects should not cause new
     objects to enter the object dumping queue.  This flag causes Emacs
     to assert that no new objects are enqueued while dumping.  */
  bool_bf assert_already_seen : 1;
  /* Punt on unstable hash tables: defer them to ctx->deferred_hash_tables.  */
  bool_bf defer_hash_tables : 1;
  /* Punt on symbols: defer them to ctx->deferred_symbols.  */
  bool_bf defer_symbols : 1;
  /* Punt on cold objects: defer them to ctx->cold_queue.  */
  bool_bf defer_cold_objects : 1;
  /* Punt on copied objects: defer them to ctx->copied_queue.  */
  bool_bf defer_copied_objects : 1;
};

/* Information we use while we dump.  Note that we're not the garbage
   collector and can operate under looser constraints: specifically,
   we allocate memory during the dumping process.  */
struct dump_context
{
  /* Header we'll write to the dump file when done.  */
  struct dump_header header;

  Lisp_Object old_purify_flag;
  Lisp_Object old_post_gc_hook;
487
  Lisp_Object old_process_environment;
Daniel Colascione's avatar
Daniel Colascione committed
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595

#ifdef REL_ALLOC
  bool blocked_ralloc;
#endif

  /* File descriptor for dumpfile; < 0 if closed.  */
  int fd;
  /* Name of dump file --- used for error reporting.  */
  Lisp_Object dump_filename;
  /* Current offset in dump file.  */
  dump_off offset;

  /* Starting offset of current object.  */
  dump_off obj_offset;

  /* Flags currently in effect for dumping.  */
  struct dump_flags flags;

  dump_off end_heap;

  /* Hash mapping objects we've already dumped to their offsets.  */
  Lisp_Object objects_dumped;

  /* Hash mapping objects to where we got them.  Used for debugging.  */
  Lisp_Object referrers;
  Lisp_Object current_referrer;
  bool have_current_referrer;

  /* Queue of objects to dump.  */
  struct dump_queue dump_queue;

  /* Deferred object lists.  */
  Lisp_Object deferred_hash_tables;
  Lisp_Object deferred_symbols;

  /* Fixups in the dump file.  */
  Lisp_Object fixups;

  /* Hash table of staticpro values: avoids double relocations.  */
  Lisp_Object staticpro_table;

  /* Hash table mapping symbols to their pre-copy-queue fwd or blv
     structures (which we dump immediately before the start of the
     discardable section). */
  Lisp_Object symbol_aux;
  /* Queue of copied objects for special treatment.  */
  Lisp_Object copied_queue;
  /* Queue of cold objects to dump.  */
  Lisp_Object cold_queue;

  /* Relocations in the dump.  */
  Lisp_Object dump_relocs;

  /* Object starts.  */
  Lisp_Object object_starts;

  /* Relocations in Emacs.  */
  Lisp_Object emacs_relocs;

  /* Hash table mapping bignums to their _data_ blobs, which we store
     in the cold section.  The actual Lisp_Bignum objects are normal
     heap objects.  */
  Lisp_Object bignum_data;

  unsigned number_hot_relocations;
  unsigned number_discardable_relocations;
};

/* These special values for use as offsets in dump_remember_object and
   dump_recall_object indicate that the corresponding object isn't in
   the dump yet (and so it has no valid offset), but that it's on one
   of our to-be-dumped-later object queues (or that we haven't seen it
   at all).  All values must be non-positive, since positive values
   are physical dump offsets.  */
enum dump_object_special_offset
  {
   DUMP_OBJECT_IS_RUNTIME_MAGIC = -6,
   DUMP_OBJECT_ON_COPIED_QUEUE = -5,
   DUMP_OBJECT_ON_HASH_TABLE_QUEUE = -4,
   DUMP_OBJECT_ON_SYMBOL_QUEUE = -3,
   DUMP_OBJECT_ON_COLD_QUEUE = -2,
   DUMP_OBJECT_ON_NORMAL_QUEUE = -1,
   DUMP_OBJECT_NOT_SEEN = 0,
  };

/* Weights for score scores for object non-locality.  */
enum link_weight_enum
  {
    WEIGHT_NONE_VALUE = 0,
    WEIGHT_NORMAL_VALUE = 1000,
    WEIGHT_STRONG_VALUE = 1200,
  };

struct link_weight
{
  /* Wrapped in a struct to break unwanted implicit conversion.  */
  enum link_weight_enum value;
};

#define LINK_WEIGHT_LITERAL(x) ((struct link_weight){.value=(x)})
#define WEIGHT_NONE LINK_WEIGHT_LITERAL (WEIGHT_NONE_VALUE)
#define WEIGHT_NORMAL LINK_WEIGHT_LITERAL (WEIGHT_NORMAL_VALUE)
#define WEIGHT_STRONG LINK_WEIGHT_LITERAL (WEIGHT_STRONG_VALUE)


/* Dump file creation */

static dump_off dump_object (struct dump_context *ctx, Lisp_Object object);
596 597
static dump_off dump_object_for_offset (struct dump_context *ctx,
					Lisp_Object object);
Daniel Colascione's avatar
Daniel Colascione committed
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666

/* Like the Lisp function `push'.  Return NEWELT.  */
static Lisp_Object
dump_push (Lisp_Object *where, Lisp_Object newelt)
{
  *where = Fcons (newelt, *where);
  return newelt;
}

/* Like the Lisp function `pop'.  */
static Lisp_Object
dump_pop (Lisp_Object *where)
{
  Lisp_Object ret = XCAR (*where);
  *where = XCDR (*where);
  return ret;
}

static bool
dump_tracking_referrers_p (struct dump_context *ctx)
{
  return !NILP (ctx->referrers);
}

static void
dump_set_have_current_referrer (struct dump_context *ctx, bool have)
{
#ifdef ENABLE_CHECKING
  ctx->have_current_referrer = have;
#endif
}

/* Remember the reason objects are enqueued.

   Until DUMP_CLEAR_REFERRER is called, any objects enqueued are being
   enqueued because OBJECT refers to them.  It is not legal to enqueue
   objects without a referer set.  We check this constraint
   at runtime.

   It is illegal to call DUMP_SET_REFERRER twice without an
   intervening call to DUMP_CLEAR_REFERRER.

   Define as a macro so we can avoid evaluating OBJECT
   if we dont want referrer tracking.  */
#define DUMP_SET_REFERRER(ctx, object)                   \
  do                                                     \
    {                                                    \
      struct dump_context *_ctx = (ctx);                 \
      eassert (!_ctx->have_current_referrer);            \
      dump_set_have_current_referrer (_ctx, true);       \
      if (dump_tracking_referrers_p (_ctx))              \
        ctx->current_referrer = (object);                \
    }                                                    \
  while (0)

/* Unset the referer that DUMP_SET_REFERRER set.

   Named with upper-case letters for symmetry with
   DUMP_SET_REFERRER.  */
static void
DUMP_CLEAR_REFERRER (struct dump_context *ctx)
{
  eassert (ctx->have_current_referrer);
  dump_set_have_current_referrer (ctx, false);
  if (dump_tracking_referrers_p (ctx))
    ctx->current_referrer = Qnil;
}

static Lisp_Object
667
dump_ptr_referrer (const char *label, void const *address)
Daniel Colascione's avatar
Daniel Colascione committed
668 669 670 671 672 673 674 675 676 677 678 679 680 681
{
  char buf[128];
  buf[0] = '\0';
  sprintf (buf, "%s @ %p", label, address);
  return build_string (buf);
}

static void
print_paths_to_root (struct dump_context *ctx, Lisp_Object object);

static void dump_remember_cold_op (struct dump_context *ctx,
                                   enum cold_op op,
                                   Lisp_Object arg);

682
static AVOID
Daniel Colascione's avatar
Daniel Colascione committed
683 684
error_unsupported_dump_object (struct dump_context *ctx,
                               Lisp_Object object,
685
			       const char *msg)
Daniel Colascione's avatar
Daniel Colascione committed
686 687 688 689 690 691 692 693 694 695 696 697 698
{
  if (dump_tracking_referrers_p (ctx))
    print_paths_to_root (ctx, object);
  error ("unsupported object type in dump: %s", msg);
}

static uintptr_t
emacs_basis (void)
{
  return (uintptr_t) &Vpurify_flag;
}

static void *
699
emacs_ptr_at (const ptrdiff_t offset)
Daniel Colascione's avatar
Daniel Colascione committed
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
{
  /* TODO: assert somehow that the result is actually in the Emacs
     image.  */
  return (void *) (emacs_basis () + offset);
}

static dump_off
emacs_offset (const void *emacs_ptr)
{
  /* TODO: assert that EMACS_PTR is actually in the Emacs image.  */
  eassert (emacs_ptr != NULL);
  intptr_t emacs_ptr_value = (intptr_t) emacs_ptr;
  ptrdiff_t emacs_ptr_relative = emacs_ptr_value - (intptr_t) emacs_basis ();
  return ptrdiff_t_to_dump_off (emacs_ptr_relative);
}

/* Return whether OBJECT is a symbol the storage of which is built
   into Emacs (and so is invariant across ASLR).  */
static bool
dump_builtin_symbol_p (Lisp_Object object)
{
  if (!SYMBOLP (object))
    return false;
723
  char *bp = (char *) lispsym;
Daniel Colascione's avatar
Daniel Colascione committed
724
  struct Lisp_Symbol *s = XSYMBOL (object);
725
  char *sp = (char *) s;
Daniel Colascione's avatar
Daniel Colascione committed
726 727 728 729 730 731 732 733 734 735
  return bp <= sp && sp < bp + sizeof (lispsym);
}

/* Return whether OBJECT has the same bit pattern in all Emacs
   invocations --- i.e., is invariant across a dump.  Note that some
   self-representing objects still need to be dumped!
*/
static bool
dump_object_self_representing_p (Lisp_Object object)
{
736
  return FIXNUMP (object) || dump_builtin_symbol_p (object);
Daniel Colascione's avatar
Daniel Colascione committed
737 738 739 740 741 742 743 744 745 746
}

#define DEFINE_FROMLISP_FUNC(fn, type)          \
  static type                                   \
  fn (Lisp_Object value)                        \
  {                                             \
    ALLOW_IMPLICIT_CONVERSION;                  \
    if (FIXNUMP (value))                        \
      return XFIXNUM (value);                   \
    eassert (BIGNUMP (value));                  \
747 748 749 750 751
    type result;				\
    if (TYPE_SIGNED (type))			\
      result = bignum_to_intmax (value);	\
    else					\
      result = bignum_to_uintmax (value);	\
Daniel Colascione's avatar
Daniel Colascione committed
752
    DISALLOW_IMPLICIT_CONVERSION;               \
753
    return result;				\
Daniel Colascione's avatar
Daniel Colascione committed
754 755 756 757 758 759 760 761 762
  }

#define DEFINE_TOLISP_FUNC(fn, type) \
  static Lisp_Object                 \
  fn (type value)                    \
  {                                  \
    return INT_TO_INTEGER (value);   \
  }

763 764 765 766
DEFINE_FROMLISP_FUNC (intmax_t_from_lisp, intmax_t)
DEFINE_TOLISP_FUNC (intmax_t_to_lisp, intmax_t)
DEFINE_FROMLISP_FUNC (dump_off_from_lisp, dump_off)
DEFINE_TOLISP_FUNC (dump_off_to_lisp, dump_off)
Daniel Colascione's avatar
Daniel Colascione committed
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797

static void
dump_write (struct dump_context *ctx, const void *buf, dump_off nbyte)
{
  eassert (nbyte == 0 || buf != NULL);
  eassert (ctx->obj_offset == 0);
  eassert (ctx->flags.dump_object_contents);
  if (emacs_write (ctx->fd, buf, nbyte) < nbyte)
    report_file_error ("Could not write to dump file", ctx->dump_filename);
  ctx->offset += nbyte;
}

static Lisp_Object
make_eq_hash_table (void)
{
  return CALLN (Fmake_hash_table, QCtest, Qeq);
}

static void
dump_tailq_init (struct dump_tailq *tailq)
{
  tailq->head = tailq->tail = Qnil;
  tailq->length = 0;
}

static intptr_t
dump_tailq_length (const struct dump_tailq *tailq)
{
  return tailq->length;
}

798
static void ATTRIBUTE_UNUSED
Daniel Colascione's avatar
Daniel Colascione committed
799 800 801 802 803 804 805 806 807
dump_tailq_prepend (struct dump_tailq *tailq, Lisp_Object value)
{
  Lisp_Object link = Fcons (value, tailq->head);
  tailq->head = link;
  if (NILP (tailq->tail))
    tailq->tail = link;
  tailq->length += 1;
}

808
static void ATTRIBUTE_UNUSED
Daniel Colascione's avatar
Daniel Colascione committed
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
dump_tailq_append (struct dump_tailq *tailq, Lisp_Object value)
{
  Lisp_Object link = Fcons (value, Qnil);
  if (NILP (tailq->head))
    {
      eassert (NILP (tailq->tail));
      tailq->head = tailq->tail = link;
    }
  else
    {
      eassert (!NILP (tailq->tail));
      XSETCDR (tailq->tail, link);
      tailq->tail = link;
    }
  tailq->length += 1;
}

static bool
dump_tailq_empty_p (struct dump_tailq *tailq)
{
  return NILP (tailq->head);
}

static Lisp_Object
dump_tailq_peek (struct dump_tailq *tailq)
{
  eassert (!dump_tailq_empty_p (tailq));
  return XCAR (tailq->head);
}

static Lisp_Object
dump_tailq_pop (struct dump_tailq *tailq)
{
  eassert (!dump_tailq_empty_p (tailq));
  eassert (tailq->length > 0);
  tailq->length -= 1;
  Lisp_Object value = XCAR (tailq->head);
  tailq->head = XCDR (tailq->head);
  if (NILP (tailq->head))
    tailq->tail = Qnil;
  return value;
}

static void
dump_seek (struct dump_context *ctx, dump_off offset)
{
  eassert (ctx->obj_offset == 0);
  if (lseek (ctx->fd, offset, SEEK_SET) < 0)
    report_file_error ("Setting file position",
                       ctx->dump_filename);
  ctx->offset = offset;
}

static void
dump_write_zero (struct dump_context *ctx, dump_off nbytes)
{
  while (nbytes > 0)
    {
      uintmax_t zero = 0;
      dump_off to_write = sizeof (zero);
      if (to_write > nbytes)
        to_write = nbytes;
      dump_write (ctx, &zero, to_write);
      nbytes -= to_write;
    }
}

static void
dump_align_output (struct dump_context *ctx, int alignment)
{
  if (ctx->offset % alignment != 0)
    dump_write_zero (ctx, alignment - (ctx->offset % alignment));
}

static dump_off
dump_object_start (struct dump_context *ctx,
                   void *out,
                   dump_off outsz)
{
  /* We dump only one object at a time, so obj_offset should be
     invalid on entry to this function.  */
  eassert (ctx->obj_offset == 0);
  int alignment = ctx->flags.pack_objects ? 1 : DUMP_ALIGNMENT;
  if (ctx->flags.dump_object_contents)
    dump_align_output (ctx, alignment);
  ctx->obj_offset = ctx->offset;
  memset (out, 0, outsz);
  return ctx->offset;
}

static dump_off
dump_object_finish (struct dump_context *ctx,
                    const void *out,
                    dump_off sz)
{
  dump_off offset = ctx->obj_offset;
  eassert (offset > 0);
  eassert (offset == ctx->offset); /* No intervening writes.  */
  ctx->obj_offset = 0;
  if (ctx->flags.dump_object_contents)
    dump_write (ctx, out, sz);
  return offset;
}

/* Return offset at which OBJECT has been dumped, or one of the dump_object_special_offset
   negative values, or DUMP_OBJECT_NOT_SEEN.  */
static dump_off
dump_recall_object (struct dump_context *ctx, Lisp_Object object)
{
  Lisp_Object dumped = ctx->objects_dumped;
  return dump_off_from_lisp (Fgethash (object, dumped,
                                       make_fixnum (DUMP_OBJECT_NOT_SEEN)));
}

static void
dump_remember_object (struct dump_context *ctx,
                      Lisp_Object object,
                      dump_off offset)
{
  Fputhash (object,
            dump_off_to_lisp (offset),
            ctx->objects_dumped);
}

static void
dump_note_reachable (struct dump_context *ctx, Lisp_Object object)
{
  eassert (ctx->have_current_referrer);
  if (!dump_tracking_referrers_p (ctx))
    return;
  Lisp_Object referrer = ctx->current_referrer;
  Lisp_Object obj_referrers = Fgethash (object, ctx->referrers, Qnil);
  if (NILP (Fmemq (referrer, obj_referrers)))
    Fputhash (object, Fcons (referrer, obj_referrers), ctx->referrers);
}

/* If this object lives in the Emacs image and not on the heap, return
   a pointer to the object data.  Otherwise, return NULL.  */
947
static void *
Daniel Colascione's avatar
Daniel Colascione committed
948 949 950 951 952 953
dump_object_emacs_ptr (Lisp_Object lv)
{
  if (SUBRP (lv))
    return XSUBR (lv);
  if (dump_builtin_symbol_p (lv))
    return XSYMBOL (lv);
954 955 956
  if (XTYPE (lv) == Lisp_Vectorlike
      && PSEUDOVECTOR_TYPEP (&XVECTOR (lv)->header, PVEC_THREAD)
      && main_thread_p (XTHREAD (lv)))
Daniel Colascione's avatar
Daniel Colascione committed
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
    return XTHREAD (lv);
  return NULL;
}

static void
dump_queue_init (struct dump_queue *dump_queue)
{
  dump_tailq_init (&dump_queue->zero_weight_objects);
  dump_tailq_init (&dump_queue->one_weight_normal_objects);
  dump_tailq_init (&dump_queue->one_weight_strong_objects);
  dump_tailq_init (&dump_queue->fancy_weight_objects);
  dump_queue->link_weights = make_eq_hash_table ();
  dump_queue->sequence_numbers = make_eq_hash_table ();
  dump_queue->next_sequence_number = 1;
}

static bool
dump_queue_empty_p (struct dump_queue *dump_queue)
{
  bool is_empty =
    EQ (Fhash_table_count (dump_queue->sequence_numbers),
        make_fixnum (0));
  eassert (EQ (Fhash_table_count (dump_queue->sequence_numbers),
               Fhash_table_count (dump_queue->link_weights)));
  if (!is_empty)
    {
983 984 985 986
      eassert (!dump_tailq_empty_p (&dump_queue->zero_weight_objects)
	       || !dump_tailq_empty_p (&dump_queue->one_weight_normal_objects)
	       || !dump_tailq_empty_p (&dump_queue->one_weight_strong_objects)
	       || !dump_tailq_empty_p (&dump_queue->fancy_weight_objects));
Daniel Colascione's avatar
Daniel Colascione committed
987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
    }
  else
    {
      /* If we're empty, we can still have a few stragglers on one of
         the above queues.  */
    }

  return is_empty;
}

static void
dump_queue_push_weight (Lisp_Object *weight_list,
                        dump_off basis,
                        struct link_weight weight)
{
  if (EQ (*weight_list, Qt))
    *weight_list = Qnil;
  dump_push (weight_list, Fcons (dump_off_to_lisp (basis),
                                 dump_off_to_lisp (weight.value)));
}

static void
dump_queue_enqueue (struct dump_queue *dump_queue,
                    Lisp_Object object,
                    dump_off basis,
                    struct link_weight weight)
{
  Lisp_Object weights = Fgethash (object, dump_queue->link_weights, Qnil);
  Lisp_Object orig_weights = weights;
  /* N.B. want to find the last item of a given weight in each queue
     due to prepend use.  */
  bool use_single_queues = true;
  if (NILP (weights))
    {
      /* Object is new.  */
      dump_trace ("new object %016x weight=%u\n",
                  (unsigned) XLI (object),
                  (unsigned) weight.value);

      if (weight.value == WEIGHT_NONE.value)
        {
          eassert (weight.value == 0);
          dump_tailq_prepend (&dump_queue->zero_weight_objects, object);
          weights = Qt;
        }
      else if (!use_single_queues)
        {
          dump_tailq_prepend (&dump_queue->fancy_weight_objects, object);
          dump_queue_push_weight (&weights, basis, weight);
        }
      else if (weight.value == WEIGHT_NORMAL.value)
        {
          dump_tailq_prepend (&dump_queue->one_weight_normal_objects, object);
          dump_queue_push_weight (&weights, basis, weight);
        }
      else if (weight.value == WEIGHT_STRONG.value)
        {
          dump_tailq_prepend (&dump_queue->one_weight_strong_objects, object);
          dump_queue_push_weight (&weights, basis, weight);
        }
      else
        {
          emacs_abort ();
        }

      Fputhash (object,
                dump_off_to_lisp(dump_queue->next_sequence_number++),
                dump_queue->sequence_numbers);
    }
  else
    {
      /* Object was already on the queue.  It's okay for an object to
         be on multiple queues so long as we maintain order
         invariants: attempting to dump an object multiple times is
         harmless, and most of the time, an object is only referenced
         once before being dumped, making this code path uncommon.  */
      if (weight.value != WEIGHT_NONE.value)
        {
          if (EQ (weights, Qt))
            {
              /* Object previously had a zero weight.  Once we
                 incorporate the link weight attached to this call,
                 the object will have a single weight.  Put the object
                 on the appropriate single-weight queue.  */
              weights = Qnil;
1072
	      struct dump_tailq *tailq;
Daniel Colascione's avatar
Daniel Colascione committed
1073
              if (!use_single_queues)
1074
		tailq = &dump_queue->fancy_weight_objects;
Daniel Colascione's avatar
Daniel Colascione committed
1075
              else if (weight.value == WEIGHT_NORMAL.value)
1076
		tailq = &dump_queue->one_weight_normal_objects;
Daniel Colascione's avatar
Daniel Colascione committed
1077
              else if (weight.value == WEIGHT_STRONG.value)
1078
		tailq = &dump_queue->one_weight_strong_objects;
Daniel Colascione's avatar
Daniel Colascione committed
1079 1080
              else
                emacs_abort ();
1081
	      dump_tailq_prepend (tailq, object);
Daniel Colascione's avatar
Daniel Colascione committed
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
            }
          else if (use_single_queues && NILP (XCDR (weights)))
            dump_tailq_prepend (&dump_queue->fancy_weight_objects, object);
          dump_queue_push_weight (&weights, basis, weight);
        }
    }

  if (!EQ (weights, orig_weights))
    Fputhash (object, weights, dump_queue->link_weights);
}

static float
dump_calc_link_score (dump_off basis,
                      dump_off link_basis,
                      dump_off link_weight)
{
  float distance = (float)(basis - link_basis);
  eassert (distance >= 0);
  float link_score = powf (distance, -0.2f);
  return powf (link_score, (float) link_weight / 1000.0f);
}

/* Compute the score score for a queued object.

   OBJECT is the object to query, which must currently be queued for
   dumping.  BASIS is the offset at which we would be
   dumping the object; score is computed relative to BASIS and the
   various BASIS values supplied to dump_add_link_weight --- the
   further an object is from its referrers, the greater the
   score.  */
static float
dump_queue_compute_score (struct dump_queue *dump_queue,
                          Lisp_Object object,
                          dump_off basis)
{
  float score = 0;
  Lisp_Object object_link_weights =
    Fgethash (object, dump_queue->link_weights, Qnil);
  if (EQ (object_link_weights, Qt))
    object_link_weights = Qnil;
  while (!NILP (object_link_weights))
    {
      Lisp_Object basis_weight_pair = dump_pop (&object_link_weights);
      dump_off link_basis = dump_off_from_lisp (XCAR (basis_weight_pair));
      dump_off link_weight = dump_off_from_lisp (XCDR (basis_weight_pair));
      score += dump_calc_link_score (basis, link_basis, link_weight);
    }
  return score;
}

/* Scan the fancy part of the dump queue.

   BASIS is the position at which to evaluate the score function,
   usually ctx->offset.

   If we have at least one entry in the queue, return the pointer (in
   the singly-linked list) to the cons containing the object via
   *OUT_HIGHEST_SCORE_CONS_PTR and return its score.

   If the queue is empty, set *OUT_HIGHEST_SCORE_CONS_PTR to NULL
   and return negative infinity.  */
static float
dump_queue_scan_fancy (struct dump_queue *dump_queue,
                       dump_off basis,
                       Lisp_Object **out_highest_score_cons_ptr)
{
  Lisp_Object *cons_ptr = &dump_queue->fancy_weight_objects.head;
  Lisp_Object *highest_score_cons_ptr = NULL;
  float highest_score = -INFINITY;
  bool first = true;

  while (!NILP (*cons_ptr))
    {
      Lisp_Object queued_object = XCAR (*cons_ptr);
1156
      float score = dump_queue_compute_score (dump_queue, queued_object, basis);
Daniel Colascione's avatar
Daniel Colascione committed
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
      if (first || score >= highest_score)
        {
          highest_score_cons_ptr = cons_ptr;
          highest_score = score;
          if (first)
            first = false;
        }
      cons_ptr = &XCONS (*cons_ptr)->u.s.u.cdr;
    }

  *out_highest_score_cons_ptr = highest_score_cons_ptr;
  return highest_score;
}

/* Return the sequence number of OBJECT.

   Return -1 if object doesn't have a sequence number.  This situation
   can occur when we've double-queued an object.  If this happens, we
   discard the errant object and try again.  */
static dump_off
dump_queue_sequence (struct dump_queue *dump_queue,
                     Lisp_Object object)
{
  Lisp_Object n = Fgethash (object, dump_queue->sequence_numbers, Qnil);
  return NILP (n) ? -1 : dump_off_from_lisp (n);
}

/* Find score and sequence at head of a one-weight object queue.

   Transparently discard stale objects from head of queue.  BASIS
   is the baseness for score computation.

   We organize these queues so that score is strictly decreasing, so
   examining the head is sufficient.  */
static void
1192 1193 1194 1195 1196
dump_queue_find_score_of_one_weight_queue (struct dump_queue *dump_queue,
					   dump_off basis,
					   struct dump_tailq *one_weight_queue,
					   float *out_score,
					   int *out_sequence)
Daniel Colascione's avatar
Daniel Colascione committed
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
{
  /* Transparently discard stale objects from the head of this queue.  */
  do
    {
      if (dump_tailq_empty_p (one_weight_queue))
        {
          *out_score = -INFINITY;
          *out_sequence = 0;
        }
      else
        {
          Lisp_Object head = dump_tailq_peek (one_weight_queue);
          *out_sequence = dump_queue_sequence (dump_queue, head);
          if (*out_sequence < 0)
            dump_tailq_pop (one_weight_queue);
          else
            *out_score =
              dump_queue_compute_score (dump_queue, head, basis);
        }
    }
  while (*out_sequence < 0);
}

/* Pop the next object to dump from the dump queue.

   BASIS is the dump offset at which to evaluate score.

   The object returned is the queued object with the greatest score;
   by side effect, the object is removed from the dump queue.
   The dump queue must not be empty.  */
static Lisp_Object
dump_queue_dequeue (struct dump_queue *dump_queue, dump_off basis)
{
  eassert (EQ (Fhash_table_count (dump_queue->sequence_numbers),
               Fhash_table_count (dump_queue->link_weights)));

1233 1234 1235 1236 1237
  eassert (XFIXNUM (Fhash_table_count (dump_queue->sequence_numbers))
	   <= (dump_tailq_length (&dump_queue->fancy_weight_objects)
	       + dump_tailq_length (&dump_queue->zero_weight_objects)
	       + dump_tailq_length (&dump_queue->one_weight_normal_objects)
	       + dump_tailq_length (&dump_queue->one_weight_strong_objects)));
Daniel Colascione's avatar
Daniel Colascione committed
1238 1239 1240

  bool dump_object_counts = true;
  if (dump_object_counts)
1241 1242 1243 1244 1245 1246 1247 1248 1249
    dump_trace
      ("dump_queue_dequeue basis=%d fancy=%u zero=%u "
       "normal=%u strong=%u hash=%u\n",
       basis,
       (unsigned) dump_tailq_length (&dump_queue->fancy_weight_objects),
       (unsigned) dump_tailq_length (&dump_queue->zero_weight_objects),
       (unsigned) dump_tailq_length (&dump_queue->one_weight_normal_objects),
       (unsigned) dump_tailq_length (&dump_queue->one_weight_strong_objects),
       (unsigned) XFIXNUM (Fhash_table_count (dump_queue->link_weights)));
Daniel Colascione's avatar
Daniel Colascione committed
1250 1251

  static const int nr_candidates = 3;
1252 1253
  struct candidate
  {
Daniel Colascione's avatar
Daniel Colascione committed
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
    float score;
    dump_off sequence;
  } candidates[nr_candidates];

  Lisp_Object *fancy_cons = NULL;
  candidates[0].sequence = 0;
  do
    {
      if (candidates[0].sequence < 0)
        *fancy_cons = XCDR (*fancy_cons);  /* Discard stale object.  */
1264 1265
      candidates[0].score = dump_queue_scan_fancy (dump_queue, basis,
						   &fancy_cons);
Daniel Colascione's avatar
Daniel Colascione committed
1266 1267 1268 1269 1270 1271 1272
      candidates[0].sequence =
        candidates[0].score > -INFINITY
        ? dump_queue_sequence (dump_queue, XCAR (*fancy_cons))
        : 0;
    }
  while (candidates[0].sequence < 0);

1273 1274 1275 1276 1277
  dump_queue_find_score_of_one_weight_queue
    (dump_queue, basis,
     &dump_queue->one_weight_normal_objects,
     &candidates[1].score,
     &candidates[1].sequence);
Daniel Colascione's avatar
Daniel Colascione committed
1278

1279 1280 1281 1282 1283
  dump_queue_find_score_of_one_weight_queue
    (dump_queue, basis,
     &dump_queue->one_weight_strong_objects,
     &candidates[2].score,
     &candidates[2].sequence);
Daniel Colascione's avatar
Daniel Colascione committed
1284 1285 1286 1287 1288

  int best = -1;
  for (int i = 0; i < nr_candidates; ++i)
    {
      eassert (candidates[i].sequence >= 0);
1289 1290 1291 1292 1293
      if (candidates[i].score > -INFINITY
	  && (best < 0
	      || candidates[i].score > candidates[best].score
	      || (candidates[i].score == candidates[best].score
		  && candidates[i].sequence < candidates[best].sequence)))
Daniel Colascione's avatar
Daniel Colascione committed
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
        best = i;
    }

  Lisp_Object result;
  const char *src;
  if (best < 0)
    {
      src = "zero";
      result = dump_tailq_pop (&dump_queue->zero_weight_objects);
    }
  else if (best == 0)
    {
      src = "fancy";
      result = dump_tailq_pop (&dump_queue->fancy_weight_objects);
    }
  else if (best == 1)
    {
      src = "normal";
      result = dump_tailq_pop (&dump_queue->one_weight_normal_objects);
    }
  else if (best == 2)
    {
      src = "strong";
      result = dump_tailq_pop (&dump_queue->one_weight_strong_objects);
    }
  else
    emacs_abort ();

  dump_trace ("  result score=%f src=%s object=%016x\n",
              best < 0 ? -1.0 : (double) candidates[best].score,
              src,
              (unsigned) XLI (result));

  {
    Lisp_Object weights = Fgethash (result, dump_queue->link_weights, Qnil);
    while (!NILP (weights) && CONSP (weights))
      {
        Lisp_Object basis_weight_pair = dump_pop (&weights);
        dump_off link_basis =
          dump_off_from_lisp (XCAR (basis_weight_pair));
        dump_off link_weight =
          dump_off_from_lisp (XCDR (basis_weight_pair));
1336 1337 1338 1339 1340 1341
	dump_trace
	  ("    link_basis=%d distance=%d weight=%d contrib=%f\n",
	   link_basis,
	   basis - link_basis,
	   link_weight,
	   (double) dump_calc_link_score (basis, link_basis, link_weight));
Daniel Colascione's avatar
Daniel Colascione committed
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358
      }
  }

  Fremhash (result, dump_queue->link_weights);
  Fremhash (result, dump_queue->sequence_numbers);
  return result;
}

/* Return whether we need to write OBJECT to the dump file.  */
static bool
dump_object_needs_dumping_p (Lisp_Object object)
{
  /* Some objects, like symbols, are self-representing because they
     have invariant bit patterns, but sometimes these objects have
     associated data too, and these data-carrying objects need to be
     included in the dump despite all references to them being
     bitwise-invariant.  */
1359 1360
  return (!dump_object_self_representing_p (object)
	  || dump_object_emacs_ptr (object));
Daniel Colascione's avatar
Daniel Colascione committed
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
}

static void
dump_enqueue_object (struct dump_context *ctx,
                     Lisp_Object object,
                     struct link_weight weight)
{
  if (dump_object_needs_dumping_p (object))
    {
      dump_off state = dump_recall_object (ctx, object);
      bool already_dumped_object = state > DUMP_OBJECT_NOT_SEEN;
      if (ctx->flags.assert_already_seen)
        eassert (already_dumped_object);
      if (!already_dumped_object)
        {
          if (state == DUMP_OBJECT_NOT_SEEN)
            {
              state = DUMP_OBJECT_ON_NORMAL_QUEUE;
              dump_remember_object (ctx, object, state);
            }
          /* Note that we call dump_queue_enqueue even if the object
             is already on the normal queue: multiple enqueue calls
             can increase the object's weight.  */
          if (state == DUMP_OBJECT_ON_NORMAL_QUEUE)
            dump_queue_enqueue (&ctx->dump_queue,
                                object,
                                ctx->offset,
                                weight);
        }
    }
  /* Always remember the path to this object.  */
  dump_note_reachable (ctx, object);
}

static void
print_paths_to_root_1 (struct dump_context *ctx,
                       Lisp_Object object,
                       int level)
{
  Lisp_Object referrers = Fgethash (object, ctx->referrers, Qnil);
  while (!NILP (referrers))
    {
      Lisp_Object referrer = XCAR (referrers);
      referrers = XCDR (referrers);
      Lisp_Object repr = Fprin1_to_string (referrer, Qnil);
      for (int i = 0; i < level; ++i)
        fputc (' ', stderr);
      fprintf (stderr, "%s\n", SDATA (repr));
      print_paths_to_root_1 (ctx, referrer, level + 1);
    }
}

static void
print_paths_to_root (struct dump_context *ctx, Lisp_Object object)
{
  print_paths_to_root_1 (ctx, object, 0);
}

static void
dump_remember_cold_op (struct dump_context *ctx,
                       enum cold_op op,
                       Lisp_Object arg)
{
  if (ctx->flags.dump_object_contents)
    dump_push (&ctx->cold_queue, Fcons (make_fixnum (op), arg));
}

/* Add a dump relocation that points into Emacs.

   Add a relocation that updates the pointer stored at DUMP_OFFSET to
   point into the Emacs binary upon dump load.  The pointer-sized
   value at DUMP_OFFSET in the dump file should contain a number
   relative to emacs_basis().  */
static void
dump_reloc_dump_to_emacs_ptr_raw (struct dump_context *ctx,
                                  dump_off dump_offset)
{
  if (ctx->flags.dump_object_contents)
    dump_push (&ctx->dump_relocs,
               list2 (make_fixnum (RELOC_DUMP_TO_EMACS_PTR_RAW),
                      dump_off_to_lisp (dump_offset)));
}

/* Add a dump relocation that points a Lisp_Object back at the dump.

   Add a relocation that updates the Lisp_Object at DUMP_OFFSET in the
   dump to point to another object in the dump.  The Lisp_Object-sized
   value at DUMP_OFFSET in the dump file should contain the offset of
   the target object relative to the start of the dump.  */
static void
dump_reloc_dump_to_dump_lv (struct dump_context *ctx,
                            dump_off dump_offset,
                            enum Lisp_Type type)
{
  if (!ctx->flags.dump_object_contents)
    return;

  int reloc_type;
  switch (type)
    {
    case Lisp_Symbol:
    case Lisp_String:
    case Lisp_Vectorlike:
    case Lisp_Cons:
    case Lisp_Float:
      reloc_type = RELOC_DUMP_TO_DUMP_LV + type;
      break;
    default:
      emacs_abort ();
    }

  dump_push (&ctx->dump_relocs,
             list2 (make_fixnum (reloc_type),
                    dump_off_to_lisp (dump_offset)));
}

/* Add a dump relocation that points a raw pointer back at the dump.

   Add a relocation that updates the raw pointer at DUMP_OFFSET in the
   dump to point to another object in the dump.  The pointer-sized
   value at DUMP_OFFSET in the dump file should contain the offset of
   the target object relative to the start of the dump.  */
static void
dump_reloc_dump_to_dump_ptr_raw (struct dump_context *ctx,
                                 dump_off dump_offset)
{
  if (ctx->flags.dump_object_contents)
    dump_push (&ctx->dump_relocs,
               list2 (make_fixnum (RELOC_DUMP_TO_DUMP_PTR_RAW),
                      dump_off_to_lisp (dump_offset)));
}

/* Add a dump relocation that points to a Lisp object in Emacs.

   Add a relocation that updates the Lisp_Object at DUMP_OFFSET in the
   dump to point to a lisp object in Emacs.  The Lisp_Object-sized
   value at DUMP_OFFSET in the dump file should contain the offset of
   the target object relative to emacs_basis().  TYPE is the type of
   Lisp value.  */
static void
dump_reloc_dump_to_emacs_lv (struct dump_context *ctx,
                             dump_off dump_offset,
                             enum Lisp_Type type)
{
  if (!ctx->flags.dump_object_contents)
    return;

  int reloc_type;
  switch (type)
    {
    case Lisp_String:
    case Lisp_Vectorlike:
    case Lisp_Cons:
    case Lisp_Float:
      reloc_type = RELOC_DUMP_TO_EMACS_LV + type;
      break;
    default:
      emacs_abort ();
    }

  dump_push (&ctx->dump_relocs,
             list2 (make_fixnum (reloc_type),
                    dump_off_to_lisp (dump_offset)));
}

/* Add an Emacs relocation that copies arbitrary bytes from the dump.

   When the dump is loaded, Emacs copies SIZE bytes from OFFSET in
   dump to LOCATION in the Emacs data section.  This copying happens
   after other relocations, so it's all right to, say, copy a
   Lisp_Object (since by the time we copy the Lisp_Object, it'll have
   been adjusted to account for the location of the running Emacs and
   dump file).  */
static void
1535 1536
dump_emacs_reloc_copy_from_dump (struct dump_context *ctx, dump_off dump_offset,
				 void *emacs_ptr, dump_off size)
Daniel Colascione's avatar
Daniel Colascione committed
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591
{
  eassert (size >= 0);
  eassert (size < (1 << EMACS_RELOC_LENGTH_BITS));

  if (!ctx->flags.dump_object_contents)
    return;

  if (size == 0)
    return;

  eassert (dump_offset >= 0);
  dump_push (&ctx->emacs_relocs,
             list4 (make_fixnum (RELOC_EMACS_COPY_FROM_DUMP),
                    dump_off_to_lisp (emacs_offset (emacs_ptr)),
                    dump_off_to_lisp (dump_offset),
                    dump_off_to_lisp (size)));
}

/* Add an Emacs relocation that sets values to arbitrary bytes.

   When the dump is loaded, Emacs copies SIZE bytes from the
   relocation itself to the adjusted location inside Emacs EMACS_PTR.
   SIZE is the number of bytes to copy.  See struct emacs_reloc for
   the maximum size that this mechanism can support.  The value comes
   from VALUE_PTR.
 */
static void
dump_emacs_reloc_immediate (struct dump_context *ctx,
                            const void *emacs_ptr,
                            const void *value_ptr,
                            dump_off size)
{
  if (!ctx->flags.dump_object_contents)
    return;

  intmax_t value = 0;
  eassert (size <= sizeof (value));
  memcpy (&value, value_ptr, size);
  dump_push (&ctx->emacs_relocs,
             list4 (make_fixnum (RELOC_EMACS_IMMEDIATE),
                    dump_off_to_lisp (emacs_offset (emacs_ptr)),
                    intmax_t_to_lisp (value),
                    dump_off_to_lisp (size)));
}

#define DEFINE_EMACS_IMMEDIATE_FN(fnname, type)                         \
  static void                                                           \
  fnname (struct dump_context *ctx,                                     \
          const type *emacs_ptr,                                        \
          type value)                                                   \
  {                                                                     \
    dump_emacs_reloc_immediate (                                        \
      ctx, emacs_ptr, &value, sizeof (value));                          \
  }

1592 1593 1594 1595 1596
DEFINE_EMACS_IMMEDIATE_FN (dump_emacs_reloc_immediate_lv, Lisp_Object)
DEFINE_EMACS_IMMEDIATE_FN (dump_emacs_reloc_immediate_ptrdiff_t, ptrdiff_t)
DEFINE_EMACS_IMMEDIATE_FN (dump_emacs_reloc_immediate_intmax_t, intmax_t)
DEFINE_EMACS_IMMEDIATE_FN (dump_emacs_reloc_immediate_int, int)
DEFINE_EMACS_IMMEDIATE_FN (dump_emacs_reloc_immediate_bool, bool)
Daniel Colascione's avatar
Daniel Colascione committed
1597 1598 1599 1600 1601

/* Add an emacs relocation that makes a raw pointer in Emacs point
   into the dump.  */
static void
dump_emacs_reloc_to_dump_ptr_raw (struct dump_context *ctx,
1602
				  const void *emacs_ptr, dump_off dump_offset)
Daniel Colascione's avatar
Daniel Colascione committed
1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
{
  if (!ctx->flags.dump_object_contents)
    return;

  dump_push (&ctx->emacs_relocs,
             list3 (make_fixnum (RELOC_EMACS_DUMP_PTR_RAW),
                    dump_off_to_lisp (emacs_offset (emacs_ptr)),
                    dump_off_to_lisp (dump_offset)));
}

/* Add an emacs relocation that points into the dump.

   When the dump is loaded, the Lisp_Object at EMACS_ROOT in Emacs to
   point to VALUE.  VALUE can be any Lisp value; this function
   automatically queues the value for dumping if necessary.  */
static void
dump_emacs_reloc_to_lv (struct dump_context *ctx,
1620
			Lisp_Object const *emacs_ptr,
Daniel Colascione's avatar
Daniel Colascione committed
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
                        Lisp_Object value)
{
  if (dump_object_self_representing_p (value))
    dump_emacs_reloc_immediate_lv (ctx, emacs_ptr, value);
  else
    {
      if (ctx->flags.dump_object_contents)
        /* Conditionally use RELOC_EMACS_EMACS_LV or
           RELOC_EMACS_DUMP_LV depending on where the target object
           lives.  We could just have decode_emacs_reloc pick the
           right type, but we might as well maintain the invariant
           that the types on ctx->emacs_relocs correspond to the types
           of emacs_relocs we actually emit.  */
1634 1635 1636 1637 1638 1639
	dump_push (&ctx->emacs_relocs,
		   list3 (make_fixnum (dump_object_emacs_ptr (value)
				       ? RELOC_EMACS_EMACS_LV
				       : RELOC_EMACS_DUMP_LV),
			  dump_off_to_lisp (emacs_offset (emacs_ptr)),
			  value));
Daniel Colascione's avatar
Daniel Colascione committed
1640 1641 1642 1643 1644 1645 1646
      dump_enqueue_object (ctx, value, WEIGHT_NONE);
    }
}

/* Add an emacs relocation that makes a raw pointer in Emacs point
   back into the Emacs image.  */
static void
1647
dump_emacs_reloc_to_emacs_ptr_raw (struct dump_context *ctx, void *emacs_ptr,
1648
				   void const *target_emacs_ptr)
Daniel Colascione's avatar
Daniel Colascione committed
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
{
  if (!ctx->flags.dump_object_contents)
    return;

  dump_push (&ctx->emacs_relocs,
             list3 (make_fixnum (RELOC_EMACS_EMACS_PTR_RAW),
                    dump_off_to_lisp (emacs_offset (emacs_ptr)),
                    dump_off_to_lisp (emacs_offset (target_emacs_ptr))));
}

/* Add an Emacs relocation that makes a raw pointer in Emacs point to
   a different part of Emacs.  */

enum dump_fixup_type
  {
    DUMP_FIXUP_LISP_OBJECT,
    DUMP_FIXUP_LISP_OBJECT_RAW,
    DUMP_FIXUP_PTR_DUMP_RAW,
    DUMP_FIXUP_BIGNUM_DATA,
  };

enum dump_lv_fixup_type
  {
    LV_FIXUP_LISP_OBJECT,
    LV_FIXUP_RAW_POINTER,
  };

/* Make something in the dump point to a lisp object.

   CTX is a dump context.  DUMP_OFFSET is the location in the dump to
   fix.  VALUE is the object to which the location in the dump
   should point.

   If FIXUP_SUBTYPE is LV_FIXUP_LISP_OBJECT, we expect a Lisp_Object
   at DUMP_OFFSET.  If it's LV_FIXUP_RAW_POINTER, we expect a pointer.
 */
static void
dump_remember_fixup_lv (struct dump_context *ctx,
                        dump_off dump_offset,
                        Lisp_Object value,
                        enum dump_lv_fixup_type fixup_subtype)
{
  if (!ctx->flags.dump_object_contents)
    return;

  dump_push (&ctx->fixups,
1695 1696 1697 1698 1699
	     list3 (make_fixnum (fixup_subtype == LV_FIXUP_LISP_OBJECT
				 ? DUMP_FIXUP_LISP_OBJECT
				 : DUMP_FIXUP_LISP_OBJECT_RAW),
		    dump_off_to_lisp (dump_offset),
		    value));
Daniel Colascione's avatar
Daniel Colascione committed
1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714
}

/* Remember to fix up the dump file such that the pointer-sized value
   at DUMP_OFFSET points to NEW_DUMP_OFFSET in the dump file and to
   its absolute address at runtime.  */
static void
dump_remember_fixup_ptr_raw (struct dump_context *ctx,
                             dump_off dump_offset,
                             dump_off new_dump_offset)
{
  if (!ctx->flags.dump_object_contents)
    return;

  /* We should not be generating relocations into the
     to-be-copied-into-Emacs dump region.  */
1715 1716 1717 1718
  eassert (ctx->header.discardable_start == 0
	   || new_dump_offset < ctx->header.discardable_start
	   || (ctx->header.cold_start != 0
	       && new_dump_offset >= ctx->header.cold_start));
Daniel Colascione's avatar
Daniel Colascione committed
1719 1720

  dump_push (&ctx->fixups,
1721 1722 1723
	     list3 (make_fixnum (DUMP_FIXUP_PTR_DUMP_RAW),
		    dump_off_to_lisp (dump_offset),
		    dump_off_to_lisp (new_dump_offset)));
Daniel Colascione's avatar
Daniel Colascione committed
1724 1725 1726
}

static void
1727 1728
dump_root_visitor (Lisp_Object const *root_ptr, enum gc_root_type type,
		   void *data)
Daniel Colascione's avatar
Daniel Colascione committed
1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
{
  struct dump_context *ctx = data;
  Lisp_Object value = *root_ptr;
  if (type == GC_ROOT_C_SYMBOL)
    {
      eassert (dump_builtin_symbol_p (value));
      /* Remember to dump the object itself later along with all the
         rest of the copied-to-Emacs objects.  */
      DUMP_SET_REFERRER (ctx, build_string ("built-in symbol list"));
      dump_enqueue_object (ctx, value, WEIGHT_NONE);
      DUMP_CLEAR_REFERRER (ctx);
    }
  else
    {
      if (type == GC_ROOT_STATICPRO)
        Fputhash (dump_off_to_lisp (emacs_offset (root_ptr)),
                  Qt,
                  ctx->staticpro_table);
      if (root_ptr != &Vinternal_interpreter_environment)
        {
          DUMP_SET_REFERRER (ctx, dump_ptr_referrer ("emacs root", root_ptr));
          dump_emacs_reloc_to_lv (ctx, root_ptr, *root_ptr);
          DUMP_CLEAR_REFERRER (ctx);
        }
    }
}

/* Kick off the dump process by queuing up the static GC roots.  */
static void
dump_roots (struct dump_context *ctx)
{
Paul Eggert's avatar
Paul Eggert committed
1760 1761
  struct gc_root_visitor visitor = { .visit = dump_root_visitor,
				     .data = ctx };
Daniel Colascione's avatar
Daniel Colascione committed
1762 1763 1764
  visit_static_gc_roots (visitor);
}

1765 1766
#define PDUMPER_MAX_OBJECT_SIZE 2048

Daniel Colascione's avatar
Daniel Colascione committed
1767 1768 1769 1770 1771 1772 1773
static dump_off
field_relpos (const void *in_start, const void *in_field)
{
  ptrdiff_t in_start_val = (ptrdiff_t) in_start;
  ptrdiff_t in_field_val = (ptrdiff_t) in_field;
  eassert (in_start_val <= in_field_val);
  ptrdiff_t relpos = in_field_val - in_start_val;
1774 1775 1776 1777 1778 1779 1780 1781 1782
  /* The following assertion attempts to detect bugs whereby IN_START
     and IN_FIELD don't point to the same object/structure, on the
     assumption that a too-large difference between them is
     suspicious.  As of Apr 2019 the largest object we dump -- 'struct
     buffer' -- is slightly smaller than 1KB, and we want to leave
     some margin for future extensions.  If the assertion below is
     ever violated, make sure the two pointers indeed point into the
     same object, and if so, enlarge the value of PDUMPER_MAX_OBJECT_SIZE.  */
  eassert (relpos < PDUMPER_MAX_OBJECT_SIZE);
Daniel Colascione's avatar
Daniel Colascione committed
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853
  return (dump_off) relpos;
}

static void
cpyptr (void *out, const void *in)
{
  memcpy (out, in, sizeof (void *));
}

/* Convenience macro for regular assignment.  */
#define DUMP_FIELD_COPY(out, in, name) \
  do                                   \
    {                                  \
      (out)->name = (in)->name;        \
    }                                  \
  while (0)

static void
dump_field_lv_or_rawptr (struct dump_context *ctx,
                         void *out,
                         const void *in_start,
                         const void *in_field,
                         /* opt */ const enum Lisp_Type *ptr_raw_type,
                         struct link_weight weight)
{
  eassert (ctx->obj_offset > 0);

  Lisp_Object value;
  dump_off relpos = field_relpos (in_start, in_field);
  void *out_field = (char *) out + relpos;
  bool is_ptr_raw = (ptr_raw_type != NULL);

  if (!is_ptr_raw)
    {
      memcpy (&value, in_field, sizeof (value));
      if (dump_object_self_representing_p (value))
        {
          memcpy (out_field, &value, sizeof (value));
          return;
        }
    }
  else
    {
      void *ptrval;
      cpyptr (&ptrval, in_field);
      if (ptrval == NULL)
        return; /* Nothing to do.  */
      switch (*ptr_raw_type)
        {
        case Lisp_Symbol:
          value = make_lisp_symbol (ptrval);
          break;
        case Lisp_String:
        case Lisp_Vectorlike:
        case Lisp_Cons:
        case Lisp_Float:
          value = make_lisp_ptr (ptrval, *ptr_raw_type);
          break;
        default:
          emacs_abort ();
        }
    }

  /* Now value is the Lisp_Object to which we want to point whether or
     not the field is a raw pointer (in which case we just synthesized
     the Lisp_Object outselves) or a Lisp_Object (in which case we
     just copied the thing).  Add a fixup or relocation.  */

  intptr_t out_value;
  dump_off out_field_offset = ctx->obj_offset + relpos;
  dump_off target_offset = dump_recall_object (ctx, value);
1854 1855
  if (DANGEROUS
      && target_offset > 0 && dump_object_emacs_ptr (value) == NULL)
Daniel Colascione's avatar
Daniel Colascione committed
1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933
    {
      /* We've already dumped the referenced object, so we can emit
         the value and a relocation directly instead of indirecting
         through a fixup.  */
      out_value = target_offset;
      if (is_ptr_raw)
        dump_reloc_dump_to_dump_ptr_raw (ctx, out_field_offset);
      else
        dump_reloc_dump_to_dump_lv (ctx, out_field_offset, XTYPE (value));
    }
  else
    {
      /* We don't know about the target object yet, so add a fixup.
         When we process the fixup, we'll have dumped the target
         object.  */
      out_value = (intptr_t) 0xDEADF00D;
      dump_remember_fixup_lv (ctx,
                              out_field_offset,
                              value,
                              ( is_ptr_raw
                                ? LV_FIXUP_RAW_POINTER
                                : LV_FIXUP_LISP_OBJECT ));
      dump_enqueue_object (ctx, value, weight);
    }

  memcpy (out_field, &out_value, sizeof (out_value));
}

/* Set a pointer field on an output object during dump.

   CTX is the dump context.  OFFSET is the offset at which the current
   object starts.  OUT is a pointer to the dump output object.
   IN_START is the start of the current Emacs object.  IN_FIELD is a
   pointer to the field in that object.  TYPE is the type of pointer
   to which IN_FIELD points.
 */
static void
dump_field_lv_rawptr (struct dump_context *ctx,
                      void *out,
                      const void *in_start,
                      const void *in_field,
                      enum Lisp_Type type,
                      struct link_weight weight)
{
  dump_field_lv_or_rawptr (ctx, out, in_start, in_field, &type, weight);
}

/* Set a Lisp_Object field on an output object during dump.

   CTX is a dump context.  OFFSET is the offset at which the current
   object starts.  OUT is a pointer to the dump output object.
   IN_START is the start of the current Emacs object.  IN_FIELD is a
   pointer to a Lisp_Object field in that object.

   Arrange for the dump to contain fixups and relocations such that,
   at load time, the given field of the output object contains a valid
   Lisp_Object pointing to the same notional object that *IN_FIELD
   contains now.

   See idomatic usage below.  */
static void
dump_field_lv (struct dump_context *ctx,
               void *out,
               const void *in_start,
               const Lisp_Object *in_field,
               struct link_weight weight)
{
  dump_field_lv_or_rawptr (ctx, out, in_start, in_field, NULL, weight);
}

/* Note that we're going to add a manual fixup for the given field
   later.  */
static void
dump_field_fixup_later (struct dump_context *ctx,
                        void *out,
                        const void *in_start,
                        const void *in_field)
{
Paul Eggert's avatar
Paul Eggert committed
1934
  /* TODO: more error checking.  */
Daniel Colascione's avatar
Daniel Colascione committed
1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
  (void) field_relpos (in_start, in_field);
}

/* Mark an output object field, which is as wide as a poiner, as being
   fixed up to point to a specific offset in the dump.  */
static void
dump_field_ptr_to_dump_offset (struct dump_context *ctx,
                               void *out,
                               const void *in_start,
                               const void *in_field,
                               dump_off target_dump_offset)
{
  eassert (ctx->obj_offset > 0);
  if (!ctx->flags.dump_object_contents)
    return;

  dump_off relpos = field_relpos (in_start, in_field);
  dump_reloc_dump_to_dump_ptr_raw (ctx, ctx->obj_offset + relpos);
  intptr_t outval = target_dump_offset;
1954
  memcpy ((char *) out + relpos, &outval, sizeof (outval));
Daniel Colascione's avatar
Daniel Colascione committed
1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
}

/* Mark a field as pointing to a place inside Emacs.

   CTX is the dump context.  OUT points to the out-object for the
   current dump function.  IN_START points to the start of the object
   being dumped.  IN_FIELD points to the field inside the object being
   dumped that we're dumping.  The contents of this field (which
   should be as wide as a pointer) are the Emacs pointer to dump.

 */
static void
dump_field_emacs_ptr (struct dump_context *ctx,
                      void *out,
                      const void *in_start,
                      const void *in_field)
{
  eassert (ctx->obj_offset > 0);
  if (!ctx->flags.dump_object_contents)
    return;

  dump_off relpos = field_relpos (in_start, in_field);
  void *abs_emacs_ptr;
  cpyptr (&abs_emacs_ptr, in_field);
  intptr_t rel_emacs_ptr = 0;
  if (abs_emacs_ptr)
    {
      rel_emacs_ptr = emacs_offset ((void *)abs_emacs_ptr);
      dump_reloc_dump_to_emacs_ptr_raw (ctx, ctx->obj_offset + relpos);
    }
1985
  cpyptr ((char *) out + relpos, &rel_emacs_ptr);
Daniel Colascione's avatar
Daniel Colascione committed
1986 1987 1988
}

static void
1989 1990 1991
_dump_object_start_pseudovector (struct dump_context *ctx,
				 union vectorlike_header *out_hdr,
				 const union vectorlike_header *in_hdr)
Daniel Colascione's avatar
Daniel Colascione committed
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
{
  eassert (in_hdr->size & PSEUDOVECTOR_FLAG);
  ptrdiff_t vec_size = vectorlike_nbytes (in_hdr);
  dump_object_start (ctx, out_hdr, (dump_off) vec_size);
  *out_hdr = *in_hdr;
}

/* Need a macro for alloca.  */
#define START_DUMP_PVEC(ctx, hdr, type, out)                  \
  const union vectorlike_header *_in_hdr = (hdr);             \
  type *out = alloca (vectorlike_nbytes (_in_hdr));           \
  _dump_object_start_pseudovector (ctx, &out->header, _in_hdr)

static dump_off
finish_dump_pvec (struct dump_context *ctx,
                  union vectorlike_header *out_hdr)
{
  ALLOW_IMPLICIT_CONVERSION;
2010 2011
  dump_off result = dump_object_finish (ctx, out_hdr,
					vectorlike_nbytes (out_hdr));
Daniel Colascione's avatar
Daniel Colascione committed
2012
  DISALLOW_IMPLICIT_CONVERSION;
2013
  return result;
Daniel Colascione's avatar
Daniel Colascione committed
2014 2015 2016
}

static void
2017 2018 2019
dump_pseudovector_lisp_fields (struct dump_context *ctx,
			       union vectorlike_header *out_hdr,
			       const union vectorlike_header *in_hdr)
Daniel Colascione's avatar
Daniel Colascione committed
2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032
{
  const struct Lisp_Vector *in = (const struct Lisp_Vector *) in_hdr;
  struct Lisp_Vector *out = (struct Lisp_Vector *) out_hdr;
  ptrdiff_t size = in->header.size;
  eassert (size & PSEUDOVECTOR_FLAG);
  size &= PSEUDOVECTOR_SIZE_MASK;
  for (ptrdiff_t i = 0; i < size; ++i)
    dump_field_lv (ctx, out, in, &in->contents[i], WEIGHT_STRONG);
}

static dump_off
dump_cons (struct dump_context *ctx, const struct Lisp_Cons *cons)
{
Paul Eggert's avatar
Paul Eggert committed
2033
#if CHECK_STRUCTS && !defined (HASH_Lisp_Cons_00EEE63F67)
2034
# error "Lisp_Cons changed. See CHECK_STRUCTS comment in config.h."