hash: Increase the number of possible overflow bitmaps by 8x.
Per a report from AP, it's not that hard to exhaust the supply of bitmap pages if you create a table with a hash index and then insert a few billion rows - and then you start getting errors when you try to insert additional rows. In the particular case reported by AP, there's another fix that we can make to improve recycling of overflow pages, which is another way to avoid the error, but there may be other cases where this problem happens and that fix won't help. So let's buy ourselves as much headroom as we can without rearchitecting anything. The comments claim that the old limit was 64GB, but it was really only 32GB, because we didn't use all the bits in the page for bitmap bits - only the largest power of 2 that could fit after deducting space for the page header and so forth. Thus, we have 4kB per page for bitmap bits, not 8kB. The new limit is thus actually 8 times the old *real* limit but only 4 times the old *purported* limit. Since this breaks on-disk compatibility, bump HASH_VERSION. We've already done this earlier in this release cycle, so this doesn't cause any incremental inconvenience for people using pg_upgrade from releases prior to v10. However, users who use pg_upgrade to reach 10beta3 or later from 10beta2 or earlier will need to REINDEX any hash indexes again. Amit Kapila and Robert Haas Discussion: http://postgr.es/m/20170704105728.mwb72jebfmok2nm2@zip.com.au
This commit is contained in:
parent
c30f1770a9
commit
620b49a16d
@ -43,9 +43,9 @@ ERROR: invalid overflow block number 5
|
||||
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
|
||||
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
|
||||
hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
|
||||
-[ RECORD 1 ]----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
-[ RECORD 1 ]--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
magic | 105121344
|
||||
version | 3
|
||||
version | 4
|
||||
ntuples | 1
|
||||
bsize | 8152
|
||||
bmsize | 4096
|
||||
@ -58,7 +58,7 @@ firstfree | 0
|
||||
nmaps | 1
|
||||
procid | 450
|
||||
spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
|
||||
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
|
||||
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
|
||||
|
@ -134,7 +134,7 @@ create index test_hashidx on test using hash (b);
|
||||
select * from pgstathashindex('test_hashidx');
|
||||
version | bucket_pages | overflow_pages | bitmap_pages | unused_pages | live_items | dead_items | free_percent
|
||||
---------+--------------+----------------+--------------+--------------+------------+------------+--------------
|
||||
3 | 4 | 0 | 1 | 0 | 0 | 0 | 100
|
||||
4 | 4 | 0 | 1 | 0 | 0 | 0 | 100
|
||||
(1 row)
|
||||
|
||||
-- these should error with the wrong type
|
||||
@ -235,7 +235,7 @@ select pgstatindex('test_partition_idx');
|
||||
select pgstathashindex('test_partition_hash_idx');
|
||||
pgstathashindex
|
||||
---------------------
|
||||
(3,8,0,1,0,0,0,100)
|
||||
(4,8,0,1,0,0,0,100)
|
||||
(1 row)
|
||||
|
||||
drop table test_partitioned;
|
||||
|
@ -687,8 +687,13 @@ test=# SELECT * FROM hash_bitmap_info('con_hash_index', 2052);
|
||||
<function>hash_metapage_info</function> returns information stored
|
||||
in meta page of a <acronym>HASH</acronym> index. For example:
|
||||
<screen>
|
||||
test=# SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0));
|
||||
-[ RECORD 1 ]-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
test=# SELECT magic, version, ntuples, ffactor, bsize, bmsize, bmshift,
|
||||
test-# maxbucket, highmask, lowmask, ovflpoint, firstfree, nmaps, procid,
|
||||
test-# regexp_replace(spares::text, '(,0)*}', '}') as spares,
|
||||
test-# regexp_replace(mapp::text, '(,0)*}', '}') as mapp
|
||||
test-# FROM hash_metapage_info(get_raw_page('con_hash_index', 0));
|
||||
-[ RECORD 1 ]-------------------------------------------------------------------------------
|
||||
spares | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204}
|
||||
magic | 105121344
|
||||
version | 3
|
||||
ntuples | 500500
|
||||
@ -703,8 +708,8 @@ ovflpoint | 28
|
||||
firstfree | 1204
|
||||
nmaps | 1
|
||||
procid | 450
|
||||
spares | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
mapp | {65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
spares | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204}
|
||||
mapp | {65}
|
||||
</screen>
|
||||
</para>
|
||||
</listitem>
|
||||
|
@ -368,7 +368,7 @@ pending_tuples | 0
|
||||
<programlisting>
|
||||
test=> select * from pgstathashindex('con_hash_index');
|
||||
-[ RECORD 1 ]--+-----------------
|
||||
version | 2
|
||||
version | 4
|
||||
bucket_pages | 33081
|
||||
overflow_pages | 0
|
||||
bitmap_pages | 1
|
||||
|
@ -158,8 +158,7 @@ typedef HashScanOpaqueData *HashScanOpaque;
|
||||
#define HASH_METAPAGE 0 /* metapage is always block 0 */
|
||||
|
||||
#define HASH_MAGIC 0x6440640
|
||||
#define HASH_VERSION 3 /* 3 signifies multi-phased bucket allocation
|
||||
* to reduce doubling */
|
||||
#define HASH_VERSION 4
|
||||
|
||||
/*
|
||||
* spares[] holds the number of overflow pages currently allocated at or
|
||||
@ -182,10 +181,10 @@ typedef HashScanOpaqueData *HashScanOpaque;
|
||||
* after HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE).
|
||||
*
|
||||
* There is no particular upper limit on the size of mapp[], other than
|
||||
* needing to fit into the metapage. (With 8K block size, 128 bitmaps
|
||||
* limit us to 64 GB of overflow space...)
|
||||
* needing to fit into the metapage. (With 8K block size, 1024 bitmaps
|
||||
* limit us to 256 GB of overflow space...)
|
||||
*/
|
||||
#define HASH_MAX_BITMAPS 128
|
||||
#define HASH_MAX_BITMAPS 1024
|
||||
|
||||
#define HASH_SPLITPOINT_PHASE_BITS 2
|
||||
#define HASH_SPLITPOINT_PHASES_PER_GRP (1 << HASH_SPLITPOINT_PHASE_BITS)
|
||||
|
Loading…
x
Reference in New Issue
Block a user