diff --git a/Docs/source-object-backing-store b/Docs/source-object-backing-store new file mode 100644 index 000000000..e55a99db3 --- /dev/null +++ b/Docs/source-object-backing-store @@ -0,0 +1,194 @@ +Source Object (low level) cache backing store +============================================= + +Introduction +------------ + +The source object cache provides a system to extend the life of source +objects (html files, images etc.) after they are no longer immediately +being used. + +Only fetch types where we have well defined rules on caching are +considered, in practice this limits us to HTTP(S). The section in +RFC2616 [1] on caching specifies these rules. + +To futher extend the objects lifetime they can be pushed into a +backing store where the objects are available for reuse less quickly +than from RAM but faster than retriving from the network again. + +The backing store implementation provides a key:value infrastructure +with a simple store, retrive and invalidate interface. + +Generic filesystem backing store +-------------------------------- + +Although the backing store interface is fully pluggable a generic +implementation based on storing objects on the filesystem in a +heirachy of directories. + +The option to alter the backing store format exists and is controled +by a version field. It is implementation defined what happens if a +version mis-match occours. + +As the backing store only holds cache data one should not expect a +great deal of effort to be expended converting formats (i.e. the cache +may simply be discarded). + +Layout version 1 +---------------- + +An object has an identifier value generated from the url (NetSurf +backing stores uses the url as the unique key). The value used is +obtained using nsurl_hash() which is currently a 32 bit FNV so is +directly usable. + +This identifier is adequate to ensure the collision rate for the +hashed url values (a collision for every 2^16 urls added) is +sufficiently low the overhead of returning the wrong object (which +backing stores are permitted to do) is not significat. + +An entry list is maintained which contains all the metadata about a +given identifier. This list is limited in length to constrain the +resources necessary to maintain it. It is made persistant to avoid the +overhead of reconstructing it at initialisation and to keep the data +used to improve the eviction decisions. + +Each object is stored and retrived directly into the filesystem using +a filename generated from a base64url encoding of an address +value. The objects address is derived from the identifier by cropping +it to a shorter length. + +A mapping between the object address and its entry is maintained which +uses storage directly proportional to the size of the address length. + +The cropping length is stored in the control file with the default +values set at compile time. This allows existing backing stores to +continue operating with existing data independantly of new default +setting. This setting gives some ability to tune the default cache +index size to values suitable for a specific host operating system. + +E.g. Linux based systems can easily cope with several megabytes of +mmaped index but RISC OS might want to limit this to a few megabytes +of heap at most. + +The files are stored on disc using their base64url address value. +By creating a directory for each character of the encoded filename +(except the last which is of course the leafname) we create a +directory structure where no directory has more than 64 entries. + +E.g. A 19bit address of 0x1 would be base64url encoded into AAAB +resulting in the data being stored in a file path of +"/store/prefix/data/B/A/A/BAAAAA". + +An address of 0x00040001 encodes to BAAB and a file path of +"/store/prefix/meta/B/A/A/BAABAA" + +Control files +~~~~~~~~~~~~~ + +control ++++++++ +A control file is used to hold a list of values describing how the +other files in the backing store should be used. + +entries ++++++++ + +this file contains a table of entries describing the files held on the +filesystem. + +Each control file table entry is 28 bytes and consists of + + - signed 64 but value for last use time + + - 32bit full url hash allowing for index reconstruction and + addiitonal collision detection. Also the possibility of increasing + the ADDRESS_LENGTH although this would require renaming all the + existing files in the cache and is not currently implemented. + + - unsigned 32bit length for data + + - unsigned 32bit length for metadata + + - unsigned 16bit value for number of times used. + + - unsigned 16bit value for flags + + - unsigned 16bit value for data block index (unused) + + - unsigned 16bit value for metatdata block index (unused) + +Address to entry index +~~~~~~~~~~~~~~~~~~~~~~ + +An entry index is held in RAM that allows looking up the address to +map to an entry in the control file. + +The index is the only data structure whose size is directly depndant +on the length of the hash specificaly: + +(2 ^ (ADDRESS_BITS - 3)) * ENTRY_BITS) in bytes + +where ADDRESS_BITS is how long the address is in bits and ENTRY_BITS +is how many entries the control file (and hence the while +cache) may hold. + +RISCOS values ++++++++++++++ + +By limiting the ENTRY_BITS size to 14 (16,384 entries) the entries +list is limited to 448kilobytes. + +The typical values for RISC OS would set ADDRESS_BITS to 18. This +spreads the entries over 262144 hash values which uses 512 kilobytes +for the index. Limiting the hash space like this reduces the +efectiveness of the cache. + +A small ADDRESS_LENGTH causes a collision (two urls with the same +address) to happen roughly for every 2 ^ (ADDRESS_BITS / 2) = 2 ^ 9 = +512 objects stored. This roughly translates to a cache miss due to +collision every ten pages navigated to. + +Larger systems +++++++++++++++ + +In general ENTRY_BITS set to 16 as this limits the store to 65536 +objects which given the average size of an object at 8 kilobytes +yeilds half a gigabyte of disc used which is judged to be sufficient. + +For larger systems e.g. those using GTK frontend we would most likely +select ADDRESS_BITS as 22 resulting in a collision every 2048 objects +but the index using some 8 Megabytes + +Typical values +-------------- + +Example 1 +~~~~~~~~~ + +For a store with 1034 objects genrated from a random navigation of +pages linked from the about:welcome page. + +Metadata total size is 593608 bytes an average of 574 bytes. The +majority of the storage is used to hold the urls and headers. + +Data total size is 9180475 bytes a mean of 8879 bytes 1648726 in the +largest 10 entries which if excluded gives 7355 bytes average size + +Example 2 +~~~~~~~~~ + +355 pages navigated in 80 minutes from about:welcome page and a +handful of additional sites (google image search and reddit) + +2018 objects in cache at quit. 400 objects from news.bbc.co.uk alone + +Metadata total 987,439 bytes mean of 489 bytes + +data total 33,127,831 bytes mean of 16,416 bytes + +with one single 5,000,811 byte gif + +data totals without gif is 28,127,020 mean 13,945 + +[1] http://tools.ietf.org/html/rfc2616#section-13 \ No newline at end of file diff --git a/Makefile.defaults b/Makefile.defaults index e11fa1fb6..97793318d 100644 --- a/Makefile.defaults +++ b/Makefile.defaults @@ -86,6 +86,11 @@ NETSURF_HOMEPAGE := "about:welcome" # Valid options: YES, NO NETSURF_USE_LIBICONV_PLUG := YES +# Enable building the source object cache filesystem based backing store. +# implementation. +# Valid options: YES, NO +NETSURF_FS_BACKING_STORE := NO + # Initial CFLAGS. Optimisation level etc. tend to be target specific. CFLAGS := diff --git a/amiga/gui.c b/amiga/gui.c index ddca092b2..e43bca597 100644 --- a/amiga/gui.c +++ b/amiga/gui.c @@ -5294,7 +5294,7 @@ int main(int argc, char** argv) if (ami_locate_resource(messages, "Messages") == false) die("Cannot open Messages file"); - ret = netsurf_init(messages); + ret = netsurf_init(messages, NULL); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); } diff --git a/atari/gui.c b/atari/gui.c index e5f5770ba..98871f084 100644 --- a/atari/gui.c +++ b/atari/gui.c @@ -1126,7 +1126,7 @@ int main(int argc, char** argv) /* common initialisation */ LOG(("Initialising core...")); - ret = netsurf_init(messages); + ret = netsurf_init(messages, NULL); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); } diff --git a/beos/gui.cpp b/beos/gui.cpp index 4077a8580..56d5bfe99 100644 --- a/beos/gui.cpp +++ b/beos/gui.cpp @@ -1062,7 +1062,7 @@ int main(int argc, char** argv) /* common initialisation */ BPath messages = get_messages_path(); - ret = netsurf_init(messages.Path()); + ret = netsurf_init(messages.Path(), NULL); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); } @@ -1115,7 +1115,7 @@ int gui_init_replicant(int argc, char** argv) /* common initialisation */ BPath messages = get_messages_path(); - ret = netsurf_init(messages.Path()); + ret = netsurf_init(messages.Path(), NULL); if (ret != NSERROR_OK) { // FIXME: must not die when in replicant! die("NetSurf failed to initialise"); diff --git a/cocoa/NetsurfApp.m b/cocoa/NetsurfApp.m index 3a9572309..f9cc98885 100644 --- a/cocoa/NetsurfApp.m +++ b/cocoa/NetsurfApp.m @@ -217,7 +217,7 @@ int main( int argc, char **argv ) nsoption_commandline(&argc, argv, NULL); /* common initialisation */ - error = netsurf_init(messages); + error = netsurf_init(messages, NULL); if (error != NSERROR_OK) { die("NetSurf failed to initialise"); } diff --git a/content/Makefile b/content/Makefile index 557e6c787..ab257eaea 100644 --- a/content/Makefile +++ b/content/Makefile @@ -1,6 +1,11 @@ # Content sources S_CONTENT := content.c content_factory.c dirlist.c fetch.c hlcache.c \ - llcache.c mimesniff.c urldb.c + llcache.c mimesniff.c urldb.c no_backing_store.c -S_CONTENT := $(addprefix content/,$(S_CONTENT)) \ No newline at end of file +# Make filesystem backing store available +ifeq ($(NETSURF_FS_BACKING_STORE),YES) + S_CONTENT += fs_backing_store.c +endif + +S_CONTENT := $(addprefix content/,$(S_CONTENT)) diff --git a/content/backing_store.h b/content/backing_store.h new file mode 100644 index 000000000..849e11aeb --- /dev/null +++ b/content/backing_store.h @@ -0,0 +1,100 @@ +/* + * Copyright 2014 Vincent Sanders + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** \file + * Low-level source data cache backing store interface + */ + +#ifndef NETSURF_CONTENT_LLCACHE_PRIVATE_H_ +#define NETSURF_CONTENT_LLCACHE_PRIVATE_H_ + +#include "content/llcache.h" + +/** storage control flags */ +enum backing_store_flags { + BACKING_STORE_NONE = 0, /**< no special processing */ + BACKING_STORE_META = 1, /**< data is metadata */ + BACKING_STORE_MMAP = 2, /**< when data is retrived this indicates the + * returned buffer may be memory mapped, + * flag must be cleared if the storage is + * allocated and is not memory mapped. + */ +}; + +/** low level cache backing store operation table + * + * The low level cache (source objects) has the capability to make + * objects and their metadata (headers etc) persistant by writing to a + * backing store using these operations. + */ +struct gui_llcache_table { + /** + * Initialise the backing store. + * + * @param parameters to configure backing store. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*initialise)(const struct llcache_store_parameters *parameters); + + /** + * Finalise the backing store. + * + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*finalise)(void); + + /** + * Place an object in the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the obejct is stored. + * @param data The objects data. + * @param datalen The length of the \a data. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*store)(struct nsurl *url, enum backing_store_flags flags, + const uint8_t *data, const size_t datalen); + + /** + * Retrive an object from the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is retrived. + * @param data The objects data. + * @param datalen The length of the \a data retrieved. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*fetch)(struct nsurl *url, enum backing_store_flags *flags, + uint8_t **data, size_t *datalen); + + /** + * Invalidate a source object from the backing store. + * + * The entry (if present in the backing store) must no longer + * be returned as a result to the fetch or meta operations. + * + * @param url The url is used as the unique primary key to invalidate. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*invalidate)(struct nsurl *url); +}; + +extern struct gui_llcache_table* null_llcache_table; +extern struct gui_llcache_table* filesystem_llcache_table; + +#endif diff --git a/content/fs_backing_store.c b/content/fs_backing_store.c new file mode 100644 index 000000000..29856813f --- /dev/null +++ b/content/fs_backing_store.c @@ -0,0 +1,1197 @@ +/* + * Copyright 2014 Vincent Sanders + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** \file + * Low-level resource cache persistent storage implementation. + * + * file based backing store. + * + * \todo Consider improving eviction sorting to include objects size + * and remaining lifetime and other cost metrics. + * + * \todo make backing store have a more efficient small object storage. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/filepath.h" +#include "utils/file.h" +#include "utils/nsurl.h" +#include "utils/log.h" +#include "utils/utils.h" +#include "utils/messages.h" +#include "desktop/gui.h" + +#include "content/backing_store.h" + +/** Default number of bits of the ident to use in index hash */ +#define DEFAULT_IDENT_SIZE 20 + +/** Default number of bits to use for an entry index. */ +#define DEFAULT_ENTRY_SIZE 16 + +/** Backing store file format version */ +#define CONTROL_VERSION 100 + +/** Get address from ident */ +#define BS_ADDRESS(ident, state) ((ident) & ((1 << state->ident_bits) - 1)) + +/** Lookup store entry index from ident */ +#define BS_ENTRY_INDEX(ident, state) state->addrmap[(ident) & ((1 << state->ident_bits) - 1)] + +/** Get store entry from ident. */ +#define BS_ENTRY(ident, state) state->entries[state->addrmap[(ident) & ((1 << state->ident_bits) - 1)]] + +enum store_entry_flags { + STORE_ENTRY_FLAG_NONE = 0, +}; + +/** + * The type used to store index values refering to store entries. Care + * must be taken with this type as it is used to build address to + * entry mapping so changing the size will have large impacts on + * memory usage. + */ +typedef uint16_t entry_index_t; + +/** + * The type used as a binary identifier for each entry derived from + * the url. A larger identifier will have fewer collisions but + * requires proportionately more storage. + */ +typedef uint32_t entry_ident_t; + +/** + * Backing store object index entry. + * + * @note Order is important to avoid structure packing overhead. + */ +struct store_entry { + int64_t last_used; /**< unix time the entry was last used */ + entry_ident_t ident; /**< entry identifier */ + uint32_t data_alloc; /**< currently allocated size of data on disc */ + uint32_t meta_alloc; /**< currently allocated size of metadata on disc */ + uint16_t use_count; /**< number of times this entry has been accessed */ + uint16_t flags; /**< entry flags (unused) */ + uint16_t data_block; /**< small object data block entry (unused) */ + uint16_t meta_block; /**< small object meta block entry (unused) */ +}; + +/** + * Parameters controlling the backing store. + */ +struct store_state { + char *path; /**< The path to the backing store */ + size_t limit; /**< The backing store upper bound target size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + unsigned int ident_bits; /**< log2 number of bits to use for address. */ + + struct store_entry *entries; /**< store entries. */ + unsigned int entry_bits; /**< log2 number of bits in entry index. */ + unsigned int last_entry; /**< index of last usable entry. */ + + /** flag indicating if the entries have been made persistant + * since they were last changed. + */ + bool entries_dirty; + + /** URL identifier to entry index mapping. + * + * This is an open coded index on the entries url field and + * provides a computationaly inexpensive way to go from the + * url to an entry. + */ + entry_index_t *addrmap; + + uint64_t total_alloc; /**< total size of all allocated storage. */ + + size_t hit_count; /**< number of cache hits */ + uint64_t hit_size; /**< size of storage served */ + size_t miss_count; /**< number of cache misses */ + +}; + +/** + * Global storage state. + * + * @todo Investigate if there is a way to have a context rather than + * use a global. + */ +struct store_state *storestate; + + + +/** + * Remove a backing store entry from the entry table. + * + * This finds the store entry associated with the given key and + * removes it from the table. The removed entry is returned but is + * only valid until the next set_store_entry call. + * + * @param state The store state to use. + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and bse updated on succes or NSERROR_NOT_FOUND + * if no entry coresponds to the url. + */ +static nserror +remove_store_entry(struct store_state *state, + entry_ident_t ident, + struct store_entry **bse) +{ + entry_index_t sei; /* store entry index */ + + sei = BS_ENTRY_INDEX(ident, state); + if (sei == 0) { + LOG(("ident 0x%08x not in index", ident)); + return NSERROR_NOT_FOUND; + } + + if (state->entries[sei].ident != ident) { + /* entry ident did not match */ + LOG(("ident 0x%08x did not match entry index %d", ident, sei)); + return NSERROR_NOT_FOUND; + } + + /* sei is entry to be removed, we swap it to the end of the + * table so there are no gaps and the returned entry is held + * in storage with reasonable lifetime. + */ + + /* remove entry from map */ + BS_ENTRY_INDEX(ident, state) = 0; + + /* global allocation accounting */ + state->total_alloc -= state->entries[sei].data_alloc; + state->total_alloc -= state->entries[sei].meta_alloc; + + state->last_entry--; + + if (sei == state->last_entry) { + /* the removed entry was the last one, how conveniant */ + *bse = &state->entries[sei]; + } else { + /* need to swap entries */ + struct store_entry tent; + + tent = state->entries[sei]; + state->entries[sei] = state->entries[state->last_entry]; + state->entries[state->last_entry] = tent; + + /* update map for moved entry */ + BS_ENTRY_INDEX(state->entries[sei].ident, state) = sei; + + *bse = &state->entries[state->last_entry]; + } + + return NSERROR_OK; +} + + +/** + * Generate a filename for an object. + * + * @param state The store state to use. + * @param ident The identifier to use. + * @return The filename string or NULL on allocation error. + */ +static char * +store_fname(struct store_state *state, + entry_ident_t ident, + enum backing_store_flags flags) +{ + char *fname = NULL; + uint8_t b64u_i[7]; /* base64 ident */ + uint8_t b64u_d[6][2]; /* base64 ident as separate components */ + const char *dat; + + /** Base64url encoding table */ + static const uint8_t encoding_table[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_' + }; + + /* base64 encode ident */ + b64u_i[0] = b64u_d[0][0] = encoding_table[(ident ) & 0x3f]; + b64u_i[1] = b64u_d[1][0] = encoding_table[(ident >> 6) & 0x3f]; + b64u_i[2] = b64u_d[2][0] = encoding_table[(ident >> 12) & 0x3f]; + b64u_i[3] = b64u_d[3][0] = encoding_table[(ident >> 18) & 0x3f]; + b64u_i[4] = b64u_d[4][0] = encoding_table[(ident >> 24) & 0x3f]; + b64u_i[5] = b64u_d[5][0] = encoding_table[(ident >> 30) & 0x3f]; + /* null terminate strings */ + b64u_i[6] = b64u_d[0][1] = b64u_d[1][1] = b64u_d[2][1] = + b64u_d[3][1] = b64u_d[4][1] = b64u_d[5][1] = 0; + + if ((flags & BACKING_STORE_META) != 0) { + dat = "meta"; + } else { + dat = "data"; + } + + /* number of chars with usefully encoded data in b64 */ + switch(((state->ident_bits + 5) / 6)) { + case 1: + netsurf_mkpath(&fname, NULL, 3, + state->path, + dat, + b64u_i); + break; + + case 2: + netsurf_mkpath(&fname, NULL, 4, + state->path, + dat, + b64u_d[0], + b64u_i); + break; + + case 3: + netsurf_mkpath(&fname, NULL, 5, + state->path, + dat, + b64u_d[0], + b64u_d[1], + b64u_i); + break; + + case 4: + netsurf_mkpath(&fname, NULL, 6, + state->path, + dat, + b64u_d[0], + b64u_d[1], + b64u_d[2], + b64u_i); + break; + + case 5: + netsurf_mkpath(&fname, NULL, 7, + state->path, + dat, + b64u_d[0], + b64u_d[1], + b64u_d[2], + b64u_d[3], + b64u_i); + break; + + case 6: + netsurf_mkpath(&fname, NULL, 8, + state->path, + dat, + b64u_d[0], + b64u_d[1], + b64u_d[2], + b64u_d[3], + b64u_d[4], + b64u_i); + break; + + default: + assert(false); + } + + return fname; +} + + +/** + * Remove the entry and files associated with an identifier. + * + * @param state The store state to use. + * @param ident The identifier to use. + * @return NSERROR_OK on sucess or error code on failure. + */ +static nserror +unlink_ident(struct store_state *state, entry_ident_t ident) +{ + char *fname; + nserror ret; + struct store_entry *bse; + + /* LOG(("ident %08x", ident)); */ + + /* use the url hash as the entry identifier */ + ret = remove_store_entry(state, ident, &bse); + if (ret != NSERROR_OK) { + /* LOG(("entry not found")); */ + return ret; + } + + fname = store_fname(state, bse->ident, BACKING_STORE_META); + if (fname == NULL) { + return NSERROR_NOMEM; + } + unlink(fname); + free(fname); + + fname = store_fname(state, bse->ident, BACKING_STORE_NONE); + if (fname == NULL) { + return NSERROR_NOMEM; + } + unlink(fname); + free(fname); + + return NSERROR_OK; +} + + +/** + * Quick sort comparison. + */ +static int compar(const void *va, const void *vb) +{ + const struct store_entry *a = &BS_ENTRY(*(entry_ident_t *)va, storestate); + const struct store_entry *b = &BS_ENTRY(*(entry_ident_t *)vb, storestate); + + if (a->use_count < b->use_count) { + return -1; + } else if (a->use_count > b->use_count) { + return 1; + } + /* use count is the same - now consider last use time */ + + if (a->last_used < b->last_used) { + return -1; + } else if (a->last_used > b->last_used) { + return 1; + } + + /* they are the same */ + return 0; +} + + +/** + * Evict entries from backing store as per configuration. + * + * Entries are evicted to ensure the cache remains within the + * configured limits on size and number of entries. + * + * The approach is to check if the cache limits have been exceeded and + * if so build and sort list of entries to evict. The list is sorted + * by use count and then by age, so oldest object with least number of uses + * get evicted first. + * + * @param state The store state to use. + * @return NSERROR_OK on success or error code on failure. + */ +static nserror store_evict(struct store_state *state) +{ + entry_ident_t *elist; /* sorted list of entry identifiers */ + unsigned int ent; + unsigned int ent_count; + size_t removed; /* size of removed entries */ + nserror ret = NSERROR_OK; + + /* check if the cache has exceeded configured limit */ + if ((state->total_alloc < state->limit) && + (state->last_entry < (1U << state->entry_bits))) { + /* cache within limits */ + return NSERROR_OK; + } + + LOG(("Evicting entries to reduce %d by %d", + state->total_alloc, state->hysteresis)); + + /* allocate storage for the list */ + elist = malloc(sizeof(entry_ident_t) * state->last_entry); + if (elist == NULL) { + return NSERROR_NOMEM; + } + + /* sort the list avoiding entry 0 which is the empty sentinel */ + for (ent = 1; ent < state->last_entry; ent++) { + elist[ent - 1] = state->entries[ent].ident; + } + ent_count = ent - 1; /* important to keep this as the entry count will change when entries are removed */ + qsort(elist, ent_count, sizeof(entry_ident_t), compar); + + /* evict entries in listed order */ + removed = 0; + for (ent = 0; ent < ent_count; ent++) { + + removed += BS_ENTRY(elist[ent], state).data_alloc; + removed += BS_ENTRY(elist[ent], state).meta_alloc; + + ret = unlink_ident(state, elist[ent]); + if (ret != NSERROR_OK) { + break; + } + + if (removed > state->hysteresis) { + break; + } + } + + free(elist); + + LOG(("removed %d in %d entries", removed, ent)); + + return ret; +} + + +/** + * Lookup a backing store entry in the entry table from a url. + * + * This finds the store entry associated with the given + * key. Additionally if an entry is found it updates the usage data + * about the entry. + * + * @param state The store state to use. + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and bse updated on success or NSERROR_NOT_FOUND + * if no entry corresponds to the url. + */ +static nserror +get_store_entry(struct store_state *state, nsurl *url, struct store_entry **bse) +{ + entry_ident_t ident; + unsigned int sei; /* store entry index */ + + LOG(("url:%s", nsurl_access(url))); + + /* use the url hash as the entry identifier */ + ident = nsurl_hash(url); + + sei = BS_ENTRY_INDEX(ident, state); + + if (sei == 0) { + return NSERROR_NOT_FOUND; + } + + if (state->entries[sei].ident != ident) { + /* entry ident did not match */ + LOG(("ident did not match entry")); + return NSERROR_NOT_FOUND; + } + + *bse = &state->entries[sei]; + + state->entries[sei].last_used = time(NULL); + state->entries[sei].use_count++; + + state->entries_dirty = true; + + return NSERROR_OK; +} + + +/** + * Set a backing store entry in the entry table from a url. + * + * This creates a backing store entry in the entry table for a url. + * + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and bse updated on succes or NSERROR_NOT_FOUND + * if no entry coresponds to the url. + */ +static nserror +set_store_entry(struct store_state *state, + nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen, + struct store_entry **bse) +{ + entry_ident_t ident; + entry_index_t sei; /* store entry index */ + struct store_entry *se; + nserror ret; + bool isrep; /* is the store repalcing an existing entry or not */ + + LOG(("url:%s", nsurl_access(url))); + + /* evict entries as required and ensure there is at least one + * new entry available. + */ + ret = store_evict(state); + if (ret != NSERROR_OK) { + return ret; + } + + /* use the url hash as the entry identifier */ + ident = nsurl_hash(url); + + sei = BS_ENTRY_INDEX(ident, state); + + /** @todo Should this deal with cache eviction? */ + + if (sei == 0) { + /* allocating the next available entry */ + sei = state->last_entry; + state->last_entry++; + BS_ENTRY_INDEX(ident, state) = sei; + isrep = false; + } else { + /* updating or replacing existing entry */ + /** @todo should we be checking the entry ident + * matches the url. Thats a collision in the address + * mapping right? and is it important? + */ + isrep = true; + } + + se = &state->entries[sei]; + + se->ident = ident; + se->flags = STORE_ENTRY_FLAG_NONE; + se->use_count = 1; + se->last_used = time(NULL); + + /* account for allocation */ + if ((flags & BACKING_STORE_META) != 0) { + if (isrep) { + state->total_alloc -= se->meta_alloc; + } else { + se->data_alloc = 0; + } + se->meta_alloc = datalen; + } else { + if (isrep) { + state->total_alloc -= se->data_alloc; + } else { + se->meta_alloc = 0; + } + se->data_alloc = datalen; + } + state->total_alloc += datalen; + + state->entries_dirty = true; + + *bse = se; + + return NSERROR_OK; +} + + + + +/** + * Open a file using a store ident. + * + * @param state The store state to use. + * @param ident The identifier of the file to open. + * @param flags The backing store flags. + * @pram openflags The flags used with the open call. + * @return An fd from the open call or -1 on error. + */ +static int +store_open(struct store_state *state, + uint32_t ident, + enum backing_store_flags flags, + int openflags) +{ + char *fname; + nserror ret; + int fd; + + fname = store_fname(state, ident, flags); + if (fname == NULL) { + LOG(("filename error")); + return -1; + } + + /* ensure path to file is usable */ + ret = filepath_mkdir_all(fname); + if (ret != NSERROR_OK) { + LOG(("file path \"%s\" could not be created", fname)); + free(fname); + return -1; + } + + LOG(("opening %s", fname)); + fd = open(fname, openflags, S_IRUSR | S_IWUSR); + + free(fname); + + return fd; +} + +/** + * Construct address ident to filesystem entry map + * + * To allow a filesystem entry to be found from it's identifier we + * construct an mapping index. This is a hash map from the entries URL + * (its unique key) to filesystem entry. + * + * As the entire entry list must be iterated over to construct the map + * we also compute the total storage in use. + * + * @param state The backing store global state. + * @return NSERROR_OK on sucess or NSERROR_NOMEM if the map storage + * could not be allocated. + */ +static nserror +build_entrymap(struct store_state *state) +{ + unsigned int eloop; + + LOG(("Allocating %d bytes for max of %d buckets", + (1 << state->ident_bits) * sizeof(entry_index_t), + 1 << state->ident_bits)); + + state->addrmap = calloc(1 << state->ident_bits, sizeof(entry_index_t)); + if (state->addrmap == NULL) { + return NSERROR_NOMEM; + } + + state->total_alloc = 0; + + for (eloop = 1; eloop < state->last_entry; eloop++) { + /* + LOG(("entry:%d ident:0x%08x used:%d", + eloop, + BS_ADDRESS(state->entries[eloop].ident, state), + state->entries[eloop].use_count)); + */ + + /* update the address map to point at the entry */ + BS_ENTRY_INDEX(state->entries[eloop].ident, state) = eloop; + + /* account for the storage space */ + state->total_alloc += state->entries[eloop].data_alloc + + state->entries[eloop].meta_alloc; + } + + return NSERROR_OK; +} + +/** + * Write filesystem entries to file. + * + * @param state The backing store state to read the entries from. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror write_entries(struct store_state *state) +{ + int fd; + char *fname = NULL; + nserror ret; + + if (state->entries_dirty == false) { + /* entries have not been updated since last write */ + return NSERROR_OK; + } + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "entries"); + if (ret != NSERROR_OK) { + return ret; + } + + fd = open(fname, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + free(fname); + if (fd == -1) { + return NSERROR_SAVE_FAILED; + } + + write(fd, state->entries, state->last_entry * sizeof(struct store_entry)); + close(fd); + + return NSERROR_OK; +} + +/** + * Read description entries into memory. + * + * @param state The backing store state to put the loaded entries in. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +read_entries(struct store_state *state) +{ + int fd; + ssize_t rd; + size_t entries_size; + char *fname = NULL; + nserror ret; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "entries"); + if (ret != NSERROR_OK) { + return ret; + } + + entries_size = (1 << state->entry_bits) * sizeof(struct store_entry); + + LOG(("Allocating %d bytes for max of %d entries", + entries_size, 1 << state->entry_bits)); + + state->entries = calloc(1, entries_size); + if (state->entries == NULL) { + free(fname); + return NSERROR_NOMEM; + } + + fd = open(fname, O_RDWR); + free(fname); + if (fd != -1) { + rd = read(fd, state->entries, entries_size); + close(fd); + if (rd > 0) { + state->last_entry = rd / sizeof(struct store_entry); + LOG(("Read %d entries", state->last_entry)); + } + } else { + /* could rebuild entries from fs */ + state->last_entry = 1; + } + return NSERROR_OK; +} + + +/** + * Write the control file for the current state. + * + * @param state The state to write to the control file. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +write_control(struct store_state *state) +{ + FILE *fcontrol; + nserror ret; + char *fname = NULL; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "control"); + if (ret != NSERROR_OK) { + return ret; + } + + ret = filepath_mkdir_all(fname); + if (ret != NSERROR_OK) { + free(fname); + return ret; + } + + fcontrol = fopen(fname, "wb"); + + free(fname); + + if (fcontrol == NULL) { + return NSERROR_NOT_FOUND; + } + + fprintf(fcontrol, "%u%c", CONTROL_VERSION, 0); + fprintf(fcontrol, "%u%c", state->entry_bits, 0); + fprintf(fcontrol, "%u%c", state->ident_bits, 0); + fprintf(fcontrol, "%u%c", state->last_entry, 0); + + fclose(fcontrol); + + return NSERROR_OK; +} + + +/** + * Read and parse the control file. + * + * @param state The state to read from the control file. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +read_control(struct store_state *state) +{ + nserror ret; + FILE *fcontrol; + unsigned int ctrlversion; + unsigned int addrbits; + unsigned int entrybits; + char *fname = NULL; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "control"); + if (ret != NSERROR_OK) { + return ret; + } + + fcontrol = fopen(fname, "rb"); + + free(fname); + + if (fcontrol == NULL) { + /* unable to open control file */ + if (errno == ENOENT) { + return NSERROR_NOT_FOUND; + } else { + return NSERROR_INIT_FAILED; + } + } + + /* read control and setup new state */ + + /* first line is version */ + if (fscanf(fcontrol, "%u", &ctrlversion) != 1) { + goto control_error; + } + + if (ctrlversion != CONTROL_VERSION) { + goto control_error; + } + + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + /* second line is log2 max number of entries */ + if (fscanf(fcontrol, "%u", &entrybits) != 1) { + LOG(("c")); + goto control_error; + } + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + /* second line is log2 size of address hash */ + if (fscanf(fcontrol, "%u", &addrbits) != 1) { + LOG(("d")); + goto control_error; + } + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + fclose(fcontrol); + + state->entry_bits = entrybits; + state->ident_bits = addrbits; + + return NSERROR_OK; + +control_error: /* problem with the control file */ + + fclose(fcontrol); + + return NSERROR_INIT_FAILED; +} + + + + +/* Functions exported in the backing store table */ + +/** + * Initialise the backing store. + * + * @param parameters to configure backing store. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +initialise(const struct llcache_store_parameters *parameters) +{ + struct store_state *newstate; + nserror ret; + + /* check backing store is not already initialised */ + if (storestate != NULL) { + return NSERROR_INIT_FAILED; + } + + /* if we are not allowed any space simply give up on init */ + if (parameters->limit == 0) { + return NSERROR_OK; + } + + /* if the path to the cache directory is not set do not init */ + if (parameters->path == NULL) { + return NSERROR_OK; + } + + /* allocate new store state and set defaults */ + newstate = calloc(1, sizeof(struct store_state)); + if (newstate == NULL) { + return NSERROR_NOMEM; + } + + newstate->path = strdup(parameters->path); + newstate->limit = parameters->limit; + newstate->hysteresis = parameters->hysteresis; + + if (parameters->address_size == 0) { + newstate->ident_bits = DEFAULT_IDENT_SIZE; + } else { + newstate->ident_bits = parameters->address_size; + } + + if (parameters->entry_size == 0) { + newstate->entry_bits = DEFAULT_ENTRY_SIZE; + } else { + newstate->entry_bits = parameters->entry_size; + } + + ret = read_control(newstate); + if (ret != NSERROR_OK) { + LOG(("read control failed %s", messages_get_errorcode(ret))); + ret = write_control(newstate); + } + if (ret != NSERROR_OK) { + /* that went well obviously */ + free(newstate->path); + free(newstate); + return ret; + } + + /* ensure the maximum number of entries can be represented in + * the type available to store it. + */ + if (newstate->entry_bits > (8 * sizeof(entry_index_t))) { + newstate->entry_bits = (8 * sizeof(entry_index_t)); + } + + /* read filesystem entries */ + ret = read_entries(newstate); + if (ret != NSERROR_OK) { + /* that went well obviously */ + free(newstate->path); + free(newstate); + return ret; + } + + /* build entry hash map */ + ret = build_entrymap(newstate); + if (ret != NSERROR_OK) { + /* that obviously went well */ + free(newstate->path); + free(newstate); + return ret; + } + + storestate = newstate; + + LOG(("FS backing store init successful")); + + LOG(("path:%s limit:%d hyst:%d addr:%d entries:%d", newstate->path, newstate->limit, newstate->hysteresis, newstate->ident_bits, newstate->entry_bits)); + LOG(("Using %d/%d", newstate->total_alloc, newstate->limit)); + + return NSERROR_OK; +} + + +/** + * Finalise the backing store. + * + * @return NSERROR_OK on success. + */ +static nserror +finalise(void) +{ + if (storestate != NULL) { + write_entries(storestate); + + /* avoid division by zero */ + if (storestate->miss_count == 0) { + storestate->miss_count = 1; + } + LOG(("hits:%d misses:%d hit ratio:%d returned:%d bytes", + storestate->hit_count, storestate->miss_count, + storestate->hit_count / storestate->miss_count, + storestate->hit_size)); + + free(storestate->path); + free(storestate); + storestate = NULL; + } + return NSERROR_OK; +} + + +/** + * Place an object in the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is stored. + * @param data The objects source data. + * @param datalen The length of the \a data. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +store(nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen) +{ + nserror ret; + struct store_entry *bse; + int fd; + + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + /* set the store entry up */ + ret = set_store_entry(storestate, url, flags, data, datalen, &bse); + if (ret != NSERROR_OK) { + LOG(("store entry setting failed")); + return ret; + } + + fd = store_open(storestate, bse->ident, flags, O_CREAT | O_WRONLY); + if (fd < 0) { + perror(""); + LOG(("Open failed %d",fd)); + return NSERROR_SAVE_FAILED; + } + + LOG(("Writing %d bytes from %p", datalen, data)); + write(fd, data, datalen); + + close(fd); + + return NSERROR_OK; +} + +/** + * Retrive an object from the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is stored. + * @param data The objects data. + * @param datalen The length of the \a data retrieved. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +fetch(nsurl *url, + enum backing_store_flags *flags, + uint8_t **data_out, + size_t *datalen_out) +{ + nserror ret; + struct store_entry *bse; + uint8_t *data; + size_t datalen; + int fd; + ssize_t rd; + + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + ret = get_store_entry(storestate, url, &bse); + if (ret != NSERROR_OK) { + LOG(("entry not found")); + storestate->miss_count++; + return ret; + } + storestate->hit_count++; + + LOG(("retriving cache file for url:%s", nsurl_access(url))); + + fd = store_open(storestate, bse->ident, *flags, O_RDONLY); + if (fd < 0) { + LOG(("Open failed")); + /** @todo should this invalidate the entry? */ + return NSERROR_NOT_FOUND; + } + + data = *data_out; + datalen = *datalen_out; + + /* need to deal with buffers */ + if (data == NULL) { + if (datalen == 0) { + /* caller did not know the files length */ + if (((*flags) & BACKING_STORE_META) != 0) { + datalen = bse->meta_alloc; + } else { + datalen = bse->data_alloc; + } + } + + data = malloc(datalen); + if (data == NULL) { + close(fd); + return NSERROR_NOMEM; + } + } + + /** @todo should this check datalen is sufficient */ + + LOG(("Reading %d bytes into %p from file", datalen, data)); + + /** @todo this read should be an a loop */ + rd = read(fd, data, datalen); + if (rd <= 0) { + LOG(("read returned %d", rd)); + close(fd); + if ((*data_out) == NULL) { + free(data); + } + return NSERROR_NOT_FOUND; + } + + close(fd); + + storestate->hit_size += datalen; + + *data_out = data; + *datalen_out = datalen; + + return NSERROR_OK; +} + + +/** + * Invalidate a source object from the backing store. + * + * The entry (if present in the backing store) must no longer + * be returned as a result to the fetch or meta operations. + * + * @param url The url is used as the unique primary key to invalidate. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +invalidate(nsurl *url) +{ + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + LOG(("url:%s", nsurl_access(url))); + + return unlink_ident(storestate, nsurl_hash(url)); +} + + +static struct gui_llcache_table llcache_table = { + .initialise = initialise, + .finalise = finalise, + .store = store, + .fetch = fetch, + .invalidate = invalidate, +}; + +struct gui_llcache_table *filesystem_llcache_table = &llcache_table; diff --git a/content/hlcache.c b/content/hlcache.c index 5a3cc8583..23fb79562 100644 --- a/content/hlcache.c +++ b/content/hlcache.c @@ -339,9 +339,10 @@ static nserror hlcache_migrate_ctx(hlcache_retrieval_ctx *ctx, ctx->migrate_target = true; - if (effective_type != NULL && - hlcache_type_is_acceptable(effective_type, - ctx->accepted_types, &type)) { + if ((effective_type != NULL) && + hlcache_type_is_acceptable(effective_type, + ctx->accepted_types, + &type)) { error = hlcache_find_content(ctx, effective_type); if (error != NSERROR_OK && error != NSERROR_NEED_DATA) { if (ctx->handle->cb != NULL) { @@ -524,9 +525,7 @@ hlcache_initialise(const struct hlcache_parameters *hlcache_parameters) return NSERROR_NOMEM; } - ret = llcache_initialise(hlcache_parameters->cb, - hlcache_parameters->cb_ctx, - hlcache_parameters->limit); + ret = llcache_initialise(&hlcache_parameters->llcache); if (ret != NSERROR_OK) { free(hlcache); hlcache = NULL; diff --git a/content/hlcache.h b/content/hlcache.h index 41f1ed6f4..746b3c866 100644 --- a/content/hlcache.h +++ b/content/hlcache.h @@ -23,11 +23,12 @@ #ifndef NETSURF_CONTENT_HLCACHE_H_ #define NETSURF_CONTENT_HLCACHE_H_ -#include "content/content.h" -#include "content/llcache.h" #include "utils/errors.h" #include "utils/nsurl.h" +#include "content/content.h" +#include "content/llcache.h" + /** High-level cache handle */ typedef struct hlcache_handle hlcache_handle; @@ -44,18 +45,10 @@ typedef struct { } hlcache_event; struct hlcache_parameters { - llcache_query_callback cb; /**< Query handler for llcache */ - void *cb_ctx; /**< Pointer to llcache query handler data */ - /** How frequently the background cache clean process is run (ms) */ unsigned int bg_clean_time; - /** The target upper bound for the cache size */ - size_t limit; - - /** The hysteresis allowed round the target size */ - size_t hysteresis; - + struct llcache_parameters llcache; }; /** @@ -67,13 +60,13 @@ struct hlcache_parameters { * \return NSERROR_OK on success, appropriate error otherwise. */ typedef nserror (*hlcache_handle_callback)(hlcache_handle *handle, - const hlcache_event *event, void *pw); + const hlcache_event *event, void *pw); /** Flags for high-level cache object retrieval */ enum hlcache_retrieve_flag { - /* Note: low-level cache retrieval flags occupy the bottom 16 bits of - * the flags word. High-level cache flags occupy the top 16 bits. - * To avoid confusion, high-level flags are allocated from bit 31 down. + /* Note: low-level cache retrieval flags occupy the bottom 16 bits of + * the flags word. High-level cache flags occupy the top 16 bits. + * To avoid confusion, high-level flags are allocated from bit 31 down. */ /** It's permitted to convert this request into a download */ HLCACHE_RETRIEVE_MAY_DOWNLOAD = (1 << 31), @@ -84,7 +77,7 @@ enum hlcache_retrieve_flag { /** * Initialise the high-level cache, preparing the llcache also. * - * \param hlcache_parameters Settings to initialise cache with + * \param hlcache_parameters Settings to initialise cache with * \return NSERROR_OK on success, appropriate error otherwise. */ nserror hlcache_initialise(const struct hlcache_parameters *hlcache_parameters); @@ -133,7 +126,7 @@ nserror hlcache_poll(void); nserror hlcache_handle_retrieve(nsurl *url, uint32_t flags, nsurl *referer, llcache_post_data *post, hlcache_handle_callback cb, void *pw, - hlcache_child_context *child, + hlcache_child_context *child, content_type accepted_types, hlcache_handle **result); /** @@ -169,13 +162,13 @@ nserror hlcache_handle_replace_callback(hlcache_handle *handle, * \param handle Cache handle to dereference * \return Pointer to content object, or NULL if there is none * - * \todo This may not be correct. Ideally, the client should never need to - * directly access a content object. It may, therefore, be better to provide a - * bunch of veneers here that take a hlcache_handle and invoke the + * \todo This may not be correct. Ideally, the client should never need to + * directly access a content object. It may, therefore, be better to provide a + * bunch of veneers here that take a hlcache_handle and invoke the * corresponding content_ API. If there's no content object associated with the - * hlcache_handle (e.g. because the source data is still being fetched, so it - * doesn't exist yet), then these veneers would behave as a NOP. The important - * thing being that the client need not care about this possibility and can + * hlcache_handle (e.g. because the source data is still being fetched, so it + * doesn't exist yet), then these veneers would behave as a NOP. The important + * thing being that the client need not care about this possibility and can * just call the functions with impugnity. */ struct content *hlcache_handle_get_content(const hlcache_handle *handle); diff --git a/content/llcache.c b/content/llcache.c index 112a7fab9..653352d3f 100644 --- a/content/llcache.c +++ b/content/llcache.c @@ -17,26 +17,41 @@ */ /** \file - * Low-level resource cache (implementation) + * Low-level resource cache implementation + * + * This is the implementation of the low level cache. This cache + * stores source objects in memory and may use a persistant backing + * store to extend their lifetime. + * + * \todo fix writeout conditions and ordering. + * + * \todo support mmaped retrieve + * + * \todo instrument and (auto)tune + * + * \todo turn llcache debugging off + * */ #include #include #include - #include -#include "content/fetch.h" -#include "content/llcache.h" -#include "content/urldb.h" #include "utils/corestrings.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/nsurl.h" #include "utils/utils.h" +#include "desktop/gui_factory.h" + +#include "content/fetch.h" +#include "content/backing_store.h" +#include "content/urldb.h" /** Define to enable tracing of llcache operations. */ -#undef LLCACHE_TRACE +//#undef LLCACHE_TRACE +#define LLCACHE_TRACE 1 #ifdef LLCACHE_TRACE #define LLCACHE_LOG(x) LOG(x) @@ -44,6 +59,9 @@ #define LLCACHE_LOG(x) #endif +#define LLCACHE_MIN_DISC_LIFETIME 3600 +#define LLCACHE_MAX_DISC_BANDWIDTH (512*1024) + /** State of a low-level cache object fetch */ typedef enum { LLCACHE_FETCH_INIT, /**< Initial state, before fetch */ @@ -96,19 +114,23 @@ typedef struct { bool outstanding_query; /**< Waiting for a query response */ } llcache_fetch_ctx; +/** validation control */ typedef enum { LLCACHE_VALIDATE_FRESH, /**< Only revalidate if not fresh */ LLCACHE_VALIDATE_ALWAYS, /**< Always revalidate */ LLCACHE_VALIDATE_ONCE /**< Revalidate once only */ } llcache_validate; +/** cache control value for invalid age */ +#define INVALID_AGE -1 + /** Cache control data */ typedef struct { time_t req_time; /**< Time of request */ time_t res_time; /**< Time of response */ + time_t fin_time; /**< Time of request completion */ time_t date; /**< Date: response header */ time_t expires; /**< Expires: response header */ -#define INVALID_AGE -1 int age; /**< Age: response header */ int max_age; /**< Max-Age Cache-control parameter */ llcache_validate no_cache; /**< No-Cache Cache-control parameter */ @@ -122,31 +144,49 @@ typedef struct { char *value; /**< Header value */ } llcache_header; +/** Current status of objects data */ +typedef enum { + LLCACHE_STATE_RAM = 0, /**< source data is stored in RAM only */ + LLCACHE_STATE_MMAP, /**< source data is mmaped (implies on disc too) */ + LLCACHE_STATE_DISC, /**< source data is stored on disc */ +} llcache_store_state; + /** Low-level cache object */ /** \todo Consider whether a list is a sane container */ struct llcache_object { - llcache_object *prev; /**< Previous in list */ - llcache_object *next; /**< Next in list */ + llcache_object *prev; /**< Previous in list */ + llcache_object *next; /**< Next in list */ - nsurl *url; /**< Post-redirect URL for object */ + nsurl *url; /**< Post-redirect URL for object */ /** \todo We need a generic dynamic buffer object */ - uint8_t *source_data; /**< Source data for object */ - size_t source_len; /**< Byte length of source data */ - size_t source_alloc; /**< Allocated size of source buffer */ + uint8_t *source_data; /**< Source data for object */ + size_t source_len; /**< Byte length of source data */ + size_t source_alloc; /**< Allocated size of source buffer */ - llcache_object_user *users; /**< List of users */ + llcache_store_state store_state; /**< where the data for the object is stored */ - llcache_fetch_ctx fetch; /**< Fetch context for object */ + llcache_object_user *users; /**< List of users */ - llcache_cache_control cache; /**< Cache control data for object */ - llcache_object *candidate; /**< Object to use, if fetch determines - * that it is still fresh */ - uint32_t candidate_count; /**< Count of objects this is a - * candidate for */ + llcache_fetch_ctx fetch; /**< Fetch context for object */ - llcache_header *headers; /**< Fetch headers */ - size_t num_headers; /**< Number of fetch headers */ + llcache_cache_control cache; /**< Cache control data for object */ + llcache_object *candidate; /**< Object to use, if fetch determines + * that it is still fresh + */ + uint32_t candidate_count; /**< Count of objects this is a + * candidate for + */ + + llcache_header *headers; /**< Fetch headers */ + size_t num_headers; /**< Number of fetch headers */ + + /* Instrumentation. These elemnts are strictly for information + * to improve the cache performance and to provide performace + * metrics. The values are non-authorative and must not be used to + * determine object lifetime etc. + */ + time_t last_used; /**< time the last user was removed from the object */ }; struct llcache_s { @@ -162,7 +202,17 @@ struct llcache_s { /** Head of the low-level uncached object list */ llcache_object *uncached_objects; + /** The target upper bound for the RAM cache size */ uint32_t limit; + + /** The minimum lifetime to consider sending objects to + * backing store. + */ + int minimum_lifetime; + + /** The maximum bandwidth to allow the backing store to use. */ + size_t bandwidth; + }; /** low level cache state */ @@ -261,6 +311,11 @@ static nserror llcache_object_remove_user(llcache_object *object, user->next = user->prev = NULL; + /* record the time the last user was removed from the object */ + if (object->users == NULL) { + object->last_used = time(NULL); + } + LLCACHE_LOG(("Removing user %p from %p", user, object)); return NSERROR_OK; @@ -711,6 +766,7 @@ static nserror llcache_object_refetch(llcache_object *object) /* Reset cache control data */ llcache_invalidate_cache_control_data(object); object->cache.req_time = time(NULL); + object->cache.fin_time = object->cache.req_time; /* Reset fetch state */ object->fetch.state = LLCACHE_FETCH_INIT; @@ -878,7 +934,7 @@ llcache_object_rfc2616_remaining_lifetime(const llcache_cache_control *cd) else freshness_lifetime = 0; - LLCACHE_LOG(("%d:%d", freshness_lifetime, current_age)); + /* LLCACHE_LOG(("%d:%d", freshness_lifetime, current_age)); */ if ((cd->no_cache == LLCACHE_VALIDATE_FRESH) && (freshness_lifetime > current_age)) { @@ -957,6 +1013,7 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, destination->cache.req_time = source->cache.req_time; destination->cache.res_time = source->cache.res_time; + destination->cache.fin_time = source->cache.fin_time; if (source->cache.date != 0) destination->cache.date = source->cache.date; @@ -979,6 +1036,367 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, return NSERROR_OK; } +/** + * Remove a low-level cache object from a cache list + * + * \param object Object to remove + * \param list List to remove from + * \return NSERROR_OK + */ +static nserror +llcache_object_remove_from_list(llcache_object *object, llcache_object **list) +{ + if (object == *list) + *list = object->next; + else + object->prev->next = object->next; + + if (object->next != NULL) + object->next->prev = object->prev; + + return NSERROR_OK; +} + +/** + * Retrieve source data for an object from persistant store if necessary. + * + * If an objects source data has been placed in the persistant store + * and the in memory copy freed this will attempt to retrive the + * source data. + * + * @param object the object to operate on. + * @return apropriate error code. + */ +static nserror llcache_persist_retrieve(llcache_object *object) +{ + enum backing_store_flags flags = BACKING_STORE_NONE; + + /* ensure the source data is present if necessary */ + if ((object->source_data != NULL) || + (object->store_state != LLCACHE_STATE_DISC)) { + /* source data does not require retriving from + * persistant store. + */ + return NSERROR_OK; + } + + /* Source data for the object may be in the persiatant store */ + return guit->llcache->fetch(object->url, + &flags, + &object->source_data, + &object->source_len); +} + +/** + * Generate a serialised version of an objects metadata + * + * metadata includes object headers + */ +static nserror +llcache_serialise_metadata(llcache_object *object, + uint8_t **data_out, + size_t *datasize_out) +{ + size_t allocsize; + int datasize; + uint8_t *data; + char *op; + unsigned int hloop; + int use; + struct tm *ltm; + + allocsize = 10 + 1; /* object length */ + + allocsize += 10 + 1; /* request time */ + + allocsize += 10 + 1; /* response time */ + + allocsize += 10 + 1; /* completion time */ + + allocsize += 10 + 1; /* space for number of header entries */ + + allocsize += nsurl_length(object->url) + 1; + + for (hloop = 0 ; hloop < object->num_headers ; hloop++) { + allocsize += strlen(object->headers[hloop].name) + 1; + allocsize += strlen(object->headers[hloop].value) + 1; + } + + data = malloc(allocsize); + if (data == NULL) { + return NSERROR_NOMEM; + } + + op = (char *)data; + datasize = allocsize; + + /* the url, used for checking for collisions */ + use = snprintf(op, datasize, "%s%c", nsurl_access(object->url), 0); + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + + /* object size */ + use = snprintf(op, datasize, "%zu%c", object->source_len, 0); + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + + /* Time of request */ + ltm = localtime(&object->cache.req_time); + use = strftime(op, datasize, "%s", ltm); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* Time of response */ + ltm = localtime(&object->cache.res_time); + use = strftime(op, datasize, "%s", ltm); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* Time of completion */ + ltm = localtime(&object->cache.fin_time); + use = strftime(op, datasize, "%s", ltm); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* number of headers */ + use = snprintf(op, datasize, "%zu%c", object->num_headers, 0); + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + + /* headers */ + for (hloop = 0 ; hloop < object->num_headers ; hloop++) { + use = snprintf(op, datasize, + "%s:%s%c", + object->headers[hloop].name, + object->headers[hloop].value, + 0); + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + } + + LLCACHE_LOG(("Filled buffer with %d spare", datasize)); + + *data_out = data; + *datasize_out = allocsize - datasize; + + return NSERROR_OK; + +overflow: + /* somehow we overflowed the buffer - hth? */ + LOG(("Overflowed metadata buffer")); + free(data); + return NSERROR_INVALID; +} + +/** + * un-serialise an objects metadata. + */ +static nserror +llcache_process_metadata(llcache_object *object) +{ + nserror res; + uint8_t *metadata = NULL; + size_t metadatalen = 0; + nsurl *metadataurl; + unsigned int line; + uint8_t *end; + char *ln; + int lnsize; + size_t num_headers; + size_t hloop; + struct tm ltm; + enum backing_store_flags flags = BACKING_STORE_META; + + LOG(("Retriving metadata")); + + /* attempt to retrieve object metadata from the backing store */ + res = guit->llcache->fetch(object->url, + &flags, + &metadata, + &metadatalen); + if (res != NSERROR_OK) { + return res; + } + + end = metadata + metadatalen; + + LOG(("Processing retrived data")); + + /* metadata line 1 is the url the metadata referrs to */ + line = 1; + ln = (char *)metadata; + lnsize = strlen(ln); + + if (lnsize < 7) + goto format_error; + + res = nsurl_create(ln, &metadataurl); + if (res != NSERROR_OK) { + free(metadata); + return res; + } + + if (nsurl_compare(object->url, metadataurl, NSURL_COMPLETE) != true) { + /* backing store returned the wrong object for the + * request. This may occour if the backing store had + * a collision in its stoage method. We cope with this + * by simply skipping caching of this object. + */ + + LOG(("Got metadata for %s instead of %s", + nsurl_access(metadataurl), + nsurl_access(object->url))); + + nsurl_unref(metadataurl); + + free(metadata); + + return NSERROR_BAD_URL; + } + nsurl_unref(metadataurl); + + + /* metadata line 2 is the objects length */ + line = 2; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (sscanf(ln, "%zu", &object->source_len) != 1)) + goto format_error; + object->source_alloc = metadatalen; + + /* metadata line 3 is the time of request */ + line = 3; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (strptime(ln, "%s", <m) == NULL)) + goto format_error; + object->cache.req_time = mktime(<m); + + /* metadata line 4 is the time of response */ + line = 4; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (strptime(ln, "%s", <m) == NULL)) + goto format_error; + object->cache.res_time = mktime(<m); + + /* metadata line 5 is the time of request completion */ + line = 5; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (strptime(ln, "%s", <m) == NULL)) + goto format_error; + object->cache.fin_time = mktime(<m); + + + /* metadata line 6 is the number of headers */ + line = 6; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (sscanf(ln, "%zu", &num_headers) != 1)) + goto format_error; + + + /* read headers */ + for (hloop = 0 ; hloop < num_headers; hloop++) { + line++; + ln += lnsize + 1; + lnsize = strlen(ln); + + res = llcache_fetch_process_header(object, (uint8_t *)ln, lnsize); + if (res != NSERROR_OK) { + free(metadata); + return res; + } + } + + free(metadata); + + /* object stored in backing store */ + object->store_state = LLCACHE_STATE_DISC; + + return NSERROR_OK; + +format_error: + LOG(("metadata error on line %d\n", line)); + free(metadata); + return NSERROR_INVALID; + +} + +/** + * attempt to retrieve an object from persistant storage. + */ +static nserror +llcache_object_fetch_persistant(llcache_object *object, + uint32_t flags, + nsurl *referer, + const llcache_post_data *post, + uint32_t redirect_count) +{ + nserror error; + nsurl *referer_clone = NULL; + llcache_post_data *post_clone = NULL; + + object->cache.req_time = time(NULL); + object->cache.fin_time = object->cache.req_time; + + /* retrieve and process metadata */ + error = llcache_process_metadata(object); + if (error != NSERROR_OK) { + return error; + } + + /* entry came out of cache - need to setup object state */ + if (post != NULL) { + error = llcache_post_data_clone(post, &post_clone); + if (error != NSERROR_OK) + return error; + } + + if (referer != NULL) { + referer_clone = nsurl_ref(referer); + } + + object->fetch.flags = flags; + object->fetch.referer = referer_clone; + object->fetch.post = post_clone; + object->fetch.redirect_count = redirect_count; + + /* fetch is "finished" */ + object->fetch.state = LLCACHE_FETCH_COMPLETE; + object->fetch.fetch = NULL; + + return NSERROR_OK; +} + /** * Retrieve a potentially cached object * @@ -990,89 +1408,158 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, * \param result Pointer to location to recieve retrieved object * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_object_retrieve_from_cache(nsurl *url, uint32_t flags, - nsurl *referer, const llcache_post_data *post, - uint32_t redirect_count, llcache_object **result) +static nserror +llcache_object_retrieve_from_cache(nsurl *url, + uint32_t flags, + nsurl *referer, + const llcache_post_data *post, + uint32_t redirect_count, + llcache_object **result) { nserror error; llcache_object *obj, *newest = NULL; - LLCACHE_LOG(("Searching cache for %s (%x %p %p)", - nsurl_access(url), flags, referer, post)); + LLCACHE_LOG(("Searching cache for %s flags:%x referer:%s post:%p", + nsurl_access(url), flags, referer==NULL?"":nsurl_access(referer), post)); /* Search for the most recently fetched matching object */ for (obj = llcache->cached_objects; obj != NULL; obj = obj->next) { if ((newest == NULL || - obj->cache.req_time > newest->cache.req_time) && - nsurl_compare(obj->url, url, - NSURL_COMPLETE) == true) { + obj->cache.req_time > newest->cache.req_time) && + nsurl_compare(obj->url, url, + NSURL_COMPLETE) == true) { newest = obj; } } - if (newest != NULL && llcache_object_is_fresh(newest)) { - /* Found a suitable object, and it's still fresh, so use it */ - obj = newest; + /* No viable object found in cache create one and attempt to + * pull from persistant store. + */ + if (newest == NULL) { + LLCACHE_LOG(("No viable object found in cache")); - LLCACHE_LOG(("Found fresh %p", obj)); + error = llcache_object_new(url, &obj); + if (error != NSERROR_OK) + return error; + + /* attempt to retrieve object from persistant store */ + error = llcache_object_fetch_persistant(obj, flags, referer, post, redirect_count); + if (error == NSERROR_OK) { + LLCACHE_LOG(("retrived object from persistant store")); + + /* set object from persistant store as newest */ + newest = obj; + + /* Add new object to cached object list */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + + } + /* else no object found and unretrivable from cache, + * fall through to start fetch + */ + } + + if ((newest != NULL) && (llcache_object_is_fresh(newest))) { + /* Found a suitable object, and it's still fresh */ + LLCACHE_LOG(("Found fresh %p", newest)); /* The client needs to catch up with the object's state. * This will occur the next time that llcache_poll is called. */ + + /* ensure the source data is present */ + error = llcache_persist_retrieve(newest); + if (error == NSERROR_OK) { + /* source data was sucessfully retrived from + * persistant store + */ + *result = newest; + + return NSERROR_OK; + } + + /* retrival of source data from persistant store + * failed, destroy cache object and fall though to + * cache miss to re-retch + */ + LLCACHE_LOG(("Persistant retrival failed for %p", newest)); + + llcache_object_remove_from_list(newest, &llcache->cached_objects); + llcache_object_destroy(newest); + + error = llcache_object_new(url, &obj); + if (error != NSERROR_OK) { + return error; + } } else if (newest != NULL) { /* Found a candidate object but it needs freshness validation */ - /* Create a new object */ + /* ensure the source data is present */ + error = llcache_persist_retrieve(newest); + if (error == NSERROR_OK) { + + /* Create a new object */ + error = llcache_object_new(url, &obj); + if (error != NSERROR_OK) + return error; + + LLCACHE_LOG(("Found candidate %p (%p)", obj, newest)); + + /* Clone candidate's cache data */ + error = llcache_object_clone_cache_data(newest, obj, true); + if (error != NSERROR_OK) { + llcache_object_destroy(obj); + return error; + } + + /* Record candidate, so we can fall back if it is still fresh */ + newest->candidate_count++; + obj->candidate = newest; + + /* Attempt to kick-off fetch */ + error = llcache_object_fetch(obj, flags, referer, post, + redirect_count); + if (error != NSERROR_OK) { + newest->candidate_count--; + llcache_object_destroy(obj); + return error; + } + + /* Add new object to cache */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + + *result = obj; + + return NSERROR_OK; + } + + LLCACHE_LOG(("Persistant retrival failed for %p", newest)); + + /* retrival of source data from persistant store + * failed, destroy cache object and fall though to + * cache miss to re-retch + */ + llcache_object_remove_from_list(newest, + &llcache->cached_objects); + llcache_object_destroy(newest); + error = llcache_object_new(url, &obj); - if (error != NSERROR_OK) - return error; - - LLCACHE_LOG(("Found candidate %p (%p)", obj, newest)); - - /* Clone candidate's cache data */ - error = llcache_object_clone_cache_data(newest, obj, true); if (error != NSERROR_OK) { - llcache_object_destroy(obj); return error; } - - /* Record candidate, so we can fall back if it is still fresh */ - newest->candidate_count++; - obj->candidate = newest; - - /* Attempt to kick-off fetch */ - error = llcache_object_fetch(obj, flags, referer, post, - redirect_count); - if (error != NSERROR_OK) { - newest->candidate_count--; - llcache_object_destroy(obj); - return error; - } - - /* Add new object to cache */ - llcache_object_add_to_list(obj, &llcache->cached_objects); - } else { - /* No object found; create a new one */ - /* Create new object */ - error = llcache_object_new(url, &obj); - if (error != NSERROR_OK) - return error; - - LLCACHE_LOG(("Not found %p", obj)); - - /* Attempt to kick-off fetch */ - error = llcache_object_fetch(obj, flags, referer, post, - redirect_count); - if (error != NSERROR_OK) { - llcache_object_destroy(obj); - return error; - } - - /* Add new object to cache */ - llcache_object_add_to_list(obj, &llcache->cached_objects); } + /* Attempt to kick-off fetch */ + error = llcache_object_fetch(obj, flags, referer, post, redirect_count); + if (error != NSERROR_OK) { + llcache_object_destroy(obj); + return error; + } + + /* Add new object to cache */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + *result = obj; return NSERROR_OK; @@ -1098,8 +1585,8 @@ static nserror llcache_object_retrieve(nsurl *url, uint32_t flags, nsurl *defragmented_url; bool uncachable = false; - LLCACHE_LOG(("Retrieve %s (%x, %p, %p)", - nsurl_access(url), flags, referer, post)); + LLCACHE_LOG(("Retrieve %s (%x, %s, %p)", nsurl_access(url), flags, + referer==NULL?"":nsurl_access(referer), post)); /* Get rid of any url fragment */ @@ -1625,6 +2112,146 @@ static nserror llcache_fetch_ssl_error(llcache_object *object) return error; } +/** + * construct a sorted list of objects available for writeout operation + * + * The list contains fresh cacheable objects held in RAM with no + * pending fetches. Any objects with a remaining lifetime less than + * the configured minimum lifetime are simply not considered, they will + * become stale before pushing to backing store is worth the cost. + * + * \todo calculate useful cost metrics to improve sorting. + * + */ +static nserror +build_candidate_list(struct llcache_object ***lst_out, int *lst_len_out) +{ + llcache_object *object, *next; + struct llcache_object **lst; + int lst_len = 0; + int remaining_lifetime; + + lst = calloc(512, sizeof(struct llcache_object *)); + if (lst == NULL) + return NSERROR_NOMEM; + + for (object = llcache->cached_objects; object != NULL; object = next) { + next = object->next; + + remaining_lifetime = llcache_object_rfc2616_remaining_lifetime(&object->cache); + + /* cacehable objects with no pending fetches, not + * already on disc and with sufficient lifetime to + * make disc cache worthwile + */ + if ((object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_RAM) && + (remaining_lifetime > llcache->minimum_lifetime)) { + lst[lst_len] = object; + lst_len++; + if (lst_len == 512) + break; + } + } + + if (lst_len == 0) { + free(lst); + return NSERROR_NOT_FOUND; + } + + /* sort list here */ + + *lst_len_out = lst_len; + *lst_out = lst; + + return NSERROR_OK; +} + +static nserror +write_backing_store(struct llcache_object *object, size_t *written_out) +{ + nserror ret; + uint8_t *metadata; + size_t metadatasize; + + /* put object data in backing store */ + ret = guit->llcache->store(object->url, + BACKING_STORE_NONE, + object->source_data, + object->source_len); + if (ret != NSERROR_OK) { + /* unable to put source data in backing store */ + return ret; + } + + ret = llcache_serialise_metadata(object, &metadata, &metadatasize); + if (ret != NSERROR_OK) { + /* There has been a metadata serialisation error. Ensure the + * already written data object is invalidated. + */ + guit->llcache->invalidate(object->url); + return ret; + } + + ret = guit->llcache->store(object->url, + BACKING_STORE_META, + metadata, + metadatasize); + free(metadata); + if (ret != NSERROR_OK) { + /* There has been an error putting the metadata in the + * backing store. Ensure the data object is invalidated. + */ + guit->llcache->invalidate(object->url); + return ret; + } + object->store_state = LLCACHE_STATE_DISC; + + *written_out = object->source_len + metadatasize; + + return NSERROR_OK; +} + +/** + * possibly write objects data to backing store. + */ +static void llcache_persist(void *p) +{ + nserror ret; + size_t size_written; + size_t total_written = 0; + struct llcache_object **lst; + int lst_count; + int idx; + + ret = build_candidate_list(&lst, &lst_count); + if (ret == NSERROR_OK) { + /* obtained a candidate list, make each object + * persistant in turn + */ + for (idx = 0; idx < lst_count; idx++) { + ret = write_backing_store(lst[idx], &size_written); + if (ret != NSERROR_OK) { + break; + } + total_written += size_written; + + if (total_written > llcache->bandwidth) { + /* The bandwidth limit has been reached. + * Writeout scheduled for the remaining objects + */ + guit->browser->schedule(1000, llcache_persist, NULL); + break; + } + } + + free(lst); + } +} + + /** * Handler for fetch events * @@ -1724,6 +2351,11 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) } llcache_object_cache_update(object); + + /* record when the fetch finished */ + object->cache.fin_time = time(NULL); + + guit->browser->schedule(5000, llcache_persist, NULL); } break; @@ -1833,26 +2465,6 @@ static llcache_object_user *llcache_object_find_user(const llcache_handle *handl return user; } -/** - * Remove a low-level cache object from a cache list - * - * \param object Object to remove - * \param list List to remove from - * \return NSERROR_OK - */ -static nserror llcache_object_remove_from_list(llcache_object *object, - llcache_object **list) -{ - if (object == *list) - *list = object->next; - else - object->prev->next = object->next; - - if (object->next != NULL) - object->next->prev = object->prev; - - return NSERROR_OK; -} /** * Determine if a low-level cache object resides in a given list @@ -2110,8 +2722,8 @@ static nserror llcache_object_notify_users(llcache_object *object) * \param snapshot Pointer to receive snapshot of \a object * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_object_snapshot(llcache_object *object, - llcache_object **snapshot) +static nserror +llcache_object_snapshot(llcache_object *object, llcache_object **snapshot) { llcache_object *newobj; nserror error; @@ -2162,6 +2774,35 @@ static nserror llcache_object_snapshot(llcache_object *object, return NSERROR_OK; } +/** + * total ram usage of object + */ +static inline uint32_t +total_object_size(llcache_object *object) +{ + uint32_t tot; + size_t hdrc; + + tot = sizeof(*object); + tot += nsurl_length(object->url); + + if (object->source_data != NULL) { + tot += object->source_len; + } + + tot += sizeof(llcache_header) * object->num_headers; + + for (hdrc = 0; hdrc < object->num_headers; hdrc++) { + if (object->headers[hdrc].name != NULL) { + tot += strlen(object->headers[hdrc].name); + } + if (object->headers[hdrc].value != NULL) { + tot += strlen(object->headers[hdrc].value); + } + } + + return tot; +} /****************************************************************************** * Public API * @@ -2169,6 +2810,8 @@ static nserror llcache_object_snapshot(llcache_object *object, /** * Attempt to clean the cache + * + * The memory cache cleaning discards objects in order of increasing value. */ /* Exported interface documented in llcache.h */ void llcache_clean(void) @@ -2179,15 +2822,10 @@ void llcache_clean(void) LLCACHE_LOG(("Attempting cache clean")); - /* Candidates for cleaning are (in order of priority): - * - * 1) Uncacheable objects with no users - * 2) Stale cacheable objects with no users or pending fetches - * 3) Fresh cacheable objects with no users or pending fetches - */ - - /* 1) Uncacheable objects with no users or fetches */ - for (object = llcache->uncached_objects; object != NULL; object = next) { + /* Uncacheable objects with no users or fetches */ + for (object = llcache->uncached_objects; + object != NULL; + object = next) { next = object->next; /* The candidate count of uncacheable objects is always 0 */ @@ -2195,18 +2833,21 @@ void llcache_clean(void) (object->candidate_count == 0) && (object->fetch.fetch == NULL) && (object->fetch.outstanding_query == false)) { - LLCACHE_LOG(("Found victim %p", object)); + LLCACHE_LOG(("Discarding uncachable object with no users (%p) %s", object, nsurl_access(object->url))); llcache_object_remove_from_list(object, &llcache->uncached_objects); llcache_object_destroy(object); } else { - llcache_size += object->source_len + sizeof(*object); + llcache_size += total_object_size(object); } } - /* 2) Stale cacheable objects with no users or pending fetches */ - for (object = llcache->cached_objects; object != NULL; object = next) { + + /* Stale cacheable objects with no users or pending fetches */ + for (object = llcache->cached_objects; + object != NULL; + object = next) { next = object->next; remaining_lifetime = llcache_object_rfc2616_remaining_lifetime(&object->cache); @@ -2214,45 +2855,113 @@ void llcache_clean(void) if ((object->users == NULL) && (object->candidate_count == 0) && (object->fetch.fetch == NULL) && - (object->fetch.outstanding_query == false)) { - - if (remaining_lifetime > 0) { - /* object is fresh */ - llcache_size += object->source_len + sizeof(*object); - } else { - /* object is not fresh */ - LLCACHE_LOG(("Found stale cacheable object (%p) with no users or pending fetches", object)); + (object->fetch.outstanding_query == false) && + (remaining_lifetime <= 0)) { + /* object is stale */ + LLCACHE_LOG(("discarding stale cacheable object with no users or pending fetches (%p) %s", object, nsurl_access(object->url))); llcache_object_remove_from_list(object, &llcache->cached_objects); + + if (object->store_state == LLCACHE_STATE_DISC) { + guit->llcache->invalidate(object->url); + } + llcache_object_destroy(object); - } + } else { - llcache_size += object->source_len + sizeof(*object); + /* object has users so account for the storage */ + llcache_size += total_object_size(object); } } - /* 3) Fresh cacheable objects with no users or pending - * fetches, only if the cache exceeds the configured size. + /* if the cache limit is exceeded try to make some objects + * persistant so their RAM can be reclaimed in the next + * step */ - if (llcache->limit < llcache_size) { - for (object = llcache->cached_objects; object != NULL; - object = next) { - next = object->next; + if (llcache->limit < llcache_size) { + llcache_persist(NULL); + } - if ((object->users == NULL) && - (object->candidate_count == 0) && - (object->fetch.fetch == NULL) && - (object->fetch.outstanding_query == false)) { - LLCACHE_LOG(("Found victim %p", object)); + /* Source data of fresh cacheable objects with no users, no + * pending fetches and pushed to persistant store while the + * cache exceeds the configured size. + */ + for (object = llcache->cached_objects; + ((llcache->limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_DISC)) { + free(object->source_data); + object->source_data = NULL; - llcache_size -= - object->source_len + sizeof(*object); + llcache_size -= object->source_len; - llcache_object_remove_from_list(object, + LLCACHE_LOG(("Freeing source data for %p len:%d", + object, + object->source_len)); + } + } + + /* Fresh cacheable objects with no users, no pending fetches + * and pushed to persistant store while the cache exceeds + * the configured size. Efectively just the object metadata. + */ + for (object = llcache->cached_objects; + ((llcache->limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_DISC) && + (object->source_data == NULL)) { + LLCACHE_LOG(("discarding backed object len:%d age:%d (%p) %s", + object->source_len, + time(NULL) - object->last_used, + object, + nsurl_access(object->url))); + + llcache_size -= total_object_size(object); + + llcache_object_remove_from_list(object, &llcache->cached_objects); - llcache_object_destroy(object); - } + llcache_object_destroy(object); + + } + } + + /* Fresh cacheable objects with no users or pending fetches + * while the cache exceeds the configured size. These are the + * most valuble objects as replacing them is a full network + * fetch + */ + for (object = llcache->cached_objects; + ((llcache->limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_RAM)) { + LLCACHE_LOG(("discarding fresh object len:%d age:%d (%p) %s", + object->source_len, + time(NULL) - object->last_used, + object, + nsurl_access(object->url))); + + llcache_size -= object->source_len + sizeof(*object); + + llcache_object_remove_from_list(object, + &llcache->cached_objects); + llcache_object_destroy(object); } } @@ -2261,20 +2970,23 @@ void llcache_clean(void) /* See llcache.h for documentation */ nserror -llcache_initialise(llcache_query_callback cb, void *pw, uint32_t llcache_limit) +llcache_initialise(const struct llcache_parameters *prm) { llcache = calloc(1, sizeof(struct llcache_s)); if (llcache == NULL) { return NSERROR_NOMEM; } - llcache->query_cb = cb; - llcache->query_cb_pw = pw; - llcache->limit = llcache_limit; + llcache->query_cb = prm->cb; + llcache->query_cb_pw = prm->cb_ctx; + llcache->limit = prm->limit; + llcache->minimum_lifetime = prm->minimum_lifetime; + llcache->bandwidth = prm->bandwidth; - LOG(("llcache initialised with a limit of %d bytes", llcache_limit)); + LOG(("llcache initialising with a limit of %d bytes", llcache->limit)); - return NSERROR_OK; + /* backing store initialisation */ + return guit->llcache->initialise(&prm->store); } /* See llcache.h for documentation */ @@ -2324,6 +3036,9 @@ void llcache_finalise(void) llcache_object_destroy(object); } + /* backing store finalisation */ + guit->llcache->finalise(); + free(llcache); llcache = NULL; } diff --git a/content/llcache.h b/content/llcache.h index 3d8232cae..a9ed1861a 100644 --- a/content/llcache.h +++ b/content/llcache.h @@ -76,7 +76,7 @@ typedef struct { } data; /**< Event data */ } llcache_event; -/** +/** * Client callback for low-level cache events * * \param handle Handle for which event is issued @@ -84,18 +84,18 @@ typedef struct { * \param pw Pointer to client-specific data * \return NSERROR_OK on success, appropriate error otherwise. */ -typedef nserror (*llcache_handle_callback)(llcache_handle *handle, +typedef nserror (*llcache_handle_callback)(llcache_handle *handle, const llcache_event *event, void *pw); /** Flags for low-level cache object retrieval */ enum llcache_retrieve_flag { /* Note: We're permitted a maximum of 16 flags which must reside in the - * bottom 16 bits of the flags word. See hlcache.h for further details. + * bottom 16 bits of the flags word. See hlcache.h for further details. */ /** Force a new fetch */ - LLCACHE_RETRIEVE_FORCE_FETCH = (1 << 0), + LLCACHE_RETRIEVE_FORCE_FETCH = (1 << 0), /** Requested URL was verified */ - LLCACHE_RETRIEVE_VERIFIABLE = (1 << 1), + LLCACHE_RETRIEVE_VERIFIABLE = (1 << 1), /**< No error pages */ LLCACHE_RETRIEVE_NO_ERROR_PAGES = (1 << 2), /**< Stream data (implies that object is not cacheable) */ @@ -149,13 +149,81 @@ typedef nserror (*llcache_query_response)(bool proceed, void *cbpw); * \param cbpw Opaque value to pass into \a cb * \return NSERROR_OK on success, appropriate error otherwise * - * \note This callback should return immediately. Once a suitable answer to - * the query has been obtained, the provided response callback should be + * \note This callback should return immediately. Once a suitable answer to + * the query has been obtained, the provided response callback should be * called. This is intended to be an entirely asynchronous process. */ typedef nserror (*llcache_query_callback)(const llcache_query *query, void *pw, llcache_query_response cb, void *cbpw); +/** + * Parameters to configure the low level cache backing store. + */ +struct llcache_store_parameters { + const char *path; /**< The path to the backing store */ + + size_t limit; /**< The backing store upper bound target size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + /** log2 of the default maximum number of entries the cache + * can track. + * + * If unset this defaults to 16 (65536 entries) The cache + * control file takes precedence so cache data remains + * portable between builds with differing defaults. + */ + unsigned int entry_size; + + /** log2 of the default number of entries in the mapping between + * the url and cache entries. + * + * @note This is exposing an internal implementation detail of + * the filesystem based default backing store implementation. + * However it is likely any backing store implementation will + * need some way to map url to cache entries so it is a + * generally useful configuration value. + * + * Too small a value will cause unecessary collisions and + * cache misses and larger values cause proportionaly larger + * amounts of memory to be used. + * + * The "birthday paradox" means that the hash will experience + * a collision in every 2^(address_size/2) urls the cache + * stores. + * + * A value of 20 means one object stored in every 1024 will + * cause a collion and a cache miss while using two megabytes + * of storage. + * + * If unset this defaults to 20 (1048576 entries using two + * megabytes) The cache control file takes precedence so cache + * data remains portable between builds with differing + * defaults. + */ + unsigned int address_size; +}; + +/** + * Parameters to configure the low level cache. + */ +struct llcache_parameters { + llcache_query_callback cb; /**< Query handler for llcache */ + void *cb_ctx; /**< Pointer to llcache query handler data */ + + size_t limit; /**< The target upper bound for the RAM cache size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + int minimum_lifetime; /**< The minimum lifetime to consider + * sending objects to backing store. + */ + + size_t bandwidth; /**< The maximum bandwidth to allow the + * backing store to use. + */ + + struct llcache_store_parameters store; +}; + /** * Initialise the low-level cache * @@ -163,7 +231,7 @@ typedef nserror (*llcache_query_callback)(const llcache_query *query, void *pw, * \param pw Pointer to query handler data * \return NSERROR_OK on success, appropriate error otherwise. */ -nserror llcache_initialise(llcache_query_callback cb, void *pw, uint32_t llcache_limit); +nserror llcache_initialise(const struct llcache_parameters *parameters); /** * Finalise the low-level cache @@ -280,12 +348,12 @@ const uint8_t *llcache_handle_get_source_data(const llcache_handle *handle, * \return Header value, or NULL if header does not exist * * \todo Make the key an enumeration, to avoid needless string comparisons - * \todo Forcing the client to parse the header value seems wrong. - * Better would be to return the actual value part and an array of + * \todo Forcing the client to parse the header value seems wrong. + * Better would be to return the actual value part and an array of * key-value pairs for any additional parameters. * \todo Deal with multiple headers of the same key (e.g. Set-Cookie) */ -const char *llcache_handle_get_header(const llcache_handle *handle, +const char *llcache_handle_get_header(const llcache_handle *handle, const char *key); /** @@ -295,7 +363,7 @@ const char *llcache_handle_get_header(const llcache_handle *handle, * \param b Second handle * \return True if handles reference the same object, false otherwise */ -bool llcache_handle_references_same_object(const llcache_handle *a, +bool llcache_handle_references_same_object(const llcache_handle *a, const llcache_handle *b); #endif diff --git a/content/no_backing_store.c b/content/no_backing_store.c new file mode 100644 index 000000000..192101522 --- /dev/null +++ b/content/no_backing_store.c @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Vincent Sanders + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** \file + * Low-level resource cache null persistant storage implementation. + */ + +#include "utils/nsurl.h" + +#include "content/backing_store.h" + + +/* default to disabled backing store */ +static nserror initialise(const struct llcache_store_parameters *parameters) +{ + return NSERROR_OK; +} + +static nserror finalise(void) +{ + return NSERROR_OK; +} + +static nserror store(nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen) +{ + return NSERROR_SAVE_FAILED; +} + +static nserror fetch(nsurl *url, + enum backing_store_flags *flags, + uint8_t **data_out, + size_t *datalen_out) +{ + return NSERROR_NOT_FOUND; +} + +static nserror invalidate(nsurl *url) +{ + return NSERROR_NOT_FOUND; +} + +static struct gui_llcache_table llcache_table = { + .initialise = initialise, + .finalise = finalise, + .store = store, + .fetch = fetch, + .invalidate = invalidate, +}; + +struct gui_llcache_table *null_llcache_table = &llcache_table; diff --git a/desktop/gui.h b/desktop/gui.h index c0a5a4e21..24838c9df 100644 --- a/desktop/gui.h +++ b/desktop/gui.h @@ -69,6 +69,7 @@ struct hlcache_handle; struct download_context; struct nsurl; struct gui_file_table; +struct gui_llcache_table; typedef struct nsnsclipboard_styles { size_t start; /**< Start of run */ @@ -520,7 +521,6 @@ struct gui_browser_table { }; - /** * NetSurf operation function table * @@ -572,6 +572,15 @@ struct netsurf_table { * Provides routines for the interactive text search on a page. */ struct gui_search_table *search; + + /** + * Low level cache table. + * + * Used by the low level cache to push objects to persistant + * storage. The table is optional and may be NULL which + * uses the default implementation. + */ + struct gui_llcache_table *llcache; }; diff --git a/desktop/gui_factory.c b/desktop/gui_factory.c index 756f5dd0e..45d9516fa 100644 --- a/desktop/gui_factory.c +++ b/desktop/gui_factory.c @@ -17,6 +17,8 @@ */ #include "content/hlcache.h" +#include "content/backing_store.h" + #include "desktop/download.h" #include "desktop/gui_factory.h" #include "utils/file.h" @@ -25,7 +27,6 @@ struct netsurf_table *guit = NULL; - static void gui_default_window_set_title(struct gui_window *g, const char *title) { } @@ -400,6 +401,34 @@ static nserror verify_search_register(struct gui_search_table *gst) return NSERROR_OK; } +/** verify low level cache persistant backing store table is valid */ +static nserror verify_llcache_register(struct gui_llcache_table *glt) +{ + /* check table is present */ + if (glt == NULL) { + return NSERROR_BAD_PARAMETER; + } + + /* mandantory operations */ + if (glt->store == NULL) { + return NSERROR_BAD_PARAMETER; + } + if (glt->fetch == NULL) { + return NSERROR_BAD_PARAMETER; + } + if (glt->invalidate == NULL) { + return NSERROR_BAD_PARAMETER; + } + if (glt->initialise == NULL) { + return NSERROR_BAD_PARAMETER; + } + if (glt->finalise == NULL) { + return NSERROR_BAD_PARAMETER; + } + + return NSERROR_OK; +} + static nsurl *gui_default_get_resource_url(const char *path) { return NULL; @@ -622,6 +651,16 @@ nserror gui_factory_register(struct netsurf_table *gt) return err; } + /* llcache table */ + if (gt->llcache == NULL) { + /* set default backing store table */ + gt->llcache = null_llcache_table; + } + err = verify_llcache_register(gt->llcache); + if (err != NSERROR_OK) { + return err; + } + guit = gt; return NSERROR_OK; diff --git a/desktop/netsurf.c b/desktop/netsurf.c index 1c9d880f7..e82233a01 100644 --- a/desktop/netsurf.c +++ b/desktop/netsurf.c @@ -67,11 +67,23 @@ */ #define SPECULATE_SMALL 4096 -/* the time between cache clean runs in ms */ +/** the time between image cache clean runs in ms. */ #define IMAGE_CACHE_CLEAN_TIME (10 * 1000) +/** default time between content cache cleans. */ #define HL_CACHE_CLEAN_TIME (2 * IMAGE_CACHE_CLEAN_TIME) +/** default minimum object time before object is pushed to backing store. */ +#define LLCACHE_MIN_DISC_LIFETIME (60 * 30) + +/** default maximum bandwidth for backing store writeout. */ +#define LLCACHE_MAX_DISC_BANDWIDTH (128 * 1024) + +/** ensure there is a minimal amount of memory for source objetcs and + * decoded bitmaps. + */ +#define MINIMUM_MEMORY_CACHE_SIZE (2 * 1024 * 1024) + bool netsurf_quit = false; static void netsurf_lwc_iterator(lwc_string *str, void *pw) @@ -108,8 +120,6 @@ static nserror netsurf_llcache_query_handler(const llcache_query *query, return NSERROR_OK; } -#define MINIMUM_MEMORY_CACHE_SIZE (2 * 1024 * 1024) - /* exported interface documented in desktop/netsurf.h */ nserror netsurf_register(struct netsurf_table *table) { @@ -118,14 +128,17 @@ nserror netsurf_register(struct netsurf_table *table) } /* exported interface documented in desktop/netsurf.h */ -nserror netsurf_init(const char *messages) +nserror netsurf_init(const char *messages, const char *store_path) { - nserror error; + nserror ret; struct utsname utsname; - nserror ret = NSERROR_OK; struct hlcache_parameters hlcache_parameters = { .bg_clean_time = HL_CACHE_CLEAN_TIME, - .cb = netsurf_llcache_query_handler, + .llcache = { + .cb = netsurf_llcache_query_handler, + .minimum_lifetime = LLCACHE_MIN_DISC_LIFETIME, + .bandwidth = LLCACHE_MAX_DISC_BANDWIDTH, + } }; struct image_cache_parameters image_cache_parameters = { .bg_clean_time = IMAGE_CACHE_CLEAN_TIME, @@ -155,75 +168,86 @@ nserror netsurf_init(const char *messages) messages_load(messages); /* corestrings init */ - error = corestrings_init(); - if (error != NSERROR_OK) - return error; + ret = corestrings_init(); + if (ret != NSERROR_OK) + return ret; /* set up cache limits based on the memory cache size option */ - hlcache_parameters.limit = nsoption_int(memory_cache_size); + hlcache_parameters.llcache.limit = nsoption_int(memory_cache_size); - if (hlcache_parameters.limit < MINIMUM_MEMORY_CACHE_SIZE) { - hlcache_parameters.limit = MINIMUM_MEMORY_CACHE_SIZE; - LOG(("Setting minimum memory cache size to %d", - hlcache_parameters.limit)); + if (hlcache_parameters.llcache.limit < MINIMUM_MEMORY_CACHE_SIZE) { + hlcache_parameters.llcache.limit = MINIMUM_MEMORY_CACHE_SIZE; + LOG(("Setting minimum memory cache size %d", + hlcache_parameters.llcache.limit)); } /* image cache is 25% of total memory cache size */ - image_cache_parameters.limit = (hlcache_parameters.limit * 25) / 100; + image_cache_parameters.limit = (hlcache_parameters.llcache.limit * 25) / 100; /* image cache hysteresis is 20% of the image cache size */ image_cache_parameters.hysteresis = (image_cache_parameters.limit * 20) / 100; /* account for image cache use from total */ - hlcache_parameters.limit -= image_cache_parameters.limit; + hlcache_parameters.llcache.limit -= image_cache_parameters.limit; + + /* set backing store target limit */ + hlcache_parameters.llcache.store.limit = nsoption_int(disc_cache_size); + + /* set backing store hysterissi to 20% */ + hlcache_parameters.llcache.store.hysteresis = (hlcache_parameters.llcache.store.limit * 20) / 100;; + + /* set the path to the backing store */ + hlcache_parameters.llcache.store.path = store_path; /* image handler bitmap cache */ - error = image_cache_init(&image_cache_parameters); - if (error != NSERROR_OK) - return error; + ret = image_cache_init(&image_cache_parameters); + if (ret != NSERROR_OK) + return ret; /* content handler initialisation */ - error = nscss_init(); - if (error != NSERROR_OK) - return error; + ret = nscss_init(); + if (ret != NSERROR_OK) + return ret; - error = html_init(); - if (error != NSERROR_OK) - return error; + ret = html_init(); + if (ret != NSERROR_OK) + return ret; - error = image_init(); - if (error != NSERROR_OK) - return error; + ret = image_init(); + if (ret != NSERROR_OK) + return ret; - error = textplain_init(); - if (error != NSERROR_OK) - return error; + ret = textplain_init(); + if (ret != NSERROR_OK) + return ret; - error = mimesniff_init(); - if (error != NSERROR_OK) - return error; + ret = mimesniff_init(); + if (ret != NSERROR_OK) + return ret; url_init(); setlocale(LC_ALL, "C"); /* initialise the fetchers */ - error = fetch_init(); - if (error != NSERROR_OK) - return error; + ret = fetch_init(); + if (ret != NSERROR_OK) + return ret; /* Initialise the hlcache and allow it to init the llcache for us */ - hlcache_initialise(&hlcache_parameters); + ret = hlcache_initialise(&hlcache_parameters); + if (ret != NSERROR_OK) + return ret; /* Initialize system colours */ - error = ns_system_colour_init(); - if (error != NSERROR_OK) - return error; + ret = ns_system_colour_init(); + if (ret != NSERROR_OK) + return ret; js_initialise(); - return ret; + return NSERROR_OK; } diff --git a/desktop/netsurf.h b/desktop/netsurf.h index 60ec57845..6c6a22a86 100644 --- a/desktop/netsurf.h +++ b/desktop/netsurf.h @@ -43,7 +43,7 @@ nserror netsurf_register(struct netsurf_table *table); * @param messages path to translation mesage file. * @return NSERROR_OK on success or error code on faliure. */ -nserror netsurf_init(const char *messages); +nserror netsurf_init(const char *messages, const char *store_path); /** * Run event loop. diff --git a/framebuffer/gui.c b/framebuffer/gui.c index a3c4d9c30..251326dc2 100644 --- a/framebuffer/gui.c +++ b/framebuffer/gui.c @@ -1837,7 +1837,7 @@ main(int argc, char** argv) /* common initialisation */ messages = filepath_find(respaths, "Messages"); - ret = netsurf_init(messages); + ret = netsurf_init(messages, NULL); free(messages); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); diff --git a/gtk/Makefile.defaults b/gtk/Makefile.defaults index eb17cb127..b7382e71b 100644 --- a/gtk/Makefile.defaults +++ b/gtk/Makefile.defaults @@ -21,6 +21,9 @@ NETSURF_USE_NSSVG := AUTO # Valid options: YES, NO, AUTO NETSURF_USE_ROSPRITE := AUTO +# Enable building the source object cache filesystem based backing store. +NETSURF_FS_BACKING_STORE := YES + # Configuration overrides for Mac OS X ifeq ($(HOST),macosx) NETSURF_USE_LIBICONV_PLUG := NO diff --git a/gtk/gui.c b/gtk/gui.c index cde07bb7b..6bbbe25c8 100644 --- a/gtk/gui.c +++ b/gtk/gui.c @@ -44,6 +44,7 @@ #include "content/fetchers/resource.h" #include "content/hlcache.h" #include "content/urldb.h" +#include "content/backing_store.h" #include "desktop/browser.h" #include "desktop/gui.h" #include "desktop/netsurf.h" @@ -1100,11 +1101,111 @@ static nserror create_config_home(char **config_home_out) /* strip the trailing separator */ config_home[strlen(config_home) - 1] = 0; + LOG(("\"%s\"", config_home)); + *config_home_out = config_home; return NSERROR_OK; } +/** + * Get the path to the cache directory. + * + * @param cache_home_out Path to cache directory. + * @return NSERROR_OK on sucess and \a cache_home_out updated else error code. + */ +static nserror get_cache_home(char **cache_home_out) +{ + nserror ret; + char *xdg_cache_dir; + char *cache_home; + char *home_dir; + + /* $XDG_CACHE_HOME defines the base directory relative to + * which user specific non-essential data files should be + * stored. + */ + xdg_cache_dir = getenv("XDG_CACHE_HOME"); + + if ((xdg_cache_dir == NULL) || (*xdg_cache_dir == 0)) { + /* If $XDG_CACHE_HOME is either not set or empty, a + * default equal to $HOME/.cache should be used. + */ + + home_dir = getenv("HOME"); + + /* the HOME envvar is required */ + if (home_dir == NULL) { + return NSERROR_NOT_DIRECTORY; + } + + ret = check_dirname(home_dir, ".cache/netsurf", &cache_home); + if (ret != NSERROR_OK) { + return ret; + } + } else { + ret = check_dirname(xdg_cache_dir, "netsurf", &cache_home); + if (ret != NSERROR_OK) { + return ret; + } + } + + LOG(("\"%s\"", cache_home)); + + *cache_home_out = cache_home; + return NSERROR_OK; +} + +static nserror create_cache_home(char **cache_home_out) +{ + char *cache_home = NULL; + char *home_dir; + char *xdg_cache_dir; + nserror ret; + + LOG(("Attempting to create configuration directory")); + + /* $XDG_CACHE_HOME defines the base directory + * relative to which user specific cache files + * should be stored. + */ + xdg_cache_dir = getenv("XDG_CACHE_HOME"); + + if ((xdg_cache_dir == NULL) || (*xdg_cache_dir == 0)) { + home_dir = getenv("HOME"); + + if ((home_dir == NULL) || (*home_dir == 0)) { + return NSERROR_NOT_DIRECTORY; + } + + ret = netsurf_mkpath(&cache_home, NULL, 4, home_dir, ".cache", "netsurf", "/"); + if (ret != NSERROR_OK) { + return ret; + } + } else { + ret = netsurf_mkpath(&cache_home, NULL, 3, xdg_cache_dir, "netsurf", "/"); + if (ret != NSERROR_OK) { + return ret; + } + } + + /* ensure all elements of path exist (the trailing / is required) */ + ret = filepath_mkdir_all(cache_home); + if (ret != NSERROR_OK) { + free(cache_home); + return ret; + } + + /* strip the trailing separator */ + cache_home[strlen(cache_home) - 1] = 0; + + LOG(("\"%s\"", cache_home)); + + *cache_home_out = cache_home; + + return NSERROR_OK; +} + static nserror nsgtk_option_init(int *pargc, char** argv) { nserror ret; @@ -1162,6 +1263,7 @@ static struct gui_browser_table nsgtk_browser_table = { int main(int argc, char** argv) { char *messages; + char *cache_home = NULL; nserror ret; struct netsurf_table nsgtk_table = { .browser = &nsgtk_browser_table, @@ -1170,6 +1272,7 @@ int main(int argc, char** argv) .download = nsgtk_download_table, .fetch = nsgtk_fetch_table, .search = nsgtk_search_table, + .llcache = filesystem_llcache_table, }; ret = netsurf_register(&nsgtk_table); @@ -1210,9 +1313,20 @@ int main(int argc, char** argv) /* Obtain path to messages */ messages = filepath_find(respaths, "Messages"); + /* Locate the correct user cache directory path */ + ret = get_cache_home(&cache_home); + if (ret == NSERROR_NOT_FOUND) { + /* no cache directory exists yet so try to create one */ + ret = create_cache_home(&cache_home); + } + if (ret != NSERROR_OK) { + LOG(("Unable to locate a cache directory.")); + } + /* core initialisation */ - ret = netsurf_init(messages); + ret = netsurf_init(messages, cache_home); free(messages); + free(cache_home); if (ret != NSERROR_OK) { fprintf(stderr, "NetSurf core failed to initialise (%s)\n", messages_get_errorcode(ret)); diff --git a/monkey/main.c b/monkey/main.c index fe703f226..10ec1baf8 100644 --- a/monkey/main.c +++ b/monkey/main.c @@ -155,7 +155,7 @@ main(int argc, char **argv) /* common initialisation */ messages = filepath_find(respaths, "Messages"); - ret = netsurf_init(messages); + ret = netsurf_init(messages, NULL); free(messages); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); diff --git a/riscos/gui.c b/riscos/gui.c index ee450fa65..9ee56e685 100644 --- a/riscos/gui.c +++ b/riscos/gui.c @@ -2542,7 +2542,7 @@ int main(int argc, char** argv) } /* common initialisation */ - ret = netsurf_init(path); + ret = netsurf_init(path, NULL); if (ret != NSERROR_OK) { die("NetSurf failed to initialise"); } diff --git a/windows/main.c b/windows/main.c index 7cd6339d7..7ad3d8550 100644 --- a/windows/main.c +++ b/windows/main.c @@ -164,7 +164,7 @@ WinMain(HINSTANCE hInstance, HINSTANCE hLastInstance, LPSTR lpcli, int ncmd) /* common initialisation */ messages = filepath_find(respaths, "messages"); - ret = netsurf_init(messages); + ret = netsurf_init(messages, NULL); free(messages); if (ret != NSERROR_OK) { free(options_file_location);