squashfs-fix-open-file-limit.patch 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. Upstream-Status: Backport
  2. unsquashfs: fix open file limit
  3. Previously Unsquashfs relied on the to_writer queue being
  4. set to 1000 to limit the amount of open file read-ahead to a
  5. maximum of 500. For the default process limit of 1024 open files
  6. this was perhaps acceptable, but it obviously breaks if ulimit has
  7. been used to set the open file limit to below 504 (this includes
  8. stdin, stdout, stderr and the Squashfs filesystem being unsquashed).
  9. More importantly setting the to_writer queue to 1000 to limit
  10. the amount of files open has always been an inherent performance
  11. hit because the to_writer queue queues blocks. It limits the
  12. block readhead to 1000 blocks, irrespective of how many files
  13. that represents. A single file containing more than 1000 blocks
  14. will still be limited to a 1000 block readahead even though the
  15. data block cache typically can buffer more than this (at the
  16. default data cache size of 256 Mbytes and the default block size
  17. of 128 Kbytes, it can buffer 2048 blocks). Obviously the
  18. caches serve more than just a read-ahead role (they also
  19. cache decompressed blocks in case they're referenced later e.g.
  20. by duplicate files), but the artificial limit imposed on
  21. the read-ahead due to setting the to_writer queue to 1000 is
  22. unnecessary.
  23. This commit does away with the need to limit the to_writer queue,
  24. by introducing open_wait() and close_wake() calls which correctly
  25. track the amount of open files.
  26. Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
  27. Signed-off-by: yanjun.zhu <yanjun.zhu@windriver.com>
  28. diff -urpN a/unsquashfs.c b/unsquashfs.c
  29. --- a/unsquashfs.c 2012-11-30 15:31:29.000000000 +0800
  30. +++ b/unsquashfs.c 2012-11-30 15:32:03.000000000 +0800
  31. @@ -31,6 +31,8 @@
  32. #include <sys/sysinfo.h>
  33. #include <sys/types.h>
  34. +#include <sys/time.h>
  35. +#include <sys/resource.h>
  36. struct cache *fragment_cache, *data_cache;
  37. struct queue *to_reader, *to_deflate, *to_writer, *from_writer;
  38. @@ -784,6 +786,46 @@ failure:
  39. }
  40. +pthread_mutex_t open_mutex = PTHREAD_MUTEX_INITIALIZER;
  41. +pthread_cond_t open_empty = PTHREAD_COND_INITIALIZER;
  42. +int open_unlimited, open_count;
  43. +#define OPEN_FILE_MARGIN 10
  44. +
  45. +
  46. +void open_init(int count)
  47. +{
  48. + open_count = count;
  49. + open_unlimited = count == -1;
  50. +}
  51. +
  52. +
  53. +int open_wait(char *pathname, int flags, mode_t mode)
  54. +{
  55. + if (!open_unlimited) {
  56. + pthread_mutex_lock(&open_mutex);
  57. + while (open_count == 0)
  58. + pthread_cond_wait(&open_empty, &open_mutex);
  59. + open_count --;
  60. + pthread_mutex_unlock(&open_mutex);
  61. + }
  62. +
  63. + return open(pathname, flags, mode);
  64. +}
  65. +
  66. +
  67. +void close_wake(int fd)
  68. +{
  69. + close(fd);
  70. +
  71. + if (!open_unlimited) {
  72. + pthread_mutex_lock(&open_mutex);
  73. + open_count ++;
  74. + pthread_cond_signal(&open_empty);
  75. + pthread_mutex_unlock(&open_mutex);
  76. + }
  77. +}
  78. +
  79. +
  80. int write_file(struct inode *inode, char *pathname)
  81. {
  82. unsigned int file_fd, i;
  83. @@ -794,8 +836,8 @@ int write_file(struct inode *inode, char
  84. TRACE("write_file: regular file, blocks %d\n", inode->blocks);
  85. - file_fd = open(pathname, O_CREAT | O_WRONLY | (force ? O_TRUNC : 0),
  86. - (mode_t) inode->mode & 0777);
  87. + file_fd = open_wait(pathname, O_CREAT | O_WRONLY |
  88. + (force ? O_TRUNC : 0), (mode_t) inode->mode & 0777);
  89. if(file_fd == -1) {
  90. ERROR("write_file: failed to create file %s, because %s\n",
  91. pathname, strerror(errno));
  92. @@ -1712,7 +1754,7 @@ void *writer(void *arg)
  93. }
  94. }
  95. - close(file_fd);
  96. + close_wake(file_fd);
  97. if(failed == FALSE)
  98. set_attributes(file->pathname, file->mode, file->uid,
  99. file->gid, file->time, file->xattr, force);
  100. @@ -1803,9 +1845,9 @@ void *progress_thread(void *arg)
  101. void initialise_threads(int fragment_buffer_size, int data_buffer_size)
  102. {
  103. - int i;
  104. + struct rlimit rlim;
  105. + int i, max_files, res;
  106. sigset_t sigmask, old_mask;
  107. - int all_buffers_size = fragment_buffer_size + data_buffer_size;
  108. sigemptyset(&sigmask);
  109. sigaddset(&sigmask, SIGINT);
  110. @@ -1841,10 +1883,86 @@ void initialise_threads(int fragment_buf
  111. EXIT_UNSQUASH("Out of memory allocating thread descriptors\n");
  112. deflator_thread = &thread[3];
  113. - to_reader = queue_init(all_buffers_size);
  114. - to_deflate = queue_init(all_buffers_size);
  115. - to_writer = queue_init(1000);
  116. + /*
  117. + * dimensioning the to_reader and to_deflate queues. The size of
  118. + * these queues is directly related to the amount of block
  119. + * read-ahead possible. To_reader queues block read requests to
  120. + * the reader thread and to_deflate queues block decompression
  121. + * requests to the deflate thread(s) (once the block has been read by
  122. + * the reader thread). The amount of read-ahead is determined by
  123. + * the combined size of the data_block and fragment caches which
  124. + * determine the total number of blocks which can be "in flight"
  125. + * at any one time (either being read or being decompressed)
  126. + *
  127. + * The maximum file open limit, however, affects the read-ahead
  128. + * possible, in that for normal sizes of the fragment and data block
  129. + * caches, where the incoming files have few data blocks or one fragment
  130. + * only, the file open limit is likely to be reached before the
  131. + * caches are full. This means the worst case sizing of the combined
  132. + * sizes of the caches is unlikely to ever be necessary. However, is is
  133. + * obvious read-ahead up to the data block cache size is always possible
  134. + * irrespective of the file open limit, because a single file could
  135. + * contain that number of blocks.
  136. + *
  137. + * Choosing the size as "file open limit + data block cache size" seems
  138. + * to be a reasonable estimate. We can reasonably assume the maximum
  139. + * likely read-ahead possible is data block cache size + one fragment
  140. + * per open file.
  141. + *
  142. + * dimensioning the to_writer queue. The size of this queue is
  143. + * directly related to the amount of block read-ahead possible.
  144. + * However, unlike the to_reader and to_deflate queues, this is
  145. + * complicated by the fact the to_writer queue not only contains
  146. + * entries for fragments and data_blocks but it also contains
  147. + * file entries, one per open file in the read-ahead.
  148. + *
  149. + * Choosing the size as "2 * (file open limit) +
  150. + * data block cache size" seems to be a reasonable estimate.
  151. + * We can reasonably assume the maximum likely read-ahead possible
  152. + * is data block cache size + one fragment per open file, and then
  153. + * we will have a file_entry for each open file.
  154. + */
  155. + res = getrlimit(RLIMIT_NOFILE, &rlim);
  156. + if (res == -1) {
  157. + ERROR("failed to get open file limit! Defaulting to 1\n");
  158. + rlim.rlim_cur = 1;
  159. + }
  160. +
  161. + if (rlim.rlim_cur != RLIM_INFINITY) {
  162. + /*
  163. + * leave OPEN_FILE_MARGIN free (rlim_cur includes fds used by
  164. + * stdin, stdout, stderr and filesystem fd
  165. + */
  166. + if (rlim.rlim_cur <= OPEN_FILE_MARGIN)
  167. + /* no margin, use minimum possible */
  168. + max_files = 1;
  169. + else
  170. + max_files = rlim.rlim_cur - OPEN_FILE_MARGIN;
  171. + } else
  172. + max_files = -1;
  173. +
  174. + /* set amount of available files for use by open_wait and close_wake */
  175. + open_init(max_files);
  176. +
  177. + /*
  178. + * allocate to_reader, to_deflate and to_writer queues. Set based on
  179. + * open file limit and cache size, unless open file limit is unlimited,
  180. + * in which case set purely based on cache limits
  181. + */
  182. + if (max_files != -1) {
  183. + to_reader = queue_init(max_files + data_buffer_size);
  184. + to_deflate = queue_init(max_files + data_buffer_size);
  185. + to_writer = queue_init(max_files * 2 + data_buffer_size);
  186. + } else {
  187. + int all_buffers_size = fragment_buffer_size + data_buffer_size;
  188. +
  189. + to_reader = queue_init(all_buffers_size);
  190. + to_deflate = queue_init(all_buffers_size);
  191. + to_writer = queue_init(all_buffers_size * 2);
  192. + }
  193. +
  194. from_writer = queue_init(1);
  195. +
  196. fragment_cache = cache_init(block_size, fragment_buffer_size);
  197. data_cache = cache_init(block_size, data_buffer_size);
  198. pthread_create(&thread[0], NULL, reader, NULL);