pthread_allocimpl.h Source File

00001 // POSIX thread-related memory allocation -*- C++ -*-
00002 
00003 // Copyright (C) 2001 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 2, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License along
00017 // with this library; see the file COPYING.  If not, write to the Free
00018 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
00019 // USA.
00020 
00021 // As a special exception, you may use this file as part of a free software
00022 // library without restriction.  Specifically, if other files instantiate
00023 // templates or use macros or inline functions from this file, or you compile
00024 // this file and link it with other files to produce an executable, this
00025 // file does not by itself cause the resulting executable to be covered by
00026 // the GNU General Public License.  This exception does not however
00027 // invalidate any other reasons why the executable file might be covered by
00028 // the GNU General Public License.
00029 
00030 /*
00031  * Copyright (c) 1996
00032  * Silicon Graphics Computer Systems, Inc.
00033  *
00034  * Permission to use, copy, modify, distribute and sell this software
00035  * and its documentation for any purpose is hereby granted without fee,
00036  * provided that the above copyright notice appear in all copies and
00037  * that both that copyright notice and this permission notice appear
00038  * in supporting documentation.  Silicon Graphics makes no
00039  * representations about the suitability of this software for any
00040  * purpose.  It is provided "as is" without express or implied warranty.
00041  */
00042 
00043 #ifndef _CPP_BITS_PTHREAD_ALLOCIMPL_H
00044 #define _CPP_BITS_PTHREAD_ALLOCIMPL_H 1
00045 
00046 // Pthread-specific node allocator.
00047 // This is similar to the default allocator, except that free-list
00048 // information is kept separately for each thread, avoiding locking.
00049 // This should be reasonably fast even in the presence of threads.
00050 // The down side is that storage may not be well-utilized.
00051 // It is not an error to allocate memory in thread A and deallocate
00052 // it in thread B.  But this effectively transfers ownership of the memory,
00053 // so that it can only be reallocated by thread B.  Thus this can effectively
00054 // result in a storage leak if it's done on a regular basis.
00055 // It can also result in frequent sharing of
00056 // cache lines among processors, with potentially serious performance
00057 // consequences.
00058 
00059 #include <bits/c++config.h>
00060 #include <bits/std_cerrno.h>
00061 #include <bits/stl_alloc.h>
00062 #ifndef __RESTRICT
00063 #  define __RESTRICT
00064 #endif
00065 
00066 #include <new>
00067 
00068 namespace std
00069 {
00070 
00071 #define __STL_DATA_ALIGNMENT 8
00072 
00073 union _Pthread_alloc_obj {
00074     union _Pthread_alloc_obj * __free_list_link;
00075     char __client_data[__STL_DATA_ALIGNMENT];    /* The client sees this.    */
00076 };
00077 
00078 // Pthread allocators don't appear to the client to have meaningful
00079 // instances.  We do in fact need to associate some state with each
00080 // thread.  That state is represented by
00081 // _Pthread_alloc_per_thread_state<_Max_size>.
00082 
00083 template<size_t _Max_size>
00084 struct _Pthread_alloc_per_thread_state {
00085   typedef _Pthread_alloc_obj __obj;
00086   enum { _S_NFREELISTS = _Max_size/__STL_DATA_ALIGNMENT };
00087   _Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS]; 
00088   _Pthread_alloc_per_thread_state<_Max_size> * __next; 
00089     // Free list link for list of available per thread structures.
00090     // When one of these becomes available for reuse due to thread
00091     // termination, any objects in its free list remain associated
00092     // with it.  The whole structure may then be used by a newly
00093     // created thread.
00094   _Pthread_alloc_per_thread_state() : __next(0)
00095   {
00096     memset((void *)__free_list, 0, (size_t) _S_NFREELISTS * sizeof(__obj *));
00097   }
00098   // Returns an object of size __n, and possibly adds to size n free list.
00099   void *_M_refill(size_t __n);
00100 };
00101 
00102 // Pthread-specific allocator.
00103 // The argument specifies the largest object size allocated from per-thread
00104 // free lists.  Larger objects are allocated using malloc_alloc.
00105 // Max_size must be a power of 2.
00106 template <size_t _Max_size = 128>
00107 class _Pthread_alloc_template {
00108 
00109 public: // but only for internal use:
00110 
00111   typedef _Pthread_alloc_obj __obj;
00112 
00113   // Allocates a chunk for nobjs of size size.  nobjs may be reduced
00114   // if it is inconvenient to allocate the requested number.
00115   static char *_S_chunk_alloc(size_t __size, int &__nobjs);
00116 
00117   enum {_S_ALIGN = __STL_DATA_ALIGNMENT};
00118 
00119   static size_t _S_round_up(size_t __bytes) {
00120     return (((__bytes) + (int) _S_ALIGN-1) & ~((int) _S_ALIGN - 1));
00121   }
00122   static size_t _S_freelist_index(size_t __bytes) {
00123     return (((__bytes) + (int) _S_ALIGN-1)/(int)_S_ALIGN - 1);
00124   }
00125 
00126 private:
00127   // Chunk allocation state. And other shared state.
00128   // Protected by _S_chunk_allocator_lock.
00129   static pthread_mutex_t _S_chunk_allocator_lock;
00130   static char *_S_start_free;
00131   static char *_S_end_free;
00132   static size_t _S_heap_size;
00133   static _Pthread_alloc_per_thread_state<_Max_size>* _S_free_per_thread_states;
00134   static pthread_key_t _S_key;
00135   static bool _S_key_initialized;
00136         // Pthread key under which per thread state is stored. 
00137         // Allocator instances that are currently unclaimed by any thread.
00138   static void _S_destructor(void *instance);
00139         // Function to be called on thread exit to reclaim per thread
00140         // state.
00141   static _Pthread_alloc_per_thread_state<_Max_size> *_S_new_per_thread_state();
00142         // Return a recycled or new per thread state.
00143   static _Pthread_alloc_per_thread_state<_Max_size> *_S_get_per_thread_state();
00144         // ensure that the current thread has an associated
00145         // per thread state.
00146   class _M_lock;
00147   friend class _M_lock;
00148   class _M_lock {
00149       public:
00150         _M_lock () { pthread_mutex_lock(&_S_chunk_allocator_lock); }
00151         ~_M_lock () { pthread_mutex_unlock(&_S_chunk_allocator_lock); }
00152   };
00153 
00154 public:
00155 
00156   /* n must be > 0      */
00157   static void * allocate(size_t __n)
00158   {
00159     __obj * volatile * __my_free_list;
00160     __obj * __RESTRICT __result;
00161     _Pthread_alloc_per_thread_state<_Max_size>* __a;
00162 
00163     if (__n > _Max_size) {
00164         return(malloc_alloc::allocate(__n));
00165     }
00166     if (!_S_key_initialized ||
00167         !(__a = (_Pthread_alloc_per_thread_state<_Max_size>*)
00168                                  pthread_getspecific(_S_key))) {
00169         __a = _S_get_per_thread_state();
00170     }
00171     __my_free_list = __a -> __free_list + _S_freelist_index(__n);
00172     __result = *__my_free_list;
00173     if (__result == 0) {
00174         void *__r = __a -> _M_refill(_S_round_up(__n));
00175         return __r;
00176     }
00177     *__my_free_list = __result -> __free_list_link;
00178     return (__result);
00179   };
00180 
00181   /* p may not be 0 */
00182   static void deallocate(void *__p, size_t __n)
00183   {
00184     __obj *__q = (__obj *)__p;
00185     __obj * volatile * __my_free_list;
00186     _Pthread_alloc_per_thread_state<_Max_size>* __a;
00187 
00188     if (__n > _Max_size) {
00189         malloc_alloc::deallocate(__p, __n);
00190         return;
00191     }
00192     if (!_S_key_initialized ||
00193         !(__a = (_Pthread_alloc_per_thread_state<_Max_size> *)
00194                 pthread_getspecific(_S_key))) {
00195         __a = _S_get_per_thread_state();
00196     }
00197     __my_free_list = __a->__free_list + _S_freelist_index(__n);
00198     __q -> __free_list_link = *__my_free_list;
00199     *__my_free_list = __q;
00200   }
00201 
00202   static void * reallocate(void *__p, size_t __old_sz, size_t __new_sz);
00203 
00204 } ;
00205 
00206 typedef _Pthread_alloc_template<> pthread_alloc;
00207 
00208 
00209 template <size_t _Max_size>
00210 void _Pthread_alloc_template<_Max_size>::_S_destructor(void * __instance)
00211 {
00212     _M_lock __lock_instance;    // Need to acquire lock here.
00213     _Pthread_alloc_per_thread_state<_Max_size>* __s =
00214         (_Pthread_alloc_per_thread_state<_Max_size> *)__instance;
00215     __s -> __next = _S_free_per_thread_states;
00216     _S_free_per_thread_states = __s;
00217 }
00218 
00219 template <size_t _Max_size>
00220 _Pthread_alloc_per_thread_state<_Max_size> *
00221 _Pthread_alloc_template<_Max_size>::_S_new_per_thread_state()
00222 {    
00223     /* lock already held here.  */
00224     if (0 != _S_free_per_thread_states) {
00225         _Pthread_alloc_per_thread_state<_Max_size> *__result =
00226                     _S_free_per_thread_states;
00227         _S_free_per_thread_states = _S_free_per_thread_states -> __next;
00228         return __result;
00229     } else {
00230         return new _Pthread_alloc_per_thread_state<_Max_size>;
00231     }
00232 }
00233 
00234 template <size_t _Max_size>
00235 _Pthread_alloc_per_thread_state<_Max_size> *
00236 _Pthread_alloc_template<_Max_size>::_S_get_per_thread_state()
00237 {
00238     /*REFERENCED*/
00239     _M_lock __lock_instance;    // Need to acquire lock here.
00240     int __ret_code;
00241     _Pthread_alloc_per_thread_state<_Max_size> * __result;
00242     if (!_S_key_initialized) {
00243         if (pthread_key_create(&_S_key, _S_destructor)) {
00244         std::__throw_bad_alloc();  // defined in funcexcept.h
00245         }
00246         _S_key_initialized = true;
00247     }
00248     __result = _S_new_per_thread_state();
00249     __ret_code = pthread_setspecific(_S_key, __result);
00250     if (__ret_code) {
00251       if (__ret_code == ENOMEM) {
00252     std::__throw_bad_alloc();
00253       } else {
00254     // EINVAL
00255     abort();
00256       }
00257     }
00258     return __result;
00259 }
00260 
00261 /* We allocate memory in large chunks in order to avoid fragmenting     */
00262 /* the malloc heap too much.                                            */
00263 /* We assume that size is properly aligned.                             */
00264 template <size_t _Max_size>
00265 char *_Pthread_alloc_template<_Max_size>
00266 ::_S_chunk_alloc(size_t __size, int &__nobjs)
00267 {
00268   {
00269     char * __result;
00270     size_t __total_bytes;
00271     size_t __bytes_left;
00272     /*REFERENCED*/
00273     _M_lock __lock_instance;         // Acquire lock for this routine
00274 
00275     __total_bytes = __size * __nobjs;
00276     __bytes_left = _S_end_free - _S_start_free;
00277     if (__bytes_left >= __total_bytes) {
00278         __result = _S_start_free;
00279         _S_start_free += __total_bytes;
00280         return(__result);
00281     } else if (__bytes_left >= __size) {
00282         __nobjs = __bytes_left/__size;
00283         __total_bytes = __size * __nobjs;
00284         __result = _S_start_free;
00285         _S_start_free += __total_bytes;
00286         return(__result);
00287     } else {
00288         size_t __bytes_to_get =
00289         2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
00290         // Try to make use of the left-over piece.
00291         if (__bytes_left > 0) {
00292             _Pthread_alloc_per_thread_state<_Max_size>* __a = 
00293                 (_Pthread_alloc_per_thread_state<_Max_size>*)
00294             pthread_getspecific(_S_key);
00295             __obj * volatile * __my_free_list =
00296                         __a->__free_list + _S_freelist_index(__bytes_left);
00297 
00298             ((__obj *)_S_start_free) -> __free_list_link = *__my_free_list;
00299             *__my_free_list = (__obj *)_S_start_free;
00300         }
00301 #       ifdef _SGI_SOURCE
00302           // Try to get memory that's aligned on something like a
00303           // cache line boundary, so as to avoid parceling out
00304           // parts of the same line to different threads and thus
00305           // possibly different processors.
00306           {
00307             const int __cache_line_size = 128;  // probable upper bound
00308             __bytes_to_get &= ~(__cache_line_size-1);
00309             _S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get); 
00310             if (0 == _S_start_free) {
00311               _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
00312             }
00313           }
00314 #       else  /* !SGI_SOURCE */
00315           _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
00316 #       endif
00317         _S_heap_size += __bytes_to_get;
00318         _S_end_free = _S_start_free + __bytes_to_get;
00319     }
00320   }
00321   // lock is released here
00322   return(_S_chunk_alloc(__size, __nobjs));
00323 }
00324 
00325 
00326 /* Returns an object of size n, and optionally adds to size n free list.*/
00327 /* We assume that n is properly aligned.                                */
00328 /* We hold the allocation lock.                                         */
00329 template <size_t _Max_size>
00330 void *_Pthread_alloc_per_thread_state<_Max_size>
00331 ::_M_refill(size_t __n)
00332 {
00333     int __nobjs = 128;
00334     char * __chunk =
00335     _Pthread_alloc_template<_Max_size>::_S_chunk_alloc(__n, __nobjs);
00336     __obj * volatile * __my_free_list;
00337     __obj * __result;
00338     __obj * __current_obj, * __next_obj;
00339     int __i;
00340 
00341     if (1 == __nobjs)  {
00342         return(__chunk);
00343     }
00344     __my_free_list = __free_list
00345          + _Pthread_alloc_template<_Max_size>::_S_freelist_index(__n);
00346 
00347     /* Build free list in chunk */
00348       __result = (__obj *)__chunk;
00349       *__my_free_list = __next_obj = (__obj *)(__chunk + __n);
00350       for (__i = 1; ; __i++) {
00351         __current_obj = __next_obj;
00352         __next_obj = (__obj *)((char *)__next_obj + __n);
00353         if (__nobjs - 1 == __i) {
00354             __current_obj -> __free_list_link = 0;
00355             break;
00356         } else {
00357             __current_obj -> __free_list_link = __next_obj;
00358         }
00359       }
00360     return(__result);
00361 }
00362 
00363 template <size_t _Max_size>
00364 void *_Pthread_alloc_template<_Max_size>
00365 ::reallocate(void *__p, size_t __old_sz, size_t __new_sz)
00366 {
00367     void * __result;
00368     size_t __copy_sz;
00369 
00370     if (__old_sz > _Max_size
00371     && __new_sz > _Max_size) {
00372         return(realloc(__p, __new_sz));
00373     }
00374     if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return(__p);
00375     __result = allocate(__new_sz);
00376     __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;
00377     memcpy(__result, __p, __copy_sz);
00378     deallocate(__p, __old_sz);
00379     return(__result);
00380 }
00381 
00382 template <size_t _Max_size>
00383 _Pthread_alloc_per_thread_state<_Max_size> *
00384 _Pthread_alloc_template<_Max_size>::_S_free_per_thread_states = 0;
00385 
00386 template <size_t _Max_size>
00387 pthread_key_t _Pthread_alloc_template<_Max_size>::_S_key;
00388 
00389 template <size_t _Max_size>
00390 bool _Pthread_alloc_template<_Max_size>::_S_key_initialized = false;
00391 
00392 template <size_t _Max_size>
00393 pthread_mutex_t _Pthread_alloc_template<_Max_size>::_S_chunk_allocator_lock
00394 = PTHREAD_MUTEX_INITIALIZER;
00395 
00396 template <size_t _Max_size>
00397 char *_Pthread_alloc_template<_Max_size>
00398 ::_S_start_free = 0;
00399 
00400 template <size_t _Max_size>
00401 char *_Pthread_alloc_template<_Max_size>
00402 ::_S_end_free = 0;
00403 
00404 template <size_t _Max_size>
00405 size_t _Pthread_alloc_template<_Max_size>
00406 ::_S_heap_size = 0;
00407 
00408 
00409 template <class _Tp>
00410 class pthread_allocator {
00411   typedef pthread_alloc _S_Alloc;          // The underlying allocator.
00412 public:
00413   typedef size_t     size_type;
00414   typedef ptrdiff_t  difference_type;
00415   typedef _Tp*       pointer;
00416   typedef const _Tp* const_pointer;
00417   typedef _Tp&       reference;
00418   typedef const _Tp& const_reference;
00419   typedef _Tp        value_type;
00420 
00421   template <class _NewType> struct rebind {
00422     typedef pthread_allocator<_NewType> other;
00423   };
00424 
00425   pthread_allocator() __STL_NOTHROW {}
00426   pthread_allocator(const pthread_allocator& a) __STL_NOTHROW {}
00427   template <class _OtherType>
00428     pthread_allocator(const pthread_allocator<_OtherType>&)
00429         __STL_NOTHROW {}
00430   ~pthread_allocator() __STL_NOTHROW {}
00431 
00432   pointer address(reference __x) const { return &__x; }
00433   const_pointer address(const_reference __x) const { return &__x; }
00434 
00435   // __n is permitted to be 0.  The C++ standard says nothing about what
00436   // the return value is when __n == 0.
00437   _Tp* allocate(size_type __n, const void* = 0) {
00438     return __n != 0 ? static_cast<_Tp*>(_S_Alloc::allocate(__n * sizeof(_Tp)))
00439                     : 0;
00440   }
00441 
00442   // p is not permitted to be a null pointer.
00443   void deallocate(pointer __p, size_type __n)
00444     { _S_Alloc::deallocate(__p, __n * sizeof(_Tp)); }
00445 
00446   size_type max_size() const __STL_NOTHROW 
00447     { return size_t(-1) / sizeof(_Tp); }
00448 
00449   void construct(pointer __p, const _Tp& __val) { new(__p) _Tp(__val); }
00450   void destroy(pointer _p) { _p->~_Tp(); }
00451 };
00452 
00453 template<>
00454 class pthread_allocator<void> {
00455 public:
00456   typedef size_t      size_type;
00457   typedef ptrdiff_t   difference_type;
00458   typedef void*       pointer;
00459   typedef const void* const_pointer;
00460   typedef void        value_type;
00461 
00462   template <class _NewType> struct rebind {
00463     typedef pthread_allocator<_NewType> other;
00464   };
00465 };
00466 
00467 template <size_t _Max_size>
00468 inline bool operator==(const _Pthread_alloc_template<_Max_size>&,
00469                        const _Pthread_alloc_template<_Max_size>&)
00470 {
00471   return true;
00472 }
00473 
00474 template <class _T1, class _T2>
00475 inline bool operator==(const pthread_allocator<_T1>&,
00476                        const pthread_allocator<_T2>& a2) 
00477 {
00478   return true;
00479 }
00480 
00481 template <class _T1, class _T2>
00482 inline bool operator!=(const pthread_allocator<_T1>&,
00483                        const pthread_allocator<_T2>&)
00484 {
00485   return false;
00486 }
00487 
00488 template <class _Tp, size_t _Max_size>
00489 struct _Alloc_traits<_Tp, _Pthread_alloc_template<_Max_size> >
00490 {
00491   static const bool _S_instanceless = true;
00492   typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max_size> > _Alloc_type;
00493   typedef __allocator<_Tp, _Pthread_alloc_template<_Max_size> > 
00494           allocator_type;
00495 };
00496 
00497 template <class _Tp, class _Atype, size_t _Max>
00498 struct _Alloc_traits<_Tp, __allocator<_Atype, _Pthread_alloc_template<_Max> > >
00499 {
00500   static const bool _S_instanceless = true;
00501   typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max> > _Alloc_type;
00502   typedef __allocator<_Tp, _Pthread_alloc_template<_Max> > allocator_type;
00503 };
00504 
00505 template <class _Tp, class _Atype>
00506 struct _Alloc_traits<_Tp, pthread_allocator<_Atype> >
00507 {
00508   static const bool _S_instanceless = true;
00509   typedef simple_alloc<_Tp, _Pthread_alloc_template<> > _Alloc_type;
00510   typedef pthread_allocator<_Tp> allocator_type;
00511 };
00512 
00513 
00514 } // namespace std
00515 
00516 #endif /* _CPP_BITS_PTHREAD_ALLOCIMPL_H */
00517 
00518 // Local Variables:
00519 // mode:C++
00520 // End: