|
SkePU 0.7
|
00001 00005 #ifndef DEVICE_CU_H 00006 #define DEVICE_CU_H 00007 00008 #ifdef SKEPU_CUDA 00009 00010 #include <iostream> 00011 #include <cuda.h> 00012 00013 namespace skepu 00014 { 00015 00028 class Device_CU 00029 { 00030 00031 public: 00032 cudaStream_t stream; 00033 private: 00034 int m_deviceID; 00035 cudaDeviceProp m_deviceProp; 00036 int m_maxThreads; 00037 int m_maxBlocks; 00038 00044 void getDeviceProps(int device) 00045 { 00046 cudaError_t err; 00047 err = cudaGetDeviceProperties(&m_deviceProp, device); 00048 if (err != cudaSuccess) {std::cerr<<"getDeviceProps failed!\n";} 00049 } 00050 00051 public: 00052 00058 Device_CU(int id) 00059 { 00060 m_deviceID = id; 00061 getDeviceProps(id); 00062 00063 if(m_deviceProp.major == 1 && m_deviceProp.minor < 2) 00064 { 00065 m_maxThreads = 256; 00066 } 00067 else 00068 { 00069 m_maxThreads = m_deviceProp.maxThreadsPerBlock; 00070 } 00071 00072 m_maxBlocks = m_deviceProp.maxGridSize[0]; 00073 // std::cerr<<"m_maxThreads: "<<m_maxThreads<<", m_maxBlocks: "<<m_maxBlocks<<"\n"; 00074 } 00075 00076 ~Device_CU() {}; 00077 00081 int getMaxBlockSize() const {return m_deviceProp.maxThreadsPerBlock;} 00082 00086 int getNumComputeUnits() const {return m_deviceProp.multiProcessorCount;} 00087 00091 size_t getGlobalMemSize() const {return m_deviceProp.totalGlobalMem;} 00092 00096 size_t getSharedMemPerBlock() const {return m_deviceProp.sharedMemPerBlock;} 00097 00101 int getMaxThreads() const {return m_maxThreads;} 00102 00106 int getMaxBlocks() const {return m_maxBlocks;} 00107 00111 int getDeviceID() const {return m_deviceID;} 00112 }; 00113 00114 } 00115 00116 #endif 00117 00118 #endif 00119
1.7.4