END\n"); } 3"> END\n"); } 3">
Download presentation
Presentation is loading. Please wait.
Published byAmelia Valerie Matthews Modified over 9 years ago
1
USolids Multi-Union notes Marek Gayer 1
2
GetCandidatesVoxelArray method, on which Inside very much relies Finding voxel candidates did not delete buffers Error in creating the final bit mask: – ((unsigned int *)(maskResult))[iIndex] = ((unsigned int *)(maskX))[iIndex] & ((unsigned int *)(maskY))[iIndex] & ((unsigned int *)(maskX))[iIndex]; Was possible to merge with Intersect method => No buffers was in the end necessary at all Optimization and reducing of original 140 lines of code to 67 Performance of new method faster in hundreds % Root timer precision is +-0.01s => Number of points tested increased to 10M 2
3
New script to measure scalability based on splitting of Multi-Union integration with Root graphics void TestMultiUnion() { // TestMultiUnionWithGraphics(); return; int numNodes[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 34, 36, 38, 40, 45, 50, 60, 70, 80, 90, 100, 500, 1000, 5000, 8000}; int numNodesCount = sizeof (numNodes) / sizeof (int); // numNodesCount -= 3; ofstream nodes("nodes.dat"), times("times.dat"); for (int i = 0; i < numNodesCount; i++) { int numNodesImpl = numNodes[i]; double chrono = TestMultiUnionOneStep(numNodesImpl); nodes << numNodesImpl << endl; times << chrono << endl; } nodes.close(), times.close(); printf("[> END\n"); } 3
4
TestMultiUnionOneStep (listed simplified) based on splitting of Multi-Union integration with Root graphics double TestMultiUnionOneStep(int numNodesImpl) { UMultiUnion *multiUnion = CreateMultiUnion(numNodesImpl); multiUnion->Voxelize(); DisplayMultiUnionInfo(multiUnion); double bmin[3], bmax[3]; multiUnion->Extent(bmin, bmax); int npoints = 10000000; int n10 = npoints/10; UVector3 point; TStopwatch timer; timer.Start(); for (int i = 0; i < npoints; i++) { if (numNodesImpl > 100) if (n10 && (i % n10)==0) printf("test inside... %d%\n",int(100*i/npoints)); point.x = gRandom->Uniform(-2000, 2000); point.y = gRandom->Uniform(-2000, 2000); point.z = gRandom->Uniform(-2000, 2000); VUSolid::EnumInside result = multiUnion->Inside(point); } timer.Stop(); double chrono = timer.CpuTime(); return chrono; } 4
5
Scaling of Multi-Union Inside: source with original conditions by Jean-Marie 5
6
6
7
Multi-Union with 21 boxes. Faster than one box in original measurement 7
8
Scaling of Multi-Union – fixing the ranges to be really [-2000; 2000] 8
9
Scaling of Multi-Union – fixed the ranges to be really [-2000; 2000] 9
10
Profiling of Inside method content (listed simplified) VUSolid::EnumInside UMultiUnion::Inside(const UVector3 &aPoint) const { UVector3 tempPointConv; VUSolid::EnumInside tempInside = eOutside; bool boolSurface = false; vector vectorOutcome; vectorOutcome = fVoxels->GetCandidatesVoxelArrayOld(aPoint); fVoxels->GetCandidatesVoxelArray(aPoint, vectorOutcome); int limit = vectorOutcome.size(); for(int i = 0 ; i < limit ; i++) { VUSolid *tempSolid = ((*fNodes)[vectorOutcome[i]])->fSolid; UTransform3D *tempTransform = ((*fNodes)[vectorOutcome[i]])->fTransform; tempPointConv = tempTransform->LocalPoint(aPoint); tempInside = tempSolid->Inside(tempPointConv); if(tempInside == eSurface) boolSurface = true; if(tempInside == eInside) return eInside; } tempInside = boolSurface ? eSurface : eOutside; return tempInside; } 10
11
valgrind --tool=callgrind root.exe run.C kcachegrind [“New Implementation”] std::vector GetCandidatesVoxelArray(const UVector3 &point); 11
12
valgrind --tool=callgrind root.exe run.C kcachegrind [“New Implementation 2”] void GetCandidatesVoxelArray(const UVector3 &point, std::vector &list); 12
13
Touch up of sources Added missing deletes Adding references to objects in methods headers double* arrMin = new double[3] => double arrMin[3] In debug mode, all results are compared to InsideDummy to make sure of consistency iIndex, jIndex etc. => i,j Optimization of loops Removing underscores Avoiding calling Inside 2x times in Normal method Several similar, small changes 13
14
NEW DEVELOPMENTS 14
15
Removing 2 continue checks => slightly less performance 15
16
New performance setup All boxes are located in -1 to 1 for xyz container Volume ratio of test are / boxes is same, ratio of inside vs. outside points is a bit lower than ½, because boxes can collide each other Random points located in -1.1 to 1.1 for xyz 16
17
Optimization of voxel finder int limit = (bytesCorrected+1) / sizeof(unsigned int); for (int i = 0 ; i < limit; i++) { unsigned int mask; // Logic "and" of the masks along the 3 axes x, y, z: // removing if (! and continue => slightly slower if (!(mask = !maskXLeft ? maskX[i] : maskX[i] | maskXLeft[i])) continue; if (!(mask &= !maskYLeft ? maskY[i] : maskY[i] | maskYLeft[i])) continue; if (!(mask &= !maskZLeft ? maskZ[i] : maskZ[i] | maskZLeft[i])) continue; int currentBit = 8*sizeof(unsigned int)*i; for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (currentBit >= carNodes) return; int shifted = 1 << bit; // new if (mask & shifted) { list.push_back(currentBit); if (!(mask -= shifted)) break; // new } currentBit++; } 17
18
Optimization of voxel finder int limit = (bytesCorrected+1) / sizeof(unsigned int); for (int i = 0 ; i < limit; i++) { // Logic "and" of the masks along the 3 axes x, y, z: unsigned int mask = !maskXLeft ? maskX[i] : maskX[i] | maskXLeft[i]; if (!mask) continue; // removing this line => slightly slower mask &= !maskYLeft ? maskY[i] : maskY[i] | maskYLeft[i]; if (!mask) continue; // removing this line => slightly slower mask &= !maskZLeft ? maskZ[i] : maskZ[i] | maskZLeft[i]; if (!mask) continue; // removing this line => slightly slower int currentBit = 8*sizeof(unsigned int)*i; for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (currentBit >= carNodes) return; int shifted = 1 << bit; // new if (mask & shifted) { list.push_back(currentBit); if (!(mask -= shifted)) break; // new } currentBit++; } 18
19
sbtscale 19
20
sbtscale 20
21
Arrays improvements 1/2 Replace C style dynamic arrays (e.g. double *) with new and detete Using instead resizable std::vector for boxes and boundaries => no need for various temporary variables Verified that performance did not drop No new and delete constructs => no possible memory leaks by missing or wrong deletes 21
22
Arrays improvements 2/2 No indexes like “boxes[6*i + 3 + k]” anymore, since structure used: struct UVoxelBox { UVector3 hlen; // half length of the box UVector3 pos; // position of the box }; std::vector boxes; 22
23
Bitmasks abstrastion Bitmasks have special class UBits, based on adaptation of ROOT Tbits One for each whole axis Other alternatives, problematic for one or another reason… – boost dynamic array: – Vector, – bitset or other online free alternatives Makes more clean and clear code 23
24
Exclusions (“crossings”) Ubits data structure is used for making additional exclusion masks (and type) for excluding selected candidates Used in DistanceToIn and DistanceToOut Local var passed as parameter => thread safe Usage philosophy example: – UBits exclusion(voxels->GetBitsPerSlice()); – exclusion.ResetAllBits(true); // set all bits to 1 – exclusion.ResetBitNumber(int excludedCandidate=1); – VUSolid::EnumInside location = InsideWithExclusion(currentPoint, &exclusion); – exclusion.SetBitNumber(candidate); 24
25
Voxels bit masks memory padding Changed bitmask sizes increase for each voxel from 8bits (size of char) to 32bit/64bit (size of int) Faster memory addressing (each mask starts 4/8 bytes for each bitmask) and algorithm makes increase up to 10% in performance Possible to make faster loops, because we do not have to count bits We can and masks and compare them to zeros using whole unsigned int 25
26
1/3 Fast routines for finding set bit components of bitmask in u. int for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (mask & 1) { list.push_back(8*sizeof(unsigned int)*i+bit); if (!(mask >>= 1)) break; // new } else mask >>= 1; } 26
27
2/3 Fast routines for finding set bit components of bitmask in u. int for (int byte = 0; byte < (int) (sizeof(unsigned int)); byte++) { if (int maskByte = mask & 0xFF) { do { static const int firstBits[256] = { 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; int bit = firstBits[maskByte]; list.push_back(8*(sizeof(unsigned int)*i+ byte) + bit); maskByte -= 1 << bit; } while (maskByte); } mask >>= 8; } 27
28
3/3 Fast routines for finding set bit components of bitmask in u. int for (int byte = 0; byte < (int) (sizeof(unsigned int)); byte++) { if (int maskByte = mask & 0xFF) { for (int bit = 0; bit < 8; bit++) { if (maskByte & 1) { list.push_back(8*(sizeof(unsigned int)*i+ byte) + bit); if (!(maskByte >>= 1)) break; // new } else maskByte >>= 1; } mask >>= 8; } 28
29
Preferring references Except for bitmasks values, using references instead of pointers because in C++ “References are the preferred way of indirectly accessing a variable”. [ see e.g. C/C++ Pointers vs References, http://www.dgp.toronto.edu/~patrick/csc418/w i2004/notes/PointersVsRef.pdfhttp://www.dgp.toronto.edu/~patrick/csc418/w i2004/notes/PointersVsRef.pdf ] 29
30
Other effort at VoxelFinder/MultiUnion 1/2 Renamed all and removed some unnecessary fields in voxel finder UVoxelFinder is separated and independent on UMultiUnion and can be used separately UVoxelFinder Used as common C++ property of UMultiUnion Overall cleanup and reducing of code Fixed wrong construction of bit masks, which for certain points was making inconsistencies 30
31
Other effort at VoxelFinder/MultiUnion 2/2 Boundaries, bit slices counts and bitmasks are stored as arrays [0..2] This allowed to rewrite all loops to for cycles, instead of having special codes for each axis => significant reducing of code + readability Separate cycles for x, y, z merged when possible to one. Removed all “1e-4” type constants No need to assign fields in constructor, their own constructor properly initializes Removed UNode class 31
32
Inside => Inside very fast, integrity is checked vs. InsideDummy, and is 100% conformal Based on calling InsideWithExclusion with NULL as exclusion mask 32
33
Safety’s Their algorithm is simple With utilization of TGeoShapeAssembly::Safety and TGeoVoxelFinder, checkup and small fixes of J.M.’s Safety’s 33
34
Normal Code is straightforward Based on J.M. code + TGeoShapeAssembly::ComputeNormal If there is a solid from union where point on surface, return normal from that point Otherwise, find solid with smallest SafetyFromInside/ SafetyFromOutside to the point and return the normal for that solid and point 34
35
DistanceToIn Integrity is checked vs. DistanceToInDummy and is 100% conformal within VUSolid::Tolerance() Checked TGeoShapeAssembly::DistFromOutside + incomplete J.M. code + comments from Andrei Clear code divided to methods: voxels.DistanceToFirst, voxels.DistanceToNext, voxels.GetCandidatesVoxelArray and DistanceToInCandidates Algorithm based on calculating DistanceToNextVoxel in voxel finder: 1.Finding distance (“shift”) to first voxel on the direction 2.Shifting the point 3.Finding solids contained in the voxel for shifted point (candidates), if any 4.Computing minimum distance from original point for all candidates using solid- >DistanceToIn() 5.From now, do not take into account candidates that was already checked 6.If distance less than shift to next voxel return it, otherwise update minimal distance and go to step 2. 7.Return minimal distance (or infinity if it was not set) 35
36
DistanceToOut Integrity is checked vs. DistanceToOutDummy, and is 100% conformal within VUSolid::Tolerance() Improved non-functional J.M. code + comments from Andrei Based on: 1.Find solid from the voxel where the point is located on surface or inside with maximum distance to out, otherwise return -1 (we are outside) 2.Propagate the point using DistanceToOut of such solid 3.Cumulate the returned distance 4.If inside (exclude the current solid when figuring it out) another solid, go to 1., but there ignore the solid that we just passed from 5.Return cumulated distance 6.If we were outside, take into consideration voxels located +-tolerance on all axis from given point 36
37
Tests SBT cannot be used directly, no Geant4 multi- union Optical Escape used. Tests Inside, Safety’s, Distance’s but not Normal method It’s passing tests for boxes and orbs with count of objects 1 - 10000 37
38
Optical Escape, for Orbs/Boxes multi-unions, no particles escape 38
39
Optical Escape, for Orbs/Boxes multi-unions, no particles escape 39
40
Questions Is it possible to make.q in root in a script? (root -b -q run.C) VUSolid::Extent. Why not use UVector3 instead of double[3] Maybe make [] operator for UVector3? Uvector3 (usolids::tolerance, usolids::tolerance, usolids::tolerance) UVector3 (double x=usolids::tolerance, x, x); If you have this constructor, Uvector3(double) Then this will be automatically done by compiler (no warning) Uvector3 Vec = (Uvector3) usolids::tolerance; 40
41
Questions UVector3 /= operator, Set(value), SetNull(), Operator[] Using UVector3 inside loops 41
42
NEW DEVELOPMENTS 42
43
Voxel cells tolerance padding, by adding tolerance to solid extents Added tolerance to extent of each solid, from which boundaries are being created This removes many errors which are gained on points on borders, for example in case of Inside and Distance* methods. No hack attempts to get it work necessary anymore. Also it allows significant simplification of the core routine GetCandidatesVoxelArray 43
44
Boxes with tolerance padding vs. no padding performance The only practical difference is, that in worst case the number of boundaries doubles But because we use binary search, it does not have any significant performance impact Up to 4 percent for number of boxes > 1000 for inside e.g. {1.28 2.08 2.9} vs. {1.26 2 2.83} I have modified J.M. boxes test, to have zero spacing with each other and run test 1:10000 44
45
UUtils::BinarySearch (OLD) //______________________________________________________________________________ int UUtils::BinarySearch(int n, const double *array, double value) { // Binary search in an array of doubles. If match is found, function returns // position of element. If no match found, function gives nearest // element smaller than value. //if (array[n-1] == value) return n - 2; // patch, let us discuss it int nabove, nbelow, middle; nabove = n+1; nbelow = 0; while(nabove-nbelow > 1) { middle = (nabove+nbelow)/2; if(value == array[middle-1]) { nbelow = middle; break; } if (value < array[middle-1]) nabove = middle; else nbelow = middle; } return nbelow-1; } 45
46
UUtils::BinarySearch (NEW) int UUtils::BinarySearch(const std::vector &vec, double value) { vector ::const_iterator begin=vec.begin(), end=vec.end(); int res = upper_bound(begin, end, value) - begin - 1; #ifdef DEBUG int resold = UUtils::BinarySearch(vec.size(), &vec[0], value); if (res != resold) res = resold; #endif return res; } 46
47
Using std::upper_bound for binary search instead of UUtils::BinarySearch 47
48
Commented out this method, using std::sort better void UUtils::Sort(int n, const double* a, int* index, bool down) { // Sort the n elements of the array a of generic templated type Element. // In output the array index of type Index contains the indices of the sorted array. // If down is false sort in increasing order (default is decreasing order). // NOTE that the array index must be created with a length >= n // before calling this function. // NOTE also that the size type for n must be the same type used for the index array // (templated type Index) for(int i = 0; i < n; i++) { index[i] = i; } if ( down ) std::sort(index, index + n, CompareDesc (a) ); else std::sort(index, index + n, CompareAsc (a) ); } => std::sort(boundary.begin(), boundary.end()); 48
49
Vector access operator inline double UVector3::operator[] (int index) const { // TODO: test performance of both versions on Linux if (true) { double vec[3] = {x, y, z}; return vec[index]; } switch (index) { case 0: return x; case 1: return y; case 2: return z; default: return 0; } inline double &UVector3::operator[] (int index) { switch (index) { case 0: return x; case 1: return y; case 2: return z; default: return x; } 49
50
Integrated multi-union into SBT As a primary solid, G4UnionSolid is created In data-analysis and performance test, we traverse through this solid and we create – UMultiUnion – TGeoCompositeShape Advantage, by adding a new test, it is propagated to all 3 solid 50
51
sbtplot3d(Inside,USolids); 51
52
Polyhedron received from Geant4 - figure; sbtpolyhedra(Normal); 52
53
DistanceToIn was not conformal Problem #1 – DistanceToFirst did not work – => solution bounded multi-union extent, wrapped as Ubox, calling DistanceToIn double UVoxelFinder::DistanceToFirst(UVector3 &point, const UVector3 &direction) const { UVector3 pointShifted = point - boundingBoxCenter; double shift = boundingBox->DistanceToIn(pointShifted, direction); return shift; } Problem #2 – DistanceToNextVoxel not working. Rounding problems + bad algorithm I consider OpticalEscape to be less suitable for validity testing than SBT 53
54
sbtplot(SafetyFromOutside, Geant4, USolids); 54
55
sbtplot(Inside, Geant4, USolids); 55
56
sbtplot(Normal, Geant4, USolids); 56
57
figure; sbtpolyhedra(Normal); sbtvectors(Normal, Geant4); 57
58
figure; sbtpolyhedra(Normal); sbtvectors(Normal, USolids); 58
59
sbtplot(SafetyFromInside, Geant4, USolids); 59
60
sbtplot(DistanceToIn, Geant4, USolids); 60
61
sbtplot(DistanceToOut, Geant4, USolids); 61
62
sbtperf (for a multi-union with 10 boxes) 62
63
Other effort Extents changed to virtual void Extent (UVector3 &aMin, UVector3 &aMax) const; UVector3.Set(double value) => remove SetNull method Operator / for UVector3, to improve readability a bit 63
64
Questions UVector3, UVector3, UVector3 with template specialization? Or very simple vector UInt3, UIntTriplet Write wrapped loops even in performance critical methods and count with auto-compiler loops unwrapping? Vectorization of [1..3] in GetCandidatesVoxelArray 64
65
NEW DEVELOPMENTS 21.3.2012 65
66
Early exit conditions before making binary search => 10% faster 66
67
Added specialization of loop for number of bytes <= sizeof (int) 67
68
DistanceToIn very significantly improved, binary search minimized 68
69
Performance scalability measurements for MU(1..100) - 5 69
70
Performance scalability measurements for MU(1..100) - 10 70
71
Performance scalability measurements for MU(1..100) - 20 71
72
DAP Scalability - Inside 72
73
DAP Scalability - Normal 73
74
DAP Scalability - SafetyFromInside 74
75
DAP Scalability -SafetyFromOutside 75
76
DAP Scalability - DistanceToIn 76
77
DAP Scalability - DistanceToOut 77
78
DAP Scalability - Inside 78
79
DAP Scalability - Normal 79
80
DAP Scalability - SafetyFromInside 80
81
DAP Scalability -SafetyFromOutside 81
82
DAP Scalability - DistanceToIn 82
83
DAP Scalability - DistanceToOut 83
84
sbtplot(Inside, Geant4, Root);
85
sbtplot(Normal, Geant4, USolids);
86
figure; sbtpolyhedra(Normal); sbtvectors(Normal, USolids, None, None, 942, 1); sbtvectors(Normal, Geant4, None, None, 942, 1, 'r'); 86
87
sbtplot(SafetyFromInside, USolids, Geant4);
88
sbtplot3d(SafetyFromOutside,USolids, None, 26557, 1); % left sbtplot3d(SafetyFromOutside,Geant4, None, 26557, 1); % right 88 >> sbtplot(SafetyFromOutside,USolids, Geant4, 26557, 1); Evaluating differences for SafetyFromOutside (USolids-Geant4) Different point found, index 26557 difference is 2.191 Total number of different points for SafetyFromOutside (USolids-Geant4): 1
89
sbtplot(SafetyFromInside, USolids Geant4);
90
sbtplot(SafetyFromOutside,USolids Geant4);
91
sbtplot(DistanceToIn, USolids Geant4);
92
sbtplot(DistanceToOut, USolids Geant4);
93
Comments Surface points definition, because we use 10x up tolerance. DistanceToOut cannot be tested with outside points, because Geant4 generates errors in DistanceToOut for these points. Maybe we should add extra groups of close surface points, from both inside and outside – InsideCloseSurface, OutsideCloseSurface Sorting of random points, according to safety How to make displaced solid or union with only 1 solid in ROOT 93
Similar presentations
© 2025 SlidePlayer.com. Inc.
All rights reserved.