END\n"); } 3"> END\n"); } 3">

Presentation is loading. Please wait.

Presentation is loading. Please wait.

USolids Multi-Union notes Marek Gayer 1. GetCandidatesVoxelArray method, on which Inside very much relies Finding voxel candidates did not delete buffers.

Similar presentations


Presentation on theme: "USolids Multi-Union notes Marek Gayer 1. GetCandidatesVoxelArray method, on which Inside very much relies Finding voxel candidates did not delete buffers."— Presentation transcript:

1 USolids Multi-Union notes Marek Gayer 1

2 GetCandidatesVoxelArray method, on which Inside very much relies Finding voxel candidates did not delete buffers Error in creating the final bit mask: – ((unsigned int *)(maskResult))[iIndex] = ((unsigned int *)(maskX))[iIndex] & ((unsigned int *)(maskY))[iIndex] & ((unsigned int *)(maskX))[iIndex]; Was possible to merge with Intersect method => No buffers was in the end necessary at all Optimization and reducing of original 140 lines of code to 67 Performance of new method faster in hundreds % Root timer precision is +-0.01s => Number of points tested increased to 10M 2

3 New script to measure scalability based on splitting of Multi-Union integration with Root graphics void TestMultiUnion() { // TestMultiUnionWithGraphics(); return; int numNodes[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 34, 36, 38, 40, 45, 50, 60, 70, 80, 90, 100, 500, 1000, 5000, 8000}; int numNodesCount = sizeof (numNodes) / sizeof (int); // numNodesCount -= 3; ofstream nodes("nodes.dat"), times("times.dat"); for (int i = 0; i < numNodesCount; i++) { int numNodesImpl = numNodes[i]; double chrono = TestMultiUnionOneStep(numNodesImpl); nodes << numNodesImpl << endl; times << chrono << endl; } nodes.close(), times.close(); printf("[> END\n"); } 3

4 TestMultiUnionOneStep (listed simplified) based on splitting of Multi-Union integration with Root graphics double TestMultiUnionOneStep(int numNodesImpl) { UMultiUnion *multiUnion = CreateMultiUnion(numNodesImpl); multiUnion->Voxelize(); DisplayMultiUnionInfo(multiUnion); double bmin[3], bmax[3]; multiUnion->Extent(bmin, bmax); int npoints = 10000000; int n10 = npoints/10; UVector3 point; TStopwatch timer; timer.Start(); for (int i = 0; i < npoints; i++) { if (numNodesImpl > 100) if (n10 && (i % n10)==0) printf("test inside... %d%\n",int(100*i/npoints)); point.x = gRandom->Uniform(-2000, 2000); point.y = gRandom->Uniform(-2000, 2000); point.z = gRandom->Uniform(-2000, 2000); VUSolid::EnumInside result = multiUnion->Inside(point); } timer.Stop(); double chrono = timer.CpuTime(); return chrono; } 4

5 Scaling of Multi-Union Inside: source with original conditions by Jean-Marie 5

6 6

7 Multi-Union with 21 boxes. Faster than one box in original measurement 7

8 Scaling of Multi-Union – fixing the ranges to be really [-2000; 2000] 8

9 Scaling of Multi-Union – fixed the ranges to be really [-2000; 2000] 9

10 Profiling of Inside method content (listed simplified) VUSolid::EnumInside UMultiUnion::Inside(const UVector3 &aPoint) const { UVector3 tempPointConv; VUSolid::EnumInside tempInside = eOutside; bool boolSurface = false; vector vectorOutcome; vectorOutcome = fVoxels->GetCandidatesVoxelArrayOld(aPoint); fVoxels->GetCandidatesVoxelArray(aPoint, vectorOutcome); int limit = vectorOutcome.size(); for(int i = 0 ; i < limit ; i++) { VUSolid *tempSolid = ((*fNodes)[vectorOutcome[i]])->fSolid; UTransform3D *tempTransform = ((*fNodes)[vectorOutcome[i]])->fTransform; tempPointConv = tempTransform->LocalPoint(aPoint); tempInside = tempSolid->Inside(tempPointConv); if(tempInside == eSurface) boolSurface = true; if(tempInside == eInside) return eInside; } tempInside = boolSurface ? eSurface : eOutside; return tempInside; } 10

11 valgrind --tool=callgrind root.exe run.C kcachegrind [“New Implementation”] std::vector GetCandidatesVoxelArray(const UVector3 &point); 11

12 valgrind --tool=callgrind root.exe run.C kcachegrind [“New Implementation 2”] void GetCandidatesVoxelArray(const UVector3 &point, std::vector &list); 12

13 Touch up of sources Added missing deletes Adding references to objects in methods headers double* arrMin = new double[3] => double arrMin[3] In debug mode, all results are compared to InsideDummy to make sure of consistency iIndex, jIndex etc. => i,j Optimization of loops Removing underscores Avoiding calling Inside 2x times in Normal method Several similar, small changes 13

14 NEW DEVELOPMENTS 14

15 Removing 2 continue checks => slightly less performance 15

16 New performance setup All boxes are located in -1 to 1 for xyz container Volume ratio of test are / boxes is same, ratio of inside vs. outside points is a bit lower than ½, because boxes can collide each other Random points located in -1.1 to 1.1 for xyz 16

17 Optimization of voxel finder int limit = (bytesCorrected+1) / sizeof(unsigned int); for (int i = 0 ; i < limit; i++) { unsigned int mask; // Logic "and" of the masks along the 3 axes x, y, z: // removing if (! and continue => slightly slower if (!(mask = !maskXLeft ? maskX[i] : maskX[i] | maskXLeft[i])) continue; if (!(mask &= !maskYLeft ? maskY[i] : maskY[i] | maskYLeft[i])) continue; if (!(mask &= !maskZLeft ? maskZ[i] : maskZ[i] | maskZLeft[i])) continue; int currentBit = 8*sizeof(unsigned int)*i; for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (currentBit >= carNodes) return; int shifted = 1 << bit; // new if (mask & shifted) { list.push_back(currentBit); if (!(mask -= shifted)) break; // new } currentBit++; } 17

18 Optimization of voxel finder int limit = (bytesCorrected+1) / sizeof(unsigned int); for (int i = 0 ; i < limit; i++) { // Logic "and" of the masks along the 3 axes x, y, z: unsigned int mask = !maskXLeft ? maskX[i] : maskX[i] | maskXLeft[i]; if (!mask) continue; // removing this line => slightly slower mask &= !maskYLeft ? maskY[i] : maskY[i] | maskYLeft[i]; if (!mask) continue; // removing this line => slightly slower mask &= !maskZLeft ? maskZ[i] : maskZ[i] | maskZLeft[i]; if (!mask) continue; // removing this line => slightly slower int currentBit = 8*sizeof(unsigned int)*i; for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (currentBit >= carNodes) return; int shifted = 1 << bit; // new if (mask & shifted) { list.push_back(currentBit); if (!(mask -= shifted)) break; // new } currentBit++; } 18

19 sbtscale 19

20 sbtscale 20

21 Arrays improvements 1/2 Replace C style dynamic arrays (e.g. double *) with new and detete Using instead resizable std::vector for boxes and boundaries => no need for various temporary variables Verified that performance did not drop No new and delete constructs => no possible memory leaks by missing or wrong deletes 21

22 Arrays improvements 2/2 No indexes like “boxes[6*i + 3 + k]” anymore, since structure used: struct UVoxelBox { UVector3 hlen; // half length of the box UVector3 pos; // position of the box }; std::vector boxes; 22

23 Bitmasks abstrastion Bitmasks have special class UBits, based on adaptation of ROOT Tbits One for each whole axis Other alternatives, problematic for one or another reason… – boost dynamic array: – Vector, – bitset or other online free alternatives Makes more clean and clear code 23

24 Exclusions (“crossings”) Ubits data structure is used for making additional exclusion masks (and type) for excluding selected candidates Used in DistanceToIn and DistanceToOut Local var passed as parameter => thread safe Usage philosophy example: – UBits exclusion(voxels->GetBitsPerSlice()); – exclusion.ResetAllBits(true); // set all bits to 1 – exclusion.ResetBitNumber(int excludedCandidate=1); – VUSolid::EnumInside location = InsideWithExclusion(currentPoint, &exclusion); – exclusion.SetBitNumber(candidate); 24

25 Voxels bit masks memory padding Changed bitmask sizes increase for each voxel from 8bits (size of char) to 32bit/64bit (size of int) Faster memory addressing (each mask starts 4/8 bytes for each bitmask) and algorithm makes increase up to 10% in performance Possible to make faster loops, because we do not have to count bits We can and masks and compare them to zeros using whole unsigned int 25

26 1/3 Fast routines for finding set bit components of bitmask in u. int for (int bit = 0; bit < (int) (8*sizeof(unsigned int)); bit++) { if (mask & 1) { list.push_back(8*sizeof(unsigned int)*i+bit); if (!(mask >>= 1)) break; // new } else mask >>= 1; } 26

27 2/3 Fast routines for finding set bit components of bitmask in u. int for (int byte = 0; byte < (int) (sizeof(unsigned int)); byte++) { if (int maskByte = mask & 0xFF) { do { static const int firstBits[256] = { 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; int bit = firstBits[maskByte]; list.push_back(8*(sizeof(unsigned int)*i+ byte) + bit); maskByte -= 1 << bit; } while (maskByte); } mask >>= 8; } 27

28 3/3 Fast routines for finding set bit components of bitmask in u. int for (int byte = 0; byte < (int) (sizeof(unsigned int)); byte++) { if (int maskByte = mask & 0xFF) { for (int bit = 0; bit < 8; bit++) { if (maskByte & 1) { list.push_back(8*(sizeof(unsigned int)*i+ byte) + bit); if (!(maskByte >>= 1)) break; // new } else maskByte >>= 1; } mask >>= 8; } 28

29 Preferring references Except for bitmasks values, using references instead of pointers because in C++ “References are the preferred way of indirectly accessing a variable”. [ see e.g. C/C++ Pointers vs References, http://www.dgp.toronto.edu/~patrick/csc418/w i2004/notes/PointersVsRef.pdfhttp://www.dgp.toronto.edu/~patrick/csc418/w i2004/notes/PointersVsRef.pdf ] 29

30 Other effort at VoxelFinder/MultiUnion 1/2 Renamed all and removed some unnecessary fields in voxel finder UVoxelFinder is separated and independent on UMultiUnion and can be used separately UVoxelFinder Used as common C++ property of UMultiUnion Overall cleanup and reducing of code Fixed wrong construction of bit masks, which for certain points was making inconsistencies 30

31 Other effort at VoxelFinder/MultiUnion 2/2 Boundaries, bit slices counts and bitmasks are stored as arrays [0..2] This allowed to rewrite all loops to for cycles, instead of having special codes for each axis => significant reducing of code + readability Separate cycles for x, y, z merged when possible to one. Removed all “1e-4” type constants No need to assign fields in constructor, their own constructor properly initializes Removed UNode class 31

32 Inside => Inside very fast, integrity is checked vs. InsideDummy, and is 100% conformal Based on calling InsideWithExclusion with NULL as exclusion mask 32

33 Safety’s Their algorithm is simple With utilization of TGeoShapeAssembly::Safety and TGeoVoxelFinder, checkup and small fixes of J.M.’s Safety’s 33

34 Normal Code is straightforward Based on J.M. code + TGeoShapeAssembly::ComputeNormal If there is a solid from union where point on surface, return normal from that point Otherwise, find solid with smallest SafetyFromInside/ SafetyFromOutside to the point and return the normal for that solid and point 34

35 DistanceToIn Integrity is checked vs. DistanceToInDummy and is 100% conformal within VUSolid::Tolerance() Checked TGeoShapeAssembly::DistFromOutside + incomplete J.M. code + comments from Andrei Clear code divided to methods: voxels.DistanceToFirst, voxels.DistanceToNext, voxels.GetCandidatesVoxelArray and DistanceToInCandidates Algorithm based on calculating DistanceToNextVoxel in voxel finder: 1.Finding distance (“shift”) to first voxel on the direction 2.Shifting the point 3.Finding solids contained in the voxel for shifted point (candidates), if any 4.Computing minimum distance from original point for all candidates using solid- >DistanceToIn() 5.From now, do not take into account candidates that was already checked 6.If distance less than shift to next voxel return it, otherwise update minimal distance and go to step 2. 7.Return minimal distance (or infinity if it was not set) 35

36 DistanceToOut Integrity is checked vs. DistanceToOutDummy, and is 100% conformal within VUSolid::Tolerance() Improved non-functional J.M. code + comments from Andrei Based on: 1.Find solid from the voxel where the point is located on surface or inside with maximum distance to out, otherwise return -1 (we are outside) 2.Propagate the point using DistanceToOut of such solid 3.Cumulate the returned distance 4.If inside (exclude the current solid when figuring it out) another solid, go to 1., but there ignore the solid that we just passed from 5.Return cumulated distance 6.If we were outside, take into consideration voxels located +-tolerance on all axis from given point 36

37 Tests SBT cannot be used directly, no Geant4 multi- union Optical Escape used. Tests Inside, Safety’s, Distance’s but not Normal method It’s passing tests for boxes and orbs with count of objects 1 - 10000 37

38 Optical Escape, for Orbs/Boxes multi-unions, no particles escape 38

39 Optical Escape, for Orbs/Boxes multi-unions, no particles escape 39

40 Questions Is it possible to make.q in root in a script? (root -b -q run.C) VUSolid::Extent. Why not use UVector3 instead of double[3] Maybe make [] operator for UVector3? Uvector3 (usolids::tolerance, usolids::tolerance, usolids::tolerance) UVector3 (double x=usolids::tolerance, x, x); If you have this constructor, Uvector3(double) Then this will be automatically done by compiler (no warning) Uvector3 Vec = (Uvector3) usolids::tolerance; 40

41 Questions UVector3 /= operator, Set(value), SetNull(), Operator[] Using UVector3 inside loops 41

42 NEW DEVELOPMENTS 42

43 Voxel cells tolerance padding, by adding tolerance to solid extents Added tolerance to extent of each solid, from which boundaries are being created This removes many errors which are gained on points on borders, for example in case of Inside and Distance* methods. No hack attempts to get it work necessary anymore. Also it allows significant simplification of the core routine GetCandidatesVoxelArray 43

44 Boxes with tolerance padding vs. no padding performance The only practical difference is, that in worst case the number of boundaries doubles But because we use binary search, it does not have any significant performance impact Up to 4 percent for number of boxes > 1000 for inside e.g. {1.28 2.08 2.9} vs. {1.26 2 2.83} I have modified J.M. boxes test, to have zero spacing with each other and run test 1:10000 44

45 UUtils::BinarySearch (OLD) //______________________________________________________________________________ int UUtils::BinarySearch(int n, const double *array, double value) { // Binary search in an array of doubles. If match is found, function returns // position of element. If no match found, function gives nearest // element smaller than value. //if (array[n-1] == value) return n - 2; // patch, let us discuss it int nabove, nbelow, middle; nabove = n+1; nbelow = 0; while(nabove-nbelow > 1) { middle = (nabove+nbelow)/2; if(value == array[middle-1]) { nbelow = middle; break; } if (value < array[middle-1]) nabove = middle; else nbelow = middle; } return nbelow-1; } 45

46 UUtils::BinarySearch (NEW) int UUtils::BinarySearch(const std::vector &vec, double value) { vector ::const_iterator begin=vec.begin(), end=vec.end(); int res = upper_bound(begin, end, value) - begin - 1; #ifdef DEBUG int resold = UUtils::BinarySearch(vec.size(), &vec[0], value); if (res != resold) res = resold; #endif return res; } 46

47 Using std::upper_bound for binary search instead of UUtils::BinarySearch 47

48 Commented out this method, using std::sort better void UUtils::Sort(int n, const double* a, int* index, bool down) { // Sort the n elements of the array a of generic templated type Element. // In output the array index of type Index contains the indices of the sorted array. // If down is false sort in increasing order (default is decreasing order). // NOTE that the array index must be created with a length >= n // before calling this function. // NOTE also that the size type for n must be the same type used for the index array // (templated type Index) for(int i = 0; i < n; i++) { index[i] = i; } if ( down ) std::sort(index, index + n, CompareDesc (a) ); else std::sort(index, index + n, CompareAsc (a) ); } => std::sort(boundary.begin(), boundary.end()); 48

49 Vector access operator inline double UVector3::operator[] (int index) const { // TODO: test performance of both versions on Linux if (true) { double vec[3] = {x, y, z}; return vec[index]; } switch (index) { case 0: return x; case 1: return y; case 2: return z; default: return 0; } inline double &UVector3::operator[] (int index) { switch (index) { case 0: return x; case 1: return y; case 2: return z; default: return x; } 49

50 Integrated multi-union into SBT As a primary solid, G4UnionSolid is created In data-analysis and performance test, we traverse through this solid and we create – UMultiUnion – TGeoCompositeShape Advantage, by adding a new test, it is propagated to all 3 solid 50

51 sbtplot3d(Inside,USolids); 51

52 Polyhedron received from Geant4 - figure; sbtpolyhedra(Normal); 52

53 DistanceToIn was not conformal Problem #1 – DistanceToFirst did not work – => solution bounded multi-union extent, wrapped as Ubox, calling DistanceToIn double UVoxelFinder::DistanceToFirst(UVector3 &point, const UVector3 &direction) const { UVector3 pointShifted = point - boundingBoxCenter; double shift = boundingBox->DistanceToIn(pointShifted, direction); return shift; } Problem #2 – DistanceToNextVoxel not working. Rounding problems + bad algorithm I consider OpticalEscape to be less suitable for validity testing than SBT 53

54 sbtplot(SafetyFromOutside, Geant4, USolids); 54

55 sbtplot(Inside, Geant4, USolids); 55

56 sbtplot(Normal, Geant4, USolids); 56

57 figure; sbtpolyhedra(Normal); sbtvectors(Normal, Geant4); 57

58 figure; sbtpolyhedra(Normal); sbtvectors(Normal, USolids); 58

59 sbtplot(SafetyFromInside, Geant4, USolids); 59

60 sbtplot(DistanceToIn, Geant4, USolids); 60

61 sbtplot(DistanceToOut, Geant4, USolids); 61

62 sbtperf (for a multi-union with 10 boxes) 62

63 Other effort Extents changed to virtual void Extent (UVector3 &aMin, UVector3 &aMax) const; UVector3.Set(double value) => remove SetNull method Operator / for UVector3, to improve readability a bit 63

64 Questions UVector3, UVector3, UVector3 with template specialization? Or very simple vector UInt3, UIntTriplet Write wrapped loops even in performance critical methods and count with auto-compiler loops unwrapping? Vectorization of [1..3] in GetCandidatesVoxelArray 64

65 NEW DEVELOPMENTS 21.3.2012 65

66 Early exit conditions before making binary search => 10% faster 66

67 Added specialization of loop for number of bytes <= sizeof (int) 67

68 DistanceToIn very significantly improved, binary search minimized 68

69 Performance scalability measurements for MU(1..100) - 5 69

70 Performance scalability measurements for MU(1..100) - 10 70

71 Performance scalability measurements for MU(1..100) - 20 71

72 DAP Scalability - Inside 72

73 DAP Scalability - Normal 73

74 DAP Scalability - SafetyFromInside 74

75 DAP Scalability -SafetyFromOutside 75

76 DAP Scalability - DistanceToIn 76

77 DAP Scalability - DistanceToOut 77

78 DAP Scalability - Inside 78

79 DAP Scalability - Normal 79

80 DAP Scalability - SafetyFromInside 80

81 DAP Scalability -SafetyFromOutside 81

82 DAP Scalability - DistanceToIn 82

83 DAP Scalability - DistanceToOut 83

84 sbtplot(Inside, Geant4, Root);

85 sbtplot(Normal, Geant4, USolids);

86 figure; sbtpolyhedra(Normal); sbtvectors(Normal, USolids, None, None, 942, 1); sbtvectors(Normal, Geant4, None, None, 942, 1, 'r'); 86

87 sbtplot(SafetyFromInside, USolids, Geant4);

88 sbtplot3d(SafetyFromOutside,USolids, None, 26557, 1); % left sbtplot3d(SafetyFromOutside,Geant4, None, 26557, 1); % right 88 >> sbtplot(SafetyFromOutside,USolids, Geant4, 26557, 1); Evaluating differences for SafetyFromOutside (USolids-Geant4) Different point found, index 26557 difference is 2.191 Total number of different points for SafetyFromOutside (USolids-Geant4): 1

89 sbtplot(SafetyFromInside, USolids Geant4);

90 sbtplot(SafetyFromOutside,USolids Geant4);

91 sbtplot(DistanceToIn, USolids Geant4);

92 sbtplot(DistanceToOut, USolids Geant4);

93 Comments Surface points definition, because we use 10x up tolerance. DistanceToOut cannot be tested with outside points, because Geant4 generates errors in DistanceToOut for these points. Maybe we should add extra groups of close surface points, from both inside and outside – InsideCloseSurface, OutsideCloseSurface Sorting of random points, according to safety How to make displaced solid or union with only 1 solid in ROOT 93


Download ppt "USolids Multi-Union notes Marek Gayer 1. GetCandidatesVoxelArray method, on which Inside very much relies Finding voxel candidates did not delete buffers."

Similar presentations


Ads by Google