From efa4f1644701093caa9bc980b2143a03d076f3ee Mon Sep 17 00:00:00 2001 From: Morten Hjorth-Jensen Date: Thu, 14 Mar 2024 22:11:08 +0100 Subject: [PATCH] update --- doc/pub/week9/html/._week9-bs000.html | 263 +- doc/pub/week9/html/._week9-bs001.html | 263 +- doc/pub/week9/html/._week9-bs002.html | 263 +- doc/pub/week9/html/._week9-bs003.html | 263 +- doc/pub/week9/html/._week9-bs004.html | 263 +- doc/pub/week9/html/._week9-bs005.html | 263 +- doc/pub/week9/html/._week9-bs006.html | 263 +- doc/pub/week9/html/._week9-bs007.html | 263 +- doc/pub/week9/html/._week9-bs008.html | 263 +- doc/pub/week9/html/._week9-bs009.html | 263 +- doc/pub/week9/html/._week9-bs010.html | 263 +- doc/pub/week9/html/._week9-bs011.html | 263 +- doc/pub/week9/html/._week9-bs012.html | 270 +- doc/pub/week9/html/._week9-bs013.html | 342 +- doc/pub/week9/html/._week9-bs014.html | 283 +- doc/pub/week9/html/._week9-bs015.html | 286 +- doc/pub/week9/html/._week9-bs016.html | 294 +- doc/pub/week9/html/._week9-bs017.html | 293 +- doc/pub/week9/html/._week9-bs018.html | 292 +- doc/pub/week9/html/._week9-bs019.html | 283 +- doc/pub/week9/html/._week9-bs020.html | 289 +- doc/pub/week9/html/._week9-bs021.html | 510 +- doc/pub/week9/html/._week9-bs022.html | 522 +- doc/pub/week9/html/._week9-bs023.html | 359 +- doc/pub/week9/html/._week9-bs024.html | 339 +- doc/pub/week9/html/._week9-bs025.html | 307 +- doc/pub/week9/html/._week9-bs026.html | 305 +- doc/pub/week9/html/._week9-bs027.html | 301 +- doc/pub/week9/html/._week9-bs028.html | 314 +- doc/pub/week9/html/._week9-bs029.html | 332 +- doc/pub/week9/html/._week9-bs030.html | 275 +- doc/pub/week9/html/._week9-bs031.html | 274 +- doc/pub/week9/html/._week9-bs032.html | 273 +- doc/pub/week9/html/._week9-bs033.html | 270 +- doc/pub/week9/html/._week9-bs034.html | 309 +- doc/pub/week9/html/._week9-bs035.html | 318 +- doc/pub/week9/html/._week9-bs036.html | 312 +- doc/pub/week9/html/._week9-bs037.html | 280 +- doc/pub/week9/html/._week9-bs038.html | 283 +- doc/pub/week9/html/._week9-bs039.html | 305 +- doc/pub/week9/html/._week9-bs040.html | 345 +- doc/pub/week9/html/._week9-bs041.html | 398 +- doc/pub/week9/html/._week9-bs042.html | 290 +- doc/pub/week9/html/._week9-bs043.html | 345 +- doc/pub/week9/html/._week9-bs044.html | 275 +- doc/pub/week9/html/._week9-bs045.html | 282 +- doc/pub/week9/html/._week9-bs046.html | 284 +- doc/pub/week9/html/._week9-bs047.html | 276 +- doc/pub/week9/html/._week9-bs048.html | 304 +- doc/pub/week9/html/._week9-bs049.html | 302 +- doc/pub/week9/html/._week9-bs050.html | 281 +- doc/pub/week9/html/._week9-bs051.html | 312 +- doc/pub/week9/html/._week9-bs052.html | 283 +- doc/pub/week9/html/._week9-bs053.html | 304 +- doc/pub/week9/html/._week9-bs054.html | 304 +- doc/pub/week9/html/._week9-bs055.html | 278 +- doc/pub/week9/html/._week9-bs056.html | 276 +- doc/pub/week9/html/._week9-bs057.html | 280 +- doc/pub/week9/html/._week9-bs058.html | 279 +- doc/pub/week9/html/._week9-bs059.html | 300 +- doc/pub/week9/html/._week9-bs060.html | 294 +- doc/pub/week9/html/._week9-bs061.html | 300 +- doc/pub/week9/html/._week9-bs062.html | 325 +- doc/pub/week9/html/._week9-bs063.html | 347 +- doc/pub/week9/html/._week9-bs064.html | 308 +- doc/pub/week9/html/._week9-bs065.html | 361 +- doc/pub/week9/html/._week9-bs066.html | 278 +- doc/pub/week9/html/._week9-bs067.html | 277 +- doc/pub/week9/html/._week9-bs068.html | 278 +- doc/pub/week9/html/._week9-bs069.html | 279 +- doc/pub/week9/html/._week9-bs070.html | 296 +- doc/pub/week9/html/._week9-bs071.html | 293 +- doc/pub/week9/html/._week9-bs072.html | 278 +- doc/pub/week9/html/._week9-bs073.html | 276 +- doc/pub/week9/html/._week9-bs074.html | 274 +- doc/pub/week9/html/._week9-bs075.html | 270 +- doc/pub/week9/html/._week9-bs076.html | 296 +- doc/pub/week9/html/._week9-bs077.html | 302 +- doc/pub/week9/html/._week9-bs078.html | 307 +- doc/pub/week9/html/._week9-bs079.html | 342 +- doc/pub/week9/html/._week9-bs080.html | 279 +- doc/pub/week9/html/._week9-bs081.html | 324 +- doc/pub/week9/html/._week9-bs082.html | 325 +- doc/pub/week9/html/._week9-bs083.html | 325 +- doc/pub/week9/html/._week9-bs084.html | 305 +- doc/pub/week9/html/._week9-bs085.html | 301 +- doc/pub/week9/html/._week9-bs086.html | 310 +- doc/pub/week9/html/._week9-bs087.html | 294 +- doc/pub/week9/html/._week9-bs088.html | 301 +- doc/pub/week9/html/._week9-bs089.html | 303 +- doc/pub/week9/html/._week9-bs090.html | 308 +- doc/pub/week9/html/._week9-bs091.html | 311 +- doc/pub/week9/html/._week9-bs092.html | 298 +- doc/pub/week9/html/._week9-bs093.html | 291 +- doc/pub/week9/html/._week9-bs094.html | 313 +- doc/pub/week9/html/._week9-bs095.html | 309 +- doc/pub/week9/html/._week9-bs096.html | 295 +- doc/pub/week9/html/._week9-bs097.html | 295 +- doc/pub/week9/html/._week9-bs098.html | 295 +- doc/pub/week9/html/._week9-bs099.html | 288 +- doc/pub/week9/html/._week9-bs100.html | 283 +- doc/pub/week9/html/._week9-bs101.html | 289 +- doc/pub/week9/html/._week9-bs102.html | 735 +- doc/pub/week9/html/._week9-bs103.html | 759 +- doc/pub/week9/html/._week9-bs104.html | 334 +- doc/pub/week9/html/._week9-bs105.html | 289 +- doc/pub/week9/html/._week9-bs106.html | 325 +- doc/pub/week9/html/._week9-bs107.html | 319 +- doc/pub/week9/html/._week9-bs108.html | 299 +- doc/pub/week9/html/._week9-bs109.html | 297 +- doc/pub/week9/html/._week9-bs110.html | 320 +- doc/pub/week9/html/._week9-bs111.html | 331 +- doc/pub/week9/html/._week9-bs112.html | 299 +- doc/pub/week9/html/._week9-bs113.html | 282 +- doc/pub/week9/html/._week9-bs114.html | 291 +- doc/pub/week9/html/._week9-bs115.html | 288 +- doc/pub/week9/html/._week9-bs116.html | 301 +- doc/pub/week9/html/._week9-bs117.html | 310 +- doc/pub/week9/html/._week9-bs118.html | 267 +- doc/pub/week9/html/._week9-bs119.html | 315 +- doc/pub/week9/html/._week9-bs120.html | 286 +- doc/pub/week9/html/._week9-bs121.html | 304 +- doc/pub/week9/html/._week9-bs122.html | 286 +- doc/pub/week9/html/._week9-bs123.html | 306 +- doc/pub/week9/html/._week9-bs124.html | 345 +- doc/pub/week9/html/._week9-bs125.html | 400 +- doc/pub/week9/html/._week9-bs126.html | 283 +- doc/pub/week9/html/._week9-bs127.html | 334 +- doc/pub/week9/html/._week9-bs128.html | 320 +- doc/pub/week9/html/._week9-bs129.html | 283 +- doc/pub/week9/html/._week9-bs130.html | 309 +- doc/pub/week9/html/._week9-bs131.html | 312 +- doc/pub/week9/html/._week9-bs132.html | 309 +- doc/pub/week9/html/._week9-bs133.html | 312 +- doc/pub/week9/html/._week9-bs134.html | 282 +- doc/pub/week9/html/._week9-bs135.html | 308 +- doc/pub/week9/html/._week9-bs136.html | 276 +- doc/pub/week9/html/._week9-bs137.html | 304 +- doc/pub/week9/html/._week9-bs138.html | 294 +- doc/pub/week9/html/._week9-bs139.html | 343 +- doc/pub/week9/html/._week9-bs140.html | 337 +- .../week9/html/._week9-bs141.html} | 267 +- doc/pub/week9/html/week9-bs.html | 263 +- doc/pub/week9/html/week9-reveal.html | 77 +- doc/pub/week9/html/week9-solarized.html | 81 +- doc/pub/week9/html/week9.html | 81 +- doc/pub/week9/ipynb/ipynb-week9-src.tar.gz | Bin 192 -> 192 bytes doc/pub/week9/ipynb/week9.ipynb | 821 ++- doc/pub/week9/pdf/week9-beamer.pdf | Bin 518679 -> 520359 bytes doc/pub/week9/pdf/week9.pdf | Bin 501561 -> 503615 bytes doc/src/week9/._week9-bs000.html | 710 -- doc/src/week9/._week9-bs001.html | 701 -- doc/src/week9/._week9-bs002.html | 697 -- doc/src/week9/._week9-bs003.html | 702 -- doc/src/week9/._week9-bs004.html | 698 -- doc/src/week9/._week9-bs005.html | 709 -- doc/src/week9/._week9-bs006.html | 696 -- doc/src/week9/._week9-bs007.html | 710 -- doc/src/week9/._week9-bs008.html | 708 -- doc/src/week9/._week9-bs009.html | 703 -- doc/src/week9/._week9-bs010.html | 713 -- doc/src/week9/._week9-bs011.html | 710 -- doc/src/week9/._week9-bs012.html | 705 -- doc/src/week9/._week9-bs013.html | 710 -- doc/src/week9/._week9-bs014.html | 700 -- doc/src/week9/._week9-bs015.html | 711 -- doc/src/week9/._week9-bs016.html | 710 -- doc/src/week9/._week9-bs017.html | 714 -- doc/src/week9/._week9-bs018.html | 711 -- doc/src/week9/._week9-bs019.html | 711 -- doc/src/week9/._week9-bs020.html | 705 -- doc/src/week9/._week9-bs021.html | 932 --- doc/src/week9/._week9-bs022.html | 777 -- doc/src/week9/._week9-bs023.html | 703 -- doc/src/week9/._week9-bs024.html | 757 -- doc/src/week9/._week9-bs025.html | 729 -- doc/src/week9/._week9-bs026.html | 755 -- doc/src/week9/._week9-bs027.html | 737 -- doc/src/week9/._week9-bs028.html | 764 -- doc/src/week9/._week9-bs029.html | 707 -- doc/src/week9/._week9-bs030.html | 705 -- doc/src/week9/._week9-bs031.html | 706 -- doc/src/week9/._week9-bs032.html | 704 -- doc/src/week9/._week9-bs033.html | 705 -- doc/src/week9/._week9-bs034.html | 733 -- doc/src/week9/._week9-bs035.html | 714 -- doc/src/week9/._week9-bs036.html | 725 -- doc/src/week9/._week9-bs037.html | 726 -- doc/src/week9/._week9-bs038.html | 726 -- doc/src/week9/._week9-bs039.html | 700 -- doc/src/week9/._week9-bs040.html | 764 -- doc/src/week9/._week9-bs041.html | 801 --- doc/src/week9/._week9-bs042.html | 790 -- doc/src/week9/._week9-bs043.html | 724 -- doc/src/week9/._week9-bs044.html | 726 -- doc/src/week9/._week9-bs045.html | 731 -- doc/src/week9/._week9-bs046.html | 724 -- doc/src/week9/._week9-bs047.html | 725 -- doc/src/week9/._week9-bs048.html | 752 -- doc/src/week9/._week9-bs049.html | 727 -- doc/src/week9/._week9-bs050.html | 725 -- doc/src/week9/._week9-bs051.html | 710 -- doc/src/week9/._week9-bs052.html | 704 -- doc/src/week9/._week9-bs053.html | 725 -- doc/src/week9/._week9-bs054.html | 700 -- doc/src/week9/._week9-bs055.html | 703 -- doc/src/week9/._week9-bs056.html | 700 -- doc/src/week9/._week9-bs057.html | 705 -- doc/src/week9/._week9-bs058.html | 699 -- doc/src/week9/._week9-bs059.html | 722 -- doc/src/week9/._week9-bs060.html | 737 -- doc/src/week9/._week9-bs061.html | 714 -- doc/src/week9/._week9-bs062.html | 750 -- doc/src/week9/._week9-bs063.html | 764 -- doc/src/week9/._week9-bs064.html | 783 -- doc/src/week9/._week9-bs065.html | 707 -- doc/src/week9/._week9-bs066.html | 710 -- doc/src/week9/._week9-bs067.html | 708 -- doc/src/week9/._week9-bs068.html | 709 -- doc/src/week9/._week9-bs069.html | 711 -- doc/src/week9/._week9-bs070.html | 724 -- doc/src/week9/._week9-bs071.html | 708 -- doc/src/week9/._week9-bs072.html | 707 -- doc/src/week9/._week9-bs073.html | 710 -- doc/src/week9/._week9-bs074.html | 705 -- doc/src/week9/._week9-bs075.html | 704 -- doc/src/week9/._week9-bs076.html | 731 -- doc/src/week9/._week9-bs077.html | 754 -- doc/src/week9/._week9-bs078.html | 776 -- doc/src/week9/._week9-bs079.html | 703 -- doc/src/week9/._week9-bs080.html | 711 -- doc/src/week9/._week9-bs081.html | 754 -- doc/src/week9/._week9-bs082.html | 714 -- doc/src/week9/._week9-bs083.html | 752 -- doc/src/week9/._week9-bs084.html | 732 -- doc/src/week9/._week9-bs085.html | 710 -- doc/src/week9/._week9-bs086.html | 737 -- doc/src/week9/._week9-bs087.html | 732 -- doc/src/week9/._week9-bs088.html | 706 -- doc/src/week9/._week9-bs089.html | 734 -- doc/src/week9/._week9-bs090.html | 709 -- doc/src/week9/._week9-bs091.html | 737 -- doc/src/week9/._week9-bs092.html | 738 -- doc/src/week9/._week9-bs093.html | 742 -- doc/src/week9/._week9-bs094.html | 710 -- doc/src/week9/._week9-bs095.html | 738 -- doc/src/week9/._week9-bs096.html | 736 -- doc/src/week9/._week9-bs097.html | 738 -- doc/src/week9/._week9-bs098.html | 736 -- doc/src/week9/._week9-bs099.html | 731 -- doc/src/week9/._week9-bs100.html | 731 -- doc/src/week9/._week9-bs101.html | 739 -- doc/src/week9/._week9-bs102.html | 1177 --- doc/src/week9/._week9-bs103.html | 717 -- doc/src/week9/._week9-bs104.html | 760 -- doc/src/week9/._week9-bs105.html | 750 -- doc/src/week9/._week9-bs106.html | 702 -- doc/src/week9/._week9-bs107.html | 740 -- doc/src/week9/._week9-bs108.html | 736 -- doc/src/week9/._week9-bs109.html | 738 -- doc/src/week9/._week9-bs110.html | 765 -- doc/src/week9/._week9-bs111.html | 707 -- doc/src/week9/._week9-bs112.html | 733 -- doc/src/week9/._week9-bs113.html | 730 -- doc/src/week9/._week9-bs114.html | 738 -- doc/src/week9/._week9-bs115.html | 729 -- doc/src/week9/._week9-bs116.html | 709 -- doc/src/week9/._week9-bs117.html | 736 -- doc/src/week9/._week9-bs118.html | 736 -- doc/src/week9/._week9-bs119.html | 756 -- doc/src/week9/._week9-bs120.html | 751 -- doc/src/week9/._week9-bs121.html | 732 -- doc/src/week9/._week9-bs122.html | 735 -- doc/src/week9/._week9-bs123.html | 750 -- doc/src/week9/._week9-bs124.html | 820 --- doc/src/week9/._week9-bs125.html | 715 -- doc/src/week9/._week9-bs126.html | 707 -- doc/src/week9/._week9-bs127.html | 768 -- doc/src/week9/._week9-bs128.html | 733 -- doc/src/week9/._week9-bs129.html | 731 -- doc/src/week9/._week9-bs130.html | 759 -- doc/src/week9/._week9-bs131.html | 730 -- doc/src/week9/._week9-bs132.html | 754 -- doc/src/week9/._week9-bs133.html | 727 -- doc/src/week9/._week9-bs134.html | 732 -- doc/src/week9/._week9-bs135.html | 689 -- doc/src/week9/._week9-bs136.html | 700 -- doc/src/week9/._week9-bs137.html | 727 -- doc/src/week9/._week9-bs138.html | 726 -- doc/src/week9/._week9-bs139.html | 764 -- doc/src/week9/.week9.copyright | 1 - doc/src/week9/README.txt | 2 - ...85A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 20 - ...C7F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...4D20DC076E8BF450B81976EF4AD1C19D937.pygtex | 10 - ...AAE0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...E880DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...4790DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...2C30DC076E8BF450B81976EF4AD1C19D937.pygtex | 15 - ...6E50DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...B440DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...DC2B0CAD346A13BD81D9AF720CF234DAE08.pygtex | 68 - ...3BC0DC076E8BF450B81976EF4AD1C19D937.pygtex | 16 - ...A750DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...B390DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...3E70DC076E8BF450B81976EF4AD1C19D937.pygtex | 11 - ...DA20DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...3060DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...1330DC076E8BF450B81976EF4AD1C19D937.pygtex | 15 - ...0CC0DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...30A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 460 -- ...ECC0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...29D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 13 - ...EB70DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...9600DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...BAE0DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...57E0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...B6D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 17 - ...9360DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...5F90DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...8FD0DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...10EB0CAD346A13BD81D9AF720CF234DAE08.pygtex | 10 - ...2A50DC076E8BF450B81976EF4AD1C19D937.pygtex | 9 - ...F79B0CAD346A13BD81D9AF720CF234DAE08.pygtex | 60 - ...B4A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...7460DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...9140DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...C800DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...A700DC076E8BF450B81976EF4AD1C19D937.pygtex | 16 - ...E2F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...88D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...7030DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...B260DC076E8BF450B81976EF4AD1C19D937.pygtex | 18 - ...9F30DC076E8BF450B81976EF4AD1C19D937.pygtex | 21 - ...B7A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 17 - ...05B0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...A5D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...5360DC076E8BF450B81976EF4AD1C19D937.pygtex | 12 - ...12F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...67D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 15 - ...2400DC076E8BF450B81976EF4AD1C19D937.pygtex | 19 - ...18F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...3F10DC076E8BF450B81976EF4AD1C19D937.pygtex | 22 - ...7F00DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...D7A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...9510DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...F190DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...8890DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...AF50DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...5590DC076E8BF450B81976EF4AD1C19D937.pygtex | 19 - ...3350DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...B810DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...8980DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...2825FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex | 219 - ...73E0DC076E8BF450B81976EF4AD1C19D937.pygtex | 9 - ...D980DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...C440DC076E8BF450B81976EF4AD1C19D937.pygtex | 9 - ...2355FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex | 59 - ...E9C0DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...5600DC076E8BF450B81976EF4AD1C19D937.pygtex | 11 - ...D3C0DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...0AE0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...A7F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...8940DC076E8BF450B81976EF4AD1C19D937.pygtex | 8 - ...4A10DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...9060DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...A6A0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...4540DC076E8BF450B81976EF4AD1C19D937.pygtex | 17 - ...B640DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...71C0DC076E8BF450B81976EF4AD1C19D937.pygtex | 6 - ...7DC0DC076E8BF450B81976EF4AD1C19D937.pygtex | 10 - ...13F0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...0B40DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...C2C0DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...5F3B0CAD346A13BD81D9AF720CF234DAE08.pygtex | 81 - ...B890DC076E8BF450B81976EF4AD1C19D937.pygtex | 12 - ...1AF0DC076E8BF450B81976EF4AD1C19D937.pygtex | 7 - ...3E60DC076E8BF450B81976EF4AD1C19D937.pygtex | 18 - ...3A30DC076E8BF450B81976EF4AD1C19D937.pygtex | 4 - ...DC00DC076E8BF450B81976EF4AD1C19D937.pygtex | 13 - ...D4D0DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...E6E0DC076E8BF450B81976EF4AD1C19D937.pygtex | 5 - ...1930DC076E8BF450B81976EF4AD1C19D937.pygtex | 10 - doc/src/week9/_minted-week9/default.pygstyle | 101 - doc/src/week9/ipynb-week9-src.tar.gz | Bin 192 -> 0 bytes doc/src/week9/programs/autocorr.py | 46 + .../{automersenne.cpp => autocorrelation.cpp} | 16 +- doc/src/week9/programs/mc.py | 54 - doc/src/week9/programs/out.dat | 1000 +++ doc/src/week9/programs/test.x | Bin 0 -> 58915 bytes doc/src/week9/reveal.js/.gitignore | 8 - doc/src/week9/reveal.js/.travis.yml | 5 - doc/src/week9/reveal.js/CONTRIBUTING.md | 23 - doc/src/week9/reveal.js/Gruntfile.js | 140 - doc/src/week9/reveal.js/LICENSE | 19 - doc/src/week9/reveal.js/README.md | 1052 --- doc/src/week9/reveal.js/bower.json | 27 - .../week9/reveal.js/css/images/cbc_footer.png | Bin 10008 -> 0 bytes .../week9/reveal.js/css/images/cbc_symbol.png | Bin 2946 -> 0 bytes .../reveal.js/css/images/simula_footer.png | Bin 2513 -> 0 bytes .../reveal.js/css/images/simula_logo.png | Bin 2138 -> 0 bytes .../reveal.js/css/images/simula_symbol.png | Bin 2138 -> 0 bytes .../week9/reveal.js/css/images/uio_footer.png | Bin 18189 -> 0 bytes .../week9/reveal.js/css/images/uio_symbol.png | Bin 11352 -> 0 bytes doc/src/week9/reveal.js/css/print/paper.css | 202 - doc/src/week9/reveal.js/css/print/pdf.css | 157 - doc/src/week9/reveal.js/css/reveal.css | 1886 ----- doc/src/week9/reveal.js/css/reveal.scss | 1319 ---- doc/src/week9/reveal.js/css/theme/README.md | 23 - doc/src/week9/reveal.js/css/theme/beige.css | 154 - .../week9/reveal.js/css/theme/beigesmall.css | 155 - doc/src/week9/reveal.js/css/theme/black.css | 273 - doc/src/week9/reveal.js/css/theme/blood.css | 180 - doc/src/week9/reveal.js/css/theme/cbc.css | 144 - .../week9/reveal.js/css/theme/darkgray.css | 153 - doc/src/week9/reveal.js/css/theme/default.css | 153 - doc/src/week9/reveal.js/css/theme/league.css | 279 - doc/src/week9/reveal.js/css/theme/moon.css | 153 - doc/src/week9/reveal.js/css/theme/night.css | 141 - doc/src/week9/reveal.js/css/theme/serif.css | 143 - doc/src/week9/reveal.js/css/theme/simple.css | 144 - doc/src/week9/reveal.js/css/theme/simula.css | 144 - doc/src/week9/reveal.js/css/theme/sky.css | 150 - .../week9/reveal.js/css/theme/solarized.css | 153 - .../reveal.js/css/theme/source/beige.scss | 50 - .../css/theme/source/beigesmall.scss | 51 - .../reveal.js/css/theme/source/black.scss | 49 - .../reveal.js/css/theme/source/blood.scss | 91 - .../week9/reveal.js/css/theme/source/cbc.scss | 39 - .../reveal.js/css/theme/source/darkgray.scss | 42 - .../reveal.js/css/theme/source/default.scss | 42 - .../reveal.js/css/theme/source/league.scss | 34 - .../reveal.js/css/theme/source/moon.scss | 68 - .../reveal.js/css/theme/source/night.scss | 35 - .../reveal.js/css/theme/source/serif.scss | 35 - .../reveal.js/css/theme/source/simple.scss | 38 - .../reveal.js/css/theme/source/simula.scss | 39 - .../week9/reveal.js/css/theme/source/sky.scss | 46 - .../reveal.js/css/theme/source/solarized.scss | 74 - .../reveal.js/css/theme/source/white.scss | 49 - doc/src/week9/reveal.js/css/theme/white.css | 273 - doc/src/week9/reveal.js/index.html | 411 -- doc/src/week9/reveal.js/js/reveal.js | 4508 ------------ doc/src/week9/reveal.js/lib/css/zenburn.css | 117 - .../reveal.js/lib/font/league-gothic/LICENSE | 2 - .../lib/font/league-gothic/league-gothic.css | 10 - .../lib/font/league-gothic/league-gothic.eot | Bin 25696 -> 0 bytes .../lib/font/league-gothic/league-gothic.ttf | Bin 64256 -> 0 bytes .../lib/font/league-gothic/league-gothic.woff | Bin 30764 -> 0 bytes .../lib/font/source-sans-pro/LICENSE | 45 - .../source-sans-pro-italic.eot | Bin 75720 -> 0 bytes .../source-sans-pro-italic.ttf | Bin 238084 -> 0 bytes .../source-sans-pro-italic.woff | Bin 98556 -> 0 bytes .../source-sans-pro-regular.eot | Bin 88070 -> 0 bytes .../source-sans-pro-regular.ttf | Bin 288008 -> 0 bytes .../source-sans-pro-regular.woff | Bin 114324 -> 0 bytes .../source-sans-pro-semibold.eot | Bin 89897 -> 0 bytes .../source-sans-pro-semibold.ttf | Bin 284640 -> 0 bytes .../source-sans-pro-semibold.woff | Bin 115648 -> 0 bytes .../source-sans-pro-semibolditalic.eot | Bin 75706 -> 0 bytes .../source-sans-pro-semibolditalic.ttf | Bin 240944 -> 0 bytes .../source-sans-pro-semibolditalic.woff | Bin 98816 -> 0 bytes .../font/source-sans-pro/source-sans-pro.css | 39 - doc/src/week9/reveal.js/lib/js/classList.js | 2 - doc/src/week9/reveal.js/lib/js/head.min.js | 8 - doc/src/week9/reveal.js/lib/js/html5shiv.js | 7 - doc/src/week9/reveal.js/package.json | 45 - .../reveal.js/plugin/highlight/highlight.js | 30 - doc/src/week9/reveal.js/plugin/leap/leap.js | 159 - .../reveal.js/plugin/markdown/example.html | 129 - .../reveal.js/plugin/markdown/example.md | 31 - .../reveal.js/plugin/markdown/markdown.js | 393 - .../week9/reveal.js/plugin/markdown/marked.js | 6 - doc/src/week9/reveal.js/plugin/math/math.js | 64 - .../reveal.js/plugin/multiplex/client.js | 13 - .../week9/reveal.js/plugin/multiplex/index.js | 56 - .../reveal.js/plugin/multiplex/master.js | 51 - .../reveal.js/plugin/notes-server/client.js | 60 - .../reveal.js/plugin/notes-server/index.js | 66 - .../reveal.js/plugin/notes-server/notes.html | 396 - .../week9/reveal.js/plugin/notes/notes.html | 406 -- doc/src/week9/reveal.js/plugin/notes/notes.js | 122 - .../reveal.js/plugin/print-pdf/print-pdf.js | 48 - .../week9/reveal.js/plugin/remotes/remotes.js | 39 - .../week9/reveal.js/plugin/search/search.js | 196 - .../week9/reveal.js/plugin/zoom-js/zoom.js | 278 - .../reveal.js/test/examples/assets/image1.png | Bin 21991 -> 0 bytes .../reveal.js/test/examples/assets/image2.png | Bin 10237 -> 0 bytes .../reveal.js/test/examples/barebones.html | 41 - .../test/examples/embedded-media.html | 49 - .../week9/reveal.js/test/examples/math.html | 185 - .../test/examples/slide-backgrounds.html | 144 - .../test/examples/slide-transitions.html | 101 - doc/src/week9/reveal.js/test/qunit-1.12.0.css | 244 - doc/src/week9/reveal.js/test/qunit-1.12.0.js | 2212 ------ .../test-markdown-element-attributes.html | 134 - .../test/test-markdown-element-attributes.js | 46 - .../test/test-markdown-slide-attributes.html | 128 - .../test/test-markdown-slide-attributes.js | 47 - .../week9/reveal.js/test/test-markdown.html | 52 - doc/src/week9/reveal.js/test/test-markdown.js | 15 - doc/src/week9/reveal.js/test/test-pdf.html | 83 - doc/src/week9/reveal.js/test/test-pdf.js | 15 - doc/src/week9/reveal.js/test/test.html | 85 - doc/src/week9/reveal.js/test/test.js | 589 -- doc/src/week9/week9-beamer.pdf | Bin 518679 -> 0 bytes doc/src/week9/week9-beamer.tex | 5610 --------------- doc/src/week9/week9-bs.html | 710 -- doc/src/week9/week9-plain.tex | 5936 --------------- doc/src/week9/week9-reveal.html | 6265 ---------------- doc/src/week9/week9-solarized.html | 6276 ---------------- doc/src/week9/week9.dlog | 175 - doc/src/week9/week9.do.txt | 57 +- doc/src/week9/week9.html | 6353 ----------------- doc/src/week9/week9.ipynb | 6042 ---------------- doc/src/week9/week9.p.tex | 5966 ---------------- doc/src/week9/week9.pdf | Bin 501561 -> 0 bytes doc/src/week9/week9.tex | 5936 --------------- 518 files changed, 24141 insertions(+), 195860 deletions(-) rename doc/{src/week9/._week9-bs140.html => pub/week9/html/._week9-bs141.html} (87%) delete mode 100644 doc/src/week9/._week9-bs000.html delete mode 100644 doc/src/week9/._week9-bs001.html delete mode 100644 doc/src/week9/._week9-bs002.html delete mode 100644 doc/src/week9/._week9-bs003.html delete mode 100644 doc/src/week9/._week9-bs004.html delete mode 100644 doc/src/week9/._week9-bs005.html delete mode 100644 doc/src/week9/._week9-bs006.html delete mode 100644 doc/src/week9/._week9-bs007.html delete mode 100644 doc/src/week9/._week9-bs008.html delete mode 100644 doc/src/week9/._week9-bs009.html delete mode 100644 doc/src/week9/._week9-bs010.html delete mode 100644 doc/src/week9/._week9-bs011.html delete mode 100644 doc/src/week9/._week9-bs012.html delete mode 100644 doc/src/week9/._week9-bs013.html delete mode 100644 doc/src/week9/._week9-bs014.html delete mode 100644 doc/src/week9/._week9-bs015.html delete mode 100644 doc/src/week9/._week9-bs016.html delete mode 100644 doc/src/week9/._week9-bs017.html delete mode 100644 doc/src/week9/._week9-bs018.html delete mode 100644 doc/src/week9/._week9-bs019.html delete mode 100644 doc/src/week9/._week9-bs020.html delete mode 100644 doc/src/week9/._week9-bs021.html delete mode 100644 doc/src/week9/._week9-bs022.html delete mode 100644 doc/src/week9/._week9-bs023.html delete mode 100644 doc/src/week9/._week9-bs024.html delete mode 100644 doc/src/week9/._week9-bs025.html delete mode 100644 doc/src/week9/._week9-bs026.html delete mode 100644 doc/src/week9/._week9-bs027.html delete mode 100644 doc/src/week9/._week9-bs028.html delete mode 100644 doc/src/week9/._week9-bs029.html delete mode 100644 doc/src/week9/._week9-bs030.html delete mode 100644 doc/src/week9/._week9-bs031.html delete mode 100644 doc/src/week9/._week9-bs032.html delete mode 100644 doc/src/week9/._week9-bs033.html delete mode 100644 doc/src/week9/._week9-bs034.html delete mode 100644 doc/src/week9/._week9-bs035.html delete mode 100644 doc/src/week9/._week9-bs036.html delete mode 100644 doc/src/week9/._week9-bs037.html delete mode 100644 doc/src/week9/._week9-bs038.html delete mode 100644 doc/src/week9/._week9-bs039.html delete mode 100644 doc/src/week9/._week9-bs040.html delete mode 100644 doc/src/week9/._week9-bs041.html delete mode 100644 doc/src/week9/._week9-bs042.html delete mode 100644 doc/src/week9/._week9-bs043.html delete mode 100644 doc/src/week9/._week9-bs044.html delete mode 100644 doc/src/week9/._week9-bs045.html delete mode 100644 doc/src/week9/._week9-bs046.html delete mode 100644 doc/src/week9/._week9-bs047.html delete mode 100644 doc/src/week9/._week9-bs048.html delete mode 100644 doc/src/week9/._week9-bs049.html delete mode 100644 doc/src/week9/._week9-bs050.html delete mode 100644 doc/src/week9/._week9-bs051.html delete mode 100644 doc/src/week9/._week9-bs052.html delete mode 100644 doc/src/week9/._week9-bs053.html delete mode 100644 doc/src/week9/._week9-bs054.html delete mode 100644 doc/src/week9/._week9-bs055.html delete mode 100644 doc/src/week9/._week9-bs056.html delete mode 100644 doc/src/week9/._week9-bs057.html delete mode 100644 doc/src/week9/._week9-bs058.html delete mode 100644 doc/src/week9/._week9-bs059.html delete mode 100644 doc/src/week9/._week9-bs060.html delete mode 100644 doc/src/week9/._week9-bs061.html delete mode 100644 doc/src/week9/._week9-bs062.html delete mode 100644 doc/src/week9/._week9-bs063.html delete mode 100644 doc/src/week9/._week9-bs064.html delete mode 100644 doc/src/week9/._week9-bs065.html delete mode 100644 doc/src/week9/._week9-bs066.html delete mode 100644 doc/src/week9/._week9-bs067.html delete mode 100644 doc/src/week9/._week9-bs068.html delete mode 100644 doc/src/week9/._week9-bs069.html delete mode 100644 doc/src/week9/._week9-bs070.html delete mode 100644 doc/src/week9/._week9-bs071.html delete mode 100644 doc/src/week9/._week9-bs072.html delete mode 100644 doc/src/week9/._week9-bs073.html delete mode 100644 doc/src/week9/._week9-bs074.html delete mode 100644 doc/src/week9/._week9-bs075.html delete mode 100644 doc/src/week9/._week9-bs076.html delete mode 100644 doc/src/week9/._week9-bs077.html delete mode 100644 doc/src/week9/._week9-bs078.html delete mode 100644 doc/src/week9/._week9-bs079.html delete mode 100644 doc/src/week9/._week9-bs080.html delete mode 100644 doc/src/week9/._week9-bs081.html delete mode 100644 doc/src/week9/._week9-bs082.html delete mode 100644 doc/src/week9/._week9-bs083.html delete mode 100644 doc/src/week9/._week9-bs084.html delete mode 100644 doc/src/week9/._week9-bs085.html delete mode 100644 doc/src/week9/._week9-bs086.html delete mode 100644 doc/src/week9/._week9-bs087.html delete mode 100644 doc/src/week9/._week9-bs088.html delete mode 100644 doc/src/week9/._week9-bs089.html delete mode 100644 doc/src/week9/._week9-bs090.html delete mode 100644 doc/src/week9/._week9-bs091.html delete mode 100644 doc/src/week9/._week9-bs092.html delete mode 100644 doc/src/week9/._week9-bs093.html delete mode 100644 doc/src/week9/._week9-bs094.html delete mode 100644 doc/src/week9/._week9-bs095.html delete mode 100644 doc/src/week9/._week9-bs096.html delete mode 100644 doc/src/week9/._week9-bs097.html delete mode 100644 doc/src/week9/._week9-bs098.html delete mode 100644 doc/src/week9/._week9-bs099.html delete mode 100644 doc/src/week9/._week9-bs100.html delete mode 100644 doc/src/week9/._week9-bs101.html delete mode 100644 doc/src/week9/._week9-bs102.html delete mode 100644 doc/src/week9/._week9-bs103.html delete mode 100644 doc/src/week9/._week9-bs104.html delete mode 100644 doc/src/week9/._week9-bs105.html delete mode 100644 doc/src/week9/._week9-bs106.html delete mode 100644 doc/src/week9/._week9-bs107.html delete mode 100644 doc/src/week9/._week9-bs108.html delete mode 100644 doc/src/week9/._week9-bs109.html delete mode 100644 doc/src/week9/._week9-bs110.html delete mode 100644 doc/src/week9/._week9-bs111.html delete mode 100644 doc/src/week9/._week9-bs112.html delete mode 100644 doc/src/week9/._week9-bs113.html delete mode 100644 doc/src/week9/._week9-bs114.html delete mode 100644 doc/src/week9/._week9-bs115.html delete mode 100644 doc/src/week9/._week9-bs116.html delete mode 100644 doc/src/week9/._week9-bs117.html delete mode 100644 doc/src/week9/._week9-bs118.html delete mode 100644 doc/src/week9/._week9-bs119.html delete mode 100644 doc/src/week9/._week9-bs120.html delete mode 100644 doc/src/week9/._week9-bs121.html delete mode 100644 doc/src/week9/._week9-bs122.html delete mode 100644 doc/src/week9/._week9-bs123.html delete mode 100644 doc/src/week9/._week9-bs124.html delete mode 100644 doc/src/week9/._week9-bs125.html delete mode 100644 doc/src/week9/._week9-bs126.html delete mode 100644 doc/src/week9/._week9-bs127.html delete mode 100644 doc/src/week9/._week9-bs128.html delete mode 100644 doc/src/week9/._week9-bs129.html delete mode 100644 doc/src/week9/._week9-bs130.html delete mode 100644 doc/src/week9/._week9-bs131.html delete mode 100644 doc/src/week9/._week9-bs132.html delete mode 100644 doc/src/week9/._week9-bs133.html delete mode 100644 doc/src/week9/._week9-bs134.html delete mode 100644 doc/src/week9/._week9-bs135.html delete mode 100644 doc/src/week9/._week9-bs136.html delete mode 100644 doc/src/week9/._week9-bs137.html delete mode 100644 doc/src/week9/._week9-bs138.html delete mode 100644 doc/src/week9/._week9-bs139.html delete mode 100644 doc/src/week9/.week9.copyright delete mode 100644 doc/src/week9/README.txt delete mode 100644 doc/src/week9/_minted-week9/0A6C47F8068CEC3257BC594AE784D85A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/0CC5E88A797F0E8C126048DE4396FC7F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/0DAB6AD41FAED4AB2EF5C7571274C4D20DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/118CEE4B50FB428DD3D8549EB29F0AAE0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/1490AC872D3FB9D3137AC51EC16D9E880DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/1B144BE7E9B688D03F08657E119E94790DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/23DB3DD9992D3EA60D66BAB6E94FA2C30DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/23E6263994F51499BBF63FFD69B8B6E50DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/25191E82FD3D485EB84FA500B216EB440DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/260BBDD86FAABC6792995A2B95ABBDC2B0CAD346A13BD81D9AF720CF234DAE08.pygtex delete mode 100644 doc/src/week9/_minted-week9/2989E68D2545DF095F8938FE8589B3BC0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/2A64863B652EA217A1D3AE654024CA750DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/2D780435D05ED64219F7B38EA5F3AB390DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/2F38624B127E615C4039128AF3F503E70DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/32243EB5D4E7A20BB0A45AC237E18DA20DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/3258EE875704B37DC577CFA7C25BB3060DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/36E0347A38C0565D1FE358C3F94B01330DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/38B2F39EC7518890F1584A08825C00CC0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/39BDD06D518805E8CC3772EB120B830A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/3AC82BEB472EF31642CEE04E21F67ECC0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/3B98EBB88E64CD2C81DFDB84AFC1629D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/3E0C0C0FDDCEAEC0D593AF05C8D9AEB70DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/4236AEA215F4B524E5B6FFC80851B9600DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/485A5796695A934830A2325AA99ECBAE0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/49441808FB11013E51C438764FC0757E0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/4AE12D397D7F961596A3F89C28AC9B6D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/4E8DC707963D22FD986FA69987A6F9360DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/517C4E3BE1BFE4D4D8044EE7079745F90DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/521A0F94D0EF4F11AF58C10A6998F8FD0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/52788122147F40FDF64E3D34A4D5C10EB0CAD346A13BD81D9AF720CF234DAE08.pygtex delete mode 100644 doc/src/week9/_minted-week9/5358A6138325DBB0CB89E0A77CA092A50DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/5389DFE1B2AD5452E98B577558064F79B0CAD346A13BD81D9AF720CF234DAE08.pygtex delete mode 100644 doc/src/week9/_minted-week9/59BA9422B354E0D316E8A5D73BA20B4A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/5A6DAA7214BFCDCF8F733CF3810D47460DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/5B343579BCF9963505A1D3AA4E6149140DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/5F0C24C76C78B09EE11739CC23F2BC800DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/63FBD8276D325745F8C303CA3370CA700DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/668D6DF261A6E3F3CCB0BC5FE288FE2F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/6745E0E5044DEC5EAF2A777B2FA0488D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/69B258D5AC69935C85BD91E993EC77030DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/6E59941A3833E1FE1319E254324B7B260DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/726CE244F7A189BFA217714F4A9629F30DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/76ED9E3D3A3215F846384023116EBB7A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/7963ABE74028F8E391D17DA4CA04E05B0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/7BACEDF5BB1AC2AD961BDBC4D1F9DA5D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/7F07B82F53C725F23B54C8A7B79CF5360DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/81B54D900E1709D3194224A29260C12F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/81D7D0361B0B45516A69DF03C150C67D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/824B5B818A4AD2013ED199E479D7A2400DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/8755B283091CF6EFD1D61EA7DF38C18F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9178F05C3CEBD2807E143475DF5843F10DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/919233BB70F39100D4BA0CFC73CC37F00DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9342B1CB5D7A51D27802969C958F5D7A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9674C8246A2FF2BB6157963C01DD59510DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9C742881A3D82D5F30B5060457BB7F190DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9E7E1C037200BF7AFF999C0AAE9428890DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/9FC2D2E5AAA3432653F93E80B14BCAF50DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/A2C942DDC1F9CF9BD256911EFAA895590DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/A8E266067B144C7E384681ABF07B03350DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/A9979A25D5212B32BF91E8AA0504BB810DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/AE088E29FB4E738F6B44D236504058980DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/AE87E97195FD5FF205CB2008603BC2825FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex delete mode 100644 doc/src/week9/_minted-week9/B1072FF5A94C91CA0E7A2DEAC165673E0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/B14B88CDFDEFB1B74580189DAD406D980DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/B3374F1FF8EA8D36202B633FCF5DDC440DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/B34F6855F3305E747885C0F16A6DE2355FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex delete mode 100644 doc/src/week9/_minted-week9/BA9032551FF35EDAFF90CD33F6B15E9C0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C1059EB3F2FBA0A55E300ACD9ADA65600DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C381E2F8F93BD26F791AD665A0A8CD3C0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C3D4CC4B811D0A33C89CB1F8C986D0AE0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C4773C9EC90AFB10AA82F3F0707B2A7F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C50BA650723AA066C821FCC7380308940DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/C59B9D50DA2EFBDC04B033DE762F84A10DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/CAB640527971B6B57C1BE2A96BFCE9060DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/CF4F99DF7158F68DB3D95830ADC84A6A0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/D260B7D3534A177583278E39A1E8B4540DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/D40FF8DF66AE7E75C9595A8D5BACAB640DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/D83A68DEA822CDD8B1F913727E09D71C0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/DC0B57C0CB950072FDDB0AEC2171B7DC0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/DCC11AEB5F26E6AB9C552C2F9405813F0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/DDE70CA1C57A6186C2C2DC3C1B7D40B40DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/E0955720563C64B02F95CDF882E4FC2C0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/E181C2032ABBA4FD80D0BF5670AEE5F3B0CAD346A13BD81D9AF720CF234DAE08.pygtex delete mode 100644 doc/src/week9/_minted-week9/EBF64C0559909635066A31CFCA48FB890DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/EEA4F347F2C750B4F63E1210ECFA01AF0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/EF953C491D773131C133BD5EFEBF53E60DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/F42E70E67DFBF66F590D1AB1C6F2A3A30DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/F664E716A6DBB4386F9AE50DEF564DC00DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/FB6759D3D69E9EFF4E66FDAAE43F5D4D0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/FCA2371BE7E05424E97D799D6024AE6E0DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/FDACD668132974D45A08A486A6FCF1930DC076E8BF450B81976EF4AD1C19D937.pygtex delete mode 100644 doc/src/week9/_minted-week9/default.pygstyle delete mode 100644 doc/src/week9/ipynb-week9-src.tar.gz create mode 100644 doc/src/week9/programs/autocorr.py rename doc/src/week9/programs/{automersenne.cpp => autocorrelation.cpp} (86%) delete mode 100644 doc/src/week9/programs/mc.py create mode 100644 doc/src/week9/programs/out.dat create mode 100755 doc/src/week9/programs/test.x delete mode 100644 doc/src/week9/reveal.js/.gitignore delete mode 100644 doc/src/week9/reveal.js/.travis.yml delete mode 100644 doc/src/week9/reveal.js/CONTRIBUTING.md delete mode 100644 doc/src/week9/reveal.js/Gruntfile.js delete mode 100644 doc/src/week9/reveal.js/LICENSE delete mode 100644 doc/src/week9/reveal.js/README.md delete mode 100644 doc/src/week9/reveal.js/bower.json delete mode 100644 doc/src/week9/reveal.js/css/images/cbc_footer.png delete mode 100644 doc/src/week9/reveal.js/css/images/cbc_symbol.png delete mode 100644 doc/src/week9/reveal.js/css/images/simula_footer.png delete mode 100644 doc/src/week9/reveal.js/css/images/simula_logo.png delete mode 100644 doc/src/week9/reveal.js/css/images/simula_symbol.png delete mode 100644 doc/src/week9/reveal.js/css/images/uio_footer.png delete mode 100644 doc/src/week9/reveal.js/css/images/uio_symbol.png delete mode 100644 doc/src/week9/reveal.js/css/print/paper.css delete mode 100644 doc/src/week9/reveal.js/css/print/pdf.css delete mode 100644 doc/src/week9/reveal.js/css/reveal.css delete mode 100644 doc/src/week9/reveal.js/css/reveal.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/README.md delete mode 100644 doc/src/week9/reveal.js/css/theme/beige.css delete mode 100644 doc/src/week9/reveal.js/css/theme/beigesmall.css delete mode 100644 doc/src/week9/reveal.js/css/theme/black.css delete mode 100644 doc/src/week9/reveal.js/css/theme/blood.css delete mode 100644 doc/src/week9/reveal.js/css/theme/cbc.css delete mode 100644 doc/src/week9/reveal.js/css/theme/darkgray.css delete mode 100644 doc/src/week9/reveal.js/css/theme/default.css delete mode 100644 doc/src/week9/reveal.js/css/theme/league.css delete mode 100644 doc/src/week9/reveal.js/css/theme/moon.css delete mode 100644 doc/src/week9/reveal.js/css/theme/night.css delete mode 100644 doc/src/week9/reveal.js/css/theme/serif.css delete mode 100644 doc/src/week9/reveal.js/css/theme/simple.css delete mode 100644 doc/src/week9/reveal.js/css/theme/simula.css delete mode 100644 doc/src/week9/reveal.js/css/theme/sky.css delete mode 100644 doc/src/week9/reveal.js/css/theme/solarized.css delete mode 100644 doc/src/week9/reveal.js/css/theme/source/beige.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/beigesmall.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/black.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/blood.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/cbc.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/darkgray.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/default.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/league.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/moon.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/night.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/serif.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/simple.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/simula.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/sky.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/solarized.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/source/white.scss delete mode 100644 doc/src/week9/reveal.js/css/theme/white.css delete mode 100644 doc/src/week9/reveal.js/index.html delete mode 100644 doc/src/week9/reveal.js/js/reveal.js delete mode 100644 doc/src/week9/reveal.js/lib/css/zenburn.css delete mode 100644 doc/src/week9/reveal.js/lib/font/league-gothic/LICENSE delete mode 100644 doc/src/week9/reveal.js/lib/font/league-gothic/league-gothic.css delete mode 100644 doc/src/week9/reveal.js/lib/font/league-gothic/league-gothic.eot delete mode 100644 doc/src/week9/reveal.js/lib/font/league-gothic/league-gothic.ttf delete mode 100644 doc/src/week9/reveal.js/lib/font/league-gothic/league-gothic.woff delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/LICENSE delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-italic.eot delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-italic.ttf delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-italic.woff delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-regular.eot delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-regular.ttf delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-regular.woff delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibold.eot delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibold.ttf delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibold.woff delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibolditalic.eot delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibolditalic.ttf delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro-semibolditalic.woff delete mode 100644 doc/src/week9/reveal.js/lib/font/source-sans-pro/source-sans-pro.css delete mode 100644 doc/src/week9/reveal.js/lib/js/classList.js delete mode 100644 doc/src/week9/reveal.js/lib/js/head.min.js delete mode 100644 doc/src/week9/reveal.js/lib/js/html5shiv.js delete mode 100644 doc/src/week9/reveal.js/package.json delete mode 100644 doc/src/week9/reveal.js/plugin/highlight/highlight.js delete mode 100644 doc/src/week9/reveal.js/plugin/leap/leap.js delete mode 100644 doc/src/week9/reveal.js/plugin/markdown/example.html delete mode 100644 doc/src/week9/reveal.js/plugin/markdown/example.md delete mode 100644 doc/src/week9/reveal.js/plugin/markdown/markdown.js delete mode 100644 doc/src/week9/reveal.js/plugin/markdown/marked.js delete mode 100644 doc/src/week9/reveal.js/plugin/math/math.js delete mode 100644 doc/src/week9/reveal.js/plugin/multiplex/client.js delete mode 100644 doc/src/week9/reveal.js/plugin/multiplex/index.js delete mode 100644 doc/src/week9/reveal.js/plugin/multiplex/master.js delete mode 100644 doc/src/week9/reveal.js/plugin/notes-server/client.js delete mode 100644 doc/src/week9/reveal.js/plugin/notes-server/index.js delete mode 100644 doc/src/week9/reveal.js/plugin/notes-server/notes.html delete mode 100644 doc/src/week9/reveal.js/plugin/notes/notes.html delete mode 100644 doc/src/week9/reveal.js/plugin/notes/notes.js delete mode 100644 doc/src/week9/reveal.js/plugin/print-pdf/print-pdf.js delete mode 100644 doc/src/week9/reveal.js/plugin/remotes/remotes.js delete mode 100644 doc/src/week9/reveal.js/plugin/search/search.js delete mode 100644 doc/src/week9/reveal.js/plugin/zoom-js/zoom.js delete mode 100644 doc/src/week9/reveal.js/test/examples/assets/image1.png delete mode 100644 doc/src/week9/reveal.js/test/examples/assets/image2.png delete mode 100644 doc/src/week9/reveal.js/test/examples/barebones.html delete mode 100644 doc/src/week9/reveal.js/test/examples/embedded-media.html delete mode 100644 doc/src/week9/reveal.js/test/examples/math.html delete mode 100644 doc/src/week9/reveal.js/test/examples/slide-backgrounds.html delete mode 100644 doc/src/week9/reveal.js/test/examples/slide-transitions.html delete mode 100644 doc/src/week9/reveal.js/test/qunit-1.12.0.css delete mode 100644 doc/src/week9/reveal.js/test/qunit-1.12.0.js delete mode 100644 doc/src/week9/reveal.js/test/test-markdown-element-attributes.html delete mode 100644 doc/src/week9/reveal.js/test/test-markdown-element-attributes.js delete mode 100644 doc/src/week9/reveal.js/test/test-markdown-slide-attributes.html delete mode 100644 doc/src/week9/reveal.js/test/test-markdown-slide-attributes.js delete mode 100644 doc/src/week9/reveal.js/test/test-markdown.html delete mode 100644 doc/src/week9/reveal.js/test/test-markdown.js delete mode 100644 doc/src/week9/reveal.js/test/test-pdf.html delete mode 100644 doc/src/week9/reveal.js/test/test-pdf.js delete mode 100644 doc/src/week9/reveal.js/test/test.html delete mode 100644 doc/src/week9/reveal.js/test/test.js delete mode 100644 doc/src/week9/week9-beamer.pdf delete mode 100644 doc/src/week9/week9-beamer.tex delete mode 100644 doc/src/week9/week9-bs.html delete mode 100644 doc/src/week9/week9-plain.tex delete mode 100644 doc/src/week9/week9-reveal.html delete mode 100644 doc/src/week9/week9-solarized.html delete mode 100644 doc/src/week9/week9.dlog delete mode 100644 doc/src/week9/week9.html delete mode 100644 doc/src/week9/week9.ipynb delete mode 100644 doc/src/week9/week9.p.tex delete mode 100644 doc/src/week9/week9.pdf delete mode 100644 doc/src/week9/week9.tex diff --git a/doc/pub/week9/html/._week9-bs000.html b/doc/pub/week9/html/._week9-bs000.html index eb84da37..0e3f3774 100644 --- a/doc/pub/week9/html/._week9-bs000.html +++ b/doc/pub/week9/html/._week9-bs000.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -689,7 +694,7 @@

    March 11-15

  • 9
  • 10
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs001.html b/doc/pub/week9/html/._week9-bs001.html index 78e442fe..ae04b3a2 100644 --- a/doc/pub/week9/html/._week9-bs001.html +++ b/doc/pub/week9/html/._week9-bs001.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -680,7 +685,7 @@

    Overview of week 11, Mar
  • 10
  • 11
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs002.html b/doc/pub/week9/html/._week9-bs002.html index d9e8847c..131e055d 100644 --- a/doc/pub/week9/html/._week9-bs002.html +++ b/doc/pub/week9/html/._week9-bs002.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -676,7 +681,7 @@

    Why resampling methods ?

  • 11
  • 12
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs003.html b/doc/pub/week9/html/._week9-bs003.html index 93beacca..3d38595d 100644 --- a/doc/pub/week9/html/._week9-bs003.html +++ b/doc/pub/week9/html/._week9-bs003.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -681,7 +686,7 @@

    Statistical analysis

  • 12
  • 13
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs004.html b/doc/pub/week9/html/._week9-bs004.html index 63514813..8f3fcb01 100644 --- a/doc/pub/week9/html/._week9-bs004.html +++ b/doc/pub/week9/html/._week9-bs004.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -677,7 +682,7 @@

    And why do we use such me
  • 13
  • 14
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs005.html b/doc/pub/week9/html/._week9-bs005.html index 8c6f395a..e156f0b4 100644 --- a/doc/pub/week9/html/._week9-bs005.html +++ b/doc/pub/week9/html/._week9-bs005.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -688,7 +693,7 @@

    Central limit theorem

  • 14
  • 15
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs006.html b/doc/pub/week9/html/._week9-bs006.html index b4cebfed..dd13cca3 100644 --- a/doc/pub/week9/html/._week9-bs006.html +++ b/doc/pub/week9/html/._week9-bs006.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -675,7 +680,7 @@

    Further remarks

  • 15
  • 16
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs007.html b/doc/pub/week9/html/._week9-bs007.html index ac828571..8b33cf09 100644 --- a/doc/pub/week9/html/._week9-bs007.html +++ b/doc/pub/week9/html/._week9-bs007.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -689,7 +694,7 @@

    Running many measurements

    16
  • 17
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs008.html b/doc/pub/week9/html/._week9-bs008.html index 5eae9dce..d6412ed2 100644 --- a/doc/pub/week9/html/._week9-bs008.html +++ b/doc/pub/week9/html/._week9-bs008.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -687,7 +692,7 @@

    Adding more definitions

  • 17
  • 18
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs009.html b/doc/pub/week9/html/._week9-bs009.html index 07e5de6b..8fa4ebad 100644 --- a/doc/pub/week9/html/._week9-bs009.html +++ b/doc/pub/week9/html/._week9-bs009.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -682,7 +687,7 @@

    Further rewriting

  • 18
  • 19
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs010.html b/doc/pub/week9/html/._week9-bs010.html index 14a3cd97..7f9d7ed5 100644 --- a/doc/pub/week9/html/._week9-bs010.html +++ b/doc/pub/week9/html/._week9-bs010.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -692,7 +697,7 @@

    The covariance term

  • 19
  • 20
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs011.html b/doc/pub/week9/html/._week9-bs011.html index 5980ce77..4ac93bb8 100644 --- a/doc/pub/week9/html/._week9-bs011.html +++ b/doc/pub/week9/html/._week9-bs011.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -689,7 +694,7 @@

    Rewriting the covariance t
  • 20
  • 21
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs012.html b/doc/pub/week9/html/._week9-bs012.html index c945828b..9c180a91 100644 --- a/doc/pub/week9/html/._week9-bs012.html +++ b/doc/pub/week9/html/._week9-bs012.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -655,9 +660,12 @@

    Introducing the cor \end{align*} $$ -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    +

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) +for a series of random numbers. We see that the function \( \kappa_d \) +approaches \( 0 \) as \( d\rightarrow \infty \). +

    -

    Note: code will be inserted here later.

    +

    In this case, our data are given by random numbers generated for the uniform distribution with \( x\in [0,1] \). Even with two random numbers being far away, we note that the correlation function is not zero.

    @@ -684,7 +692,7 @@

    Introducing the cor
  • 21
  • 22
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs013.html b/doc/pub/week9/html/._week9-bs013.html index 39f540a0..6e3ea03f 100644 --- a/doc/pub/week9/html/._week9-bs013.html +++ b/doc/pub/week9/html/._week9-bs013.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,22 +651,73 @@

     

     

     

    -

    Resampling methods: Blocking

    +

    Computing the correlation function

    + +

    This code is best seen with the jupyter-notebook

    + + +
    +
    +
    +
    +
    +
    #!/usr/bin/env python
    +import numpy as np
    +import matplotlib.mlab as mlab
    +import matplotlib.pyplot as plt
    +import random
    +
    +# initialize the rng with a seed, simple uniform distribution
    +random.seed() 
    +m = 10000
    +samplefactor = 1.0/m
    +x = np.zeros(m)   
    +MeanValue = 0.
    +VarValue = 0.
    +for i in range (m):
    +    value = random.random()
    +    x[i] = value
    +    MeanValue += value
    +    VarValue += value*value
     
    -

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) -and has become one of the standard ways to estimate the variance -\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely -\( \widehat{\theta} = \overline{X} \), the mean value. -

    +MeanValue *= samplefactor +VarValue *= samplefactor +Variance = VarValue-MeanValue*MeanValue +STDev = np.sqrt(Variance) +print("MeanValue =", MeanValue) +print("Variance =", Variance) +print("Standard deviation =", STDev) -

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: -

    -$$ -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} -$$ +# Computing the autocorrelation function +autocorrelation = np.zeros(m) +darray = np.zeros(m) +for j in range (m): + sum = 0.0 + darray[j] = j + for k in range (m-j): + sum += (x[k]-MeanValue)*(x[k+j]-MeanValue ) + autocorrelation[j] = (sum/Variance)*samplefactor +# Visualize results +plt.plot(darray, autocorrelation,'ro') +plt.axis([0,m,-0.2, 1.1]) +plt.xlabel(r'$d$') +plt.ylabel(r'$\kappa_d$') +plt.title(r'autocorrelation function for RNG with uniform distribution') +plt.show() +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    @@ -689,7 +745,7 @@

    Resampling methods: Blocking
  • 22
  • 23
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs014.html b/doc/pub/week9/html/._week9-bs014.html index fa7b50aa..efd4536f 100644 --- a/doc/pub/week9/html/._week9-bs014.html +++ b/doc/pub/week9/html/._week9-bs014.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,24 @@

     

     

     

    -

    Why blocking?

    +

    Resampling methods: Blocking

    -

    The strength of the blocking method is when the number of -observations, \( n \) is large. For large \( n \), the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger \( n \) is. +

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) +and has become one of the standard ways to estimate the variance +\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely +\( \widehat{\theta} = \overline{X} \), the mean value.

    +

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. +Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: +

    +$$ +\begin{align*} +\hat{X} = (X_1,X_2,\cdots,X_n). +\end{align*} +$$ + +

    diff --git a/doc/pub/week9/html/._week9-bs015.html b/doc/pub/week9/html/._week9-bs015.html index 72673cc9..12693e8a 100644 --- a/doc/pub/week9/html/._week9-bs015.html +++ b/doc/pub/week9/html/._week9-bs015.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,24 +651,13 @@

     

     

     

    -

    Blocking Transformations

    -

    We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from \( \boldsymbol{X} \) and form a new vector -\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of -subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and -so on. -Define \( \boldsymbol{X}_i \) recursively by: -

    - -$$ -\begin{align} -(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ -(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + -(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\tag{1} -\end{align} -$$ +

    Why blocking?

    +

    The strength of the blocking method is when the number of +observations, \( n \) is large. For large \( n \), the complexity of dependent +bootstrapping scales poorly, but the blocking method does not, +moreover, it becomes more accurate the larger \( n \) is. +

    @@ -690,7 +684,7 @@

    Blocking Transformations

  • 24
  • 25
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs016.html b/doc/pub/week9/html/._week9-bs016.html index 74235b3b..906e5dcc 100644 --- a/doc/pub/week9/html/._week9-bs016.html +++ b/doc/pub/week9/html/._week9-bs016.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,23 +651,24 @@

     

     

     

    -

    Blocking transformations

    - -

    The quantity \( \boldsymbol{X}_k \) is -subject to \( k \) blocking transformations. We now have \( d \) vectors -\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent -averages of observations. It turns out that if the components of -\( \boldsymbol{X} \) is a stationary time series, then the components of -\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \) +

    Blocking Transformations

    +

    We now define the blocking transformations. The idea is to take the mean of subsequent +pair of elements from \( \boldsymbol{X} \) and form a new vector +\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of +subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and +so on. +Define \( \boldsymbol{X}_i \) recursively by:

    -

    We can then compute the autocovariance, the variance, sample mean, and -number of observations for each \( i \). -Let \( \gamma_i, \sigma_i^2, -\overline{X}_i \) denote the covariance, variance and average of the -elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of -\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \). -

    +$$ +\begin{align} +(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ +(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + +(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 +\tag{1} +\end{align} +$$ +

    @@ -689,7 +695,7 @@

    Blocking transformations

  • 25
  • 26
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs017.html b/doc/pub/week9/html/._week9-bs017.html index 628dc398..8c0340c4 100644 --- a/doc/pub/week9/html/._week9-bs017.html +++ b/doc/pub/week9/html/._week9-bs017.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,26 +651,22 @@

     

     

     

    -

    Blocking Transformations

    +

    Blocking transformations

    -

    Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since \( h =|i-j| \) -we can define +

    The quantity \( \boldsymbol{X}_k \) is +subject to \( k \) blocking transformations. We now have \( d \) vectors +\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent +averages of observations. It turns out that if the components of +\( \boldsymbol{X} \) is a stationary time series, then the components of +\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \)

    -$$ -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} -\tag{2}\\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\tag{3} -\end{align} -$$ -

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean \( \mathrm{var}(\overline{X}) \). +

    We can then compute the autocovariance, the variance, sample mean, and +number of observations for each \( i \). +Let \( \gamma_i, \sigma_i^2, +\overline{X}_i \) denote the covariance, variance and average of the +elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of +\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \).

    @@ -693,7 +694,7 @@

    Blocking Transformations

  • 26
  • 27
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs018.html b/doc/pub/week9/html/._week9-bs018.html index ce4aec3d..4e45ebd0 100644 --- a/doc/pub/week9/html/._week9-bs018.html +++ b/doc/pub/week9/html/._week9-bs018.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,24 +651,27 @@

     

     

     

    -

    Blocking Transformations, getting there

    -

    We have

    +

    Blocking Transformations

    + +

    Using the +definition of the blocking transformation and the distributive +property of the covariance, it is clear that since \( h =|i-j| \) +we can define +

    $$ \begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\tag{4} +\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ +&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ +&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} +\tag{2}\\ +&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} +\tag{3} \end{align} $$ -

    The term \( e_k \) is called the truncation error:

    -$$ -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\tag{5} -\end{equation} -$$ - -

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    +

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample +mean \( \mathrm{var}(\overline{X}) \). +

    @@ -690,7 +698,7 @@

    Blocking Transfor
  • 27
  • 28
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs019.html b/doc/pub/week9/html/._week9-bs019.html index 0fd8978e..d48ac840 100644 --- a/doc/pub/week9/html/._week9-bs019.html +++ b/doc/pub/week9/html/._week9-bs019.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,24 +651,24 @@

     

     

     

    -

    Blocking Transformations, final expressions

    - -

    We can then wrap up

    +

    Blocking Transformations, getting there

    +

    We have

    $$ \begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\tag{6} +\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. +\tag{4} \end{align} $$ -

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    +

    The term \( e_k \) is called the truncation error:

    $$ -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \tag{7} -\end{align} +\begin{equation} +e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). +\tag{5} +\end{equation} $$ +

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    @@ -690,7 +695,7 @@

    Blocking Tran
  • 28
  • 29
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs020.html b/doc/pub/week9/html/._week9-bs020.html index 5dc169b1..4842f8e8 100644 --- a/doc/pub/week9/html/._week9-bs020.html +++ b/doc/pub/week9/html/._week9-bs020.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,24 @@

     

     

     

    -

    More on the blocking method

    +

    Blocking Transformations, final expressions

    + +

    We can then wrap up

    +$$ +\begin{align} +n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ +&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. +\tag{6} +\end{align} +$$ -

    Flyvbjerg and Petersen demonstrated that the sequence -\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term -\( e_k \) can be made as small as we would like by making \( k \) (and hence -\( d \)) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by -\( \widehat{\sigma}^2_k/n_k \). -

    +

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    +$$ +\begin{align} +\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \tag{7} +\end{align} +$$ -

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    @@ -684,7 +695,7 @@

    More on the blocking method
  • 29
  • 30
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs021.html b/doc/pub/week9/html/._week9-bs021.html index 12543a6e..5e17a01b 100644 --- a/doc/pub/week9/html/._week9-bs021.html +++ b/doc/pub/week9/html/._week9-bs021.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,245 +651,18 @@

     

     

     

    -

    Example code form last week

    - - -
    -
    -
    -
    -
    -
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    -# Using gaussian rng for new positions and Metropolis- Hastings 
    -# Added energy minimization
    -from math import exp, sqrt
    -from random import random, seed, normalvariate
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from mpl_toolkits.mplot3d import Axes3D
    -from matplotlib import cm
    -from matplotlib.ticker import LinearLocator, FormatStrFormatter
    -from scipy.optimize import minimize
    -import sys
    -import os
    -
    -# Where to save data files
    -PROJECT_ROOT_DIR = "Results"
    -DATA_ID = "Results/EnergyMin"
    -
    -if not os.path.exists(PROJECT_ROOT_DIR):
    -    os.mkdir(PROJECT_ROOT_DIR)
    -
    -if not os.path.exists(DATA_ID):
    -    os.makedirs(DATA_ID)
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -outfile = open(data_path("Energies.dat"),'w')
    -
    -
    -# Trial wave function for the 2-electron quantum dot in two dims
    -def WaveFunction(r,alpha,beta):
    -    r1 = r[0,0]**2 + r[0,1]**2
    -    r2 = r[1,0]**2 + r[1,1]**2
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = r12/(1+beta*r12)
    -    return exp(-0.5*alpha*(r1+r2)+deno)
    -
    -# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    -def LocalEnergy(r,alpha,beta):
    -    
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    -
    -# Derivate of wave function ansatz as function of variational parameters
    -def DerivativeWFansatz(r,alpha,beta):
    -    
    -    WfDer  = np.zeros((2), np.double)
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    WfDer[0] = -0.5*(r1+r2)
    -    WfDer[1] = -r12*r12*deno2
    -    return  WfDer
    -
    -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    -def QuantumForce(r,alpha,beta):
    +

    More on the blocking method

    - qforce = np.zeros((NumberParticles,Dimension), np.double) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12 - qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12 - return qforce - - -# Computing the derivative of the energy and the energy -def EnergyDerivative(x0): - - - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - EnergyDer = 0.0 - DeltaPsi = 0.0 - DerivativePsiE = 0.0 - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - DerPsi = DerivativeWFansatz(PositionOld,alpha,beta) - DeltaPsi += DerPsi - energy += DeltaE - DerivativePsiE += DerPsi*DeltaE - - # We calculate mean values - energy /= NumberMCcycles - DerivativePsiE /= NumberMCcycles - DeltaPsi /= NumberMCcycles - EnergyDer = 2*(DerivativePsiE-DeltaPsi*energy) - return EnergyDer - - -# Computing the expectation value of the local energy -def Energy(x0): - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - energy += DeltaE - if Printout: - outfile.write('%f\n' %(energy/(MCcycle+1.0))) - # We calculate mean values - energy /= NumberMCcycles - return energy - -#Here starts the main program with variable declarations -NumberParticles = 2 -Dimension = 2 -# seed for rng generator -seed() -# Monte Carlo cycles for parameter optimization -Printout = False -NumberMCcycles= 10000 -# guess for variational parameters -x0 = np.array([0.9,0.2]) -# Using Broydens method to find optimal parameters -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True}) -x0 = res.x -# Compute the energy again with the optimal parameters and increased number of Monte Cycles -NumberMCcycles= 2**19 -Printout = True -FinalEnergy = Energy(x0) -EResult = np.array([FinalEnergy,FinalEnergy]) -outfile.close() -#nice printout with Pandas -import pandas as pd -from pandas import DataFrame -data ={'Optimal Parameters':x0, 'Final Energy':EResult} -frame = pd.DataFrame(data) -print(frame) -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    Flyvbjerg and Petersen demonstrated that the sequence +\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term +\( e_k \) can be made as small as we would like by making \( k \) (and hence +\( d \)) sufficiently large. The sequence is decreasing. +It means we can apply blocking transformations until +\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by +\( \widehat{\sigma}^2_k/n_k \). +

    +

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    @@ -911,7 +689,7 @@

    Example code form last week
  • 30
  • 31
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs022.html b/doc/pub/week9/html/._week9-bs022.html index e49fae5f..16e84161 100644 --- a/doc/pub/week9/html/._week9-bs022.html +++ b/doc/pub/week9/html/._week9-bs022.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,7 @@

     

     

     

    -

    Resampling analysis

    - -

    The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of Marius Jonsson is given here -

    - +

    Example code form last week

    @@ -660,60 +659,221 @@

    Resampling analysis

    -
    # Common imports
    +  
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    +# Using gaussian rng for new positions and Metropolis- Hastings 
    +# Added energy minimization
    +from math import exp, sqrt
    +from random import random, seed, normalvariate
    +import numpy as np
    +import matplotlib.pyplot as plt
    +from mpl_toolkits.mplot3d import Axes3D
    +from matplotlib import cm
    +from matplotlib.ticker import LinearLocator, FormatStrFormatter
    +from scipy.optimize import minimize
    +import sys
     import os
     
    -# Where to save the figures and data files
    +# Where to save data files
    +PROJECT_ROOT_DIR = "Results"
     DATA_ID = "Results/EnergyMin"
     
    +if not os.path.exists(PROJECT_ROOT_DIR):
    +    os.mkdir(PROJECT_ROOT_DIR)
    +
    +if not os.path.exists(DATA_ID):
    +    os.makedirs(DATA_ID)
    +
     def data_path(dat_id):
         return os.path.join(DATA_ID, dat_id)
     
    -infile = open(data_path("Energies.dat"),'r')
    -
    -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    -from numpy.linalg import inv
    -
    -def block(x):
    -    # preliminaries
    -    n = len(x)
    -    d = int(log2(n))
    -    s, gamma = zeros(d), zeros(d)
    -    mu = mean(x)
    -
    -    # estimate the auto-covariance and variances 
    -    # for each blocking transformation
    -    for i in arange(0,d):
    -        n = len(x)
    -        # estimate autocovariance of x
    -        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    -        # estimate variance of x
    -        s[i] = var(x)
    -        # perform blocking transformation
    -        x = 0.5*(x[0::2] + x[1::2])
    -   
    -    # generate the test observator M_k from the theorem
    -    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    -
    -    # we need a list of magic numbers
    -    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    -
    -    # use magic to determine when we should have stopped blocking
    -    for k in arange(0,d):
    -        if(M[k] < q[k]):
    -            break
    -    if (k >= d-1):
    -        print("Warning: Use more data")
    -    return mu, s[k]/2**(d-k)
    -
    -
    -x = loadtxt(infile)
    -(mean, var) = block(x) 
    -std = sqrt(var)
    +outfile = open(data_path("Energies.dat"),'w')
    +
    +
    +# Trial wave function for the 2-electron quantum dot in two dims
    +def WaveFunction(r,alpha,beta):
    +    r1 = r[0,0]**2 + r[0,1]**2
    +    r2 = r[1,0]**2 + r[1,1]**2
    +    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    +    deno = r12/(1+beta*r12)
    +    return exp(-0.5*alpha*(r1+r2)+deno)
    +
    +# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    +def LocalEnergy(r,alpha,beta):
    +    
    +    r1 = (r[0,0]**2 + r[0,1]**2)
    +    r2 = (r[1,0]**2 + r[1,1]**2)
    +    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    +    deno = 1.0/(1+beta*r12)
    +    deno2 = deno*deno
    +    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    +
    +# Derivate of wave function ansatz as function of variational parameters
    +def DerivativeWFansatz(r,alpha,beta):
    +    
    +    WfDer  = np.zeros((2), np.double)
    +    r1 = (r[0,0]**2 + r[0,1]**2)
    +    r2 = (r[1,0]**2 + r[1,1]**2)
    +    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    +    deno = 1.0/(1+beta*r12)
    +    deno2 = deno*deno
    +    WfDer[0] = -0.5*(r1+r2)
    +    WfDer[1] = -r12*r12*deno2
    +    return  WfDer
    +
    +# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    +def QuantumForce(r,alpha,beta):
    +
    +    qforce = np.zeros((NumberParticles,Dimension), np.double)
    +    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    +    deno = 1.0/(1+beta*r12)
    +    qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12
    +    qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12
    +    return qforce
    +    
    +
    +# Computing the derivative of the energy and the energy 
    +def EnergyDerivative(x0):
    +
    +    
    +    # Parameters in the Fokker-Planck simulation of the quantum force
    +    D = 0.5
    +    TimeStep = 0.05
    +    # positions
    +    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    +    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    +    # Quantum force
    +    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    +    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    +
    +    energy = 0.0
    +    DeltaE = 0.0
    +    alpha = x0[0]
    +    beta = x0[1]
    +    EnergyDer = 0.0
    +    DeltaPsi = 0.0
    +    DerivativePsiE = 0.0 
    +    #Initial position
    +    for i in range(NumberParticles):
    +        for j in range(Dimension):
    +            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    +    wfold = WaveFunction(PositionOld,alpha,beta)
    +    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    +
    +    #Loop over MC MCcycles
    +    for MCcycle in range(NumberMCcycles):
    +        #Trial position moving one particle at the time
    +        for i in range(NumberParticles):
    +            for j in range(Dimension):
    +                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    +                                       QuantumForceOld[i,j]*TimeStep*D
    +            wfnew = WaveFunction(PositionNew,alpha,beta)
    +            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    +            GreensFunction = 0.0
    +            for j in range(Dimension):
    +                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    +	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    +                                      PositionNew[i,j]+PositionOld[i,j])
    +      
    +            GreensFunction = exp(GreensFunction)
    +            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    +            #Metropolis-Hastings test to see whether we accept the move
    +            if random() <= ProbabilityRatio:
    +                for j in range(Dimension):
    +                    PositionOld[i,j] = PositionNew[i,j]
    +                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    +                wfold = wfnew
    +        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    +        DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)
    +        DeltaPsi += DerPsi
    +        energy += DeltaE
    +        DerivativePsiE += DerPsi*DeltaE
    +            
    +    # We calculate mean values
    +    energy /= NumberMCcycles
    +    DerivativePsiE /= NumberMCcycles
    +    DeltaPsi /= NumberMCcycles
    +    EnergyDer  = 2*(DerivativePsiE-DeltaPsi*energy)
    +    return EnergyDer
    +
    +
    +# Computing the expectation value of the local energy 
    +def Energy(x0):
    +    # Parameters in the Fokker-Planck simulation of the quantum force
    +    D = 0.5
    +    TimeStep = 0.05
    +    # positions
    +    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    +    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    +    # Quantum force
    +    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    +    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    +
    +    energy = 0.0
    +    DeltaE = 0.0
    +    alpha = x0[0]
    +    beta = x0[1]
    +    #Initial position
    +    for i in range(NumberParticles):
    +        for j in range(Dimension):
    +            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    +    wfold = WaveFunction(PositionOld,alpha,beta)
    +    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    +
    +    #Loop over MC MCcycles
    +    for MCcycle in range(NumberMCcycles):
    +        #Trial position moving one particle at the time
    +        for i in range(NumberParticles):
    +            for j in range(Dimension):
    +                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    +                                       QuantumForceOld[i,j]*TimeStep*D
    +            wfnew = WaveFunction(PositionNew,alpha,beta)
    +            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    +            GreensFunction = 0.0
    +            for j in range(Dimension):
    +                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    +	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    +                                      PositionNew[i,j]+PositionOld[i,j])
    +      
    +            GreensFunction = exp(GreensFunction)
    +            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    +            #Metropolis-Hastings test to see whether we accept the move
    +            if random() <= ProbabilityRatio:
    +                for j in range(Dimension):
    +                    PositionOld[i,j] = PositionNew[i,j]
    +                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    +                wfold = wfnew
    +        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    +        energy += DeltaE
    +        if Printout: 
    +           outfile.write('%f\n' %(energy/(MCcycle+1.0)))            
    +    # We calculate mean values
    +    energy /= NumberMCcycles
    +    return energy
    +
    +#Here starts the main program with variable declarations
    +NumberParticles = 2
    +Dimension = 2
    +# seed for rng generator 
    +seed()
    +# Monte Carlo cycles for parameter optimization
    +Printout = False
    +NumberMCcycles= 10000
    +# guess for variational parameters
    +x0 = np.array([0.9,0.2])
    +# Using Broydens method to find optimal parameters
    +res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})
    +x0 = res.x
    +# Compute the energy again with the optimal parameters and increased number of Monte Cycles
    +NumberMCcycles= 2**19
    +Printout = True
    +FinalEnergy = Energy(x0)
    +EResult = np.array([FinalEnergy,FinalEnergy])
    +outfile.close()
    +#nice printout with Pandas
     import pandas as pd
     from pandas import DataFrame
    -data ={'Mean':[mean], 'STDev':[std]}
    -frame = pd.DataFrame(data,index=['Values'])
    +data ={'Optimal Parameters':x0, 'Final Energy':EResult}
    +frame = pd.DataFrame(data)
     print(frame)
     
    @@ -756,7 +916,7 @@

    Resampling analysis

  • 31
  • 32
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs023.html b/doc/pub/week9/html/._week9-bs023.html index 3a01dcc7..0b5c9315 100644 --- a/doc/pub/week9/html/._week9-bs023.html +++ b/doc/pub/week9/html/._week9-bs023.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,91 @@

     

     

     

    -

    Content

    -
      -
    • Simple compiler options
    • -
    • Tools to benchmark your code
    • -
    • Machine architectures
    • -
    • What is vectorization?
    • -
    • How to measure code performance
    • -
    • Parallelization with OpenMP
    • -
    • Parallelization with MPI
    • -
    • Vectorization and parallelization, examples
    • -
    +

    Resampling analysis

    + +

    The next step is then to use the above data sets and perform a +resampling analysis using the blocking method +The blocking code, based on the article of Marius Jonsson is given here +

    + + + +
    +
    +
    +
    +
    +
    # Common imports
    +import os
    +
    +# Where to save the figures and data files
    +DATA_ID = "Results/EnergyMin"
    +
    +def data_path(dat_id):
    +    return os.path.join(DATA_ID, dat_id)
    +
    +infile = open(data_path("Energies.dat"),'r')
    +
    +from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    +from numpy.linalg import inv
    +
    +def block(x):
    +    # preliminaries
    +    n = len(x)
    +    d = int(log2(n))
    +    s, gamma = zeros(d), zeros(d)
    +    mu = mean(x)
    +
    +    # estimate the auto-covariance and variances 
    +    # for each blocking transformation
    +    for i in arange(0,d):
    +        n = len(x)
    +        # estimate autocovariance of x
    +        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    +        # estimate variance of x
    +        s[i] = var(x)
    +        # perform blocking transformation
    +        x = 0.5*(x[0::2] + x[1::2])
    +   
    +    # generate the test observator M_k from the theorem
    +    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    +
    +    # we need a list of magic numbers
    +    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    +
    +    # use magic to determine when we should have stopped blocking
    +    for k in arange(0,d):
    +        if(M[k] < q[k]):
    +            break
    +    if (k >= d-1):
    +        print("Warning: Use more data")
    +    return mu, s[k]/2**(d-k)
    +
    +
    +x = loadtxt(infile)
    +(mean, var) = block(x) 
    +std = sqrt(var)
    +import pandas as pd
    +from pandas import DataFrame
    +data ={'Mean':[mean], 'STDev':[std]}
    +frame = pd.DataFrame(data,index=['Values'])
    +print(frame)
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    diff --git a/doc/pub/week9/html/._week9-bs024.html b/doc/pub/week9/html/._week9-bs024.html index 2d43eb0d..0e967e9e 100644 --- a/doc/pub/week9/html/._week9-bs024.html +++ b/doc/pub/week9/html/._week9-bs024.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,71 +651,17 @@

     

     

     

    -

    Optimization and profiling

    -
    -
    - - -

    Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as -

    - - -
    -
    -
    -
    -
    -
    c++  -c  mycode.cpp
    -c++  -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For Fortran replace with for example gfortran or ifort. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. -

    - -

    It is instructive to look up the compiler manual for further instructions by writing

    - - -
    -
    -
    -
    -
    -
    man c++
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - +

    Content

    +
      +
    • Simple compiler options
    • +
    • Tools to benchmark your code
    • +
    • Machine architectures
    • +
    • What is vectorization?
    • +
    • How to measure code performance
    • +
    • Parallelization with OpenMP
    • +
    • Parallelization with MPI
    • +
    • Vectorization and parallelization, examples
    • +

    diff --git a/doc/pub/week9/html/._week9-bs025.html b/doc/pub/week9/html/._week9-bs025.html index 1121a3ae..a7e0303c 100644 --- a/doc/pub/week9/html/._week9-bs025.html +++ b/doc/pub/week9/html/._week9-bs025.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,13 @@

     

     

     

    -

    More on optimization

    +

    Optimization and profiling

    -

    We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. + +

    Till now we have not paid much attention to speed and possible optimization possibilities +inherent in the various compilers. We have compiled and linked as

    @@ -662,8 +666,8 @@

    More on optimization

    -
    c++  -O3 -c  mycode.cpp
    -c++  -O3 -o  mycode.exe  mycode.o
    +  
    c++  -c  mycode.cpp
    +c++  -o  mycode.exe  mycode.o
     
    @@ -679,7 +683,36 @@

    More on optimization

    -

    This (other options are -O2 or -Ofast) is the recommended option.

    +

    For Fortran replace with for example gfortran or ifort. +This is what we call a flat compiler option and should be used when we develop the code. +It produces normally a very large and slow code when translated to machine instructions. +We use this option for debugging and for establishing the correct program output because +every operation is done precisely as the user specified it. +

    + +

    It is instructive to look up the compiler manual for further instructions by writing

    + + +
    +
    +
    +
    +
    +
    man c++
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -708,7 +741,7 @@

    More on optimization

  • 34
  • 35
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs026.html b/doc/pub/week9/html/._week9-bs026.html index bed0c48d..1a7dec8e 100644 --- a/doc/pub/week9/html/._week9-bs026.html +++ b/doc/pub/week9/html/._week9-bs026.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,14 @@

     

     

     

    -

    Optimization and profiling

    +

    More on optimization

    -

    It is also useful to profile your program under the development stage. -You would then compile with +

    We have additional compiler options for optimization. These may include procedure inlining where +performance may be improved, moving constants inside loops outside the loop, +identify potential parallelism, include automatic vectorization or replace a division with a reciprocal +and a multiplication if this speeds up the code.

    @@ -660,32 +667,8 @@

    Optimization and profiling <
    -
    c++  -pg -O3 -c  mycode.cpp
    -c++  -pg -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -

    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    After you have run the code you can obtain the profiling information via

    - - -
    -
    -
    -
    -
    -
    gprof mycode.exe >  ProfileOutput
    +  
    c++  -O3 -c  mycode.cpp
    +c++  -O3 -o  mycode.exe  mycode.o
     
    @@ -701,14 +684,10 @@

    Optimization and profiling <

    -

    When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator. -

    +

    This (other options are -O2 or -Ofast) is the recommended option.

    -

    diff --git a/doc/pub/week9/html/._week9-bs027.html b/doc/pub/week9/html/._week9-bs027.html index 19c3cbe9..24965cae 100644 --- a/doc/pub/week9/html/._week9-bs027.html +++ b/doc/pub/week9/html/._week9-bs027.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,11 +651,11 @@

     

     

     

    -

    Optimization and debugging

    +

    Optimization and profiling

    -

    Adding debugging options is a very useful alternative under the development stage of a program. +

    It is also useful to profile your program under the development stage. You would then compile with

    @@ -660,8 +665,8 @@

    Optimization and debugging <
    -
    c++  -g -O0 -c  mycode.cpp
    -c++  -g -O0 -o  mycode.exe  mycode.o
    +  
    c++  -pg -O3 -c  mycode.cpp
    +c++  -pg -O3 -o  mycode.exe  mycode.o
     
    @@ -677,15 +682,33 @@

    Optimization and debugging <

    -

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    +

    After you have run the code you can obtain the profiling information via

    + + +
    +
    +
    +
    +
    +
    gprof mycode.exe >  ProfileOutput
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -
    -
    - -

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag -ftrapv does this for the CLANG compiler on OS X operating systems. +

    When you have profiled properly your code, you must take out this option as it +slows down performance. +For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator.

    @@ -716,7 +739,7 @@

    Optimization and debugging <
  • 36
  • 37
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs028.html b/doc/pub/week9/html/._week9-bs028.html index c6698375..d46fb065 100644 --- a/doc/pub/week9/html/._week9-bs028.html +++ b/doc/pub/week9/html/._week9-bs028.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,16 +651,13 @@

     

     

     

    -

    Other hints

    +

    Optimization and debugging

    -

    In general, irrespective of compiler options, it is useful to

    -
      -
    • avoid if tests or call to functions inside loops, if possible.
    • -
    • avoid multiplication with constants inside loops if possible
    • -
    -

    Here is an example of a part of a program where specific operations lead to a slower code

    +

    Adding debugging options is a very useful alternative under the development stage of a program. +You would then compile with +

    @@ -663,11 +665,8 @@

    Other hints

    -
    k = n-1;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] +c*d;
    -    e = g[k];
    -}
    +  
    c++  -g -O0 -c  mycode.cpp
    +c++  -g -O0 -o  mycode.exe  mycode.o
     
    @@ -683,36 +682,15 @@

    Other hints

    -

    A better code is

    - - -
    -
    -
    -
    -
    -
    temp = c*d;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] + temp;
    -}
    -e = g[n-1];
    -
    +

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -

    Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. +

    +
    + +

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. +The flag -ftrapv does this for the CLANG compiler on OS X operating systems.

    @@ -743,7 +721,7 @@

    Other hints

  • 37
  • 38
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs029.html b/doc/pub/week9/html/._week9-bs029.html index c7c38c51..4234f2e9 100644 --- a/doc/pub/week9/html/._week9-bs029.html +++ b/doc/pub/week9/html/._week9-bs029.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,74 @@

     

     

     

    -

    Vectorization and the basic idea behind parallel computing

    +

    Other hints

    -

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

    +

    In general, irrespective of compiler options, it is useful to

      -
    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • -
    • Multiple processors are involved to solve a global problem.
    • -
    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    • +
    • avoid if tests or call to functions inside loops, if possible.
    • +
    • avoid multiplication with constants inside loops if possible
    -

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    +

    Here is an example of a part of a program where specific operations lead to a slower code

    + + +
    +
    +
    +
    +
    +
    k = n-1;
    +for (i = 0; i < n; i++){
    +    a[i] = b[i] +c*d;
    +    e = g[k];
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    A better code is

    + + +
    +
    +
    +
    +
    +
    temp = c*d;
    +for (i = 0; i < n; i++){
    +    a[i] = b[i] + temp;
    +}
    +e = g[n-1];
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    Here we avoid a repeated multiplication inside a loop. +Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. +

    @@ -686,7 +748,7 @@

    38
  • 39
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs030.html b/doc/pub/week9/html/._week9-bs030.html index 02a3522a..acb6bc60 100644 --- a/doc/pub/week9/html/._week9-bs030.html +++ b/doc/pub/week9/html/._week9-bs030.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,21 @@

     

     

     

    -

    A rough classification of hardware models

    +

    Vectorization and the basic idea behind parallel computing

    - +

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

      -
    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • -
    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • -
    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    • +
    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • +
    • Multiple processors are involved to solve a global problem.
    • +
    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    +

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    +

    diff --git a/doc/pub/week9/html/._week9-bs031.html b/doc/pub/week9/html/._week9-bs031.html index 00d8962c..5e0210ee 100644 --- a/doc/pub/week9/html/._week9-bs031.html +++ b/doc/pub/week9/html/._week9-bs031.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,20 +651,19 @@

     

     

     

    -

    Shared memory and distributed memory

    +

    A rough classification of hardware models

    -

    One way of categorizing modern parallel computers is to look at the memory configuration.

    +
      -
    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • -
    • In distributed memory systems each CPU has its own memory.
    • +
    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • +
    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • +
    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    -

    The CPUs are connected by some network and may exchange messages.

    -

    diff --git a/doc/pub/week9/html/._week9-bs032.html b/doc/pub/week9/html/._week9-bs032.html index 9f51a05f..e85de6fb 100644 --- a/doc/pub/week9/html/._week9-bs032.html +++ b/doc/pub/week9/html/._week9-bs032.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,20 @@

     

     

     

    -

    Different parallel programming paradigms

    +

    Shared memory and distributed memory

    - +

    One way of categorizing modern parallel computers is to look at the memory configuration.

      -
    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • -
    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    • +
    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • +
    • In distributed memory systems each CPU has its own memory.
    +

    The CPUs are connected by some network and may exchange messages.

    +

    diff --git a/doc/pub/week9/html/._week9-bs033.html b/doc/pub/week9/html/._week9-bs033.html index 2e6b278d..657b4d24 100644 --- a/doc/pub/week9/html/._week9-bs033.html +++ b/doc/pub/week9/html/._week9-bs033.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,18 @@

     

     

     

    -

    Different parallel programming paradigms

    +

    Different parallel programming paradigms

      -
    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • -
    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • +
    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • +
    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    -

    diff --git a/doc/pub/week9/html/._week9-bs034.html b/doc/pub/week9/html/._week9-bs034.html index a5e43a70..5bf7eed2 100644 --- a/doc/pub/week9/html/._week9-bs034.html +++ b/doc/pub/week9/html/._week9-bs034.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,47 +650,19 @@

     

     

     

    - -

    What is vectorization?

    -

    Vectorization is a special -case of Single Instructions Multiple Data (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. -

    - -

    Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example -

    + +

    Different parallel programming paradigms

    +
    +
    + - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    +
      +
    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • +
    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • +
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -

    If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. -

    @@ -712,7 +689,7 @@

    What is vectorization?

  • 43
  • 44
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs035.html b/doc/pub/week9/html/._week9-bs035.html index 4b8dbfca..199528c5 100644 --- a/doc/pub/week9/html/._week9-bs035.html +++ b/doc/pub/week9/html/._week9-bs035.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,28 +651,47 @@

     

     

     

    -

    Number of elements that can acted upon

    -

    A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. +

    What is vectorization?

    +

    Vectorization is a special +case of Single Instructions Multiple Data (SIMD) to denote a single +instruction stream capable of operating on multiple data elements in +parallel. +We can think of vectorization as the unrolling of loops accompanied with SIMD instructions.

    -

    The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. +

    Vectorization is the process of converting an algorithm that performs scalar operations +(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. +Consider the following example +

    + + +
    +
    +
    +
    +
    +
    for (i = 0; i < n; i++){
    +    a[i] = b[i] + c[i];
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    If the code is not vectorized, the compiler will simply start with the first element and +then perform subsequent additions operating on one address in memory at the time.

    -

    IN summary, our instructions operate on 128 bit (16 byte) operands

    -
      -
    • 4 floats or ints
    • -
    • 2 doubles
    • -
    • Data paths 128 bits vide for vector unit
    • -

    diff --git a/doc/pub/week9/html/._week9-bs036.html b/doc/pub/week9/html/._week9-bs036.html index 66e077b3..ba692384 100644 --- a/doc/pub/week9/html/._week9-bs036.html +++ b/doc/pub/week9/html/._week9-bs036.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,39 +651,28 @@

     

     

     

    -

    Number of elements that can acted upon, examples

    -

    We start with the simple scalar operations given by

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have \( 3\times 32 \) bits that are not used. +

    Number of elements that can acted upon

    +

    A SIMD instruction can operate on multiple data elements in one single instruction. +It uses the so-called 128-bit SIMD floating-point register. +In this sense, vectorization adds some form of parallelism since one instruction is applied +to many parts of say a vector.

    -

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    +

    The number of elements which can be operated on in parallel +range from four single-precision floating point data elements in so-called +Streaming SIMD Extensions and two double-precision floating-point data +elements in Streaming SIMD Extensions 2 to sixteen byte operations in +a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length +ranges from 2 to 16, depending on the instruction extensions used and +on the data type. +

    +

    IN summary, our instructions operate on 128 bit (16 byte) operands

    +
      +
    • 4 floats or ints
    • +
    • 2 doubles
    • +
    • Data paths 128 bits vide for vector unit
    • +

    diff --git a/doc/pub/week9/html/._week9-bs037.html b/doc/pub/week9/html/._week9-bs037.html index a3465561..0a1df454 100644 --- a/doc/pub/week9/html/._week9-bs037.html +++ b/doc/pub/week9/html/._week9-bs037.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,8 +651,8 @@

     

     

     

    -

    Operation counts for scalar operation

    -

    The code

    +

    Number of elements that can acted upon, examples

    +

    We start with the simple scalar operations given by

    @@ -673,13 +678,12 @@

    Operation counts f

    -

    has for \( n \) repeats

    -
      -
    1. one load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    +

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, +it means that we have \( 3\times 32 \) bits that are not used. +

    + +

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    +

    diff --git a/doc/pub/week9/html/._week9-bs038.html b/doc/pub/week9/html/._week9-bs038.html index 91d5dede..188b9361 100644 --- a/doc/pub/week9/html/._week9-bs038.html +++ b/doc/pub/week9/html/._week9-bs038.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,10 +651,8 @@

     

     

     

    -

    Number of elements that can acted upon, examples

    -

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have -

    +

    Operation counts for scalar operation

    +

    The code

    @@ -657,11 +660,8 @@

    Number o
    -
    for (i = 0; i < n; i+=4){
    +  
    for (i = 0; i < n; i++){
         a[i] = b[i] + c[i];
    -    a[i+1] = b[i+1] + c[i+1];
    -    a[i+2] = b[i+2] + c[i+2];
    -    a[i+3] = b[i+3] + c[i+3];
     }
     
    @@ -678,8 +678,13 @@

    Number o

    -

    Four additions are now done in a single step.

    - +

    has for \( n \) repeats

    +
      +
    1. one load for \( c[i] \) in address 1
    2. +
    3. one load for \( b[i] \) in address 2
    4. +
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. +
    7. store \( a[i] \) in address 2
    8. +

    diff --git a/doc/pub/week9/html/._week9-bs039.html b/doc/pub/week9/html/._week9-bs039.html index 15837176..00f9fab2 100644 --- a/doc/pub/week9/html/._week9-bs039.html +++ b/doc/pub/week9/html/._week9-bs039.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,40 @@

     

     

     

    -

    Number of operations when vectorized

    -

    For \( n/4 \) repeats assuming floats or integers

    -
      -
    1. one vector load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    +

    Number of elements that can acted upon, examples

    +

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is +we have +

    + + +
    +
    +
    +
    +
    +
    for (i = 0; i < n; i+=4){
    +    a[i] = b[i] + c[i];
    +    a[i+1] = b[i+1] + c[i+1];
    +    a[i+2] = b[i+2] + c[i+2];
    +    a[i+3] = b[i+3] + c[i+3];
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    Four additions are now done in a single step.

    +

    diff --git a/doc/pub/week9/html/._week9-bs040.html b/doc/pub/week9/html/._week9-bs040.html index f32f8c13..7be9cd74 100644 --- a/doc/pub/week9/html/._week9-bs040.html +++ b/doc/pub/week9/html/._week9-bs040.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,79 +650,15 @@

     

     

     

    - -

    A simple test case with and without vectorization

    -

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double *a, *b, *c;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -// Allocate space for the vectors to be used
    -    a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -  // Set up values for vectors  a and b
    -  for (int i = 0; i < n; i++){
    -    double angle = 2.0*M_PI*i/ (( double ) n);
    -    a[i] = s*(sin(angle) + cos(angle));
    -    b[i] =  s*sin(2.0*angle);
    -    c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (int i = 0; i < n; i++){
    -    c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  double Norm2 = 0.0;
    -  for (int i = 0; i < n; i++){
    -    Norm2  += c[i]*c[i];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    -  cout << "  Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - + +

    Number of operations when vectorized

    +

    For \( n/4 \) repeats assuming floats or integers

    +
      +
    1. one vector load for \( c[i] \) in address 1
    2. +
    3. one load for \( b[i] \) in address 2
    4. +
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. +
    7. store \( a[i] \) in address 2
    8. +

    diff --git a/doc/pub/week9/html/._week9-bs041.html b/doc/pub/week9/html/._week9-bs041.html index d2c79367..79251ed5 100644 --- a/doc/pub/week9/html/._week9-bs041.html +++ b/doc/pub/week9/html/._week9-bs041.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,99 +650,63 @@

     

     

     

    - -

    Compiling with and without vectorization

    -

    We can compile and link without vectorization using the clang c++ compiler

    - - -
    -
    -
    -
    -
    -
    clang -o novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    + +

    A simple test case with and without vectorization

    +

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    -

    and with vectorization (and additional optimizations)

    - +
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. -

    +
    #include <cstdlib>
    +#include <iostream>
    +#include <cmath>
    +#include <iomanip>
    +#include "time.h"
     
    -
    -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 10000000
    -Time used  for norm computation=0.04720500000
    -Compphys:~ hjensen$ ./novec.x 10000000
    -Time used  for norm computation=0.03311700000
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 1000000000
    -Time used  for norm computation=58.41391100
    -Compphys:~ hjensen$ ./novec.x 1000000000
    -Time used  for norm computation=46.51295300
    +using namespace std; // note use of namespace
    +int main (int argc, char* argv[])
    +{
    +  // read in dimension of square matrix
    +  int n = atoi(argv[1]);
    +  double s = 1.0/sqrt( (double) n);
    +  double *a, *b, *c;
    +  // Start timing
    +  clock_t start, finish;
    +  start = clock();
    +// Allocate space for the vectors to be used
    +    a = new double [n]; b = new double [n]; c = new double [n];
    +  // Define parallel region
    +  // Set up values for vectors  a and b
    +  for (int i = 0; i < n; i++){
    +    double angle = 2.0*M_PI*i/ (( double ) n);
    +    a[i] = s*(sin(angle) + cos(angle));
    +    b[i] =  s*sin(2.0*angle);
    +    c[i] = 0.0;
    +  }
    +  // Then perform the vector addition
    +  for (int i = 0; i < n; i++){
    +    c[i] += a[i]+b[i];
    +  }
    +  // Compute now the norm-2
    +  double Norm2 = 0.0;
    +  for (int i = 0; i < n; i++){
    +    Norm2  += c[i]*c[i];
    +  }
    +  finish = clock();
    +  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    +  cout << setiosflags(ios::showpoint | ios::uppercase);
    +  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    +  cout << "  Norm-2  = " << Norm2 << endl;
    +  // Free up space
    +  delete[] a;
    +  delete[] b;
    +  delete[] c;
    +  return 0;
    +}
     
    @@ -753,7 +722,6 @@

    Compiling with

    -

    We will discuss these issues further in the next slides.

    @@ -780,7 +748,7 @@

    Compiling with
  • 50
  • 51
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs042.html b/doc/pub/week9/html/._week9-bs042.html index 1d38757d..6735a963 100644 --- a/doc/pub/week9/html/._week9-bs042.html +++ b/doc/pub/week9/html/._week9-bs042.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,8 +651,8 @@

     

     

     

    -

    Compiling with and without vectorization using clang

    -

    We can compile and link without vectorization with clang compiler

    +

    Compiling with and without vectorization

    +

    We can compile and link without vectorization using the clang c++ compiler

    @@ -655,7 +660,7 @@

    Com
    -
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    +  
    clang -o novec.x vecexample.cpp
     
    @@ -671,7 +676,7 @@

    Com

    -

    and with vectorization

    +

    and with vectorization (and additional optimizations)

    @@ -695,7 +700,9 @@

    Com

    -

    We can also add vectorization analysis, see for example

    +

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. +The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. +

    @@ -703,7 +710,10 @@

    Com
    -
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    +  
    Compphys:~ hjensen$ ./vec.x 10000000
    +Time used  for norm computation=0.04720500000
    +Compphys:~ hjensen$ ./novec.x 10000000
    +Time used  for norm computation=0.03311700000
     
    @@ -719,7 +729,9 @@

    Com

    -

    or figure out if vectorization was missed

    +

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 +Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. +

    @@ -727,7 +739,10 @@

    Com
    -
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
    +  
    Compphys:~ hjensen$ ./vec.x 1000000000
    +Time used  for norm computation=58.41391100
    +Compphys:~ hjensen$ ./novec.x 1000000000
    +Time used  for norm computation=46.51295300
     
    @@ -743,6 +758,7 @@

    Com

    +

    We will discuss these issues further in the next slides.

    @@ -769,7 +785,7 @@

    Com
  • 51
  • 52
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs043.html b/doc/pub/week9/html/._week9-bs043.html index a8f6a19d..1b3009c0 100644 --- a/doc/pub/week9/html/._week9-bs043.html +++ b/doc/pub/week9/html/._week9-bs043.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,12 +650,81 @@

     

     

     

    - -

    Automatic vectorization and vectorization inhibitors, criteria

    + +

    Compiling with and without vectorization using clang

    +

    We can compile and link without vectorization with clang compiler

    + + +
    +
    +
    +
    +
    +
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    and with vectorization

    + + +
    +
    +
    +
    +
    +
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    We can also add vectorization analysis, see for example

    -

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    + +
    +
    +
    +
    +
    +
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    +

    or figure out if vectorization was missed

    @@ -658,9 +732,7 @@

    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -  }
    +  
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
     
    @@ -676,7 +748,6 @@

    Automatic vectorization and vectorization inhibitors, exit criteria

    +

    Automatic vectorization and vectorization inhibitors, criteria

    + +

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    -

    An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop -

    +

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    @@ -661,7 +665,6 @@

      for (int j = 0; j < n; j++) {
         a[j] = cos(j*1.0);
    -    if (a[j] < 0 ) break;
       }
     

    @@ -678,7 +681,7 @@

    -

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    +

    The variable \( n \) does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent.

    @@ -705,7 +708,7 @@

    53
  • 54
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs045.html b/doc/pub/week9/html/._week9-bs045.html index 208b044e..55f0934d 100644 --- a/doc/pub/week9/html/._week9-bs045.html +++ b/doc/pub/week9/html/._week9-bs045.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,11 +651,11 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, straight-line code

    +

    Automatic vectorization and vectorization inhibitors, exit criteria

    -

    SIMD instructions perform the same type of operations multiple times. -A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the if statement is implemented as a masked assignment. +

    An exit statement should in general be avoided. +If the exit statement contains data-dependent conditions, the loop cannot be vectorized. +The following is an example of a non-vectorizable loop

    @@ -660,13 +665,8 @@

      for (int j = 0; j < n; j++) {
    -    double x  = cos(j*1.0);
    -    if (x > 0 ) {
    -       a[j] =  x*sin(j*2.0); 
    -    }
    -    else {
    -       a[j] = 0.0;
    -    }
    +    a[j] = cos(j*1.0);
    +    if (a[j] < 0 ) break;
       }
     
    @@ -683,7 +683,7 @@

    -

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    +

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    @@ -710,7 +710,7 @@

    54
  • 55
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs046.html b/doc/pub/week9/html/._week9-bs046.html index 69aac9c7..6cec5c34 100644 --- a/doc/pub/week9/html/._week9-bs046.html +++ b/doc/pub/week9/html/._week9-bs046.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,9 +651,12 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, nested loops

    +

    Automatic vectorization and vectorization inhibitors, straight-line code

    -

    Only the innermost loop of the following example is vectorized

    +

    SIMD instructions perform the same type of operations multiple times. +A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. +The following code can however be vectorized since the if statement is implemented as a masked assignment. +

    @@ -656,10 +664,14 @@

    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    +  
      for (int j = 0; j < n; j++) {
    +    double x  = cos(j*1.0);
    +    if (x > 0 ) {
    +       a[j] =  x*sin(j*2.0); 
    +    }
    +    else {
    +       a[j] = 0.0;
    +    }
       }
     
    @@ -676,7 +688,7 @@

    -

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    +

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    @@ -703,7 +715,7 @@

    55
  • 56
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs047.html b/doc/pub/week9/html/._week9-bs047.html index cfb30d7b..b645b560 100644 --- a/doc/pub/week9/html/._week9-bs047.html +++ b/doc/pub/week9/html/._week9-bs047.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,9 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, function calls

    +

    Automatic vectorization and vectorization inhibitors, nested loops

    -

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable -

    +

    Only the innermost loop of the following example is vectorized

    @@ -660,7 +662,9 @@

      for (int i = 0; i < n; i++) {
    -      a[i] = log10(i)*cos(i);
    +      for (int j = 0; j < n; j++) {
    +           a[i][j] += b[i][j];
    +      }  
       }
     
    @@ -677,7 +681,7 @@

    -

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    +

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    @@ -704,7 +708,7 @@

    56
  • 57
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs048.html b/doc/pub/week9/html/._week9-bs048.html index a733da36..cd158c6c 100644 --- a/doc/pub/week9/html/._week9-bs048.html +++ b/doc/pub/week9/html/._week9-bs048.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,10 +651,11 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, data dependencies

    +

    Automatic vectorization and vectorization inhibitors, function calls

    -

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code +

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like +\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. +The following example is fully vectorizable

    @@ -658,9 +664,8 @@

    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i] = a[i-1] + b;
    +  
      for (int i = 0; i < n; i++) {
    +      a[i] = log10(i)*cos(i);
       }
     
    @@ -677,34 +682,7 @@

    -

    is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value \( a[i-1] \) computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values \( a[0] \), \( a[1] \), \( a[2] \) and \( a[3] \) from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have

    - - -
    -
    -
    -
    -
    -
       a[1] = a[0] + b;
    -   a[2] = a[1] + b;
    -   a[3] = a[2] + b;
    -   a[4] = a[3] + b;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    +

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    @@ -731,7 +709,7 @@

    57
  • 58
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs049.html b/doc/pub/week9/html/._week9-bs049.html index 81e0eddb..d042bf15 100644 --- a/doc/pub/week9/html/._week9-bs049.html +++ b/doc/pub/week9/html/._week9-bs049.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,10 +651,10 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, more data dependencies

    +

    Automatic vectorization and vectorization inhibitors, data dependencies

    -

    On the other hand, a so-called -write-after-read statement can be vectorized. The following code +

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called +read-after-write statement with an explicit flow dependency cannot be vectorized. The following code

    @@ -660,7 +665,7 @@

      double b = 15.;
       for (int i = 1; i < n; i++) {
    -      a[i-1] = a[i] + b;
    +      a[i] = a[i-1] + b;
       }
     

    @@ -677,9 +682,34 @@

    +
    +
    +
    +
    +
    +
       a[1] = a[0] + b;
    +   a[2] = a[1] + b;
    +   a[3] = a[2] + b;
    +   a[4] = a[3] + b;
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    @@ -706,7 +736,7 @@

    58
  • 59
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs050.html b/doc/pub/week9/html/._week9-bs050.html index 41ad4205..4da14c8c 100644 --- a/doc/pub/week9/html/._week9-bs050.html +++ b/doc/pub/week9/html/._week9-bs050.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,11 +651,11 @@

     

     

     

    -

    Automatic vectorization and vectorization inhibitors, memory stride

    - -

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    +

    Automatic vectorization and vectorization inhibitors, more data dependencies

    -

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    +

    On the other hand, a so-called +write-after-read statement can be vectorized. The following code +

    @@ -658,10 +663,9 @@

    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    +  
      double b = 15.;
    +  for (int i = 1; i < n; i++) {
    +      a[i-1] = a[i] + b;
       }
     
    @@ -678,6 +682,9 @@

    +

    is an example of flow dependency that can be vectorized since no iteration with a higher value of \( i \) +can complete before an iteration with a lower value of \( i \). However, such code leads to problems with parallelization. +

    @@ -704,7 +711,7 @@

    59
  • 60
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs051.html b/doc/pub/week9/html/._week9-bs051.html index 650c41f8..ad31792b 100644 --- a/doc/pub/week9/html/._week9-bs051.html +++ b/doc/pub/week9/html/._week9-bs051.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,23 +651,38 @@

     

     

     

    -

    Memory management

    -

    The main memory contains the program data

    -
      -
    1. Cache memory contains a copy of the main memory data
    2. -
    3. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    4. -
    5. Registers contain working data only
    6. -
        -
      • Modern CPUs perform most or all operations only on data in register
      • -
      -
    7. Multiple Cache memories contain a copy of the main memory data
    8. -
        -
      • Cache items accessed by their address in main memory
      • -
      • L1 cache is the fastest but has the least capacity
      • -
      • L2, L3 provide intermediate performance/size tradeoffs
      • -
      -
    -

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    +

    Automatic vectorization and vectorization inhibitors, memory stride

    + +

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    + +

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    + + +
    +
    +
    +
    +
    +
      for (int i = 0; i < n; i++) {
    +      for (int j = 0; j < n; j++) {
    +           a[i][j] += b[i][j];
    +      }  
    +  }
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    @@ -689,7 +709,7 @@

    Memory management

  • 60
  • 61
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs052.html b/doc/pub/week9/html/._week9-bs052.html index 3a54f757..eae214a4 100644 --- a/doc/pub/week9/html/._week9-bs052.html +++ b/doc/pub/week9/html/._week9-bs052.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,23 @@

     

     

     

    -

    Memory and communication

    - +

    Memory management

    +

    The main memory contains the program data

      -
    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. -
    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. +
    5. Cache memory contains a copy of the main memory data
    6. +
    7. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    8. +
    9. Registers contain working data only
    10. +
        +
      • Modern CPUs perform most or all operations only on data in register
      • +
      +
    11. Multiple Cache memories contain a copy of the main memory data
      • -
      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • -
      • Even if you do not use the data, it is moved and occupies space in the cache
      • +
      • Cache items accessed by their address in main memory
      • +
      • L1 cache is the fastest but has the least capacity
      • +
      • L2, L3 provide intermediate performance/size tradeoffs
    -

    Many of these performance features are not captured in most programming languages.

    +

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    @@ -683,7 +694,7 @@

    Memory and communication

  • 61
  • 62
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs053.html b/doc/pub/week9/html/._week9-bs053.html index fe227f06..c31e48ca 100644 --- a/doc/pub/week9/html/._week9-bs053.html +++ b/doc/pub/week9/html/._week9-bs053.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,38 +651,17 @@

     

     

     

    -

    Measuring performance

    - -

    How do we measure performance? What is wrong with this code to time a loop?

    - - -
    -
    -
    -
    -
    -
      clock_t start, finish;
    -  start = clock();
    -  for (int j = 0; j < i; j++) {
    -    a[j] = b[j]+b[j]*c[j];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    Memory and communication

    +
      +
    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. +
    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. +
        +
      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • +
      • Even if you do not use the data, it is moved and occupies space in the cache
      • +
      +
    +

    Many of these performance features are not captured in most programming languages.

    @@ -704,7 +688,7 @@

    Measuring performance

  • 62
  • 63
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs054.html b/doc/pub/week9/html/._week9-bs054.html index 3b7c0212..28a382af 100644 --- a/doc/pub/week9/html/._week9-bs054.html +++ b/doc/pub/week9/html/._week9-bs054.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,39 @@

     

     

     

    -

    Problems with measuring time

    -
      -
    1. Timers are not infinitely accurate
    2. -
    3. All clocks have a granularity, the minimum time that they can measure
    4. -
    5. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    6. -
    7. Always know what your clock granularity is
    8. -
    9. Ensure that your measurement is for a long enough duration (say 100 times the tick)
    10. -
    +

    Measuring performance

    + +

    How do we measure performance? What is wrong with this code to time a loop?

    + + +
    +
    +
    +
    +
    +
      clock_t start, finish;
    +  start = clock();
    +  for (int j = 0; j < i; j++) {
    +    a[j] = b[j]+b[j]*c[j];
    +  }
    +  finish = clock();
    +  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    diff --git a/doc/pub/week9/html/._week9-bs055.html b/doc/pub/week9/html/._week9-bs055.html index b5e0ffa4..4015fc88 100644 --- a/doc/pub/week9/html/._week9-bs055.html +++ b/doc/pub/week9/html/._week9-bs055.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,16 +651,13 @@

     

     

     

    -

    Problems with cold start

    - -

    What happens when the code is executed? The assumption is that the code is ready to -execute. But -

    +

    Problems with measuring time

      -
    1. Code may still be on disk, and not even read into memory.
    2. -
    3. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    4. -
    5. Multiple tests often necessary to ensure that cold start effects are not present
    6. -
    7. Special effort often required to ensure data in the intended part of the memory hierarchy.
    8. +
    9. Timers are not infinitely accurate
    10. +
    11. All clocks have a granularity, the minimum time that they can measure
    12. +
    13. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    14. +
    15. Always know what your clock granularity is
    16. +
    17. Ensure that your measurement is for a long enough duration (say 100 times the tick)

    @@ -682,7 +684,7 @@

    Problems with cold start

  • 64
  • 65
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs056.html b/doc/pub/week9/html/._week9-bs056.html index 3e234143..f2dc16d3 100644 --- a/doc/pub/week9/html/._week9-bs056.html +++ b/doc/pub/week9/html/._week9-bs056.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,16 @@

     

     

     

    -

    Problems with smart compilers

    +

    Problems with cold start

    +

    What happens when the code is executed? The assumption is that the code is ready to +execute. But +

      -
    1. If the result of the computation is not used, the compiler may eliminate the code
    2. -
    3. Performance will look impossibly fantastic
    4. -
    5. Even worse, eliminate some of the code so the performance looks plausible
    6. -
    7. Ensure that the results are (or may be) used.
    8. +
    9. Code may still be on disk, and not even read into memory.
    10. +
    11. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    12. +
    13. Multiple tests often necessary to ensure that cold start effects are not present
    14. +
    15. Special effort often required to ensure data in the intended part of the memory hierarchy.

    @@ -679,7 +687,7 @@

    Problems with smart compil
  • 65
  • 66
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs057.html b/doc/pub/week9/html/._week9-bs057.html index 8c76800c..48a43c66 100644 --- a/doc/pub/week9/html/._week9-bs057.html +++ b/doc/pub/week9/html/._week9-bs057.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,13 @@

     

     

     

    -

    Problems with interference

    +

    Problems with smart compilers

    +
      -
    1. Other activities are sharing your processor
    2. -
        -
      • Operating system, system demons, other users
      • -
      • Some parts of the hardware do not always perform with exactly the same performance
      • -
      -
    3. Make multiple tests and report
    4. -
    5. Easy choices include
    6. -
        -
      • Average tests represent what users might observe over time
      • -
      +
    7. If the result of the computation is not used, the compiler may eliminate the code
    8. +
    9. Performance will look impossibly fantastic
    10. +
    11. Even worse, eliminate some of the code so the performance looks plausible
    12. +
    13. Ensure that the results are (or may be) used.

    @@ -684,7 +684,7 @@

    Problems with interference 66
  • 67
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs058.html b/doc/pub/week9/html/._week9-bs058.html index 716f9a3f..f90670fb 100644 --- a/doc/pub/week9/html/._week9-bs058.html +++ b/doc/pub/week9/html/._week9-bs058.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,18 @@

     

     

     

    -

    Problems with measuring performance

    +

    Problems with interference

      -
    1. Accurate, reproducible performance measurement is hard
    2. -
    3. Think carefully about your experiment:
    4. -
    5. What is it, precisely, that you want to measure?
    6. -
    7. How representative is your test to the situation that you are trying to measure?
    8. +
    9. Other activities are sharing your processor
    10. +
        +
      • Operating system, system demons, other users
      • +
      • Some parts of the hardware do not always perform with exactly the same performance
      • +
      +
    11. Make multiple tests and report
    12. +
    13. Easy choices include
    14. +
        +
      • Average tests represent what users might observe over time
      • +

    @@ -678,7 +689,7 @@

    Problems with measur
  • 67
  • 68
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs059.html b/doc/pub/week9/html/._week9-bs059.html index 17184cda..5cfcdab0 100644 --- a/doc/pub/week9/html/._week9-bs059.html +++ b/doc/pub/week9/html/._week9-bs059.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,36 +651,13 @@

     

     

     

    -

    Thomas algorithm for tridiagonal linear algebra equations

    -
    -
    - -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -
    -
    - - +

    Problems with measuring performance

    +
      +
    1. Accurate, reproducible performance measurement is hard
    2. +
    3. Think carefully about your experiment:
    4. +
    5. What is it, precisely, that you want to measure?
    6. +
    7. How representative is your test to the situation that you are trying to measure?
    8. +

    diff --git a/doc/pub/week9/html/._week9-bs060.html b/doc/pub/week9/html/._week9-bs060.html index e66e5ddf..0ddb7005 100644 --- a/doc/pub/week9/html/._week9-bs060.html +++ b/doc/pub/week9/html/._week9-bs060.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,34 +651,19 @@

     

     

     

    -

    Thomas algorithm, forward substitution

    +

    Thomas algorithm for tridiagonal linear algebra equations

    -

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    -$$ - a_i = 0, -$$ - - -$$ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -$$ - -

    and

    -$$ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -$$ - -

    At this point the simplified equation, with only an upper triangular matrix takes the form

    $$ \left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} + b_0 & c_0 & & & \\ + a_0 & b_1 & c_1 & & \\ + & & \ddots & & \\ + & & a_{m-3} & b_{m-2} & c_{m-2} \\ + & & & a_{m-2} & b_{m-1} + \end{array} \right) +\left( \begin{array}{c} x_0 \\ x_1 \\ \vdots \\ @@ -716,7 +706,7 @@

    Thomas algorithm,
  • 69
  • 70
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs061.html b/doc/pub/week9/html/._week9-bs061.html index 49cf568e..6b545a8c 100644 --- a/doc/pub/week9/html/._week9-bs061.html +++ b/doc/pub/week9/html/._week9-bs061.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,23 +651,46 @@

     

     

     

    -

    Thomas algorithm, backward substitution

    +

    Thomas algorithm, forward substitution

    -

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    +

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    +$$ + a_i = 0, $$ - c_i = 0, + + +$$ + b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, $$ -

    and

    +

    and

    $$ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i + f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. $$ -

    All that ramains to be computed is the solution, which is the very straight forward process of

    +

    At this point the simplified equation, with only an upper triangular matrix takes the form

    $$ -x_i = \frac{f_i}{b_i} +\left( \begin{array}{ccccc} + b_0 & c_0 & & & \\ + & b_1 & c_1 & & \\ + & & \ddots & & \\ + & & & b_{m-2} & c_{m-2} \\ + & & & & b_{m-1} + \end{array} \right)\left( \begin{array}{c} + x_0 \\ + x_1 \\ + \vdots \\ + x_{m-2} \\ + x_{m-1} + \end{array} \right)=\left( \begin{array}{c} + f_0 \\ + f_1 \\ + \vdots \\ + f_{m-2} \\ + f_{m-1} \\ + \end{array} \right) $$
    @@ -693,7 +721,7 @@

    Thomas algorithm,
  • 70
  • 71
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs062.html b/doc/pub/week9/html/._week9-bs062.html index dd041aa0..f30523bb 100644 --- a/doc/pub/week9/html/._week9-bs062.html +++ b/doc/pub/week9/html/._week9-bs062.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,60 +651,24 @@

     

     

     

    -

    Thomas algorithm and counting of operations (floating point and memory)

    +

    Thomas algorithm, backward substitution

    +

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    +$$ + c_i = 0, +$$ -

    We have in specific case the following operations with the floating operations

    - -
      -
    • Memory Reads: \( 14(N-2) \);
    • -
    • Memory Writes: \( 4(N-2) \);
    • -
    • Subtractions: \( 3(N-2) \);
    • -
    • Multiplications: \( 3(N-2) \);
    • -
    • Divisions: \( 4(N-2) \).
    • -
    -
    -
    - +

    and

    +$$ + f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i +$$ -
    -
    - - - -
    -
    -
    -
    -
    -
    // Forward substitution    
    -// Note that we can simplify by precalculating a[i-1]/b[i-1]
    -  for (int i=1; i < n; i++) {
    -     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    -     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    -  }
    -  x[n-1] = f[n-1] / b[n-1];
    -  // Backwards substitution                                                           
    -  for (int i = n-2; i >= 0; i--) {
    -     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    -     x[i] = f[i]/b[i];
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    All that ramains to be computed is the solution, which is the very straight forward process of

    +$$ +x_i = \frac{f_i}{b_i} +$$
    @@ -729,7 +698,7 @@

    71
  • 72
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs063.html b/doc/pub/week9/html/._week9-bs063.html index 81516243..b94aa0bd 100644 --- a/doc/pub/week9/html/._week9-bs063.html +++ b/doc/pub/week9/html/._week9-bs063.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,62 +651,46 @@

     

     

     

    -

    Example: Transpose of a matrix

    +

    Thomas algorithm and counting of operations (floating point and memory)

    +
    +
    + +

    We have in specific case the following operations with the floating operations

    - +
      +
    • Memory Reads: \( 14(N-2) \);
    • +
    • Memory Writes: \( 4(N-2) \);
    • +
    • Subtractions: \( 3(N-2) \);
    • +
    • Multiplications: \( 3(N-2) \);
    • +
    • Divisions: \( 4(N-2) \).
    • +
    +
    +
    + + +
    +
    + + +
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B;
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -  }
    -  // Set up values for matrix A
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      A[i][j] =  cos(i*1.0)*sin(j*3.0);
    -    }
    -  }
    -  clock_t start, finish;
    -  start = clock();
    -  // Then compute the transpose
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      B[i][j]= A[j][i];
    -    }
    -  }
    -
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for setting up transpose of matrix=" << timeused  << endl;
    -
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  return 0;
    -}
    +  
    // Forward substitution    
    +// Note that we can simplify by precalculating a[i-1]/b[i-1]
    +  for (int i=1; i < n; i++) {
    +     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    +     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    +  }
    +  x[n-1] = f[n-1] / b[n-1];
    +  // Backwards substitution                                                           
    +  for (int i = n-2; i >= 0; i--) {
    +     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    +     x[i] = f[i]/b[i];
    +  }
     
    @@ -716,6 +705,8 @@

    72
  • 73
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs064.html b/doc/pub/week9/html/._week9-bs064.html index 7d9d3169..e86d86b7 100644 --- a/doc/pub/week9/html/._week9-bs064.html +++ b/doc/pub/week9/html/._week9-bs064.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,11 +651,10 @@

     

     

     

    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    +

    Example: Transpose of a matrix

    - +
    @@ -667,58 +671,40 @@

    73
  • 74
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs065.html b/doc/pub/week9/html/._week9-bs065.html index 48608261..b586773f 100644 --- a/doc/pub/week9/html/._week9-bs065.html +++ b/doc/pub/week9/html/._week9-bs065.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,94 @@

     

     

     

    -

    How do we define speedup? Simplest form

    -
    -
    - -
      -
    • Speedup measures the ratio of performance between two objects
    • -
    • Versions of same code, with different number of processors
    • -
    • Serial and vector versions
    • -
    • Try different programing languages, c++ and Fortran
    • -
    • Two algorithms computing the same result
    • -
    +

    Matrix-matrix multiplication

    +

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    + + + +
    +
    +
    +
    +
    +
    #include <cstdlib>
    +#include <iostream>
    +#include <cmath>
    +#include <iomanip>
    +#include "time.h"
    +
    +using namespace std; // note use of namespace
    +int main (int argc, char* argv[])
    +{
    +  // read in dimension of square matrix
    +  int n = atoi(argv[1]);
    +  double s = 1.0/sqrt( (double) n);
    +  double **A, **B, **C;
    +  // Start timing
    +  clock_t start, finish;
    +  start = clock();
    +  // Allocate space for the two matrices
    +  A = new double*[n]; B = new double*[n]; C = new double*[n];
    +  for (int i = 0; i < n; i++){
    +    A[i] = new double[n];
    +    B[i] = new double[n];
    +    C[i] = new double[n];
    +  }
    +  // Set up values for matrix A and B and zero matrix C
    +  for (int i = 0; i < n; i++){
    +    for (int j = 0; j < n; j++) {
    +      double angle = 2.0*M_PI*i*j/ (( double ) n);
    +      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    +      B[j][i] =  A[i][j];
    +    }
    +  }
    +  // Then perform the matrix-matrix multiplication
    +  for (int i = 0; i < n; i++){
    +    for (int j = 0; j < n; j++) {
    +      double sum = 0.0;
    +       for (int k = 0; k < n; k++) {
    +           sum += B[i][k]*A[k][j];
    +       }
    +       C[i][j] = sum;
    +    }
    +  }
    +  // Compute now the Frobenius norm
    +  double Fsum = 0.0;
    +  for (int i = 0; i < n; i++){
    +    for (int j = 0; j < n; j++) {
    +      Fsum += C[i][j]*C[i][j];
    +    }
    +  }
    +  Fsum = sqrt(Fsum);
    +  finish = clock();
    +  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    +  cout << setiosflags(ios::showpoint | ios::uppercase);
    +  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << timeused  << endl;
    +  cout << "  Frobenius norm  = " << Fsum << endl;
    +  // Free up space
    +  for (int i = 0; i < n; i++){
    +    delete[] A[i];
    +    delete[] B[i];
    +    delete[] C[i];
    +  }
    +  delete[] A;
    +  delete[] B;
    +  delete[] C;
    +  return 0;
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -686,7 +767,7 @@

    How do we define
  • 74
  • 75
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs066.html b/doc/pub/week9/html/._week9-bs066.html index 455f5be4..b54fbc5d 100644 --- a/doc/pub/week9/html/._week9-bs066.html +++ b/doc/pub/week9/html/._week9-bs066.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,16 @@

     

     

     

    -

    How do we define speedup? Correct baseline

    +

    How do we define speedup? Simplest form

    -

    The key is choosing the correct baseline for comparison

    -
      -
    • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
      • -
      • For parallel applications, this is much harder:
      • -
          -
        • Choice of algorithm, decomposition, performance of baseline case etc.
        • -
        -
      +
    • Speedup measures the ratio of performance between two objects
    • +
    • Versions of same code, with different number of processors
    • +
    • Serial and vector versions
    • +
    • Try different programing languages, c++ and Fortran
    • +
    • Two algorithms computing the same result
    @@ -689,7 +691,7 @@

    How do we defi
  • 75
  • 76
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs067.html b/doc/pub/week9/html/._week9-bs067.html index f231b190..639d28f1 100644 --- a/doc/pub/week9/html/._week9-bs067.html +++ b/doc/pub/week9/html/._week9-bs067.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,19 @@

     

     

     

    -

    Parallel speedup

    +

    How do we define speedup? Correct baseline

    -

    For parallel applications, speedup is typically defined as

    +

    The key is choosing the correct baseline for comparison

      -
    • Speedup \( =T_1/T_p \)
    • -
    -

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

    +
  • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
    • -
    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    • +
    • For parallel applications, this is much harder:
    • +
        +
      • Choice of algorithm, decomposition, performance of baseline case etc.
      • +
      +
    @@ -687,7 +694,7 @@

    Parallel speedup

  • 76
  • 77
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs068.html b/doc/pub/week9/html/._week9-bs068.html index 7f0fefe9..ca5cbddd 100644 --- a/doc/pub/week9/html/._week9-bs068.html +++ b/doc/pub/week9/html/._week9-bs068.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,17 @@

     

     

     

    -

    Speedup and memory

    +

    Parallel speedup

    -

    The speedup on \( p \) processors can -be greater than \( p \) if memory usage is optimal! -Consider the case of a memorybound computation with \( M \) words of memory -

    +

    For parallel applications, speedup is typically defined as

    +
      +
    • Speedup \( =T_1/T_p \)
    • +
    +

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

      -
    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • -
    • \( T_1 \) uses the main memory bandwidth
    • -
    • \( T_p \) uses the appropriate cache bandwidth
    • +
    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    @@ -688,7 +692,7 @@

    Speedup and memory

  • 77
  • 78
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs069.html b/doc/pub/week9/html/._week9-bs069.html index 1ca12bc3..fa2eda0b 100644 --- a/doc/pub/week9/html/._week9-bs069.html +++ b/doc/pub/week9/html/._week9-bs069.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,20 +651,18 @@

     

     

     

    -

    Upper bounds on speedup

    +

    Speedup and memory

    -

    Assume that almost all parts of a code are perfectly -parallelizable (fraction \( f \)). The remainder, -fraction \( (1-f) \) cannot be parallelized at all. -

    - -

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take -time \( Wf/p \) on \( p \) processors. +

    The speedup on \( p \) processors can +be greater than \( p \) if memory usage is optimal! +Consider the case of a memorybound computation with \( M \) words of memory

      -
    • What is the maximum possible speedup as a function of \( f \)?
    • +
    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • +
    • \( T_1 \) uses the main memory bandwidth
    • +
    • \( T_p \) uses the appropriate cache bandwidth
    @@ -690,7 +693,7 @@

    Upper bounds on speedup

  • 78
  • 79
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs070.html b/doc/pub/week9/html/._week9-bs070.html index 01108c04..1e9c8be8 100644 --- a/doc/pub/week9/html/._week9-bs070.html +++ b/doc/pub/week9/html/._week9-bs070.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,34 +651,21 @@

     

     

     

    -

    Amdahl's law

    +

    Upper bounds on speedup

    -

    On one processor we have

    -$$ -T_1 = (1-f)W + fW = W -$$ - -

    On \( p \) processors we have

    -$$ -T_p = (1-f)W + \frac{fW}{p}, -$$ - -

    resulting in a speedup of

    -$$ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -$$ - -

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    -$$ -\frac{1}{1-f}, -$$ +

    Assume that almost all parts of a code are perfectly +parallelizable (fraction \( f \)). The remainder, +fraction \( (1-f) \) cannot be parallelized at all. +

    -

    meaning that if -if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup -is \( 1/(1-.99)=100 \)! +

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take +time \( Wf/p \) on \( p \) processors.

    +
      +
    • What is the maximum possible speedup as a function of \( f \)?
    • +
    @@ -703,7 +695,7 @@

    Amdahl's law

  • 79
  • 80
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs071.html b/doc/pub/week9/html/._week9-bs071.html index fed561cb..8c108688 100644 --- a/doc/pub/week9/html/._week9-bs071.html +++ b/doc/pub/week9/html/._week9-bs071.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,33 @@

     

     

     

    -

    How much is parallelizable

    +

    Amdahl's law

    -

    If any non-parallel code slips into the -application, the parallel -performance is limited. -

    +

    On one processor we have

    +$$ +T_1 = (1-f)W + fW = W +$$ + +

    On \( p \) processors we have

    +$$ +T_p = (1-f)W + \frac{fW}{p}, +$$ + +

    resulting in a speedup of

    +$$ +\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} +$$ + +

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    +$$ +\frac{1}{1-f}, +$$ -

    In many simulations, however, the fraction of non-parallelizable work -is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. +

    meaning that if +if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup +is \( 1/(1-.99)=100 \)!

    @@ -687,7 +708,7 @@

    How much is parallelizable 80
  • 81
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs072.html b/doc/pub/week9/html/._week9-bs072.html index 8f094ac5..96560a49 100644 --- a/doc/pub/week9/html/._week9-bs072.html +++ b/doc/pub/week9/html/._week9-bs072.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,18 @@

     

     

     

    -

    Today's situation of parallel computing

    +

    How much is parallelizable

    +

    If any non-parallel code slips into the +application, the parallel +performance is limited. +

    -
      -
    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • -
    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • -
    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    • -
    -

    Our lectures will focus on both MPI and OpenMP.

    +

    In many simulations, however, the fraction of non-parallelizable work +is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. +

    @@ -686,7 +692,7 @@

    Today's situatio
  • 81
  • 82
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs073.html b/doc/pub/week9/html/._week9-bs073.html index 6acdfb79..8dd27bf4 100644 --- a/doc/pub/week9/html/._week9-bs073.html +++ b/doc/pub/week9/html/._week9-bs073.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,20 +651,17 @@

     

     

     

    -

    Overhead present in parallel computing

    +

    Today's situation of parallel computing

      -
    • Uneven load balance: not all the processors can perform useful work at all time.
    • -
    • Overhead of synchronization
    • -
    • Overhead of communication
    • -
    • Extra computation due to parallelization
    • +
    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • +
    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • +
    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    -

    Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -

    +

    Our lectures will focus on both MPI and OpenMP.

    @@ -689,7 +691,7 @@

    Overhead present
  • 82
  • 83
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs074.html b/doc/pub/week9/html/._week9-bs074.html index 0d951384..8db9633e 100644 --- a/doc/pub/week9/html/._week9-bs074.html +++ b/doc/pub/week9/html/._week9-bs074.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,15 +651,20 @@

     

     

     

    -

    Parallelizing a sequential algorithm

    +

    Overhead present in parallel computing

      -
    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • -
    • Distribute the global work and data among \( P \) processors.
    • +
    • Uneven load balance: not all the processors can perform useful work at all time.
    • +
    • Overhead of synchronization
    • +
    • Overhead of communication
    • +
    • Extra computation due to parallelization
    +

    Due to the above overhead and that certain parts of a sequential +algorithm cannot be parallelized we may not achieve an optimal parallelization. +

    @@ -684,7 +694,7 @@

    Parallelizing a seq
  • 83
  • 84
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs075.html b/doc/pub/week9/html/._week9-bs075.html index 5266e16f..1ba98b6c 100644 --- a/doc/pub/week9/html/._week9-bs075.html +++ b/doc/pub/week9/html/._week9-bs075.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,14 @@

     

     

     

    -

    Strategies

    +

    Parallelizing a sequential algorithm

    +
      -
    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • -
    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    • +
    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • +
    • Distribute the global work and data among \( P \) processors.
    @@ -683,7 +689,7 @@

    Strategies

  • 84
  • 85
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs076.html b/doc/pub/week9/html/._week9-bs076.html index e5f746fb..4728ce93 100644 --- a/doc/pub/week9/html/._week9-bs076.html +++ b/doc/pub/week9/html/._week9-bs076.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,41 +651,14 @@

     

     

     

    -

    How do I run MPI on a PC/Laptop? MPI

    +

    Strategies

    -

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -

      -
    • Compile with mpicxx/mpic++ or mpif90
    • +
    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • +
    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    - - -
    -
    -
    -
    -
    -
      # Compile and link
    -  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    -  #  run code with for example 8 processes using mpirun/mpiexec
    -  mpiexec -n 8 ./nameofprog.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    @@ -710,7 +688,7 @@

    How do I run MPI on
  • 85
  • 86
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs077.html b/doc/pub/week9/html/._week9-bs077.html index 2d14eea9..4372d24b 100644 --- a/doc/pub/week9/html/._week9-bs077.html +++ b/doc/pub/week9/html/._week9-bs077.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,17 +651,15 @@

     

     

     

    -

    Can I do it on my own PC/laptop? OpenMP installation

    +

    How do I run MPI on a PC/Laptop? MPI

    -

    If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: +

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. +When you have made sure you have installed MPI on your PC/laptop,

    -
      -
    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • -
    • For OS X users however, install libomp
    • +
    • Compile with mpicxx/mpic++ or mpif90
    @@ -665,31 +668,10 @@

    Can
    -
      brew install libomp
    -
    -
    -
    -
    -

    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and compile and link as

    - - -
    -
    -
    -
    -
    -
    c++ -o <name executable> <name program.cpp>  -lomp
    +  
      # Compile and link
    +  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    +  #  run code with for example 8 processes using mpirun/mpiexec
    +  mpiexec -n 8 ./nameofprog.x
     
    @@ -733,7 +715,7 @@

    Can
  • 86
  • 87
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs078.html b/doc/pub/week9/html/._week9-bs078.html index f3e68e76..e69a5295 100644 --- a/doc/pub/week9/html/._week9-bs078.html +++ b/doc/pub/week9/html/._week9-bs078.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,46 +651,26 @@

     

     

     

    -

    Installing MPI

    +

    Can I do it on my own PC/laptop? OpenMP installation

    -

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    - - -
    -
    -
    -
    -
    -
      sudo apt-get install libopenmpi-dev
    -  sudo apt-get install openmpi-bin
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew +

    If you wish to install MPI and OpenMP +on your laptop/PC, we recommend the following:

    +
      +
    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • +
    • For OS X users however, install libomp
    • +
    +
    -
       brew install openmpi
    +  
      brew install libomp
     
    @@ -701,7 +686,7 @@

    Installing MPI

    -

    When running an executable (code.x), run as

    +

    and compile and link as

    @@ -709,7 +694,7 @@

    Installing MPI

    -
      mpirun -n 10 ./code.x
    +  
    c++ -o <name executable> <name program.cpp>  -lomp
     
    @@ -724,8 +709,6 @@

    Installing MPI

    - -

    where we indicate that we want the number of processes to be 10.

    @@ -755,7 +738,7 @@

    Installing MPI

  • 87
  • 88
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs079.html b/doc/pub/week9/html/._week9-bs079.html index 9564c933..fae0892b 100644 --- a/doc/pub/week9/html/._week9-bs079.html +++ b/doc/pub/week9/html/._week9-bs079.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,86 @@

     

     

     

    -

    Installing MPI and using Qt

    +

    Installing MPI

    -

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    +

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    + + +
    +
    +
    +
    +
    +
      sudo apt-get install libopenmpi-dev
    +  sudo apt-get install openmpi-bin
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    For OS X users, install brew (after having installed xcode and gcc, needed for the +gfortran compiler of openmpi) and then install with brew +

    + + +
    +
    +
    +
    +
    +
       brew install openmpi
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    When running an executable (code.x), run as

    + + +
    +
    +
    +
    +
    +
      mpirun -n 10 ./code.x
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -

    You may need to tell Qt where openmpi is stored.

    +

    where we indicate that we want the number of processes to be 10.

    @@ -682,7 +760,7 @@

    Installing MPI and using Qt
  • 88
  • 89
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs080.html b/doc/pub/week9/html/._week9-bs080.html index 07a4adcb..35aea472 100644 --- a/doc/pub/week9/html/._week9-bs080.html +++ b/doc/pub/week9/html/._week9-bs080.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,25 +651,17 @@

     

     

     

    -

    What is Message Passing Interface (MPI)?

    +

    Installing MPI and using Qt

    +

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    -

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. -

    - -

    MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. -

    - -

    An MPI computation is a collection of processes communicating with messages.

    +

    You may need to tell Qt where openmpi is stored.

    +

    diff --git a/doc/pub/week9/html/._week9-bs081.html b/doc/pub/week9/html/._week9-bs081.html index 6752638a..89d1ffab 100644 --- a/doc/pub/week9/html/._week9-bs081.html +++ b/doc/pub/week9/html/._week9-bs081.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,67 +651,24 @@

     

     

     

    -

    Going Parallel with MPI

    +

    What is Message Passing Interface (MPI)?

    -

    Task parallelism: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. -

    -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding +

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions +or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ +library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked +with the MPI library.

    - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    +

    MPI programs should be able to run +on all possible machines and run all MPI implementetations without change. +

    - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    +

    An MPI computation is a collection of processes communicating with messages.

    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -

    @@ -733,7 +695,7 @@

    Going Parallel with MPI

  • 90
  • 91
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs082.html b/doc/pub/week9/html/._week9-bs082.html index 70705a4a..730002d1 100644 --- a/doc/pub/week9/html/._week9-bs082.html +++ b/doc/pub/week9/html/._week9-bs082.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,24 +651,64 @@

     

     

     

    -

    MPI is a library

    +

    Going Parallel with MPI

    -

    MPI is a library specification for the message passing interface, -proposed as a standard. +

    Task parallelism: the work of a global problem can be divided +into a number of independent tasks, which rarely need to synchronize. +Monte Carlo simulations or numerical integration are examples of this.

    -
      -
    • independent of hardware;
    • -
    • not a language or compiler specification;
    • -
    • not a specific implementation or product.
    • -
    -

    A message passing standard for portability and ease-of-use. -Designed for high performance. +

    MPI is a message-passing library where all the routines +have corresponding C/C++-binding

    -

    Insert communication and synchronization functions where necessary.

    + +
    +
    +
    +
    +
    +
       MPI_Command_name
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    + + +
    +
    +
    +
    +
    +
       MPI_COMMAND_NAME
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -693,7 +738,7 @@

    MPI is a library

  • 91
  • 92
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs083.html b/doc/pub/week9/html/._week9-bs083.html index 4dec0aed..ebc4c678 100644 --- a/doc/pub/week9/html/._week9-bs083.html +++ b/doc/pub/week9/html/._week9-bs083.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,62 +651,24 @@

     

     

     

    -

    Bindings to MPI routines

    +

    MPI is a library

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding +

    MPI is a library specification for the message passing interface, +proposed as a standard.

    - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +
      +
    • independent of hardware;
    • +
    • not a language or compiler specification;
    • +
    • not a specific implementation or product.
    • +
    +

    A message passing standard for portability and ease-of-use. +Designed for high performance. +

    -

    The discussion in these slides focuses on the C++ binding.

    +

    Insert communication and synchronization functions where necessary.

    @@ -731,7 +698,7 @@

    Bindings to MPI routines

  • 92
  • 93
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs084.html b/doc/pub/week9/html/._week9-bs084.html index 4ba1f0d6..cb940ddf 100644 --- a/doc/pub/week9/html/._week9-bs084.html +++ b/doc/pub/week9/html/._week9-bs084.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,15 +651,14 @@

     

     

     

    -

    Communicator

    +

    Bindings to MPI routines

    -
      -
    • A group of MPI processes with a name (context).
    • -
    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • -
    • By default the communicator contains all the MPI processes.
    • -
    + +

    MPI is a message-passing library where all the routines +have corresponding C/C++-binding +

    @@ -662,7 +666,7 @@

    Communicator

    -
      MPI_COMM_WORLD 
    +  
       MPI_Command_name
     
    @@ -678,10 +682,31 @@

    Communicator

    -
      -
    • Mechanism to identify subset of processes.
    • -
    • Promotes modular design of parallel libraries.
    • -
    +

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    + + +
    +
    +
    +
    +
    +
       MPI_COMMAND_NAME
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    The discussion in these slides focuses on the C++ binding.

    @@ -711,7 +736,7 @@

    Communicator

  • 93
  • 94
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs085.html b/doc/pub/week9/html/._week9-bs085.html index 81713681..ef8641c2 100644 --- a/doc/pub/week9/html/._week9-bs085.html +++ b/doc/pub/week9/html/._week9-bs085.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,41 @@

     

     

     

    -

    Some of the most important MPI functions

    +

    Communicator

    +
      +
    • A group of MPI processes with a name (context).
    • +
    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • +
    • By default the communicator contains all the MPI processes.
    • +
    + + +
    +
    +
    +
    +
    +
      MPI_COMM_WORLD 
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
      -
    • \( MPI\_Init \) - initiate an MPI computation
    • -
    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • -
    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • -
    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • -
    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • -
    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • -
    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • +
    • Mechanism to identify subset of processes.
    • +
    • Promotes modular design of parallel libraries.
    @@ -689,7 +716,7 @@

    Some of the mos
  • 94
  • 95
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs086.html b/doc/pub/week9/html/._week9-bs086.html index 0d4d72c4..66430072 100644 --- a/doc/pub/week9/html/._week9-bs086.html +++ b/doc/pub/week9/html/._week9-bs086.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,47 +651,20 @@

     

     

     

    -

    The first MPI C/C++ program

    +

    Some of the most important MPI functions

    -

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    - - -
    -
    -
    -
    -
    -
    using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -int main (int nargs, char* args[])
    -{
    -int numprocs, my_rank;
    -//   MPI initializations
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -     << numprocs << endl;
    -//  End MPI
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +
      +
    • \( MPI\_Init \) - initiate an MPI computation
    • +
    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • +
    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • +
    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • +
    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • +
    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • +
    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • +
    @@ -716,7 +694,7 @@

    95
  • 96
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs087.html b/doc/pub/week9/html/._week9-bs087.html index 134f35cc..8bc18d84 100644 --- a/doc/pub/week9/html/._week9-bs087.html +++ b/doc/pub/week9/html/._week9-bs087.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,28 +651,33 @@

     

     

     

    -

    The Fortran program

    +

    The first MPI C/C++ program

    - +

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    + +
    -
    PROGRAM hello
    -INCLUDE "mpif.h"
    -INTEGER:: size, my_rank, ierr
    -
    -CALL  MPI_INIT(ierr)
    -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    -CALL MPI_FINALIZE(ierr)
    -
    -END PROGRAM hello
    +  
    using namespace std;
    +#include <mpi.h>
    +#include <iostream>
    +int main (int nargs, char* args[])
    +{
    +int numprocs, my_rank;
    +//   MPI initializations
    +MPI_Init (&nargs, &args);
    +MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    +MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    +cout << "Hello world, I have  rank " << my_rank << " out of " 
    +     << numprocs << endl;
    +//  End MPI
    +MPI_Finalize ();
     
    @@ -711,7 +721,7 @@

    The Fortran program

  • 96
  • 97
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs088.html b/doc/pub/week9/html/._week9-bs088.html index 4475cf1e..7756a36b 100644 --- a/doc/pub/week9/html/._week9-bs088.html +++ b/doc/pub/week9/html/._week9-bs088.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,16 +651,42 @@

     

     

     

    -

    Note 1

    +

    The Fortran program

    -
      -
    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • -
    • It is the operating system which opts for an ordering.
    • -
    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • -
    + +
    +
    +
    +
    +
    +
    PROGRAM hello
    +INCLUDE "mpif.h"
    +INTEGER:: size, my_rank, ierr
    +
    +CALL  MPI_INIT(ierr)
    +CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    +CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    +WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    +CALL MPI_FINALIZE(ierr)
    +
    +END PROGRAM hello
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -685,7 +716,7 @@

    Note 1

  • 97
  • 98
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs089.html b/doc/pub/week9/html/._week9-bs089.html index 64cbb14f..fe94052b 100644 --- a/doc/pub/week9/html/._week9-bs089.html +++ b/doc/pub/week9/html/._week9-bs089.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,44 +651,16 @@

     

     

     

    -

    Ordered output with MPIBarrier

    +

    Note 1

    - - -
    -
    -
    -
    -
    -
    int main (int nargs, char* args[])
    -{
    - int numprocs, my_rank, i;
    - MPI_Init (&nargs, &args);
    - MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    - for (i = 0; i < numprocs; i++) {}
    - MPI_Barrier (MPI_COMM_WORLD);
    - if (i == my_rank) {
    - cout << "Hello world, I have  rank " << my_rank << 
    -        " out of " << numprocs << endl;}
    -      MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +
      +
    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • +
    • It is the operating system which opts for an ordering.
    • +
    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • +
    @@ -713,7 +690,7 @@

    98
  • 99
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs090.html b/doc/pub/week9/html/._week9-bs090.html index ad85091c..6797164e 100644 --- a/doc/pub/week9/html/._week9-bs090.html +++ b/doc/pub/week9/html/._week9-bs090.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,44 @@

     

     

     

    -

    Note 2

    +

    Ordered output with MPIBarrier

    -
      -
    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • -
    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • -
    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • -
    -

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -

    + + + +
    +
    +
    +
    +
    +
    int main (int nargs, char* args[])
    +{
    + int numprocs, my_rank, i;
    + MPI_Init (&nargs, &args);
    + MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    + MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    + for (i = 0; i < numprocs; i++) {}
    + MPI_Barrier (MPI_COMM_WORLD);
    + if (i == my_rank) {
    + cout << "Hello world, I have  rank " << my_rank << 
    +        " out of " << numprocs << endl;}
    +      MPI_Finalize ();
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -688,7 +718,7 @@

    Note 2

  • 99
  • 100
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs091.html b/doc/pub/week9/html/._week9-bs091.html index 2dfc5223..13af8105 100644 --- a/doc/pub/week9/html/._week9-bs091.html +++ b/doc/pub/week9/html/._week9-bs091.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,47 +651,19 @@

     

     

     

    -

    Ordered output

    +

    Note 2

    - - - -
    -
    -
    -
    -
    -
    .....
    -int numprocs, my_rank, flag;
    -MPI_Status status;
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -if (my_rank > 0)
    -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    -           MPI_COMM_WORLD, &status);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -<< numprocs << endl;
    -if (my_rank < numprocs-1)
    -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    -          100, MPI_COMM_WORLD);
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +
      +
    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • +
    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • +
    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • +
    +

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there +are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized +action. +

    @@ -716,7 +693,7 @@

    100
  • 101
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs092.html b/doc/pub/week9/html/._week9-bs092.html index ae90d140..44c0d25c 100644 --- a/doc/pub/week9/html/._week9-bs092.html +++ b/doc/pub/week9/html/._week9-bs092.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,24 +651,33 @@

     

     

     

    -

    Note 3

    +

    Ordered output

    -

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ -is defined as -

    - +
    -
    int MPI_Send(void *buf, int count, 
    -             MPI_Datatype datatype, 
    -             int dest, int tag, MPI_Comm comm)}
    +  
    .....
    +int numprocs, my_rank, flag;
    +MPI_Status status;
    +MPI_Init (&nargs, &args);
    +MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    +MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    +if (my_rank > 0)
    +MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    +           MPI_COMM_WORLD, &status);
    +cout << "Hello world, I have  rank " << my_rank << " out of " 
    +<< numprocs << endl;
    +if (my_rank < numprocs-1)
    +MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    +          100, MPI_COMM_WORLD);
    +MPI_Finalize ();
     
    @@ -678,16 +692,6 @@

    Note 3

    - -

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable buf is the variable we wish to send while count -is the number of variables we are passing. If we are passing only a single value, this should be 1. -

    - -

    If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) -since we are actually passing 100 values. -

    @@ -717,7 +721,7 @@

    Note 3

  • 101
  • 102
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs093.html b/doc/pub/week9/html/._week9-bs093.html index f2e07cd3..e098f294 100644 --- a/doc/pub/week9/html/._week9-bs093.html +++ b/doc/pub/week9/html/._week9-bs093.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,13 @@

     

     

     

    -

    Note 4

    +

    Note 3

    -

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) -is similar to the send call. +

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ +is defined as

    @@ -661,9 +666,9 @@

    Note 4

    -
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    -            int source, 
    -            int tag, MPI_Comm comm, MPI_Status *status )
    +  
    int MPI_Send(void *buf, int count, 
    +             MPI_Datatype datatype, 
    +             int dest, int tag, MPI_Comm comm)}
     
    @@ -679,18 +684,14 @@

    Note 4

    -

    The arguments that are different from those in MPI\_SEND are -buf which is the name of the variable where you will be storing the received data, -source which replaces the destination in the send command. This is the return ID of the sender. -

    - -

    Finally, we have used \( MPI\_Status\_status \), -where one can check if the receive was completed. +

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. +The variable buf is the variable we wish to send while count +is the number of variables we are passing. If we are passing only a single value, this should be 1.

    -

    The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. +

    If we transfer an array, it is the overall size of the array. +For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) +since we are actually passing 100 values.

    @@ -721,7 +722,7 @@

    Note 4

  • 102
  • 103
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs094.html b/doc/pub/week9/html/._week9-bs094.html index 62f58770..fe42c604 100644 --- a/doc/pub/week9/html/._week9-bs094.html +++ b/doc/pub/week9/html/._week9-bs094.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,20 +651,52 @@

     

     

     

    -

    Numerical integration in parallel

    +

    Note 4

    -
      -
    • The code example computes \( \pi \) using the trapezoidal rules.
    • -
    • The trapezoidal rule
    • -
    -$$ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -$$ +

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) +is similar to the send call. +

    + + +
    +
    +
    +
    +
    +
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    +            int source, 
    +            int tag, MPI_Comm comm, MPI_Status *status )
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    The arguments that are different from those in MPI\_SEND are +buf which is the name of the variable where you will be storing the received data, +source which replaces the destination in the send command. This is the return ID of the sender. +

    + +

    Finally, we have used \( MPI\_Status\_status \), +where one can check if the receive was completed. +

    -

    Click on this link for the full program.

    +

    The output of this code is the same as the previous example, but now +process 0 sends a message to process 1, which forwards it further +to process 2, and so forth. +

    @@ -689,7 +726,7 @@

    103
  • 104
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs095.html b/doc/pub/week9/html/._week9-bs095.html index a463281d..1d8bb287 100644 --- a/doc/pub/week9/html/._week9-bs095.html +++ b/doc/pub/week9/html/._week9-bs095.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,48 +651,20 @@

     

     

     

    -

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    +

    Numerical integration in parallel

    +
      +
    • The code example computes \( \pi \) using the trapezoidal rules.
    • +
    • The trapezoidal rule
    • +
    +$$ + I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). +$$ - -
    -
    -
    -
    -
    -
    //    Trapezoidal rule and numerical integration usign MPI
    -using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -
    -//     Here we define various functions called by the main program
    -
    -double int_function(double );
    -double trapezoidal_rule(double , double , int , double (*)(double));
    -
    -//   Main function begins here
    -int main (int nargs, char* args[])
    -{
    -  int n, local_n, numprocs, my_rank; 
    -  double a, b, h, local_a, local_b, total_sum, local_sum;   
    -  double  time_start, time_end, total_time;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    Click on this link for the full program.

    @@ -717,7 +694,7 @@

    Dissectio
  • 104
  • 105
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs096.html b/doc/pub/week9/html/._week9-bs096.html index 4f371e52..75ada8f0 100644 --- a/doc/pub/week9/html/._week9-bs096.html +++ b/doc/pub/week9/html/._week9-bs096.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Dissection of trapezoidal rule

    +

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    @@ -658,20 +663,22 @@

    Dissection of trapezoidal
    -
      //  MPI initializations
    -  MPI_Init (&nargs, &args);
    -  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -  time_start = MPI_Wtime();
    -  //  Fixed values for a, b and n 
    -  a = 0.0 ; b = 1.0;  n = 1000;
    -  h = (b-a)/n;    // h is the same for all processes 
    -  local_n = n/numprocs;  
    -  // make sure n > numprocs, else integer division gives zero
    -  // Length of each process' interval of
    -  // integration = local_n*h.  
    -  local_a = a + my_rank*local_n*h;
    -  local_b = local_a + local_n*h;
    +  
    //    Trapezoidal rule and numerical integration usign MPI
    +using namespace std;
    +#include <mpi.h>
    +#include <iostream>
    +
    +//     Here we define various functions called by the main program
    +
    +double int_function(double );
    +double trapezoidal_rule(double , double , int , double (*)(double));
    +
    +//   Main function begins here
    +int main (int nargs, char* args[])
    +{
    +  int n, local_n, numprocs, my_rank; 
    +  double a, b, h, local_a, local_b, total_sum, local_sum;   
    +  double  time_start, time_end, total_time;
     
    @@ -715,7 +722,7 @@

    Dissection of trapezoidal
  • 105
  • 106
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs097.html b/doc/pub/week9/html/._week9-bs097.html index 2e93defe..fe035e74 100644 --- a/doc/pub/week9/html/._week9-bs097.html +++ b/doc/pub/week9/html/._week9-bs097.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Integrating with MPI

    +

    Dissection of trapezoidal rule

    @@ -658,22 +663,20 @@

    Integrating with MPI

    -
      total_sum = 0.0;
    -  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    -                               &int_function); 
    -  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    -              MPI_SUM, 0, MPI_COMM_WORLD);
    -  time_end = MPI_Wtime();
    -  total_time = time_end-time_start;
    -  if ( my_rank == 0) {
    -    cout << "Trapezoidal rule = " <<  total_sum << endl;
    -    cout << "Time = " <<  total_time  
    -         << " on number of processors: "  << numprocs  << endl;
    -  }
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  // end of main program
    +  
      //  MPI initializations
    +  MPI_Init (&nargs, &args);
    +  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    +  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    +  time_start = MPI_Wtime();
    +  //  Fixed values for a, b and n 
    +  a = 0.0 ; b = 1.0;  n = 1000;
    +  h = (b-a)/n;    // h is the same for all processes 
    +  local_n = n/numprocs;  
    +  // make sure n > numprocs, else integer division gives zero
    +  // Length of each process' interval of
    +  // integration = local_n*h.  
    +  local_a = a + my_rank*local_n*h;
    +  local_b = local_a + local_n*h;
     
    @@ -717,7 +720,7 @@

    Integrating with MPI

  • 106
  • 107
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs098.html b/doc/pub/week9/html/._week9-bs098.html index 896e921e..7c64e909 100644 --- a/doc/pub/week9/html/._week9-bs098.html +++ b/doc/pub/week9/html/._week9-bs098.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,11 @@

     

     

     

    -

    How do I use \( MPI\_reduce \)?

    +

    Integrating with MPI

    -

    Here we have used

    @@ -659,8 +663,22 @@

    How do I use \( MPI\_reduce \)?
    -
    MPI_reduce( void *senddata, void* resultdata, int count, 
    -     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
    +  
      total_sum = 0.0;
    +  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    +                               &int_function); 
    +  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    +              MPI_SUM, 0, MPI_COMM_WORLD);
    +  time_end = MPI_Wtime();
    +  total_time = time_end-time_start;
    +  if ( my_rank == 0) {
    +    cout << "Trapezoidal rule = " <<  total_sum << endl;
    +    cout << "Time = " <<  total_time  
    +         << " on number of processors: "  << numprocs  << endl;
    +  }
    +  // End MPI
    +  MPI_Finalize ();  
    +  return 0;
    +}  // end of main program
     
    @@ -675,17 +693,6 @@

    How do I use \( MPI\_reduce \)?

    - -

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable \( count \) represents the total dimensionality, 1 in case of just one variable, -while \( MPI\_Datatype \) -defines the type of variable which is sent and received. -

    - -

    The new feature is \( MPI\_Op \). It defines the type -of operation we want to do. -

    @@ -715,7 +722,7 @@

    How do I use \( MPI\_reduce \)?
  • 107
  • 108
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs099.html b/doc/pub/week9/html/._week9-bs099.html index 5f9228b0..bacc3a90 100644 --- a/doc/pub/week9/html/._week9-bs099.html +++ b/doc/pub/week9/html/._week9-bs099.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,18 +651,12 @@

     

     

     

    -

    More on \( MPI\_Reduce \)

    +

    How do I use \( MPI\_reduce \)?

    -

    In our case, since we are summing -the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). -If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or -\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer -\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \). -

    -

    \( MPI\_Allreduce \) is defined as

    +

    Here we have used

    @@ -665,8 +664,8 @@

    More on \( MPI\_Reduce \)

    -
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    -          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
    +  
    MPI_reduce( void *senddata, void* resultdata, int count, 
    +     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
     
    @@ -681,6 +680,17 @@

    More on \( MPI\_Reduce \)

    + +

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address +of the variable or the first element of an array. If they are arrays they need to have the same size. +The variable \( count \) represents the total dimensionality, 1 in case of just one variable, +while \( MPI\_Datatype \) +defines the type of variable which is sent and received. +

    + +

    The new feature is \( MPI\_Op \). It defines the type +of operation we want to do. +

    @@ -710,7 +720,7 @@

    More on \( MPI\_Reduce \)

  • 108
  • 109
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs100.html b/doc/pub/week9/html/._week9-bs100.html index e4e70a3c..7824b404 100644 --- a/doc/pub/week9/html/._week9-bs100.html +++ b/doc/pub/week9/html/._week9-bs100.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,27 +651,27 @@

     

     

     

    -

    Dissection of trapezoidal rule

    +

    More on \( MPI\_Reduce \)

    - -

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function -\( MPI\_Wtime \). +

    In our case, since we are summing +the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). +If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or +\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer +\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \).

    +

    \( MPI\_Allreduce \) is defined as

    +
    -
    //  this function defines the function to integrate
    -double int_function(double x)
    -{
    -  double value = 4./(1.+x*x);
    -  return value;
    -} // end of function to evaluate
    +  
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    +          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
     
    @@ -710,7 +715,7 @@

    Dissection of trapezoidal
  • 109
  • 110
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs101.html b/doc/pub/week9/html/._week9-bs101.html index 6370ee56..18efcbb3 100644 --- a/doc/pub/week9/html/._week9-bs101.html +++ b/doc/pub/week9/html/._week9-bs101.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -651,30 +656,22 @@

    Dissection of trapezoidal
    +

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function +\( MPI\_Wtime \). +

    +
    -
    //  this function defines the trapezoidal rule
    -double trapezoidal_rule(double a, double b, int n, 
    -                         double (*func)(double))
    +  
    //  this function defines the function to integrate
    +double int_function(double x)
     {
    -  double trapez_sum;
    -  double fa, fb, x, step;
    -  int    j;
    -  step=(b-a)/((double) n);
    -  fa=(*func)(a)/2. ;
    -  fb=(*func)(b)/2. ;
    -  trapez_sum=0.;
    -  for (j=1; j <= n-1; j++){
    -    x=j*step+a;
    -    trapez_sum+=(*func)(x);
    -  }
    -  trapez_sum=(trapez_sum+fb+fa)*step;
    -  return trapez_sum;
    -}  // end trapezoidal_rule 
    +  double value = 4./(1.+x*x);
    +  return value;
    +} // end of function to evaluate
     
    @@ -718,7 +715,7 @@

    Dissection of trapezoidal
  • 110
  • 111
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs102.html b/doc/pub/week9/html/._week9-bs102.html index e3f6cc51..9cd81039 100644 --- a/doc/pub/week9/html/._week9-bs102.html +++ b/doc/pub/week9/html/._week9-bs102.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    The quantum dot program for two electrons

    +

    Dissection of trapezoidal rule

    @@ -657,462 +662,24 @@

    -
    // Variational Monte Carlo for atoms with importance sampling, slater det
    -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    -#include "mpi.h"
    -#include <cmath>
    -#include <random>
    -#include <string>
    -#include <iostream>
    -#include <fstream>
    -#include <iomanip>
    -#include "vectormatrixclass.h"
    -
    -using namespace  std;
    -// output file as global variable
    -ofstream ofile;  
    -// the step length and its squared inverse for the second derivative 
    -//  Here we define global variables  used in various functions
    -//  These can be changed by using classes
    -int Dimension = 2; 
    -int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    -
    -// declaration of functions 
    -
    -// The Mc sampling for the variational Monte Carlo 
    -void  MonteCarloSampling(int, double &, double &, Vector &);
    -
    -// The variational wave function
    -double  WaveFunction(Matrix &, Vector &);
    -
    -// The local energy 
    -double  LocalEnergy(Matrix &, Vector &);
    -
    -// The quantum force
    -void  QuantumForce(Matrix &, Matrix &, Vector &);
    -
    -
    -// inline function for single-particle wave function
    -inline double SPwavefunction(double r, double alpha) { 
    -   return exp(-alpha*r*0.5);
    -}
    -
    -// inline function for derivative of single-particle wave function
    -inline double DerivativeSPwavefunction(double r, double alpha) { 
    -  return -r*alpha;
    -}
    -
    -// function for absolute value of relative distance
    -double RelativeDistance(Matrix &r, int i, int j) { 
    -      double r_ij = 0;  
    -      for (int k = 0; k < Dimension; k++) { 
    -	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    -      }
    -      return sqrt(r_ij); 
    -}
    -
    -// inline function for derivative of Jastrow factor
    -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    -  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    -}
    -
    -// function for square of position of single particle
    -double singleparticle_pos2(Matrix &r, int i) { 
    -    double r_single_particle = 0;
    -    for (int j = 0; j < Dimension; j++) { 
    -      r_single_particle  += r(i,j)*r(i,j);
    -    }
    -    return r_single_particle;
    -}
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    -
    -static double sqrarg;
    -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    -
    -
    -static double maxarg1,maxarg2;
    -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    -        (maxarg1) : (maxarg2))
    -
    -
    -// Begin of main program   
    -
    -int main(int argc, char* argv[])
    -{
    -
    -  //  MPI initializations
    -  int NumberProcesses, MyRank, NumberMCsamples;
    -  MPI_Init (&argc, &argv);
    -  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    -  double StartTime = MPI_Wtime();
    -  if (MyRank == 0 && argc <= 1) {
    -    cout << "Bad Usage: " << argv[0] << 
    -      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    -  }
    -  // Read filename and number of Monte Carlo cycles from the command line
    -  if (MyRank == 0 && argc > 2) {
    -    string filename = argv[1]; // first command line argument after name of program
    -    NumberMCsamples  = atoi(argv[2]);
    -    string fileout = filename;
    -    string argument = to_string(NumberMCsamples);
    -    // Final filename as filename+NumberMCsamples
    -    fileout.append(argument);
    -    ofile.open(fileout);
    -  }
    -  // broadcast the number of  Monte Carlo samples
    -  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    -  // Two variational parameters only
    -  Vector VariationalParameters(2);
    -  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    -  // Loop over variational parameters
    -  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    -    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    -      VariationalParameters(0) = alpha;  // value of alpha
    -      VariationalParameters(1) = beta;  // value of beta
    -      //  Do the mc sampling  and accumulate data with MPI_Reduce
    -      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    -      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    -      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    -      //  Collect data in total averages
    -      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      // Print out results  in case of Master node, set to MyRank = 0
    -      if ( MyRank == 0) {
    -	double Energy = TotalEnergy/( (double)NumberProcesses);
    -	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    -	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    -	ofile << setiosflags(ios::showpoint | ios::uppercase);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    -	ofile << setw(15) << setprecision(8) << Energy;
    -	ofile << setw(15) << setprecision(8) << Variance;
    -	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    -      }
    -    }
    -  }
    -  double EndTime = MPI_Wtime();
    -  double TotalTime = EndTime-StartTime;
    -  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    -  if (MyRank == 0)  ofile.close();  // close output file
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  //  end of main function
    -
    -
    -// Monte Carlo sampling with the Metropolis algorithm  
    -
    -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    -{
    -
    - // Initialize the seed and call the Mersienne algo
    -  std::random_device rd;
    -  std::mt19937_64 gen(rd());
    -  // Set up the uniform distribution for x \in [[0, 1]
    -  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    -  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    -  // diffusion constant from Schroedinger equation
    -  double D = 0.5; 
    -  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    -  // allocate matrices which contain the position of the particles  
    -  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    -  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    -  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    -  //  initial trial positions
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int j = 0; j < Dimension; j++) {
    -      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    -    }
    -  }
    -  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    -  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    -  // loop over monte carlo cycles 
    -  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    -    // new position 
    -    for (int i = 0; i < NumberParticles; i++) { 
    -      for (int j = 0; j < Dimension; j++) {
    -	// gaussian deviate to compute new positions using a given timestep
    -	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -      }  
    -      //  for the other particles we need to set the position to the old position since
    -      //  we move only one particle at the time
    -      for (int k = 0; k < NumberParticles; k++) {
    -	if ( k != i) {
    -	  for (int j = 0; j < Dimension; j++) {
    -	    NewPosition(k,j) = OldPosition(k,j);
    -	  }
    -	} 
    -      }
    -      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    -      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    -      //  we compute the log of the ratio of the greens functions to be used in the 
    -      //  Metropolis-Hastings algorithm
    -      double GreensFunction = 0.0;            
    -      for (int j = 0; j < Dimension; j++) {
    -	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    -	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    -      }
    -      GreensFunction = exp(GreensFunction);
    -      // The Metropolis test is performed by moving one particle at the time
    -      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    -	for (int  j = 0; j < Dimension; j++) {
    -	  OldPosition(i,j) = NewPosition(i,j);
    -	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    -	}
    -	OldWaveFunction = NewWaveFunction;
    -      }
    -    }  //  end of loop over particles
    -    // compute local energy  
    -    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    -    // update energies
    -    Energy += DeltaE;
    -    EnergySquared += DeltaE*DeltaE;
    -  }   // end of loop over MC trials   
    -  // update the energy average and its squared 
    -  cumulative_e = Energy/NumberMCsamples;
    -  cumulative_e2 = EnergySquared/NumberMCsamples;
    -}   // end MonteCarloSampling function  
    -
    -
    -// Function to compute the squared wave function and the quantum force
    -
    -double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    -{
    -  double wf = 0.0;
    -  // full Slater determinant for two particles, replace with Slater det for more particles 
    -  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    -  // contribution from Jastrow factor
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    -    }
    -  }
    -  return wf;
    -}
    -
    -// Function to calculate the local energy without numerical derivation of kinetic energy
    -
    -double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    +  
    //  this function defines the trapezoidal rule
    +double trapezoidal_rule(double a, double b, int n, 
    +                         double (*func)(double))
     {
    -
    -  // compute the kinetic and potential energy from the single-particle part
    -  // for a many-electron system this has to be replaced by a Slater determinant
    -  // The absolute value of the interparticle length
    -  Matrix length( NumberParticles, NumberParticles);
    -  // Set up interparticle distance
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for(int j = i+1; j < NumberParticles; j++){
    -      length(i,j) = RelativeDistance(r, i, j);
    -      length(j,i) =  length(i,j);
    -    }
    -  }
    -  double KineticEnergy = 0.0;
    -  // Set up kinetic energy from Slater and Jastrow terms
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int k = 0; k < Dimension; k++) {
    -      double sum1 = 0.0; 
    -      for(int j = 0; j < NumberParticles; j++){
    -	if ( j != i) {
    -	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    -    }
    -  }
    -  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    -  for (int i = 0; i < NumberParticles-1; i++) {
    -      for (int j = i+1; j < NumberParticles; j++) {
    -        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    -      }
    -  }
    -  KineticEnergy *= -0.5;
    -  // Set up potential energy, external potential + eventual electron-electron repulsion
    -  double PotentialEnergy = 0;
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    double DistanceSquared = singleparticle_pos2(r, i);
    -    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    -  }
    -  // Add the electron-electron repulsion
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      PotentialEnergy += 1.0/length(i,j);          
    -    }
    -  }
    -  double LocalE = KineticEnergy+PotentialEnergy;
    -  return LocalE;
    -}
    -
    -// Compute the analytical expression for the quantum force
    -void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    -{
    -  // compute the first derivative 
    -  for (int i = 0; i < NumberParticles; i++) {
    -    for (int k = 0; k < Dimension; k++) {
    -      // single-particle part, replace with Slater det for larger systems
    -      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    -      //  Jastrow factor contribution
    -      double Jsum = 0.0;
    -      for (int j = 0; j < NumberParticles; j++) {
    -	if ( j != i) {
    -	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      qforce(i,k) = 2.0*(Jsum+sppart);
    -    }
    -  }
    -} // end of QuantumForce function
    -
    -
    -#define ITMAX 200
    -#define EPS 3.0e-8
    -#define TOLX (4*EPS)
    -#define STPMX 100.0
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    -{
    -
    -  int check,i,its,j;
    -  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    -  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    -  Matrix hessian(n,n);
    -
    -  fp=(*func)(p);
    -  (*dfunc)(p,g);
    -  for (i = 0;i < n;i++) {
    -    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    -    hessian(i,i)=1.0;
    -    xi(i) = -g(i);
    -    sum += p(i)*p(i);
    -  }
    -  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    -  for (its=1;its<=ITMAX;its++) {
    -    *iter=its;
    -    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    -    fp = *fret;
    -    for (i = 0; i< n;i++) {
    -      xi(i)=pnew(i)-p(i);
    -      p(i)=pnew(i);
    -    }
    -    test=0.0;
    -    for (i = 0;i< n;i++) {
    -      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    -      if (temp > test) test=temp;
    -    }
    -    if (test < TOLX) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i);
    -    (*dfunc)(p,g);
    -    test=0.0;
    -    den=FMAX(*fret,1.0);
    -    for (i=0;i<n;i++) {
    -      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    -      if (temp > test) test=temp;
    -    }
    -    if (test < gtol) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    -    for (i=0;i<n;i++) {
    -      hdg(i)=0.0;
    -      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    -    }
    -    fac=fae=sumdg=sumxi=0.0;
    -    for (i=0;i<n;i++) {
    -      fac += dg(i)*xi(i);
    -      fae += dg(i)*hdg(i);
    -      sumdg += SQR(dg(i));
    -      sumxi += SQR(xi(i));
    -    }
    -    if (fac*fac > EPS*sumdg*sumxi) {
    -      fac=1.0/fac;
    -      fad=1.0/fae;
    -      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    -      for (i=0;i<n;i++) {
    -	for (j=0;j<n;j++) {
    -	  hessian(i,j) += fac*xi(i)*xi(j)
    -	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    -	}
    -      }
    -    }
    -    for (i=0;i<n;i++) {
    -      xi(i)=0.0;
    -      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    -    }
    -  }
    -  cout << "too many iterations in dfpmin" << endl;
    -}
    -#undef ITMAX
    -#undef EPS
    -#undef TOLX
    -#undef STPMX
    -
    -#define ALF 1.0e-4
    -#define TOLX 1.0e-7
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    -{
    -  int i;
    -  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    -    test,tmplam;
    -
    -  *check=0;
    -  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    -  sum=sqrt(sum);
    -  if (sum > stpmax)
    -    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    -  for (slope=0.0,i=0;i<n;i++)
    -    slope += g(i)*p(i);
    -  test=0.0;
    -  for (i=0;i<n;i++) {
    -    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    -    if (temp > test) test=temp;
    -  }
    -  alamin=TOLX/test;
    -  alam=1.0;
    -  for (;;) {
    -    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    -    *f=(*func)(x);
    -    if (alam < alamin) {
    -      for (i=0;i<n;i++) x(i)=xold(i);
    -      *check=1;
    -      return;
    -    } else if (*f <= fold+ALF*alam*slope) return;
    -    else {
    -      if (alam == 1.0)
    -	tmplam = -slope/(2.0*(*f-fold-slope));
    -      else {
    -	rhs1 = *f-fold-alam*slope;
    -	rhs2=f2-fold2-alam2*slope;
    -	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    -	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    -	if (a == 0.0) tmplam = -slope/(2.0*b);
    -	else {
    -	  disc=b*b-3.0*a*slope;
    -	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    -	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    -	}
    -	if (tmplam>0.5*alam)
    -	  tmplam=0.5*alam;
    -      }
    -    }
    -    alam2=alam;
    -    f2 = *f;
    -    fold2=fold;
    -    alam=FMAX(tmplam,0.1*alam);
    +  double trapez_sum;
    +  double fa, fb, x, step;
    +  int    j;
    +  step=(b-a)/((double) n);
    +  fa=(*func)(a)/2. ;
    +  fb=(*func)(b)/2. ;
    +  trapez_sum=0.;
    +  for (j=1; j <= n-1; j++){
    +    x=j*step+a;
    +    trapez_sum+=(*func)(x);
       }
    -}
    -#undef ALF
    -#undef TOLX
    +  trapez_sum=(trapez_sum+fb+fa)*step;
    +  return trapez_sum;
    +}  // end trapezoidal_rule 
     
    @@ -1156,7 +723,7 @@

    111
  • 112
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs103.html b/doc/pub/week9/html/._week9-bs103.html index 8fac6fa4..d861032c 100644 --- a/doc/pub/week9/html/._week9-bs103.html +++ b/doc/pub/week9/html/._week9-bs103.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,27 +651,487 @@

     

     

     

    -

    What is OpenMP

    +

    The quantum dot program for two electrons

    -
      -
    • OpenMP provides high-level thread programming
    • -
    • Multiple cooperating threads are allowed to run simultaneously
    • -
    • Threads are created and destroyed dynamically in a fork-join pattern
    • -
        -
      • An OpenMP program consists of a number of parallel regions
      • -
      • Between two parallel regions there is only one master thread
      • -
      • In the beginning of a parallel region, a team of new threads is spawned
      • -
      -
    • The newly spawned threads work simultaneously with the master thread
    • -
    • At the end of a parallel region, the new threads are destroyed
    • -
    -

    Many good tutorials online and excellent textbook

    -
      -
    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. -
    3. Many tutorials online like OpenMP official site
    4. -
    + + +
    +
    +
    +
    +
    +
    // Variational Monte Carlo for atoms with importance sampling, slater det
    +// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    +#include "mpi.h"
    +#include <cmath>
    +#include <random>
    +#include <string>
    +#include <iostream>
    +#include <fstream>
    +#include <iomanip>
    +#include "vectormatrixclass.h"
    +
    +using namespace  std;
    +// output file as global variable
    +ofstream ofile;  
    +// the step length and its squared inverse for the second derivative 
    +//  Here we define global variables  used in various functions
    +//  These can be changed by using classes
    +int Dimension = 2; 
    +int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    +
    +// declaration of functions 
    +
    +// The Mc sampling for the variational Monte Carlo 
    +void  MonteCarloSampling(int, double &, double &, Vector &);
    +
    +// The variational wave function
    +double  WaveFunction(Matrix &, Vector &);
    +
    +// The local energy 
    +double  LocalEnergy(Matrix &, Vector &);
    +
    +// The quantum force
    +void  QuantumForce(Matrix &, Matrix &, Vector &);
    +
    +
    +// inline function for single-particle wave function
    +inline double SPwavefunction(double r, double alpha) { 
    +   return exp(-alpha*r*0.5);
    +}
    +
    +// inline function for derivative of single-particle wave function
    +inline double DerivativeSPwavefunction(double r, double alpha) { 
    +  return -r*alpha;
    +}
    +
    +// function for absolute value of relative distance
    +double RelativeDistance(Matrix &r, int i, int j) { 
    +      double r_ij = 0;  
    +      for (int k = 0; k < Dimension; k++) { 
    +	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    +      }
    +      return sqrt(r_ij); 
    +}
    +
    +// inline function for derivative of Jastrow factor
    +inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    +  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    +}
    +
    +// function for square of position of single particle
    +double singleparticle_pos2(Matrix &r, int i) { 
    +    double r_single_particle = 0;
    +    for (int j = 0; j < Dimension; j++) { 
    +      r_single_particle  += r(i,j)*r(i,j);
    +    }
    +    return r_single_particle;
    +}
    +
    +void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    +		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    +
    +void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    +	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    +
    +static double sqrarg;
    +#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    +
    +
    +static double maxarg1,maxarg2;
    +#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    +        (maxarg1) : (maxarg2))
    +
    +
    +// Begin of main program   
    +
    +int main(int argc, char* argv[])
    +{
    +
    +  //  MPI initializations
    +  int NumberProcesses, MyRank, NumberMCsamples;
    +  MPI_Init (&argc, &argv);
    +  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    +  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    +  double StartTime = MPI_Wtime();
    +  if (MyRank == 0 && argc <= 1) {
    +    cout << "Bad Usage: " << argv[0] << 
    +      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    +  }
    +  // Read filename and number of Monte Carlo cycles from the command line
    +  if (MyRank == 0 && argc > 2) {
    +    string filename = argv[1]; // first command line argument after name of program
    +    NumberMCsamples  = atoi(argv[2]);
    +    string fileout = filename;
    +    string argument = to_string(NumberMCsamples);
    +    // Final filename as filename+NumberMCsamples
    +    fileout.append(argument);
    +    ofile.open(fileout);
    +  }
    +  // broadcast the number of  Monte Carlo samples
    +  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    +  // Two variational parameters only
    +  Vector VariationalParameters(2);
    +  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    +  // Loop over variational parameters
    +  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    +    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    +      VariationalParameters(0) = alpha;  // value of alpha
    +      VariationalParameters(1) = beta;  // value of beta
    +      //  Do the mc sampling  and accumulate data with MPI_Reduce
    +      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    +      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    +      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    +      //  Collect data in total averages
    +      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    +      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    +      // Print out results  in case of Master node, set to MyRank = 0
    +      if ( MyRank == 0) {
    +	double Energy = TotalEnergy/( (double)NumberProcesses);
    +	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    +	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    +	ofile << setiosflags(ios::showpoint | ios::uppercase);
    +	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    +	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    +	ofile << setw(15) << setprecision(8) << Energy;
    +	ofile << setw(15) << setprecision(8) << Variance;
    +	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    +      }
    +    }
    +  }
    +  double EndTime = MPI_Wtime();
    +  double TotalTime = EndTime-StartTime;
    +  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    +  if (MyRank == 0)  ofile.close();  // close output file
    +  // End MPI
    +  MPI_Finalize ();  
    +  return 0;
    +}  //  end of main function
    +
    +
    +// Monte Carlo sampling with the Metropolis algorithm  
    +
    +void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    +{
    +
    + // Initialize the seed and call the Mersienne algo
    +  std::random_device rd;
    +  std::mt19937_64 gen(rd());
    +  // Set up the uniform distribution for x \in [[0, 1]
    +  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    +  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    +  // diffusion constant from Schroedinger equation
    +  double D = 0.5; 
    +  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    +  // allocate matrices which contain the position of the particles  
    +  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    +  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    +  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    +  //  initial trial positions
    +  for (int i = 0; i < NumberParticles; i++) { 
    +    for (int j = 0; j < Dimension; j++) {
    +      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    +    }
    +  }
    +  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    +  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    +  // loop over monte carlo cycles 
    +  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    +    // new position 
    +    for (int i = 0; i < NumberParticles; i++) { 
    +      for (int j = 0; j < Dimension; j++) {
    +	// gaussian deviate to compute new positions using a given timestep
    +	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    +	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    +      }  
    +      //  for the other particles we need to set the position to the old position since
    +      //  we move only one particle at the time
    +      for (int k = 0; k < NumberParticles; k++) {
    +	if ( k != i) {
    +	  for (int j = 0; j < Dimension; j++) {
    +	    NewPosition(k,j) = OldPosition(k,j);
    +	  }
    +	} 
    +      }
    +      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    +      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    +      //  we compute the log of the ratio of the greens functions to be used in the 
    +      //  Metropolis-Hastings algorithm
    +      double GreensFunction = 0.0;            
    +      for (int j = 0; j < Dimension; j++) {
    +	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    +	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    +      }
    +      GreensFunction = exp(GreensFunction);
    +      // The Metropolis test is performed by moving one particle at the time
    +      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    +	for (int  j = 0; j < Dimension; j++) {
    +	  OldPosition(i,j) = NewPosition(i,j);
    +	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    +	}
    +	OldWaveFunction = NewWaveFunction;
    +      }
    +    }  //  end of loop over particles
    +    // compute local energy  
    +    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    +    // update energies
    +    Energy += DeltaE;
    +    EnergySquared += DeltaE*DeltaE;
    +  }   // end of loop over MC trials   
    +  // update the energy average and its squared 
    +  cumulative_e = Energy/NumberMCsamples;
    +  cumulative_e2 = EnergySquared/NumberMCsamples;
    +}   // end MonteCarloSampling function  
    +
    +
    +// Function to compute the squared wave function and the quantum force
    +
    +double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    +{
    +  double wf = 0.0;
    +  // full Slater determinant for two particles, replace with Slater det for more particles 
    +  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    +  // contribution from Jastrow factor
    +  for (int i = 0; i < NumberParticles-1; i++) { 
    +    for (int j = i+1; j < NumberParticles; j++) {
    +      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    +    }
    +  }
    +  return wf;
    +}
    +
    +// Function to calculate the local energy without numerical derivation of kinetic energy
    +
    +double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    +{
    +
    +  // compute the kinetic and potential energy from the single-particle part
    +  // for a many-electron system this has to be replaced by a Slater determinant
    +  // The absolute value of the interparticle length
    +  Matrix length( NumberParticles, NumberParticles);
    +  // Set up interparticle distance
    +  for (int i = 0; i < NumberParticles-1; i++) { 
    +    for(int j = i+1; j < NumberParticles; j++){
    +      length(i,j) = RelativeDistance(r, i, j);
    +      length(j,i) =  length(i,j);
    +    }
    +  }
    +  double KineticEnergy = 0.0;
    +  // Set up kinetic energy from Slater and Jastrow terms
    +  for (int i = 0; i < NumberParticles; i++) { 
    +    for (int k = 0; k < Dimension; k++) {
    +      double sum1 = 0.0; 
    +      for(int j = 0; j < NumberParticles; j++){
    +	if ( j != i) {
    +	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    +	}
    +      }
    +      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    +    }
    +  }
    +  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    +  for (int i = 0; i < NumberParticles-1; i++) {
    +      for (int j = i+1; j < NumberParticles; j++) {
    +        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    +      }
    +  }
    +  KineticEnergy *= -0.5;
    +  // Set up potential energy, external potential + eventual electron-electron repulsion
    +  double PotentialEnergy = 0;
    +  for (int i = 0; i < NumberParticles; i++) { 
    +    double DistanceSquared = singleparticle_pos2(r, i);
    +    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    +  }
    +  // Add the electron-electron repulsion
    +  for (int i = 0; i < NumberParticles-1; i++) { 
    +    for (int j = i+1; j < NumberParticles; j++) {
    +      PotentialEnergy += 1.0/length(i,j);          
    +    }
    +  }
    +  double LocalE = KineticEnergy+PotentialEnergy;
    +  return LocalE;
    +}
    +
    +// Compute the analytical expression for the quantum force
    +void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    +{
    +  // compute the first derivative 
    +  for (int i = 0; i < NumberParticles; i++) {
    +    for (int k = 0; k < Dimension; k++) {
    +      // single-particle part, replace with Slater det for larger systems
    +      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    +      //  Jastrow factor contribution
    +      double Jsum = 0.0;
    +      for (int j = 0; j < NumberParticles; j++) {
    +	if ( j != i) {
    +	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    +	}
    +      }
    +      qforce(i,k) = 2.0*(Jsum+sppart);
    +    }
    +  }
    +} // end of QuantumForce function
    +
    +
    +#define ITMAX 200
    +#define EPS 3.0e-8
    +#define TOLX (4*EPS)
    +#define STPMX 100.0
    +
    +void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    +	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    +{
    +
    +  int check,i,its,j;
    +  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    +  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    +  Matrix hessian(n,n);
    +
    +  fp=(*func)(p);
    +  (*dfunc)(p,g);
    +  for (i = 0;i < n;i++) {
    +    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    +    hessian(i,i)=1.0;
    +    xi(i) = -g(i);
    +    sum += p(i)*p(i);
    +  }
    +  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    +  for (its=1;its<=ITMAX;its++) {
    +    *iter=its;
    +    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    +    fp = *fret;
    +    for (i = 0; i< n;i++) {
    +      xi(i)=pnew(i)-p(i);
    +      p(i)=pnew(i);
    +    }
    +    test=0.0;
    +    for (i = 0;i< n;i++) {
    +      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    +      if (temp > test) test=temp;
    +    }
    +    if (test < TOLX) {
    +      return;
    +    }
    +    for (i=0;i<n;i++) dg(i)=g(i);
    +    (*dfunc)(p,g);
    +    test=0.0;
    +    den=FMAX(*fret,1.0);
    +    for (i=0;i<n;i++) {
    +      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    +      if (temp > test) test=temp;
    +    }
    +    if (test < gtol) {
    +      return;
    +    }
    +    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    +    for (i=0;i<n;i++) {
    +      hdg(i)=0.0;
    +      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    +    }
    +    fac=fae=sumdg=sumxi=0.0;
    +    for (i=0;i<n;i++) {
    +      fac += dg(i)*xi(i);
    +      fae += dg(i)*hdg(i);
    +      sumdg += SQR(dg(i));
    +      sumxi += SQR(xi(i));
    +    }
    +    if (fac*fac > EPS*sumdg*sumxi) {
    +      fac=1.0/fac;
    +      fad=1.0/fae;
    +      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    +      for (i=0;i<n;i++) {
    +	for (j=0;j<n;j++) {
    +	  hessian(i,j) += fac*xi(i)*xi(j)
    +	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    +	}
    +      }
    +    }
    +    for (i=0;i<n;i++) {
    +      xi(i)=0.0;
    +      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    +    }
    +  }
    +  cout << "too many iterations in dfpmin" << endl;
    +}
    +#undef ITMAX
    +#undef EPS
    +#undef TOLX
    +#undef STPMX
    +
    +#define ALF 1.0e-4
    +#define TOLX 1.0e-7
    +
    +void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    +	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    +{
    +  int i;
    +  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    +    test,tmplam;
    +
    +  *check=0;
    +  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    +  sum=sqrt(sum);
    +  if (sum > stpmax)
    +    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    +  for (slope=0.0,i=0;i<n;i++)
    +    slope += g(i)*p(i);
    +  test=0.0;
    +  for (i=0;i<n;i++) {
    +    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    +    if (temp > test) test=temp;
    +  }
    +  alamin=TOLX/test;
    +  alam=1.0;
    +  for (;;) {
    +    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    +    *f=(*func)(x);
    +    if (alam < alamin) {
    +      for (i=0;i<n;i++) x(i)=xold(i);
    +      *check=1;
    +      return;
    +    } else if (*f <= fold+ALF*alam*slope) return;
    +    else {
    +      if (alam == 1.0)
    +	tmplam = -slope/(2.0*(*f-fold-slope));
    +      else {
    +	rhs1 = *f-fold-alam*slope;
    +	rhs2=f2-fold2-alam2*slope;
    +	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    +	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    +	if (a == 0.0) tmplam = -slope/(2.0*b);
    +	else {
    +	  disc=b*b-3.0*a*slope;
    +	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    +	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    +	}
    +	if (tmplam>0.5*alam)
    +	  tmplam=0.5*alam;
    +      }
    +    }
    +    alam2=alam;
    +    f2 = *f;
    +    fold2=fold;
    +    alam=FMAX(tmplam,0.1*alam);
    +  }
    +}
    +#undef ALF
    +#undef TOLX
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -696,7 +1161,7 @@

    What is OpenMP

  • 112
  • 113
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs104.html b/doc/pub/week9/html/._week9-bs104.html index 6e219014..fad71a10 100644 --- a/doc/pub/week9/html/._week9-bs104.html +++ b/doc/pub/week9/html/._week9-bs104.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,70 +651,27 @@

     

     

     

    -

    Getting started, things to remember

    +

    What is OpenMP

      -
    • Remember the header file
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Insert compiler directives in C++ syntax as
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp...
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Compile with for example c++ -fopenmp code.cpp
    • -
    • Execute
    • +
    • OpenMP provides high-level thread programming
    • +
    • Multiple cooperating threads are allowed to run simultaneously
    • +
    • Threads are created and destroyed dynamically in a fork-join pattern
      • -
      • Remember to assign the environment variable OMP NUM THREADS
      • -
      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      • +
      • An OpenMP program consists of a number of parallel regions
      • +
      • Between two parallel regions there is only one master thread
      • +
      • In the beginning of a parallel region, a team of new threads is spawned
      +
    • The newly spawned threads work simultaneously with the master thread
    • +
    • At the end of a parallel region, the new threads are destroyed
    +

    Many good tutorials online and excellent textbook

    +
      +
    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. +
    3. Many tutorials online like OpenMP official site
    4. +
    @@ -739,7 +701,7 @@

    Getting started, thin
  • 113
  • 114
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs105.html b/doc/pub/week9/html/._week9-bs105.html index 13aecc4f..f549b3a4 100644 --- a/doc/pub/week9/html/._week9-bs105.html +++ b/doc/pub/week9/html/._week9-bs105.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,9 +651,12 @@

     

     

     

    -

    OpenMP syntax

    +

    Getting started, things to remember

    +
    +
    +
      -
    • Mostly directives
    • +
    • Remember the header file
    @@ -657,7 +665,7 @@

    OpenMP syntax

    -
    #pragma omp construct [ clause ...]
    +  
    #include <omp.h>
     
    @@ -674,7 +682,7 @@

    OpenMP syntax

      -
    • Some functions and types
    • +
    • Insert compiler directives in C++ syntax as
    @@ -683,7 +691,7 @@

    OpenMP syntax

    -
    #include <omp.h>
    +  
    #pragma omp...
     
    @@ -700,10 +708,17 @@

    OpenMP syntax

      -
    • Most apply to a block of code
    • -
    • Specifically, a structured block
    • -
    • Enter at top, exit at bottom only, exit(), abort() permitted
    • +
    • Compile with for example c++ -fopenmp code.cpp
    • +
    • Execute
    • +
        +
      • Remember to assign the environment variable OMP NUM THREADS
      • +
      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      +
    +
    +
    + +

    diff --git a/doc/pub/week9/html/._week9-bs106.html b/doc/pub/week9/html/._week9-bs106.html index 85a3adf4..d6dcfdd2 100644 --- a/doc/pub/week9/html/._week9-bs106.html +++ b/doc/pub/week9/html/._week9-bs106.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,15 +651,63 @@

     

     

     

    -

    Different OpenMP styles of parallelism

    -

    OpenMP supports several different ways to specify thread parallelism

    +

    OpenMP syntax

    +
      +
    • Mostly directives
    • +
    + + +
    +
    +
    +
    +
    +
    #pragma omp construct [ clause ...]
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
      +
    • Some functions and types
    • +
    + + +
    +
    +
    +
    +
    +
    #include <omp.h>
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
      -
    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • -
    • Parallel loops: Special case for loops, simplifies data parallel code
    • -
    • Task parallelism, new in OpenMP 3
    • -
    • Several ways to manage thread coordination, including Master regions and Locks
    • -
    • Memory model for shared data
    • +
    • Most apply to a block of code
    • +
    • Specifically, a structured block
    • +
    • Enter at top, exit at bottom only, exit(), abort() permitted

    @@ -681,7 +734,7 @@

    Different OpenMP
  • 115
  • 116
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs107.html b/doc/pub/week9/html/._week9-bs107.html index 83cdccb7..79a2bcec 100644 --- a/doc/pub/week9/html/._week9-bs107.html +++ b/doc/pub/week9/html/._week9-bs107.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,54 +651,16 @@

     

     

     

    -

    General code structure

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -main ()
    -{
    -int var1, var2, var3;
    -/* serial code */
    -/* ... */
    -/* start of a parallel region */
    -#pragma omp parallel private(var1, var2) shared(var3)
    -{
    -/* ... */
    -}
    -/* more serial code */
    -/* ... */
    -/* another parallel region */
    -#pragma omp parallel
    -{
    -/* ... */
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - +

    Different OpenMP styles of parallelism

    +

    OpenMP supports several different ways to specify thread parallelism

    +
      +
    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • +
    • Parallel loops: Special case for loops, simplifies data parallel code
    • +
    • Task parallelism, new in OpenMP 3
    • +
    • Several ways to manage thread coordination, including Master regions and Locks
    • +
    • Memory model for shared data
    • +

    diff --git a/doc/pub/week9/html/._week9-bs108.html b/doc/pub/week9/html/._week9-bs108.html index 8302350d..1d0b4d43 100644 --- a/doc/pub/week9/html/._week9-bs108.html +++ b/doc/pub/week9/html/._week9-bs108.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,10 @@

     

     

     

    -

    Parallel region

    +

    General code structure

    -
      -
    • A parallel region is a block of code that is executed by a team of threads
    • -
    • The following compiler directive creates a parallel region
    • -
    @@ -661,7 +662,25 @@

    Parallel region

    -
    #pragma omp parallel { ... }
    +  
    #include <omp.h>
    +main ()
    +{
    +int var1, var2, var3;
    +/* serial code */
    +/* ... */
    +/* start of a parallel region */
    +#pragma omp parallel private(var1, var2) shared(var3)
    +{
    +/* ... */
    +}
    +/* more serial code */
    +/* ... */
    +/* another parallel region */
    +#pragma omp parallel
    +{
    +/* ... */
    +}
    +}
     
    @@ -676,16 +695,6 @@

    Parallel region

    - -
      -
    • Clauses can be added at the end of the directive
    • -
    • Most often used clauses:
    • -
        -
      • default(shared) or default(none)
      • -
      • public(list of variables)
      • -
      • private(list of variables)
      • -
      -
    @@ -715,7 +724,7 @@

    Parallel region

  • 117
  • 118
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs109.html b/doc/pub/week9/html/._week9-bs109.html index 0535ac7e..f1681d76 100644 --- a/doc/pub/week9/html/._week9-bs109.html +++ b/doc/pub/week9/html/._week9-bs109.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,10 +651,14 @@

     

     

     

    -

    Hello world, not again, please!

    +

    Parallel region

    +
      +
    • A parallel region is a block of code that is executed by a team of threads
    • +
    • The following compiler directive creates a parallel region
    • +
    @@ -657,23 +666,7 @@

    Hello world, not again, ple
    -
    #include <omp.h>
    -#include <cstdio>
    -int main (int argc, char *argv[])
    -{
    -int th_id, nthreads;
    -#pragma omp parallel private(th_id) shared(nthreads)
    -{
    -th_id = omp_get_thread_num();
    -printf("Hello World from thread %d\n", th_id);
    -#pragma omp barrier
    -if ( th_id == 0 ) {
    -nthreads = omp_get_num_threads();
    -printf("There are %d threads\n",nthreads);
    -}
    -}
    -return 0;
    -}
    +  
    #pragma omp parallel { ... }
     
    @@ -688,6 +681,16 @@

    Hello world, not again, ple

    + +
      +
    • Clauses can be added at the end of the directive
    • +
    • Most often used clauses:
    • +
        +
      • default(shared) or default(none)
      • +
      • public(list of variables)
      • +
      • private(list of variables)
      • +
      +
    @@ -717,7 +720,7 @@

    Hello world, not again, ple
  • 118
  • 119
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs110.html b/doc/pub/week9/html/._week9-bs110.html index 9be952fd..e8b32751 100644 --- a/doc/pub/week9/html/._week9-bs110.html +++ b/doc/pub/week9/html/._week9-bs110.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Hello world, yet another variant

    +

    Hello world, not again, please!

    @@ -657,17 +662,21 @@

    Hello world, yet another
    -
    #include <cstdio>
    -#include <omp.h>
    -int main(int argc, char *argv[]) 
    +  
    #include <omp.h>
    +#include <cstdio>
    +int main (int argc, char *argv[])
     {
    - omp_set_num_threads(4); 
    -#pragma omp parallel
    - {
    -   int id = omp_get_thread_num();
    -   int nproc = omp_get_num_threads(); 
    -   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    - } 
    +int th_id, nthreads;
    +#pragma omp parallel private(th_id) shared(nthreads)
    +{
    +th_id = omp_get_thread_num();
    +printf("Hello World from thread %d\n", th_id);
    +#pragma omp barrier
    +if ( th_id == 0 ) {
    +nthreads = omp_get_num_threads();
    +printf("There are %d threads\n",nthreads);
    +}
    +}
     return 0;
     }
     
    @@ -684,37 +693,6 @@

    Hello world, yet another

    - -

    Variables declared outside of the parallel region are shared by all threads -If a variable like id is declared outside of the -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel, 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    it would have been shared by various the threads, possibly causing erroneous output

    -
      -
    • Why? What would go wrong? Why do we add possibly?
    • -

    @@ -744,7 +722,7 @@

    Hello world, yet another
  • 119
  • 120
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs111.html b/doc/pub/week9/html/._week9-bs111.html index f62dcabf..d4155535 100644 --- a/doc/pub/week9/html/._week9-bs111.html +++ b/doc/pub/week9/html/._week9-bs111.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,16 +651,74 @@

     

     

     

    -

    Important OpenMP library routines

    +

    Hello world, yet another variant

    + +
    +
    +
    +
    +
    +
    #include <cstdio>
    +#include <omp.h>
    +int main(int argc, char *argv[]) 
    +{
    + omp_set_num_threads(4); 
    +#pragma omp parallel
    + {
    +   int id = omp_get_thread_num();
    +   int nproc = omp_get_num_threads(); 
    +   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    + } 
    +return 0;
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    Variables declared outside of the parallel region are shared by all threads +If a variable like id is declared outside of the +

    + + +
    +
    +
    +
    +
    +
    #pragma omp parallel, 
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    it would have been shared by various the threads, possibly causing erroneous output

      -
    • int omp get num threads (), returns the number of threads inside a parallel region
    • -
    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • -
    • void omp set num threads (int), sets the number of threads to be used
    • -
    • void omp set nested (int), turns nested parallelism on/off
    • +
    • Why? What would go wrong? Why do we add possibly?
    @@ -686,7 +749,7 @@

    Important OpenMP libra
  • 120
  • 121
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs112.html b/doc/pub/week9/html/._week9-bs112.html index dc1368f0..98a93ad7 100644 --- a/doc/pub/week9/html/._week9-bs112.html +++ b/doc/pub/week9/html/._week9-bs112.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,42 +651,16 @@

     

     

     

    -

    Private variables

    +

    Important OpenMP library routines

    -

    Private clause can be used to make thread- private versions of such variables:

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel private(id)
    -{
    - int id = omp_get_thread_num();
    - cout << "My thread num" << id << endl; 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
      -
    • What is their value on entry? Exit?
    • -
    • OpenMP provides ways to control that
    • -
    • Can use default(none) to require the sharing of each variable to be described
    • +
    • int omp get num threads (), returns the number of threads inside a parallel region
    • +
    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • +
    • void omp set num threads (int), sets the number of threads to be used
    • +
    • void omp set nested (int), turns nested parallelism on/off
    @@ -712,7 +691,7 @@

    Private variables

  • 121
  • 122
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs113.html b/doc/pub/week9/html/._week9-bs113.html index 5786409f..05119049 100644 --- a/doc/pub/week9/html/._week9-bs113.html +++ b/doc/pub/week9/html/._week9-bs113.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,11 +651,11 @@

     

     

     

    -

    Master region

    +

    Private variables

    -

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    +

    Private clause can be used to make thread- private versions of such variables:

    @@ -658,13 +663,10 @@

    Master region

    -
    #pragma omp parallel 
    +  
    #pragma omp parallel private(id)
     {
    -  #pragma omp master
    -   {
    -      int id = omp_get_thread_num();
    -      cout << "My thread num" << id << endl; 
    -   } 
    + int id = omp_get_thread_num();
    + cout << "My thread num" << id << endl; 
     }
     
    @@ -680,6 +682,12 @@

    Master region

    + +
      +
    • What is their value on entry? Exit?
    • +
    • OpenMP provides ways to control that
    • +
    • Can use default(none) to require the sharing of each variable to be described
    • +
    @@ -709,7 +717,7 @@

    Master region

  • 122
  • 123
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs114.html b/doc/pub/week9/html/._week9-bs114.html index 8de6d5b0..6be465bf 100644 --- a/doc/pub/week9/html/._week9-bs114.html +++ b/doc/pub/week9/html/._week9-bs114.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,13 +651,11 @@

     

     

     

    -

    Parallel for loop

    +

    Master region

    -
      -
    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • -
    +

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    @@ -660,7 +663,14 @@

    Parallel for loop

    -
    #pragma omp for
    +  
    #pragma omp parallel 
    +{
    +  #pragma omp master
    +   {
    +      int id = omp_get_thread_num();
    +      cout << "My thread num" << id << endl; 
    +   } 
    +}
     
    @@ -675,19 +685,6 @@

    Parallel for loop

    - -
      -
    • Clauses can be added, such as
    • -
        -
      • schedule(static, chunk size)
      • -
      • schedule(dynamic, chunk size)
      • -
      • schedule(guided, chunk size) (non-deterministic allocation)
      • -
      • schedule(runtime)
      • -
      • private(list of variables)
      • -
      • reduction(operator:variable)
      • -
      • nowait
      • -
      -
    @@ -717,7 +714,7 @@

    Parallel for loop

  • 123
  • 124
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs115.html b/doc/pub/week9/html/._week9-bs115.html index 5ed23399..73a4714d 100644 --- a/doc/pub/week9/html/._week9-bs115.html +++ b/doc/pub/week9/html/._week9-bs115.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,13 @@

     

     

     

    -

    Parallel computations and loops

    - +

    Parallel for loop

    -

    OpenMP provides an easy way to parallelize a loop

    +
      +
    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • +
    @@ -659,8 +665,7 @@

    Parallel computations an
    -
    #pragma omp parallel for
    -  for (i=0; i<n; i++) c[i] = a[i];
    +  
    #pragma omp for
     
    @@ -676,9 +681,18 @@

    Parallel computations an

    -

    OpenMP handles index variable (no need to declare in for loop or make private)

    - -

    Which thread does which values? Several options.

    +
      +
    • Clauses can be added, such as
    • +
        +
      • schedule(static, chunk size)
      • +
      • schedule(dynamic, chunk size)
      • +
      • schedule(guided, chunk size) (non-deterministic allocation)
      • +
      • schedule(runtime)
      • +
      • private(list of variables)
      • +
      • reduction(operator:variable)
      • +
      • nowait
      • +
      +
    @@ -708,7 +722,7 @@

    Parallel computations an
  • 124
  • 125
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs116.html b/doc/pub/week9/html/._week9-bs116.html index 604c97f6..00bc14ff 100644 --- a/doc/pub/week9/html/._week9-bs116.html +++ b/doc/pub/week9/html/._week9-bs116.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,19 +651,39 @@

     

     

     

    -

    Scheduling of loop computations

    +

    Parallel computations and loops

    -

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled -and OpenMP defines three choices of loop scheduling: -

    -
      -
    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. -
    3. Dynamic: Selection made at runtime
    4. -
    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. -
    +

    OpenMP provides an easy way to parallelize a loop

    + + +
    +
    +
    +
    +
    +
    #pragma omp parallel for
    +  for (i=0; i<n; i++) c[i] = a[i];
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    OpenMP handles index variable (no need to declare in for loop or make private)

    + +

    Which thread does which values? Several options.

    @@ -688,7 +713,7 @@

    Scheduling of loop comp
  • 125
  • 126
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs117.html b/doc/pub/week9/html/._week9-bs117.html index 7ff2f4e8..bf18e198 100644 --- a/doc/pub/week9/html/._week9-bs117.html +++ b/doc/pub/week9/html/._week9-bs117.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,46 +651,19 @@

     

     

     

    -

    Example code for loop scheduling

    +

    Scheduling of loop computations

    +
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(dynamic,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled +and OpenMP defines three choices of loop scheduling: +

    +
      +
    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. +
    3. Dynamic: Selection made at runtime
    4. +
    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. +
    @@ -715,7 +693,7 @@

    Example code for loop s
  • 126
  • 127
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs118.html b/doc/pub/week9/html/._week9-bs118.html index 65988f01..838948e0 100644 --- a/doc/pub/week9/html/._week9-bs118.html +++ b/doc/pub/week9/html/._week9-bs118.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Example code for loop scheduling, guided instead of dynamic

    +

    Example code for loop scheduling

    @@ -668,7 +673,7 @@

    = CHUNKSIZE; #pragma omp parallel shared(a,b,c,chunk) private(i) { -#pragma omp for schedule(guided,chunk) +#pragma omp for schedule(dynamic,chunk) for (i=0; i < N; i++) c[i] = a[i] + b[i]; } /* end of parallel region */ } @@ -715,7 +720,7 @@

    127
  • 128
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs119.html b/doc/pub/week9/html/._week9-bs119.html index 94d0fadf..101c7cdf 100644 --- a/doc/pub/week9/html/._week9-bs119.html +++ b/doc/pub/week9/html/._week9-bs119.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,44 +651,10 @@

     

     

     

    -

    More on Parallel for loop

    +

    Example code for loop scheduling, guided instead of dynamic

    -
      -
    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • -
    • The loop index is private to each thread
    • -
    • A reduction variable is special
    • -
        -
      • During the for-loop there is a local private copy in each thread
      • -
      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • -
      -
    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • -
    - - -
    -
    -
    -
    -
    -
    // #pragma omp parallel and #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    can be combined into

    @@ -691,7 +662,21 @@

    More on Parallel for loop

    -
    #pragma omp parallel for
    +  
    #include <omp.h>
    +#define CHUNKSIZE 100
    +#define N 1000
    +int main (int argc, char *argv[])
    +{
    +int i, chunk;
    +float a[N], b[N], c[N];
    +for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    +chunk = CHUNKSIZE;
    +#pragma omp parallel shared(a,b,c,chunk) private(i)
    +{
    +#pragma omp for schedule(guided,chunk)
    +for (i=0; i < N; i++) c[i] = a[i] + b[i];
    +} /* end of parallel region */
    +}
     
    @@ -735,7 +720,7 @@

    More on Parallel for loop

    128
  • 129
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs120.html b/doc/pub/week9/html/._week9-bs120.html index e3df3b26..bfec2fc1 100644 --- a/doc/pub/week9/html/._week9-bs120.html +++ b/doc/pub/week9/html/._week9-bs120.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,20 @@

     

     

     

    -

    What can happen with this loop?

    - +

    More on Parallel for loop

    -

    What happens with code like this

    +
      +
    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • +
    • The loop index is private to each thread
    • +
    • A reduction variable is special
    • +
        +
      • During the for-loop there is a local private copy in each thread
      • +
      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • +
      +
    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • +
    @@ -659,8 +672,7 @@

    What can happen with this
    -
    #pragma omp parallel for
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    +  
    // #pragma omp parallel and #pragma omp for
     
    @@ -676,7 +688,7 @@

    What can happen with this

    -

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    +

    can be combined into

    @@ -684,9 +696,7 @@

    What can happen with this
    -
    sum = 0.0;
    -#pragma omp parallel for reduction(+:sum)
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    +  
    #pragma omp parallel for
     
    @@ -730,7 +740,7 @@

    What can happen with this
  • 129
  • 130
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs121.html b/doc/pub/week9/html/._week9-bs121.html index 04dff569..bde5c625 100644 --- a/doc/pub/week9/html/._week9-bs121.html +++ b/doc/pub/week9/html/._week9-bs121.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,37 @@

     

     

     

    -

    Inner product

    +

    What can happen with this loop?

    +
    -$$ -\sum_{i=0}^{n-1} a_ib_i -$$ +

    What happens with code like this

    + + +
    +
    +
    +
    +
    +
    #pragma omp parallel for
    +for (i=0; i<n; i++) sum += a[i]*a[i];
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    @@ -661,13 +689,9 @@

    Inner product

    -
    int i;
    -double sum = 0.;
    -/* allocating and initializing arrays */
    -/* ... */
    -#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    - for (i=0; i<N; i++) sum += a[i]*b[i];
    -}
    +  
    sum = 0.0;
    +#pragma omp parallel for reduction(+:sum)
    +for (i=0; i<n; i++) sum += a[i]*a[i];
     
    @@ -711,7 +735,7 @@

    Inner product

  • 130
  • 131
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs122.html b/doc/pub/week9/html/._week9-bs122.html index f39aa7a7..bf3698cc 100644 --- a/doc/pub/week9/html/._week9-bs122.html +++ b/doc/pub/week9/html/._week9-bs122.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,12 +651,14 @@

     

     

     

    -

    Different threads do different tasks

    +

    Inner product

    +$$ +\sum_{i=0}^{n-1} a_ib_i +$$ -

    Different threads do different tasks independently, each section is executed by one thread.

    @@ -659,17 +666,12 @@

    Different threads d
    -
    #pragma omp parallel
    -{
    -#pragma omp sections
    -{
    -#pragma omp section
    -funcA ();
    -#pragma omp section
    -funcB ();
    -#pragma omp section
    -funcC ();
    -}
    +  
    int i;
    +double sum = 0.;
    +/* allocating and initializing arrays */
    +/* ... */
    +#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    + for (i=0; i<N; i++) sum += a[i]*b[i];
     }
     
    @@ -714,7 +716,7 @@

    Different threads d
  • 131
  • 132
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs123.html b/doc/pub/week9/html/._week9-bs123.html index a6542487..fa34e463 100644 --- a/doc/pub/week9/html/._week9-bs123.html +++ b/doc/pub/week9/html/._week9-bs123.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,36 +651,12 @@

     

     

     

    -

    Single execution

    +

    Different threads do different tasks

    - -
    -
    -
    -
    -
    -
    #pragma omp single { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The code is executed by one thread only, no guarantee which thread

    - -

    Can introduce an implicit barrier at the end

    +

    Different threads do different tasks independently, each section is executed by one thread.

    @@ -683,7 +664,18 @@

    Single execution

    -
    #pragma omp master { ... }
    +  
    #pragma omp parallel
    +{
    +#pragma omp sections
    +{
    +#pragma omp section
    +funcA ();
    +#pragma omp section
    +funcB ();
    +#pragma omp section
    +funcC ();
    +}
    +}
     
    @@ -698,8 +690,6 @@

    Single execution

    - -

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    @@ -729,7 +719,7 @@

    Single execution

  • 132
  • 133
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs124.html b/doc/pub/week9/html/._week9-bs124.html index 7fafa115..06c7555f 100644 --- a/doc/pub/week9/html/._week9-bs124.html +++ b/doc/pub/week9/html/._week9-bs124.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Coordination and synchronization

    +

    Single execution

    @@ -657,7 +662,7 @@

    Coordination and synchr
    -
    #pragma omp barrier
    +  
    #pragma omp single { ... }
     
    @@ -673,57 +678,9 @@

    Coordination and synchr

    -

    Synchronization, must be encountered by all threads in a team (or none)

    - - -
    -
    -
    -
    -
    -
    #pragma omp ordered { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is another form of synchronization (in sequential order). -The form -

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    The code is executed by one thread only, no guarantee which thread

    -

    and

    +

    Can introduce an implicit barrier at the end

    @@ -731,7 +688,7 @@

    Coordination and synchr
    -
    #pragma omp atomic { single assignment statement }
    +  
    #pragma omp master { ... }
     
    @@ -747,29 +704,7 @@

    Coordination and synchr

    -

    is more efficient than

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    @@ -799,7 +734,7 @@

    Coordination and synchr
  • 133
  • 134
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs125.html b/doc/pub/week9/html/._week9-bs125.html index 862c4125..3e0347d5 100644 --- a/doc/pub/week9/html/._week9-bs125.html +++ b/doc/pub/week9/html/._week9-bs125.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,25 +651,130 @@

     

     

     

    -

    Data scope

    +

    Coordination and synchronization

    -
      -
    • OpenMP data scope attribute clauses:
    • -
        -
      • shared
      • -
      • private
      • -
      • firstprivate
      • -
      • lastprivate
      • -
      • reduction
      • -
      -
    -

    What are the purposes of these attributes

    -
      -
    • define how and which variables are transferred to a parallel region (and back)
    • -
    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    • -
    + + +
    +
    +
    +
    +
    +
    #pragma omp barrier
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    Synchronization, must be encountered by all threads in a team (or none)

    + + +
    +
    +
    +
    +
    +
    #pragma omp ordered { a block of codes }
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    is another form of synchronization (in sequential order). +The form +

    + + +
    +
    +
    +
    +
    +
    #pragma omp critical { a block of codes }
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    and

    + + +
    +
    +
    +
    +
    +
    #pragma omp atomic { single assignment statement }
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    is more efficient than

    + + +
    +
    +
    +
    +
    +
    #pragma omp critical
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    @@ -694,7 +804,7 @@

    Data scope

  • 134
  • 135
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs126.html b/doc/pub/week9/html/._week9-bs126.html index 68256fe4..77b27ffe 100644 --- a/doc/pub/week9/html/._week9-bs126.html +++ b/doc/pub/week9/html/._week9-bs126.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,16 +651,24 @@

     

     

     

    -

    Some remarks

    +

    Data scope

    -
      -
    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • -
    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • -
    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • -
    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    • +
    • OpenMP data scope attribute clauses:
    • +
        +
      • shared
      • +
      • private
      • +
      • firstprivate
      • +
      • lastprivate
      • +
      • reduction
      • +
      +
    +

    What are the purposes of these attributes

    +
      +
    • define how and which variables are transferred to a parallel region (and back)
    • +
    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    @@ -686,7 +699,7 @@

    Some remarks

  • 135
  • 136
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs127.html b/doc/pub/week9/html/._week9-bs127.html index cd237ae7..50319dae 100644 --- a/doc/pub/week9/html/._week9-bs127.html +++ b/doc/pub/week9/html/._week9-bs127.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,77 +651,16 @@

     

     

     

    -

    Parallelizing nested for-loops

    +

    Some remarks

      -
    • Serial code
    • -
    - - -
    -
    -
    -
    -
    -
    for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -        a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Parallelization
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for private(j)
    -for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -       a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • -
    • Why must j be private? To avoid race condition among the threads
    • +
    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • +
    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • +
    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • +
    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    @@ -747,7 +691,7 @@

    Parallelizing nested for-
  • 136
  • 137
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs128.html b/doc/pub/week9/html/._week9-bs128.html index bbde9f39..b7079ebb 100644 --- a/doc/pub/week9/html/._week9-bs128.html +++ b/doc/pub/week9/html/._week9-bs128.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,14 +651,14 @@

     

     

     

    -

    Nested parallelism

    +

    Parallelizing nested for-loops

    -

    When a thread in a parallel region encounters another parallel construct, it -may create a new team of threads and become the master of the new -team. -

    + +
      +
    • Serial code
    • +
    @@ -661,13 +666,42 @@

    Nested parallelism

    -
    #pragma omp parallel num_threads(4)
    -{
    -/* .... */
    -#pragma omp parallel num_threads(2)
    -{
    -//  
    +  
    for (i=0; i<100; i++)
    +    for (j=0; j<100; j++)
    +        a[i][j] = b[i][j] + c[i][j];
    +    }
     }
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + + +
      +
    • Parallelization
    • +
    + + +
    +
    +
    +
    +
    +
    #pragma omp parallel for private(j)
    +for (i=0; i<100; i++)
    +    for (j=0; j<100; j++)
    +       a[i][j] = b[i][j] + c[i][j];
    +    }
     }
     
    @@ -683,6 +717,12 @@

    Nested parallelism

    + + +
      +
    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • +
    • Why must j be private? To avoid race condition among the threads
    • +
    @@ -712,7 +752,7 @@

    Nested parallelism

  • 137
  • 138
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs129.html b/doc/pub/week9/html/._week9-bs129.html index d6c9e935..9ef8d9b2 100644 --- a/doc/pub/week9/html/._week9-bs129.html +++ b/doc/pub/week9/html/._week9-bs129.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,10 +651,14 @@

     

     

     

    -

    Parallel tasks

    +

    Nested parallelism

    +

    When a thread in a parallel region encounters another parallel construct, it +may create a new team of threads and become the master of the new +team. +

    @@ -657,16 +666,14 @@

    Parallel tasks

    -
    #pragma omp task 
    -#pragma omp parallel shared(p_vec) private(i)
    +  
    #pragma omp parallel num_threads(4)
     {
    -#pragma omp single
    +/* .... */
    +#pragma omp parallel num_threads(2)
     {
    -for (i=0; i<N; i++) {
    -  double r = random_number();
    -  if (p_vec[i] > r) {
    -#pragma omp task
    -   do_work (p_vec[i]);
    +//  
    +}
    +}
     
    @@ -710,7 +717,7 @@

    Parallel tasks

  • 138
  • 139
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs130.html b/doc/pub/week9/html/._week9-bs130.html index 3d44e13a..9855846c 100644 --- a/doc/pub/week9/html/._week9-bs130.html +++ b/doc/pub/week9/html/._week9-bs130.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,39 +651,10 @@

     

     

     

    -

    Common mistakes

    +

    Parallel tasks

    -

    Race condition

    - - -
    -
    -
    -
    -
    -
    int nthreads;
    -#pragma omp parallel shared(nthreads)
    -{
    -nthreads = omp_get_num_threads();
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Deadlock

    @@ -686,15 +662,16 @@

    Common mistakes

    -
    #pragma omp parallel
    +  
    #pragma omp task 
    +#pragma omp parallel shared(p_vec) private(i)
     {
    -...
    -#pragma omp critical
    +#pragma omp single
     {
    -...
    -#pragma omp barrier
    -}
    -}
    +for (i=0; i<N; i++) {
    +  double r = random_number();
    +  if (p_vec[i] > r) {
    +#pragma omp task
    +   do_work (p_vec[i]);
     
    @@ -738,7 +715,7 @@

    Common mistakes

  • 139
  • 140
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs131.html b/doc/pub/week9/html/._week9-bs131.html index f1d5da69..eb274ee8 100644 --- a/doc/pub/week9/html/._week9-bs131.html +++ b/doc/pub/week9/html/._week9-bs131.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,16 +650,40 @@

     

     

     

    - -

    Not all computations are simple

    + +

    Common mistakes

    -

    Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads -

    +

    Race condition

    + + +
    +
    +
    +
    +
    +
    int nthreads;
    +#pragma omp parallel shared(nthreads)
    +{
    +nthreads = omp_get_num_threads();
    +}
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -

    An example is finding the location and value of the largest element in an array

    +

    Deadlock

    @@ -662,11 +691,14 @@

    Not all computations are
    -
    for (i=0; i<n; i++) { 
    -   if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -   }
    +  
    #pragma omp parallel
    +{
    +...
    +#pragma omp critical
    +{
    +...
    +#pragma omp barrier
    +}
     }
     
    @@ -710,6 +742,8 @@

    Not all computations are
  • 139
  • 140
  • 141
  • +
  • ...
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs132.html b/doc/pub/week9/html/._week9-bs132.html index 26b84107..c0c628c5 100644 --- a/doc/pub/week9/html/._week9-bs132.html +++ b/doc/pub/week9/html/._week9-bs132.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,40 +651,15 @@

     

     

     

    -

    Not all computations are simple, competing threads

    +

    Not all computations are simple

    -

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    -
      -
    1. OpenMP provides several ways to coordinate access to shared values
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +

    Not all computations are simple loops where the data can be evenly +divided among threads without any dependencies between threads +

    -
      -
    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. -
    +

    An example is finding the location and value of the largest element in an array

    @@ -687,7 +667,12 @@

    Not al
    -
    #pragma omp critical
    +  
    for (i=0; i<n; i++) { 
    +   if (x[i] > maxval) {
    +      maxval = x[i];
    +      maxloc = i; 
    +   }
    +}
     
    @@ -702,11 +687,6 @@

    Not al

    - -
      -
    1. Only one thread at a time can execute the following block
    2. -
    -

    Atomic may be faster than critical but depends on hardware

    @@ -734,6 +714,7 @@

    Not al
  • 139
  • 140
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs133.html b/doc/pub/week9/html/._week9-bs133.html index b6b41ca9..68a3bc94 100644 --- a/doc/pub/week9/html/._week9-bs133.html +++ b/doc/pub/week9/html/._week9-bs133.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -645,14 +650,15 @@

     

     

     

    - -

    How to find the max value using OpenMP

    + +

    Not all computations are simple, competing threads

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    +

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    +
      +
    1. OpenMP provides several ways to coordinate access to shared values
    2. +
    @@ -660,13 +666,7 @@

    How to find the m
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -    if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -    }
    -}
    +  
    #pragma omp atomic
     
    @@ -681,6 +681,37 @@

    How to find the m

    + +
      +
    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. +
    + + +
    +
    +
    +
    +
    +
    #pragma omp critical
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
      +
    1. Only one thread at a time can execute the following block
    2. +
    +

    Atomic may be faster than critical but depends on hardware

    @@ -707,6 +738,7 @@

    How to find the m
  • 139
  • 140
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs134.html b/doc/pub/week9/html/._week9-bs134.html index b3373106..fcf33efd 100644 --- a/doc/pub/week9/html/._week9-bs134.html +++ b/doc/pub/week9/html/._week9-bs134.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Then deal with the race conditions

    +

    How to find the max value using OpenMP

    @@ -662,14 +667,11 @@

    Then deal with the ra
    #pragma omp parallel for
      for (i=0; i<n; i++) {
    -#pragma omp critical
    -  {
    -     if (x[i] > maxval) {
    -       maxval = x[i];
    -       maxloc = i; 
    -     }
    -  }
    -} 
    +    if (x[i] > maxval) {
    +      maxval = x[i];
    +      maxloc = i; 
    +    }
    +}
     

    @@ -684,13 +686,10 @@

    Then deal with the ra

    - -

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -

    +

    diff --git a/doc/pub/week9/html/._week9-bs135.html b/doc/pub/week9/html/._week9-bs135.html index ecd0d861..2e476cdc 100644 --- a/doc/pub/week9/html/._week9-bs135.html +++ b/doc/pub/week9/html/._week9-bs135.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,8 +651,50 @@

     

     

     

    -

    What can slow down OpenMP performance?

    -

    Give it a thought!

    +

    Then deal with the race conditions

    +
    +
    + +

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? +The first step would be to parallelize as +

    + + +
    +
    +
    +
    +
    +
    #pragma omp parallel for
    + for (i=0; i<n; i++) {
    +#pragma omp critical
    +  {
    +     if (x[i] > maxval) {
    +       maxval = x[i];
    +       maxloc = i; 
    +     }
    +  }
    +} 
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, +with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. +

    +
    +

    @@ -669,6 +716,7 @@

    What can slow down
  • 139
  • 140
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs136.html b/doc/pub/week9/html/._week9-bs136.html index 599b6c70..63b74f32 100644 --- a/doc/pub/week9/html/._week9-bs136.html +++ b/doc/pub/week9/html/._week9-bs136.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -647,19 +652,7 @@

    What can slow down OpenMP performance?

    -
    -
    - -

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    -
      -
    • We do not care about the value during the execution of the loop, just the value at the end.
    • -
    -

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread -
    -
    - +

    Give it a thought!

    @@ -680,6 +673,7 @@

    What can slow down
  • 139
  • 140
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs137.html b/doc/pub/week9/html/._week9-bs137.html index 3740456b..0a63f393 100644 --- a/doc/pub/week9/html/._week9-bs137.html +++ b/doc/pub/week9/html/._week9-bs137.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,45 +651,17 @@

     

     

     

    -

    Find the max location for each thread

    +

    What can slow down OpenMP performance?

    +

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    +
      +
    • We do not care about the value during the execution of the loop, just the value at the end.
    • +
    +

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -
    -
    -
    -
    -
    -
    int maxloc[MAX_THREADS], mloc;
    -double maxval[MAX_THREADS], mval; 
    -#pragma omp parallel shared(maxval,maxloc)
    -{
    -  int id = omp_get_thread_num(); 
    -  maxval[id] = -1.0e30;
    -#pragma omp for
    -   for (int i=0; i<n; i++) {
    -       if (x[i] > maxval[id]) { 
    -           maxloc[id] = i;
    -           maxval[id] = x[i]; 
    -       }
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    +Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread
    @@ -707,6 +684,7 @@

    Find the max locat
  • 139
  • 140
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/._week9-bs138.html b/doc/pub/week9/html/._week9-bs138.html index 060ae5da..38fce0ec 100644 --- a/doc/pub/week9/html/._week9-bs138.html +++ b/doc/pub/week9/html/._week9-bs138.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,7 +651,7 @@

     

     

     

    -

    Combine the values from each thread

    +

    Find the max location for each thread

    @@ -657,19 +662,20 @@

    Combine the values f
    -
    #pragma omp flush (maxloc,maxval)
    -#pragma omp master
    -  {
    -    int nt = omp_get_num_threads(); 
    -    mloc = maxloc[0]; 
    -    mval = maxval[0]; 
    -    for (int i=1; i<nt; i++) {
    -        if (maxval[i] > mval) { 
    -           mval = maxval[i]; 
    -           mloc = maxloc[i];
    -        } 
    -     }
    -   }
    +  
    int maxloc[MAX_THREADS], mloc;
    +double maxval[MAX_THREADS], mval; 
    +#pragma omp parallel shared(maxval,maxloc)
    +{
    +  int id = omp_get_thread_num(); 
    +  maxval[id] = -1.0e30;
    +#pragma omp for
    +   for (int i=0; i<n; i++) {
    +       if (x[i] > maxval[id]) { 
    +           maxloc[id] = i;
    +           maxval[id] = x[i]; 
    +       }
    +    }
    +}
     
    @@ -684,11 +690,10 @@

    Combine the values f

    - -

    Note that we let the master process perform the last operation.

    +

    diff --git a/doc/pub/week9/html/._week9-bs139.html b/doc/pub/week9/html/._week9-bs139.html index 5214c06c..b70141f8 100644 --- a/doc/pub/week9/html/._week9-bs139.html +++ b/doc/pub/week9/html/._week9-bs139.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -646,72 +651,30 @@

     

     

     

    -

    Matrix-matrix multiplication

    -

    This code computes the norm of a vector using OpenMp

    +

    Combine the values from each thread

    +
    +
    + - +
    -
    //  OpenMP program to compute vector norm by adding two other vectors
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of vector
    -  int n = atoi(argv[1]);
    -  double *a, *b, *c;
    -  int i;
    -  int thread_num;
    -  double wtime, Norm2, s, angle;
    -  cout << "  Perform addition of two vectors and compute the norm-2." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the vectors to be used
    -  a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)
    -  // Set up values for vectors  a and b
    -  for (i = 0; i < n; i++){
    -      angle = 2.0*M_PI*i/ (( double ) n);
    -      a[i] = s*(sin(angle) + cos(angle));
    -      b[i] =  s*sin(2.0*angle);
    -      c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (i = 0; i < n; i++){
    -     c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  Norm2 = 0.0;
    -  for (i = 0; i < n; i++){
    -     Norm2  += c[i]*c[i];
    -  }
    -// end parallel region
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm-2 computation=" << wtime  << endl;
    -  cout << " Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    +  
    #pragma omp flush (maxloc,maxval)
    +#pragma omp master
    +  {
    +    int nt = omp_get_num_threads(); 
    +    mloc = maxloc[0]; 
    +    mval = maxval[0]; 
    +    for (int i=1; i<nt; i++) {
    +        if (maxval[i] > mval) { 
    +           mval = maxval[i]; 
    +           mloc = maxloc[i];
    +        } 
    +     }
    +   }
     
    @@ -727,6 +690,9 @@

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP

    - +

    Matrix-matrix multiplication

    +

    This code computes the norm of a vector using OpenMp

    @@ -656,7 +660,7 @@

    -
    //  Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP
    +  
    //  OpenMP program to compute vector norm by adding two other vectors
     #include <cstdlib>
     #include <iostream>
     #include <cmath>
    @@ -667,13 +671,13 @@ 

    @@ -763,6 +748,8 @@

    139
  • 140
  • 141
  • +
  • 142
  • +
  • »
  • diff --git a/doc/src/week9/._week9-bs140.html b/doc/pub/week9/html/._week9-bs141.html similarity index 87% rename from doc/src/week9/._week9-bs140.html rename to doc/pub/week9/html/._week9-bs141.html index 7688209c..3e58ef18 100644 --- a/doc/src/week9/._week9-bs140.html +++ b/doc/pub/week9/html/._week9-bs141.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,133 +511,134 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -644,7 +649,7 @@

     

     

     

    - +

    Matrix-matrix multiplication

    This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP

    @@ -751,10 +756,9 @@

    138
  • 139
  • 140
  • -
  • 141
  • +
  • 141
  • +
  • 142
  • diff --git a/doc/pub/week9/html/week9-bs.html b/doc/pub/week9/html/week9-bs.html index eb84da37..0e3f3774 100644 --- a/doc/pub/week9/html/week9-bs.html +++ b/doc/pub/week9/html/week9-bs.html @@ -63,6 +63,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -507,134 +511,135 @@
  • The covariance term
  • Rewriting the covariance term
  • Introducing the correlation function
  • -
  • Resampling methods: Blocking
  • -
  • Why blocking?
  • -
  • Blocking Transformations
  • -
  • Blocking transformations
  • -
  • Blocking Transformations
  • -
  • Blocking Transformations, getting there
  • -
  • Blocking Transformations, final expressions
  • -
  • More on the blocking method
  • -
  • Example code form last week
  • -
  • Resampling analysis
  • -
  • Content
  • -
  • Optimization and profiling
  • -
  • More on optimization
  • -
  • Optimization and profiling
  • -
  • Optimization and debugging
  • -
  • Other hints
  • -
  • Vectorization and the basic idea behind parallel computing
  • -
  • A rough classification of hardware models
  • -
  • Shared memory and distributed memory
  • -
  • Different parallel programming paradigms
  • -
  • Different parallel programming paradigms
  • -
  • What is vectorization?
  • -
  • Number of elements that can acted upon
  • -
  • Number of elements that can acted upon, examples
  • -
  • Operation counts for scalar operation
  • -
  • Number of elements that can acted upon, examples
  • -
  • Number of operations when vectorized
  • -
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • -
  • Compiling with and without vectorization
  • -
  • Compiling with and without vectorization using clang
  • -
  • Automatic vectorization and vectorization inhibitors, criteria
  • -
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • -
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • -
  • Automatic vectorization and vectorization inhibitors, nested loops
  • -
  • Automatic vectorization and vectorization inhibitors, function calls
  • -
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • -
  • Automatic vectorization and vectorization inhibitors, memory stride
  • -
  • Memory management
  • -
  • Memory and communication
  • -
  • Measuring performance
  • -
  • Problems with measuring time
  • -
  • Problems with cold start
  • -
  • Problems with smart compilers
  • -
  • Problems with interference
  • -
  • Problems with measuring performance
  • -
  • Thomas algorithm for tridiagonal linear algebra equations
  • -
  • Thomas algorithm, forward substitution
  • -
  • Thomas algorithm, backward substitution
  • -
  • Thomas algorithm and counting of operations (floating point and memory)
  • -
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • -
  • How do we define speedup? Simplest form
  • -
  • How do we define speedup? Correct baseline
  • -
  • Parallel speedup
  • -
  • Speedup and memory
  • -
  • Upper bounds on speedup
  • -
  • Amdahl's law
  • -
  • How much is parallelizable
  • -
  • Today's situation of parallel computing
  • -
  • Overhead present in parallel computing
  • -
  • Parallelizing a sequential algorithm
  • -
  • Strategies
  • -
  • How do I run MPI on a PC/Laptop? MPI
  • -
  • Can I do it on my own PC/laptop? OpenMP installation
  • -
  • Installing MPI
  • -
  • Installing MPI and using Qt
  • -
  • What is Message Passing Interface (MPI)?
  • -
  • Going Parallel with MPI
  • -
  • MPI is a library
  • -
  • Bindings to MPI routines
  • -
  • Communicator
  • -
  • Some of the most important MPI functions
  • -
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • -
  • The Fortran program
  • -
  • Note 1
  • -
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • -
  • Note 2
  • -
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • -
  • Note 3
  • -
  • Note 4
  • -
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • -
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Integrating with MPI
  • -
  • How do I use \( MPI\_reduce \)?
  • -
  • More on \( MPI\_Reduce \)
  • -
  • Dissection of trapezoidal rule
  • -
  • Dissection of trapezoidal rule
  • -
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • -
  • What is OpenMP
  • -
  • Getting started, things to remember
  • -
  • OpenMP syntax
  • -
  • Different OpenMP styles of parallelism
  • -
  • General code structure
  • -
  • Parallel region
  • -
  • Hello world, not again, please!
  • -
  • Hello world, yet another variant
  • -
  • Important OpenMP library routines
  • -
  • Private variables
  • -
  • Master region
  • -
  • Parallel for loop
  • -
  • Parallel computations and loops
  • -
  • Scheduling of loop computations
  • -
  • Example code for loop scheduling
  • -
  • Example code for loop scheduling, guided instead of dynamic
  • -
  • More on Parallel for loop
  • -
  • What can happen with this loop?
  • -
  • Inner product
  • -
  • Different threads do different tasks
  • -
  • Single execution
  • -
  • Coordination and synchronization
  • -
  • Data scope
  • -
  • Some remarks
  • -
  • Parallelizing nested for-loops
  • -
  • Nested parallelism
  • -
  • Parallel tasks
  • -
  • Common mistakes
  • -
  • Not all computations are simple
  • -
  • Not all computations are simple, competing threads
  • -
  • How to find the max value using OpenMP
  • -
  • Then deal with the race conditions
  • -
  • What can slow down OpenMP performance?
  • -
  • What can slow down OpenMP performance?
  • -
  • Find the max location for each thread
  • -
  • Combine the values from each thread
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • -
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • +
  • Computing the correlation function
  • +
  • Resampling methods: Blocking
  • +
  • Why blocking?
  • +
  • Blocking Transformations
  • +
  • Blocking transformations
  • +
  • Blocking Transformations
  • +
  • Blocking Transformations, getting there
  • +
  • Blocking Transformations, final expressions
  • +
  • More on the blocking method
  • +
  • Example code form last week
  • +
  • Resampling analysis
  • +
  • Content
  • +
  • Optimization and profiling
  • +
  • More on optimization
  • +
  • Optimization and profiling
  • +
  • Optimization and debugging
  • +
  • Other hints
  • +
  • Vectorization and the basic idea behind parallel computing
  • +
  • A rough classification of hardware models
  • +
  • Shared memory and distributed memory
  • +
  • Different parallel programming paradigms
  • +
  • Different parallel programming paradigms
  • +
  • What is vectorization?
  • +
  • Number of elements that can acted upon
  • +
  • Number of elements that can acted upon, examples
  • +
  • Operation counts for scalar operation
  • +
  • Number of elements that can acted upon, examples
  • +
  • Number of operations when vectorized
  • +
  • "A simple test case with and without vectorization":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp"
  • +
  • Compiling with and without vectorization
  • +
  • Compiling with and without vectorization using clang
  • +
  • Automatic vectorization and vectorization inhibitors, criteria
  • +
  • Automatic vectorization and vectorization inhibitors, exit criteria
  • +
  • Automatic vectorization and vectorization inhibitors, straight-line code
  • +
  • Automatic vectorization and vectorization inhibitors, nested loops
  • +
  • Automatic vectorization and vectorization inhibitors, function calls
  • +
  • Automatic vectorization and vectorization inhibitors, data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, more data dependencies
  • +
  • Automatic vectorization and vectorization inhibitors, memory stride
  • +
  • Memory management
  • +
  • Memory and communication
  • +
  • Measuring performance
  • +
  • Problems with measuring time
  • +
  • Problems with cold start
  • +
  • Problems with smart compilers
  • +
  • Problems with interference
  • +
  • Problems with measuring performance
  • +
  • Thomas algorithm for tridiagonal linear algebra equations
  • +
  • Thomas algorithm, forward substitution
  • +
  • Thomas algorithm, backward substitution
  • +
  • Thomas algorithm and counting of operations (floating point and memory)
  • +
  • "Example: Transpose of a matrix":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp"
  • +
  • How do we define speedup? Simplest form
  • +
  • How do we define speedup? Correct baseline
  • +
  • Parallel speedup
  • +
  • Speedup and memory
  • +
  • Upper bounds on speedup
  • +
  • Amdahl's law
  • +
  • How much is parallelizable
  • +
  • Today's situation of parallel computing
  • +
  • Overhead present in parallel computing
  • +
  • Parallelizing a sequential algorithm
  • +
  • Strategies
  • +
  • How do I run MPI on a PC/Laptop? MPI
  • +
  • Can I do it on my own PC/laptop? OpenMP installation
  • +
  • Installing MPI
  • +
  • Installing MPI and using Qt
  • +
  • What is Message Passing Interface (MPI)?
  • +
  • Going Parallel with MPI
  • +
  • MPI is a library
  • +
  • Bindings to MPI routines
  • +
  • Communicator
  • +
  • Some of the most important MPI functions
  • +
  • "The first MPI C/C++ program":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp"
  • +
  • The Fortran program
  • +
  • Note 1
  • +
  • "Ordered output with MPIBarrier":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp"
  • +
  • Note 2
  • +
  • "Ordered output":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp"
  • +
  • Note 3
  • +
  • Note 4
  • +
  • "Numerical integration in parallel":"https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp"
  • +
  • Dissection of trapezoidal rule with \( MPI\_reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Integrating with MPI
  • +
  • How do I use \( MPI\_reduce \)?
  • +
  • More on \( MPI\_Reduce \)
  • +
  • Dissection of trapezoidal rule
  • +
  • Dissection of trapezoidal rule
  • +
  • "The quantum dot program for two electrons":"https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp"
  • +
  • What is OpenMP
  • +
  • Getting started, things to remember
  • +
  • OpenMP syntax
  • +
  • Different OpenMP styles of parallelism
  • +
  • General code structure
  • +
  • Parallel region
  • +
  • Hello world, not again, please!
  • +
  • Hello world, yet another variant
  • +
  • Important OpenMP library routines
  • +
  • Private variables
  • +
  • Master region
  • +
  • Parallel for loop
  • +
  • Parallel computations and loops
  • +
  • Scheduling of loop computations
  • +
  • Example code for loop scheduling
  • +
  • Example code for loop scheduling, guided instead of dynamic
  • +
  • More on Parallel for loop
  • +
  • What can happen with this loop?
  • +
  • Inner product
  • +
  • Different threads do different tasks
  • +
  • Single execution
  • +
  • Coordination and synchronization
  • +
  • Data scope
  • +
  • Some remarks
  • +
  • Parallelizing nested for-loops
  • +
  • Nested parallelism
  • +
  • Parallel tasks
  • +
  • Common mistakes
  • +
  • Not all computations are simple
  • +
  • Not all computations are simple, competing threads
  • +
  • How to find the max value using OpenMP
  • +
  • Then deal with the race conditions
  • +
  • What can slow down OpenMP performance?
  • +
  • What can slow down OpenMP performance?
  • +
  • Find the max location for each thread
  • +
  • Combine the values from each thread
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp"
  • +
  • "Matrix-matrix multiplication":"https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp"
  • @@ -689,7 +694,7 @@

    March 11-15

  • 9
  • 10
  • ...
  • -
  • 141
  • +
  • 142
  • »
  • diff --git a/doc/pub/week9/html/week9-reveal.html b/doc/pub/week9/html/week9-reveal.html index d5f52da2..ca3d5825 100644 --- a/doc/pub/week9/html/week9-reveal.html +++ b/doc/pub/week9/html/week9-reveal.html @@ -422,9 +422,82 @@

    Introducing the correlation functi $$

     
    -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    +

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) +for a series of random numbers. We see that the function \( \kappa_d \) +approaches \( 0 \) as \( d\rightarrow \infty \). +

    + +

    In this case, our data are given by random numbers generated for the uniform distribution with \( x\in [0,1] \). Even with two random numbers being far away, we note that the correlation function is not zero.

    + -

    Note: code will be inserted here later.

    +
    +

    Computing the correlation function

    + +

    This code is best seen with the jupyter-notebook

    + + +
    +
    +
    +
    +
    +
    #!/usr/bin/env python
    +import numpy as np
    +import matplotlib.mlab as mlab
    +import matplotlib.pyplot as plt
    +import random
    +
    +# initialize the rng with a seed, simple uniform distribution
    +random.seed() 
    +m = 10000
    +samplefactor = 1.0/m
    +x = np.zeros(m)   
    +MeanValue = 0.
    +VarValue = 0.
    +for i in range (m):
    +    value = random.random()
    +    x[i] = value
    +    MeanValue += value
    +    VarValue += value*value
    +
    +MeanValue *= samplefactor
    +VarValue *= samplefactor
    +Variance = VarValue-MeanValue*MeanValue
    +STDev = np.sqrt(Variance)
    +print("MeanValue =", MeanValue)
    +print("Variance =", Variance)
    +print("Standard deviation =", STDev)
    +
    +# Computing the autocorrelation function
    +autocorrelation = np.zeros(m)
    +darray = np.zeros(m)
    +for j in range (m):
    +    sum = 0.0
    +    darray[j] = j
    +    for k in range (m-j):
    +        sum += (x[k]-MeanValue)*(x[k+j]-MeanValue ) 
    +    autocorrelation[j] = (sum/Variance)*samplefactor
    +# Visualize results
    +plt.plot(darray, autocorrelation,'ro')
    +plt.axis([0,m,-0.2, 1.1])
    +plt.xlabel(r'$d$')
    +plt.ylabel(r'$\kappa_d$')
    +plt.title(r'autocorrelation function for RNG with uniform distribution')
    +plt.show()
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    diff --git a/doc/pub/week9/html/week9-solarized.html b/doc/pub/week9/html/week9-solarized.html index 354c9093..b201b862 100644 --- a/doc/pub/week9/html/week9-solarized.html +++ b/doc/pub/week9/html/week9-solarized.html @@ -90,6 +90,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -725,9 +729,82 @@

    Introducing the correlation functi \end{align*} $$ -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    +

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) +for a series of random numbers. We see that the function \( \kappa_d \) +approaches \( 0 \) as \( d\rightarrow \infty \). +

    + +

    In this case, our data are given by random numbers generated for the uniform distribution with \( x\in [0,1] \). Even with two random numbers being far away, we note that the correlation function is not zero.

    + +









    +

    Computing the correlation function

    + +

    This code is best seen with the jupyter-notebook

    + + +
    +
    +
    +
    +
    +
    #!/usr/bin/env python
    +import numpy as np
    +import matplotlib.mlab as mlab
    +import matplotlib.pyplot as plt
    +import random
    +
    +# initialize the rng with a seed, simple uniform distribution
    +random.seed() 
    +m = 10000
    +samplefactor = 1.0/m
    +x = np.zeros(m)   
    +MeanValue = 0.
    +VarValue = 0.
    +for i in range (m):
    +    value = random.random()
    +    x[i] = value
    +    MeanValue += value
    +    VarValue += value*value
    +
    +MeanValue *= samplefactor
    +VarValue *= samplefactor
    +Variance = VarValue-MeanValue*MeanValue
    +STDev = np.sqrt(Variance)
    +print("MeanValue =", MeanValue)
    +print("Variance =", Variance)
    +print("Standard deviation =", STDev)
    +
    +# Computing the autocorrelation function
    +autocorrelation = np.zeros(m)
    +darray = np.zeros(m)
    +for j in range (m):
    +    sum = 0.0
    +    darray[j] = j
    +    for k in range (m-j):
    +        sum += (x[k]-MeanValue)*(x[k+j]-MeanValue ) 
    +    autocorrelation[j] = (sum/Variance)*samplefactor
    +# Visualize results
    +plt.plot(darray, autocorrelation,'ro')
    +plt.axis([0,m,-0.2, 1.1])
    +plt.xlabel(r'$d$')
    +plt.ylabel(r'$\kappa_d$')
    +plt.title(r'autocorrelation function for RNG with uniform distribution')
    +plt.show()
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -

    Note: code will be inserted here later.











    Resampling methods: Blocking

    diff --git a/doc/pub/week9/html/week9.html b/doc/pub/week9/html/week9.html index 06ac4661..7d0cc4a1 100644 --- a/doc/pub/week9/html/week9.html +++ b/doc/pub/week9/html/week9.html @@ -167,6 +167,10 @@ 2, None, 'introducing-the-correlation-function'), + ('Computing the correlation function', + 2, + None, + 'computing-the-correlation-function'), ('Resampling methods: Blocking', 2, None, @@ -802,9 +806,82 @@

    Introducing the correlation functi \end{align*} $$ -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    +

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) +for a series of random numbers. We see that the function \( \kappa_d \) +approaches \( 0 \) as \( d\rightarrow \infty \). +

    + +

    In this case, our data are given by random numbers generated for the uniform distribution with \( x\in [0,1] \). Even with two random numbers being far away, we note that the correlation function is not zero.

    + +









    +

    Computing the correlation function

    + +

    This code is best seen with the jupyter-notebook

    + + +
    +
    +
    +
    +
    +
    #!/usr/bin/env python
    +import numpy as np
    +import matplotlib.mlab as mlab
    +import matplotlib.pyplot as plt
    +import random
    +
    +# initialize the rng with a seed, simple uniform distribution
    +random.seed() 
    +m = 10000
    +samplefactor = 1.0/m
    +x = np.zeros(m)   
    +MeanValue = 0.
    +VarValue = 0.
    +for i in range (m):
    +    value = random.random()
    +    x[i] = value
    +    MeanValue += value
    +    VarValue += value*value
    +
    +MeanValue *= samplefactor
    +VarValue *= samplefactor
    +Variance = VarValue-MeanValue*MeanValue
    +STDev = np.sqrt(Variance)
    +print("MeanValue =", MeanValue)
    +print("Variance =", Variance)
    +print("Standard deviation =", STDev)
    +
    +# Computing the autocorrelation function
    +autocorrelation = np.zeros(m)
    +darray = np.zeros(m)
    +for j in range (m):
    +    sum = 0.0
    +    darray[j] = j
    +    for k in range (m-j):
    +        sum += (x[k]-MeanValue)*(x[k+j]-MeanValue ) 
    +    autocorrelation[j] = (sum/Variance)*samplefactor
    +# Visualize results
    +plt.plot(darray, autocorrelation,'ro')
    +plt.axis([0,m,-0.2, 1.1])
    +plt.xlabel(r'$d$')
    +plt.ylabel(r'$\kappa_d$')
    +plt.title(r'autocorrelation function for RNG with uniform distribution')
    +plt.show()
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    -

    Note: code will be inserted here later.











    Resampling methods: Blocking

    diff --git a/doc/pub/week9/ipynb/ipynb-week9-src.tar.gz b/doc/pub/week9/ipynb/ipynb-week9-src.tar.gz index 622f1df5d6c58769d57edb8138b62981029b6649..ed6aaa8704ca35a3bad288a5c168c4f2634f146d 100644 GIT binary patch literal 192 zcmV;x06+g9iwFSUXY*wM1MSaC3c@fD24L4c#hjodHV<41cHu%0@d7DLZLCddlA^u6 zeSoeMH${Yeo1bBZVdjvoH@h_UcfXhoLWmIpLvlvPM9ZljVN3uM$_Zl$PdFf)P>L3S z<~!-7bza#1l)5}i?WBG;*N+v&hdt9P@XSAPER}=pzVoHgppBQAF%{gTtX8pVI-Nmo uq#HV5<+W8-g3uj^JXc;BEiO@G&B=<|#nG>S34$O9zV`s2Mtgw(2mk=nN>%*; literal 192 zcmV;x06+g9iwFSLF7sso1MSaC3c@fD2H>uHia9|^(lo`TU>7a~5igL^)W+JRCMnw6 z+Xv`MaZ^OdxA_@n7-kOHdb7(ScXz>J5JHSn7&B$bl!(mr1Y-)ACMjhkjR^yU{q)UGY!JZ=-a8l=KR!M;^Ydo)<{dYP3e1IKXtPHKe@h||lLQwO zZFcB7d*7V8cAYbo?ate}xm)L)n(TC_?2FE}?HZ=KuDUNzb=&ya{kd7wZ0iPYHXoh? zw+I99NJPOqvHtgn7T5@eazU}s%p>}=W+t=lIm_yRPFr+tj9P4spDq1>#b_doFbEh& zfN$Oj8hJGuYo!5^bNHDsf8oufMj98II4gyjktnBFYwZnp-inL)Vg@S#98XqPVJ#K8 zq*zipc-$x4am-=igu58K#EQo5J`>KprKYp>D8t0Ss)a1PAq(gu0R+~ZqI_nC*SQcZ z`?Wode3z#blAQ4ZycxjEu@uFU%m|GoQ`zVgSKg4M$h5kElga8Te+4=wlLo|7-q5V8 zE?qqZ5VGl^bcs@z4dhZPFPH$%)RY%VVii_QCWwvH%n9ZNBsLh!5Xvzb3cRWUF|3N9 zXW1vohPJ{42c-m-g?s@(up{y-06}?;8DTL%u>i<{k{Q6OG<8^03j+GheaS-lFg%Bqyit zX(){%6*{uy(6||53ezEo?^BW~%#|%vD0~?Rrs-TZLJYa|>M8L_F_@%O(ok5^B2zrD zKQuHsv$9gpvX+n}@p8&+nuqXon88PIeoTQ8ekd-VS{=i{wA z8B|@L*a7+~G<~;y#Y{hUHedqMTdDtx;4fcX_-(m8RDFnEN~i?xqnE?x=ym8*lrxHp zLM}KLB6e(n28-`3O=fITG7cBI)YyWsMUmv>gVmKl4Hs1ZCSrqPTNQ{mnamfztBajMo`iOB`ME>u;F&Wvwi6_@*m~s zp!BI>c=p;~J}yH&Q;{|PJPJRAf_86V=9Ze(+GlNU-20Wgy>^c4a*G?T#+D1X&iTW_N{ z6n^ioU}r{JGHqNN%x$H4*~^Ty(yp|fs4tmlMH;7dN`L{AY3A3jjUfR-QUjS*TdBo% z?Q=eV=i+04+Pwy~PZzN9MEr^P^D7^LnrR!rH7(6>41f${kLcxXN-pTqp^t`ojnlI zaP_-rWY~I=kicgnq|+#rV1fDIuY3eWUC_wj_Mh&{tVJ;*+1IDiWlkcE5awA}SE+@^iZ7fh*z;*;?Y+DlZ+n=tpG`XJB@S231(bzJu9&g!0 z$rqzvBR60|ATXz5|Lj<*U5^XXXkQ$?|{ zip&DG(;fEOXK@g5=YL`Wor87Y&sFzeDepm5YJc+UyNI{E!_L2tIAA|^8;Kt-qIG?4 z_WJ#v=Y+^dN;soraaHvAPJ&`h7Sd&u ztYv3GQ)x{~ENb ziOunUXb`Tj1VR2&M^smz`hu*bf5)Ok6$$Qz1i(&<^y?;(>neMNWuE%7*T`C(t@@oL z+>xSGSayBHw0{vly!C^O{6G_99fDK#z4en9hoF@Em!-ee4MGc`d>RDEEYeSHV9ZW?xHO@{A^~1Yij*Yva)yGZF zyODZmv)cLwk)J2Olg-N<52)qT|C%rnuO-iA+^W@LaeqQX))Xnk6>uC=ro;SCj;J;w z*?O~QM;_X5ni7kGX*W+1q;pP?J4ZZ?@ilq;%y|0slZAx1ARFrE^`rEgV~BJA+rvFq zd0e@I*Q#SbW6@nCui!gy`Te`NkUwAT7Nj*L^{fidP+=V@uL}R;CQ1D5&PpNPkeFq3>8uFiqqJZ!m9%}&wsQ2V{-8ye)kBMfk6Zl zlfE<+vtuxo0e?|KhKC~vNSDGkXTFd!h)ZY5#po0T)=GAUO%|M$&?9$H5F9~SXiwnD zThXCaNU(&raA5aH7@LEYSg;`~^+qFwX>4R~v?us ze;9|H#vwY2UpN|hoZ0j=o5rS8%)gmOgT*;TU$w&hZhr}VjJBmg5i=aCW{ZI`Ar!5$ zn}?4Ct>MoPP33Tfo?FulTxs5HOVQ8J?b?nb)ex)qr732SahSi(WudL>K%rQfndQ8PNTLs+R_(n z?8?m-Z(MtX`GGWq>$z8_Y+o03vWnBx6FlTAhg8}WhTPi2|I$up&m3s2c0=YYINDWx zt|Dl$g$sU;ioYqi@$azW1AOc6llC)H0q>JaG({aB3YXt`5u86(%_o&|UsuD0`|Dqd zaFhEBrio%QlMzZ20x>wV!89HM0Wy~{v;h(+z!X?v5kJLl%p`Td{;|Tz{qK4+~VK}s3n_Xg*H*s=;qT-fH=13(h;U!)o?yx)zf$1nbC%2cq#7_I zWEcqfJOd^QaUY_jT=<16GK;!l9CCTFvnZJQ#>H(el}Q(JR3dctGVlupG{V_4Vk$3U>j_{XDVLRzn3-d8rc7gYV~3@w}j7s*bakDhV)dRve7I$X|FO zQi&m6mA=b0<5{}&b#&<~>EbV=D>09v+)00Fq_I|C_C7Xiz|h1!+jx8#zen|M zheuRr&6(4*$dsmF#G^D^M!@>}U9!zBgnoDC$JW{8)^U_M_0%ehiXU$iv!FV_d>J9n z`cWkYXQ?jyI$(t|s{J8dhx7V;;QjMIAJ}RB2L;&lXOj_169O_hlc5SRBZB^1klB;Mlfv@}b_n8%VZ$Y2nqkTmYD%7-ev zG}-0tk|tGc_segqzpte-OB$j}L?IUGqHPe(un`QUQemDc7k~9oLR3zZr$cVrs_mlw z?BcwBPa#$BZAEdW1vu{C`|NC_MjEzBZvm@Jqm*K;wX=G3R$&?jRvLL$f7RZlus_`W zR~D`N(bhu*<-aQd!HkVbJdTM5{-H{EX)w9#@CA1xzDiH{!fpfDj=-Lw#W*Hr^n;hF z5G?t$WF{#)bCfboupkNw=2(i%+1a|Y_|etCl85|Q#;On!8bL?b>u7Z7s+rI}0xZ4b zW*_YhmzZRH)*DS{z3EcLe~J&N6K4f^_?cnNVdG@=CK^vb{@KgI(U*i1m!o|pgu+xn zAIFt)tB89FL{}3my>A3tgd-3OJ#_3>tV58wkOF#qvWqjGX&x|sXr_>!l>|ld(kBtT z6qaVcuKVZFEZpQe=xJj@5^EG>do+k zH=CoIiGlHdjq`YD>}Y*x9lzU$eE7H8%#tYT-FFZ+lhhUxIfaR_iDe8cP>zsKVfpE3 z)K?)x;mC)=+dlKWSI(b9Le*}BrEIEl|7yz9jx~fCYcFGVF=GzIpxJF=2(P+qV|6Jw z2{PncO*J`@5gKM>f7ZNHp=KzQofYcV8>br1B;Rh`E{RPU<-lg6Pef%RHW`#+N#$r& z#XKp$o45H{$NXd3Mq@%vh}xjtJrqT5Ln65TAKG%pS6{>uHC{Oc6WW8f3mqek}jy%Q<{`{@eqYK z%dphhtWP0y94n<}2i!kp8&guIF!42*$_V8pR>H(6ANt~;z{;omn^Dg>k4PgBMS#e1 zoXG({7Kxn_xYJOS0*8sd61>ym&KZMDPp87-j}_b_yAzqfof4)1DuF8sW0*6BirwPN zH-gsiC-f2vf3DDv-gF15+#O&9ZPCAQ+w5vp=4 zfYM7Evdt5?qmxFTr9N>3O0w~Y2glgiaCZ_>%ZYMPww$k^El z&Qyj4Gr@sj-Td8Y zZr6vRQ=GB5JPt91ZvxI{e0v$jvx9^4AnCWypBy$m*SpCDi)JjK-Ja-N zK{@rzu=_^4_!-%KykOORv?=%f=5Ks_{zDXm>VIh3hX9iiN)(q-W(N}lF)}nXlM%Hk zv&KJ71Aj3kBqSQ5^+{hWCQ=Q~QUfL7VJeD>WniekODAmaQ)qC)xN2=)vhXdSAumw& z2J%1z8>jzr)`{a;w_m!RpLowC52fKaub&^nPEjw0|1?`Vs>KLvdUuCxWGioKI!t__6jH&{+a>S!hatIr(N^z^ENH&-1qk)UMj!~N(l&< zd0_vFVK!kcYfgG{*GHI~i=C%kb1<#$SL?d0^I5uF`ZnXXn7O{|CR?U;HupWxrYCmq zdSYo+ZCC3tP3opeo5P?iUc}yk!C1rT?&&Xa>^x$^jFe%LSfWu+DzCbLz%AI>1ntdq zS$~y7FycVq@k9uk{Fc=}RKBUZ-T3Zcx%K_kw63#a-~X-4eb4r={oAx}jc?x`=`~mB zYQ6O1RhAahU>zoGWH~Sd2?3`pb&8Hm+v^GVXxIrPhyz=8 zG;hbiI67J^ok= z-OkqQoZ?Q%*C{GY;LbtisBty8|)bpFuSK2 zr>oiniqiN5F{`+jBg-8dR;mc^wnA4*sVrbPY}$w#MZO!>m<(_d(V5b#co_Pae-4D& z!F^8+NEc06V$U&#OO3(H10MN7kAhT4udaG+6P(sL2vm{)Ix#??`x<+I@|ag% zGH7CF>mlQY>RKUB=qO!_ll=P9mkrfZP( zM=I?PHo&_D_Ul}XW9+JwHA_;n-6q7g0}LWjLzhkqG`6^&ED~d;Vm&MGv-8qPMg9yB z3HTIx2zjh6Ur+DuW;cx&e~S%m?;U2JI7#Ehy8Lr>oql8Bl=H=`EFVTj6=lvY8p?0@ z$Lm~(<7!4*jfO`P647v|@i;U=vvjW+F0(xY*tui$(RehB6(J_J^|EE(=t4q5a3@U| ze}FvdVe)8BB9DB2@)%AdkA9dux?|+g9VL(EWb$|-ceLl=j#{b2e^t(brdHP}X!5z1 z*<+5Op^o}{rjF()b<8kFI=GIq!`y59mbKlR2hL1!gO$sE|LWsx+$||N*4rwIufFHLxBL}$)T2zKW4^ci*-`Qp%K<@3qYmcgBu2P zF-Eh1l=d&%tlE>0M|Hc`{le7AO>rfj>U}PqZ}>le1ypgqZH9NDVy00E5IUs65m2c; zB~a;QRZHNQqotJ?J045cvDyn2w=XI?xI)^OmH!B^G_Df33~Os#u^85W{C>td?oGR+ z%lnu_>ngn#x3^U`iByNAAS>Gg=$VjuR9fxu95&Es8|+?w+AuFtegZlT^YlPBIDYi{ zI^wfjYnjb!Nulg&v1xBTYrT@18>#!kaelwrKz)kUdC{0J(u$)A=S&>u6?zS6kCI>> zU5RBn@Id-SM5-MjvgR~@GfE@s(Wsj={VIVIRPZXcY0fq;D3vUQRvIW(Fn;{xnG3Gq z7p+s%1>Ynx3q7@WnrZBhn!M+YB$9qIwUG0Krb$6UrQhcY+jRpQA{S7_Rs7pru%9Nm z-Q#4DY|MSUSWisXZ$>TNP^@G2G@l7AN;;v@$;PE0l!!O!M3jdx`Z-!cr#M+(5{x%3 z$j#t;N#`21ocPfQJq&vO_=}6JethYE@NxIgP9X9B3r6pglMzZ20X3H~v;h!5Gl^j#by+8Nm^N;;E(^_MkMrob< z%|9D3XnNCjruHYfshYa$+q5@9raz_qxU>Fq7Io(PVCu|RPv73h@!UKU0y9m)HD~$Y zWrUo|2po%eln7dcLJB8}IDr^KNYIvIrIeR?TC)X~5;(X>9e-DJSz`h;3a!0R9#n~i z5M`r7UkD*yMA4au1PyYOp(}>A6g(jkZ8ao*;WbYdv8iOz^T%t3f+x!oYb<4gI2Md~ zsiL!DXv-3bxsJgVxP%P)cum%tv;(NUxhj9w)Za|j4{fOHO{rmH+G?$YR4c3*{jDNi z2a^yrFJhJkEPw8S4>}5tRB0Y|Y3J9jO3O0DO(+)d|03>D?xzF!wAWEHlpz6S@ie>K z+skL4iYj&7T#KAZti>4kYDmC9AuezdWXkyILYpf}aLl2mWC78oR*9veGp#hFVfaG0 z%nDQY<#PxtwNbT4gE!karqS+Xe4Z%aLjmv?d79Yl{(mjHu@Q`@aY|V9cYsD^(t0Jc zQ`OS*dGy2gIUV|(^?)o*L!YU;Lu2#aR%u@}UNeh^x-ZJLJS7Upoc|*1^e@3qPaOKc zd^)7P@t#ek1d4=+P1hITeQ<^nGPeU=fnur?AGq+ger$w@(l+?D=xH!ll$3MuxOwpB z4o0G_K7S}jvBDf0C}jzzux8cW<5z-mxPuHsekA(oBrAwgar;+^KEd2{vul~pNfp6> zC~x%hnqO8ETcB?xscH`VKqud9)m4o2qHG^T=pt<}@AS+b=c}DkG68)s5dlwZtt}JG zLFZP1!sRTXx3dIT$mpUq-%hS6zW!IF0CD0X0&yHK3hRwmBjqB9pmplzD_wNxM?T&`1tx`6d3*= zht#23lMzZ20yR04!88*CFfuWd5w$6QU0ZY8HWq%*uh3*?r*dO4yi@9IAJW)P(@7e~ zaW@ZcCPR~u&6pw?kc#c>^uI4a02B#QGI*10hn;CmNg!};I5_9vBPfq9M)K&@6Pf#~ zpuYrpAAz=}4ATbPdrL*@u%&zx;N8`sDeM zHXSLtr0J$QI-QTEk|FCO)sUr`GCFlfXXC>gdlC6yZ=xyFu@ml1RCDaokMnR(wZ?Ek zKKXW>;45o?mv)dW7vyU< zOvuHxmt2x7ZxMwsvG~n+1x0U$`!yLxBL1 z0pP_g`MB@`Z{dBnF;&IEP?rX^2zp6$YvpuX9BdB51>XmnUZe+ffyp}~quNMYcaN5~ z)~QAd?P)*YCfcVe{+hL@?5U_oaY`Z5QMqjsT1iigTmW2piS-e}h3$W{p=T#RfD0<` zim}%65iA$ctvDNYEZA*-B+_SdvXP-geuyNL_};k`-J%bPC>Mq>7m=UTV57DTA?Ncs z2}2*Tapfhkw7^_wt~ej-jq1e>h_wT8lc_aDF`1ZX@4O3Aokbt>vp}Ylc+M4oT~PbR z3xEyZgpQr0^OH{2BRo@+4+$g}Nj6xgvX31vx|Kq_U?kW(gDyCKqMp7d{}SKE^`}ss z0mAxgAs2|Cu@y_2X=-6<{CEi<4HJ@(EWWY*jeg_WD3Bn_x#xo-1=UL-bX+f{)*O8DH z0`*-GBuf}{sX?qjB$1{%{1SRWPJ{cIZ>9{U+APYF`uK2}V8`9q-E9hy!wB#~VuB}X z!5`?zB3^U+H1(7oh$Ym;(~7Q@ec|rV;xx-GPRdM`W(H!Gh~+2T54V2lk|mq0!8L6# zTi4*HO-o^aq2+20)L*eDs%%NJrJ@T{nj&iQ3->9?mVBE0<1C!5+4E)Kq}gmS4NorV z?<#6rC_ETzN(vq=OC;AJ{l#0vjdNtH=Is86f{-K33XW{UN=t+LwjW*E`{y9BH@Y>b zG@St~cRdrEx-w7ZzR-P2`NmIT1OEIow>;g^Uai%CJmvk_4g@k*b|MeXR-w8jvrz`K z=|mtqHk-`bw6&Y~E5xZU_Hn77>v^&@wW0}sU*GC26PjCH@2SaeL4itRyWhG7m7QrY z7m9K7)dfM=*3m0yzKYHI@$;%srit|KY|h)`7zav%vMw2hqTZFw$%0uFxQWDBp44Mm zMtYEcjC%K!cuk|qhH^@$yP!dr^aEA#*-*L}n{lgTqbh-6+6#~X zxV`OL%ASYVQN|ySDE-aKCA1dxaelzyhIjdxv@%t(9P1fiw>AjlJAh%#k{hu;)M^Yv z0XSTn4mVt$`(RB}I8>B|iqcR~8Y)VELq%z*C=C^*p`tWYl!l7ZnuLoZEDf&DdUce| z>_)E*0qCTd)xjPl7(|?(WXq)M!}iWQpHLG(mE?Vy9{Q2aqHbmY|Q9 z5eI!J;2h>5bZB~C21z8>VHt#tpV`jCBxtS(fn)nV>%dM(pB_U|u!pH>{*U^93}M?x znT7NG_k`o45Ql0GXa|V+N<`MiJ$9Ah|G8Y819)u1#B+QQ?;m&z5X89JqV$-?525oA zIuD`q5IPT`bJ;19#i8*$_~l*&scm;$VBd1+0TtUj$sPJ4G^8?au~F=HvE|=%Qg zwI+@-Oq_Od*xkfYwm)&?$2D<(+GL(-3YX7cnx~x=5Bt6h7twN+cxGv&yV_;lsU zI)ld-kK4*_Zwa2Sn=F!}F!0teeqNc7U19Jya_+JH`f|*Pu_(mj)iyZEpaV`EN8!~K zfRkh24xB4`7K*o6`gVDnolE%IzF13}~%W>@OmIf9VBSH?FpFSrPW62$dH?IS< zE@#+Hv`SMrXY2E6xqTsaWFi%hz{zG_Abvg+w1bPv@SgLhVn=l8#q%+%Ir*|>P72Gw z+S7+}LFbvb|K(HLMJ<0=u6RwiG-1Rm?&UjMB>3q;#pH-KmVT^%&M?y~$<${B>0Qsm zmchD_ZV0pEq=>fUoKm>$$vdKw_F-DSw-PCc(v%u9`SUb&apdQlFr-`7;Wq$D$jS~K zfBVw8b@)R*8$iU@%uU+8=nU0s%uwSlU&B~yUslkIr;F##U}M=R5$YoRmK|OY1|Wms zr!J4}*CVgaqXKY$%F~bhAB2}TMAobO?d4I{(uJ3Av}lxf;St%sc3dj$cSrHrNRM*KpjpFap^)>4>%&wkz2!^iyeZQ(QD<5!v# z3_R_v&11tW$D>JxW)=)1)yfXs%$9((@A2M9Q{9v{COOZ;ytKG(CTn#gw_nc7wsef& z-BDc0b0;>ds-&4p5d+zcA1fZUR_t@#b^r{|?Q_rflG_u!;ni~J27HtaNPMyD8J$<& zGDdkR&ehz1?NHaYq?CKX5`DE2wo96HfH=v&03v-m4}Bc@65!lK8B_T3HOpNb2Sj}G zf^r_vwnW7pw_kX55Ut18I62`z)uA+fp4J(zUe*}cxI0KJ3_hvQ zP!}~LauOS6X zWNidqzwZC6ayNO!6~}Jft*~cn_{Dyk>aLS*1B}~N)(TftA{AL+-ozThiS){+SUysh z?btpxK#XuF~y#c1gg4yb6dJMfF36}{qMqN^r;gK;GdoEqFWRvyw?>XNc zp7$u!L$s(lPr$x8$$}65eQvecq7z^ZTbrcJF*K8OL>XhK%IDNDxa#vMi&emG-*Sa9 zD%GV2_J4ESkETeLrBxjdlZV>jP`4F-)XwY|YS;SB?s>X8dI9ACijOVTpk2s!t&^U@sOhPEgOg>uZ*Dtj#+s2ewSvs0-1+}l-q0so+& z(aiu?L^qI?z%Q`n*!;QW8+C%a<08Ip=zJRnY=*wXHF)ULsF%)@JPWy3t5L*%UX*wX z@4H>vj@`%$TNoQ_byI+NWcxALWK>mt;4WX1CCSkS)-=UxT+% zH8*n05Uw%EB(2FqJB|88Q4rFs>AC2Vy6If)ZuIn5ob7r2hM5a*UF6N!VwhZpZZXZA zd*r}MI;|S1F~j}FE%mtY0-ULTpvj%am?R>nW#=1tzraPKDO5kMGi6CultN&)DFprP z=+y_>Ml?L!)E#B{i|sVwH-~t~W=`?sv`?(=Y=N%b@Bf;{|B7KqnylX?&@4l^NJ4*| zKy_h(X5IZFZlMa8=<8;BZMHR4n(3Ob8M$5@b!bK#t1E=tQ@AWuIR06Gl}UBXfYq(n z7WEfpw|%OpvLJXB_hu3DI^UUr4A`l_Uf@7r$1F+<(y$

    M@C3w5 zKWQ5qA?s@%9o(XLhvA|hsw*VIik^02A7Niv^%6h34UV{Zd?L0`XYs_l zyyH)wq)H#(rExRY_Y~dA#gbynH;#Q9@}uI-Zt?gvo!fid(`Z~!mxd|nR+A`%I<$XC zNTDjIpDDMk!>pl&!;9QzZbwU2~(z@qK><qeuCf@0nV$acrfJVv zJD&5GH(u|G;fmq<5oSDx1_W{HJ0v6slfWs3vwHKllTDMah&vu4FfL)DyLo@<$)DzL z-u_6rgBgFKEFjK&<#6PC%ptyqBJ9i)XW_mVRh;d%X}-RmQ5?E)9&bNYX?1-+|LM}H zJ^bJx-@yRZlYmzaju2}6iRmH8%ULvQ?qFBwmg!zo^;3~o6@XS7l_>Snmhg4@ zq(py0+>I!O)(gTUWUhW1#ZT?bMh{l$dJmeXZsLC&IFP8Z(rUG2D~@rzNc+3*=ih($>pLx| z2J=oK4sJg);JY6EA%z84|1`UfX4;L+y#nX4-`3UJA97KyKi#GIm4V)>3lF%QTnDGFkM)EYrLIigu#Urk3?eF!DwpZlXl) zRiikms1Sg-8UT%NjKP6TL=Qk9izac0pzdZm$t(`>G>LWhS&>`$*bUz=JgOBP1d(2awvT|?&G9+)7suT z&a&7PKi0!fW761i#2-JVpWtHn|KMNZmvI@V3W~z9t#!%vS}}~9+Swn zE~_X9orz{H;Mc@IyZ8o@IvrjiJY zD8lcD6k)oxO~%+p;bwnrOdlw0tp&WbpQd6p7@#(_hf0)BaouK1cOO7EA9V^P-@%Sc zU@$03k(sOL-4_U-@_kc=_3Suu$ba)3+W=0SqM6gGhH*I@E1;&Un#)EJ& z4CZ2rNEn13v%$kh9uG)JY+wY4V+=gykrgT&vCyYpXaR$G$csEeeG9e{jDoMA@HBrUEDCKjjRc^rK@?cAHADgD#6QjE#sG;BW|&ZJfe3}{H4G!nEaXcu zir|mfFoaUXz-|H?9ia?y`4$K&AJ~8i5RL$s$OcA;{2;J18_H$yz#+B3gF%$y5e;ng zok>rG0?KJ*qiKwwD2zC;3i!hiLq0C#>+t=k zRt24@K9qkVel#hP&cLY#EV@ik_tyJpPSGE~8xm3S^-88=#kecae3={7sPJ=K=3pYX z%3}V}8D-J7_{biJtMirbGg3OlH+-oZCEK@Oy-jtnSD$$OB0%$jabj`!@wBxCvPT|8 zmUYRl*+KRp$hI7bq-vBo^C?s=kY6)DcR(0db;yj5f^A42LwP)ZahV5gMdOF5peZH0B9oYJhWt@qj<^tKaH@%$j->Lmh z6f#CejoY2>zDxbU82i7^--)LI^x?(Vr+5nbV0`z# zYUgMJ?=&oodRnHXZDE}|)B5H~KVYYCZL{4?nkT}{A)cz(VscBJ?<49lV=4VJ-tJZF zMPE#i%2PgGO!qIx(+JGKKMts86M&UwDen1%e&>BeWV$Nz_CWFJaN_;Tz574R-~0v% zMxC6K5lR!2B5f%SF$ynCWo~D5Xdp2*GdYtHwJCpE+in{-5PjdTn8)^Im&5BYFaqS} zv_M+~b_%pL{Gcd{#H}G2(xyoMzK1KxSZTe;O4_yjP{1~6ikF#lX2=;*tVj`SGMd=o z2Zbk&s0PwThz)6B(vGZgqyssJLdZoSZOL2C3{wb9#*jo5mLh(Qq8I|D85kXGTEwAb zlo5X)2h5B=))Lzg$zo9#V`kCrt;S*~1&c;vQHC&-qecn&qW5I6x>AumMAHz=Lreq7 zyhRDoyn=L)%|i<(hJ1wVm?xk;U@=UggG36@TtV>wkr?v?hcXByeYA#R0)@~niZH#) z0EJ1aC)J=k3C0Q56;CE&us|inLRiu&2#0?qtVTP4u?DC{sAd6*2uk4$O*U`@R#3^H zM7xM4Kx898XT{JxX`evQ0k9}b4l<&mP{A6^s5mUj3cPqP88iBb#c(j|73K#CsQ5HI zfu=AVQ+NYN86SI_I%|cBq|N}yflt;L?$fA5br6?VB5-EVFG*d75eO%X05HOUkRhei zqM}ut$8Z>ecxcJ66!8vb1e1~gY?3WY^fqG$TjbihJ!~^A>R_ZcLJLqAT{1R6939wW zCm=y5GsLKfp$v#)1R?Bd(343g4M2ah$Jgsi|wfuN9c{%?|SIISv*SXZ5 zr*qc+I>UU6$!tmMl`qmNn0$d*VfytH>vbFA{PE{sm?1YdnY~F9(d^6Z?e%{EU1i-> zx4IIvy`3*E?rcKD{g}y26^TD{ey!+Ui3cfx_&bznJ3J+DpuRZw>^dOM z*tT?RVSXTCzGMfIX)p#20V zkJ=Ng+r`$7uuWUr8{3{;N7;K1#L%f0hB5+%u|J0KbQo^$2?z(B+1RirXE*f}XK%(L zmhL33EIVDG3ymXx@Tby=rgjD*dI7(V^l$hZ;lKyd&*#9}&;V8D^G?5WF>K~^`d@6h0 z)){oaUv+c$eU7$m0BL_lG)HmQy1R(S-(AGG!(HrucM;uF@vlBUwG}foW9)lg20pV` zwSw$)pF22X>0-9`wVpa@oE_5z_J*jKu39%}r;Az@j)I%H4R;(0?&8I4HpePg^$mOy zlULWfyHeHtvf|}@@v&Ug%c<+)r{cHb75-u3>m=Y5nrZEGEA@XvOs)~%7~}9iq*9>8 zS@gU7@}a_<#V`^5ygfl;HDx zzW7vpoZl3za-eJq&aDep)Q^z%K!KB7;Fa_ARNx{eOfNMikK!Le+GM?~`dfO>z9U(B z^7Ql`(LcZMx*LBOG}mEnC8y~nX!t#e!yZh9rx61jim7fTkgRzGO!Yp`H~O^JZ;7?v zBl>MdTwAAb$4NZ^Yq8B*7=tz0gEe_NtTmZx?oH_cXQLn5otn{ZgU_u9>z?yNc{8s4 zs(O5$$*TvACK;xGrrEde3DcO)aJk`ZTVys!YaV8Y(q@0#(V2;3&}R3D5$qF<5xQc5 z&O6WsS>L~d+xFpu32mjLZ0#t7x;+Rzv~Se8ufNcf3-X9Z_SruE*ffvkS0c6!$mZ2U z(r!jv+>DlCV znNRMPbryefwxOshJbVP%SUpT8|H%75P=Rmzn$M%iQ2u;5^?X!b6T!H`Jq z>}kYLuH^QJOx;N%)21tQ?y?Qn*>b^#Ywnxh9qeB1DAk2Ac3HPanCc!qOda*&$hRX( zZ^qcdRWN(uoN(7VkbriE6blCxu`*{7**RG{3uL4(VX=n z^ST2|y}tcDh~k4EdetHHRC7K2ar)^sICsFmKLJsippdXk;~*^Jl<}Ap_3|YM0va%$ z=IdoqlUc)G$^AQ>uLrYuLn!_V3$n#Y2;2mkoy!b33|O?Z-+rWRZ?dG zLh-|h@;FCm?OiG(W0IkXs?@0DfO6H8vWLy}TMTa~9ZQLVXQ89%RXYtRDB8MHmFx$4 z@Z*Skzv{>(TA65f&{&&o6i`Kfs`+HB-G@ONKOXZGiZf4=SQF1_l7hF8=A2t^w(Mf! zekRw;D`XqgK72sExogshEPuviBU#tT8@~N8;DP6F{I7t0{YJ)BoO|_4(VitOy3Wz3 z2}r;3c-#LPuly|E=K|1J7cG{|wpo2F8ph3tGbA2$8JjNcvDl7!-J0Tm9(!RMua{jV z3fsFkHI~?J7t}+s{4DAYyYBG_P}UoV3FM9GiDNdsf}!?mHrT-#M>NVqr)@Xc^~)$v zot|=D_gu1o@|-(W-J>z{PNf|X|`?CTPenQgr#nT6~aw9fe$zYxKnh~ z0D00=$IY<0%ZIX^D!YDvGDFBZo(9+Ww^BBnjdy0qSw73E?d*Uo9k|wbghIebe{@6E zaA`kIg&dYS4e}a7dO^rGrf1o|*r9|2?`p^P5=LcgZ+H8yp>ju~i$>>tsKn^vbaTBH zMX4Gaf@+4bp9;;KnW=GXePfU&!4_p}+P3ZP>1o@xrfu8yH*NE4P209@+qP|U=e^xn zL~K<3sHlHcnR(B>dG0ycGNEEJI(U)LGSiWG#9*v2|5z4#YAehDIA@RRr_Q2fORV2Y z)2>_|HNR)I^(3y-s4!KmIQWHV z`7cv5k`Cx}$Mtc(L8|;Wnl)T$3MyS!Bv!#qG$wIm9XDsk8F)DarUHTET1{Vglvi_{ zpns0MLbY?;r3)8l<&Wiw;EJo4oi+vu1g;XaTh2%gdi4M+?OD}4CoFmU_ujPbdH%Gb z8F2mD=r0*fzt2seh;?YPEuq`~a!C4dcZMI>I1NYRzYnCUC-;p21w>N+fe826R!w=X zA^JlQK{h?_hKlBeh0slSy@`q(YFk|FwVm4PALs`7jOQC=m5)7XqPYz-y*`DCe2hz+ zwiq?O%FUHIR=~bPM4;%lF$XE`kP{tqR9+S-D~W^rCVNRCqxRzn_y{2??u&-R5EeN9 z%riR=o$SbheQV&H1{^3l+o|&5o(*$vrQ*)IELKobDDLL86@0`vOWC+ZRx?i>Q&BZ2 zWAp!2Z|E2k)#n=bPs#DIy?fbZK0NO)Z$nvSnknVPJ@|7L)B7z|dUtm+%w{s;-9nsr zJubhL>0=Zn5H)Ij3aEh zr@+17Pf6$q_%LqOWFh|srmve#G$+Q`UK8$>VSfaN=C5G*GE}*kg|-vE^J|li{lYC% z21-x5Xk`OuG%Dthd0ACz^DVnQqs4<6&dIVrhKJ7kc->MSO4PW^<5ob9JX#6=Th9~v zh}**kAHlu>Qt|jof9+;#zJ*b*hyue)Wql2a&}iTBB&NTtyvbg0TlZfzj?RHy2wqKH zm-=%~fNcVyU*W!sDsOMD@;{bi-LC{dcbwV{|63Pn2cd&<0hway>IeRqfdWK9Rpbud ztAd96{XqPc@IA9&>1PLbN$;0UO*n!w{;Zys)}4=YIm}$?H=DAuOdNoluP1s8)JYr% zO8jnAI2rjju-P=PcrhI!5~KG2=mPY0(E@xbRa0phGuD^v9zeR_Z86M2^M|8EOFWk_ zwQ{W3v|59io;#0=Dw8%g1>pBx(HqV$PlDzSF4`bZF09Na^jkZb4jtH1&h#xCI@?5C zrg99ekTXs$TAw#b(@FdNJZ99S=|-r1skYK{jWzpWD;-yvytI{>JFil<)<`*diW|97 z%yDa_n|R`-Nkz_fi!Or52gn%MpIZ&!KQ1Xc&l8QHPIsn)C0R#w?Eu94{RfHyeS`&} z)W4c&mp{?xkVC7Cqi#wb5U-74VXs)8Pp|fm-N!b6MFnCFf=phCp>RuX#@XEDg;ATy zs*j=53ahc+Kg)ocbs-N2rsI#W`)m8rx3z65bkU3E)D&;iReKgn*UGxbCOTDP$~ks3 zp6jR@&CSy+{t9BqnE=EY#8XAqVxRa%NS!L7Hb-jIgQX_Zw|LSg{hV$&YWmZh;IhbM zlMnBq>%OPNLBK^bf=UGGn#dRhg5|S>W`?+?Tp1fb1j88VHCG^vwnA*W&@UPH!I<94 zUv%@b`p!?VRPKI!{R@E@c8BR}-g?oGRXWE9EXKDi&367S&yysqA2}2pOUb)M$iv1z!++nxF&w684D>s zRSol?X7@v7q_EVVCO{^~g=7c`NNCBoD|mA?BkbfynAp~Oe0FA6hjxxzOU(J5Ph0ya z%e6TbF{hK?CE+AQP9s_W8o&M8ao>j|Dd}`{A6haJ-yOzYS7soQ$#=SW`%L{$uYp&+ zPF!c`E5UA`cC~+ynJC87HEMqMCMz(Rj-#XRhu+2~eLw%SSA5&C2h)DtCt}nrm_hR$ zxq9hpjxT!WJN&vF^0_Ip((Iot=J3na{~OCYZeyiAc`2{kg*CH4z@m`*6ni#LE0ac7 z!HxLlE~WqAknvT3h;sCw-i_XcvVsrQO2Tl*2Pv9ryQKOZAn4Iox_3td9Quns%7?_v zZG4qeqSD^GjzXz|XFY!~%^=-{ii`-lXEUX%wz)K-+DgzqZfWxB zYI#zv1!7`up?LIq#@G}vpj{Fg7$@6*bj)->s01W{&4%f{ z#r$7cqJ7=@ACRj{5OH+71yjq<(rqg{oLRE#VjS^4h40+QIw<1FLGeiD7Y5P)u1^?D z@ja$u%?wojUDiG0ydRnwzP`H;!*r9-`S?$`@a;s~@|hjknYtSTIN_+cRL1gVa0#=C z6hXyN@?^|(qt83iTlJinTC9Le>Zp8rjbk!?7#BU&JmE-|lESHTE%x!<q-n51o-pll4WZv&7H|9&_$yO| z#Q-h0k*;emstk8w?6dJ(l3m7{XVQziS9K~Oc^(GGc{zX6tiqk{?X$`Uv9&}fPB;uoXDuS z6u^fEzf7GYwci*ULoSix@xZ8w+dgisUxnA_hi#X8p&I{2N~;MNZxS&#&iVGo*T?wE zt?QT1p*j5hl`_{grVY2dGM<*1>eT={d z-n@?qK;VwtuyLPXoi^=e2+hmpoV_**%XE00MOnA7-G;N}#(1G3g@wg-s4OD^=WK?2 zr8ChRJx1x#e)A44TdwPqiV8T__e4lw9$pt$?o4!Kq~epIc^*OUzuD#G*pzhJzv#$9 z^6=rB%5LYlyON=zMMz7C?VHJ;f4OT1H=IGef z58Cd`vpEEIq*4L>lNbT>jL4nW)i+E@VatwPTnsfnlK6t1pimr+Xp!U$j&p?{UPCy| za_yUS!uXu4gws*k+a0wyrO;&d_A9H{J=i@j;Pg>JFvXhKXjx9UH$e{x754mmuy5z_ zW(VC(j6BdMVrc!_|kLpEr%#ga2&6=Bj z;(KsV;&ht_m;B>fuRER@8th4E8_>IPMcM@|lP~Orp%H1vC{)_Da6|SM z)vP3E!`d{m)UD=d0?l+)#A|~UPe~HCrEfxe2{k%Do)7RwdkhJhs^KnPiF@3<1U#YS zg^w5I60Y?MB7l{+rQDpO?z|G)s+qqbJbtxP# zoR}$JlRXV)U^el`r0BVr{mk^8k3!5|brstbu?5J%&&=A8Lf0u-d(=lsGxYX+)IE zZO>gs7|iMh_B#-xcpHBg1d8?LXQNBI4PK$2TZlWX^O1xvIP<8X0tp#ZUJ|(ROWfb6 z7b`C^&2;{B1D1|r=P%}Ksr|Pi%bbv6_G?wi{pG9}1dTUnfo{ zC@5(wW>PiwMTiY&{Hn|b>v&Z@3# zU7+c(1Yfp|wYF}cpm$w}O3(o%V{xgvOx`EeQ45*HG+2@EX}~%hG_W=K&Iq%e(dh@7 zpy7d>8BbxaWlB8HL>nsweXd@?Rd;FgJZ!^OV^sD6$;+Y=zm=WLeR;WV^Z^@O<#0BVEm!)dG*EF$(OGLA;Uq-0MC zG6py4!ZW(u(^R^hKy0KyZG-saC3;gp$xV_51w^Dxx4DBJ)Rr)Oty|mMzomDn zpTEXPAW5A7M|-!&uNH69`a^C=_m0R@6C&(cA1>I4C-xEFv7t*vo(`h!`S9`z_D70< zxNW~Jx=}}YL(x>T#?DcOfzWSleJ=2nZWt{9eg;3f7U)b?%b`KS{KV12c44Q1t*1Yw z**f#{R2klULCt*S!M!DTWWZtR>CM4mzV@0cRl#wCHA!KyCUH&RHt7Yv`d30`^3oVs zeK%5m3(u_yq zMGM9IwW1@3VdDP#zU{T*N58P&okK-UNJUV`-<|OS+gUzPVq-@MK+YVh`reEPaG+s6 zIylF_Dcs8h`p+CM2N@LLX$3P0BzH6TA*K9}jB_FZIj^(xd70^F6>jg&iUyx1q_Dt* zWw_zOQ1QkjB9xVi3eoR&6&Q&Vuf!T`3iDzp)x+-aFibA5?Ym#26Dm`-q$hpdsDQRC;sMYKp zunsm>eJEk*&|?BLocAK{=UnO{>+X}>lHeYF>`j?i+XCX+ z?)hH*-VXURM5?t=gB&Mr8&oQ}wc6mMl-#WWike8tGOvlY>Affz+JK*Jn5U|#+@AcG=c3j1)}k`+^U zYSvO$Rr^3JbIZbt!sa{4Lz{&s#}L&jH!xea-gNT*!6XDWt()25oqH?s>&3{1Dp4KO zI*E#HD_brf(u?eih=DKt!bm(+2{K-j_1Gbnxr`OU0R)t(Y~_)z(%s=H&y ze9&7t6U`vTLAE;v2rO>zBD1z7{{E0&w&)wf4PuzAiSm|IOPcj)zkgCIvaC<1#DLxJ zESea3qvcVNW=9cwQ%O@xi%V(JAbfOzE)=)MaAYnS?Hdok_a65k!G^o5cp4k#{Yyk) z%q89I&s{!RvWYC5lTT?hR}SqonI2}Uj}RVGP(@bcn}E{*I9Qg8t^muLI2LOwiw$zu zGU>LMdJeORq_m@4)BX3{1v)vvmBv-Lo;DhoHY?saK<@h;rhZE8tdcp&CRe9d^H8~) z*!e*|h*4uF@06(?>fZeUyka7zX_|Nwk~yr3c?%NvI;z#-J}7d=S?nFsiU&DJ5fkvF zJs!Xq>3+jKTz?n4KFzr$O_QG948OhIhRELU1?1y4T>;lhsKD5nQ*x7`XaR@naNpZkBk=eS2eG=MSnh+DhR7%_$W#B30g zvobpfET8a(s3RiPGT%$El{$q=w9E+DmZY(x#ZYk(!#U=?wLi;JFak-wREy1EBt~AumluDp<)p~FjpigvIt545<5#8(L zwET2j%_N`8;^1H0A^>2htosrA{Y52e%s$$gcdC*9{4%kDDtKnvlt!}Y;A?}2dk}u? z=4s|wUB$SR2)A$h>2=hxU5LNX??-@?AV$6i)*9Uuop>m@-OY-#2C*LxUVR-rvzZZT z2R4wIgJ!jYw0uESlDZGk-xlEcm+vWexH|n0fF%E0wCB_afdN3L=67jyxXhsTz4b00;Z=8)Q{&&W9gB}|ICy=eW+#xOJRW8 z>_9#(=hEjxJbS*%5l}9GXFk#nx6=QxBbL$6B`B)c=zwK2#R*ZwSLgq^pU8u!$ZxB` z=Nx99P;Qb@*=RPT%m9Z;E3zHJI{nyZVuXG-_eX7xnxs;9{nNvU9!o_U2NPn-j|!B@ z2(WEtM9?*R=g>c(s37#_;*J#20Ci_ik

    a$bHAH+^bn&zLsw<(IZS*>zddSqs^}- zo2v822j%-!8EojKZ$S5mi87D0kappo@SM6jIeb^n4WvYb@=hrcdhRaoQFU+a)cQ~8 zMf4Cp142@N&2$mvNs~z*xvH?`0F7F27r-m^V3!CFkYTA#p&{{gyqj%UB)X9XpI3|47l2e-Gu!?C_urd3W#W^sBXDOU{ zU;8%=Ps5h!a}iW>c0NA;?-2MVudd>4#oZ_6TLP#eIbTfK8uDE!-92k(6#O+KJ3vz| zZNn#;3@t`IM9xIi_Hp{9y?XY6+B6)V%j-zP(5%3~g7&Kh`>=)|>rlw{%c;j7*` zvsLb00+(fy{%j(CKY=JJTXI{p#-Xmy2zDd)TVM0;1LWET)vVI5ArP&c_|<(KH%z47 zU+%jYqfXG~3~NS&(Z1QXFTAXKaezRI`_o`84VxCOc366GQ18^Irubam2&0yv_&?vJ zAZrs6kV>{_sU+F@1HFZQf+vjiKJWD3@YFc%5;3btIR|cNmF@@K$0HCTy@7_K>KzFE z(}j}RN1Gye+tuEoPljeUN&eMfk1?CQr|q+fOKlIAw^N@1T4*XQAEznfL;!mte!WfJ z4Lx1CbekT`)2z?%56-~dZOGl#)BgwOu3TJS)5EvhMYK=H`CTiSp=^@iSk`&LmRRBve23 zg@e&kBenLr?a4m?el2sqq;rJ|(3wYcYy2~CZKP}A`pWleN6-x|=s znV48oNbEpqfUY(808-F7LXAkh`bE0D#Ldav?oJU$@^K{2rbt#e6D*BL{I0IQyAwWj zd`=>~cY9;&p3EN19)2L<5+7mn14uBjT&BFF(d6aj9Nv)(ese({i6ZTwy(@o-@LnAq zT|YMzpy5i0L^J5s$a8bCkO{q5O-r=H(*K^<16Ep9FV`ASB@$P~Ra`5R z{1V5T(r#BQ7pP{-$}&D{UtY1>2>3u0C9(0ZaX{$$l+z$GT5fK9BTp`)sA7WlTaM|) zET`l$O_9(Fp?|u9Ja4PHl$%Z9I0zFtQsQNFw#=7U!U98CoH&EHI99yx21+0dbdTm4 z8&YpxhTwGth!u<(O>o8hXNe1xY;K^6)~AC%?Plm}71)V7ZJWq{g9B@7aRC-O{_!W9 zg*-h*o>URsDK47JON{GXu9cQ83Vb0=wORM84%0b}fPAmpYrT``da-007ILTc>N7PA zPThW4w<`UZ^|e}egT3@$m&58`DJ_h>Uqbg<7QSgHek;1^>Z-nLbDhTCRDpqqA~A4B zKT7YvhX?W$L>fU276+ESECDrUI${2>xN49wzA7-<3ob{HpzeBC zRpJfvQCJ0-OIqxlr%R$#P0M>^O=(pd2&PAxNyg$vX+v3)H;R4+3fV;@SNKHCvS4h$ ztU4@!Vh&F8p{IMEbqlk+$k2;L%i*ZLkm*Ct|Vjx4fsC@7sk-Y#-l! zK&r^fmFJNMG=g7b*J~I}c{K*htnq|TWvahX|DNx3hNvw%=7H7&dbjF00~Q}uFDWEI zrQJTtUc+~BUW5Oz$D5ukEe`M2Tp+*BB?z3W@zcvgq-LPB_8K}AdMczD`d#B+1UHc` z`Lm>0(Od0Jx==x1N;}jSlLOL3eQ@yM;ruV!!NtZ=5C-=ICeUb0Bh8H%=D~GPJS5l- z1>sAcF%WT=H6Sah>W1M*`-ZvqY2N{yay*qy^7G1zXl=@o%XoWCU^O)euH!qSH27W0 z;x(raS9%UPb-v~@yUNCGS`hdO;DzAVEoh<_@85e31xwN-P#Bu+7BgUvvcF;WR`<^IU8VAW|6ln1 zpJtE$z&FQ#7-CSigloEduEzh+ZC5%6?`7b{r9^**xuP4IW}1GhIeec335!zy`31Mt zowJ)Fki*ArnMl-kY9hTnHOvgGJH~=qhn^QWE^x~0Ee>DyjMV*tOD6YbRZekM&4=@P zR&i#{a!S}7950%%m}6}^Y9}~dErZ)cE2(}f+6;ljt>4$T&wjp>{bT`H8yUx}2c^6> znd)+w+jlw7qVPEuk=`})eT6|TL&nc9u#!1WAS=$jf+epPcI&=Kor*A6nZ3i!QWtP%k4>-C@uzvi1eqhS% z_zyI2Vh*nV=H>Ne&E=o~3X}|ga(M9h-vKk&UJ*{#@RVDhyidPnXqRd zq2Vyo!LB+;a`%GHr^bPsm-@ITn@8sG%l+}%>^;X&`(C|wVDom<3ODVY^B$8QKl}g} zT?Z284R{p%{^idF`r;ORZWbHe?>v2uW)6N6;dR4QnH##@Q!yC;qIRVog^`SuDADaM zx%PN%Jr(ZCWv_-=ZP<#rLf}!dxc7aJSec7c6K(qQ^!>sD)g|3#!jteQ zA(aT%M%i7n7Wk4A4W6>PXQQ8}O^{7CyXP)uM%rIU&$4;j9A-;>WaepNO=B`uVW8g=Pm2@^29P!p@4^DrdR|>csEt3an2utrV&d)NY zB^EsS5k(Z_RdJg4OHe;iak-0@ zwG(Dotrg71JU3n3WcohpsD#=^IS{n&qtBLO^fm#7NE?lA>#IMpF{=Y%&=QA2N;Rz; z;eO3Gq_Se_f-yHP?5+;bmLVn|Zq(>HVYl^W)sCbjY37X5 z6SZT4R4U=|YLQybGUc;uG2lDB!*1|&XkPIYN04FcU~@Y*(PerO=>F!t_ykw~(Hrug z5dpUJk^SRk|Jh6ZlULOPxDTUEh74Ib~4%}b@@P=WdoH0!$%H^3DM2g5^X6GA* zwRZCbWmfMNtMAfftj$~+EiTn(brjER8&6`@xfimgtQ_So6u2gxphe+?GS%al4J}Yw ze$S=4%d8pcT9VDgNMZlk?DkU=Pnhi?9=Q-)U2i4K=2UqXn&7%5Q~E^zdm2ss6aAzUcL$(*2zkHyZd!AX4@`{S0@KYR+ww8c=L>AE#)cyrKz^9qHX4`Zl7a{ zU2hwCI!zaubwjM}V;RlcLr#bz=rw05V0G|X1`cr^1aT(;8jI?A7aH99@OK%ZZ>KWC zrxzT@8WcFySUIrcYs5#YN-1X{87XhUx0dX?;I)sxDVmp6ZF$Q!AnNF@` zl2N&K{qARKsbg`D1Y5SzKb(|ecQ-e?<04WGPvnU+TX329U4g{F8(Ro5G8e@@twbo) zDd#QdYOpcjjXc@4Sl9=Fn6g`*2)MOur$8yW#0);=?yoDY|#5aVW>+*+^rT!5q_9br(Uj#6AOTtOnS1Y8hiI2U;XYDj1VVz3YqHLRqETQ9V+mVO4Ke7^Z* z{Ee4zTF|YHDxhC}hhb(;lv5mnCtOz$ydlK&!5m{q>}IY`l{6M&NQ~GBfkzAlXt)^! zZ#L+;!lb1U>lX%wis%M^AzBsnWkbqoW z4}$8w2LX==*dq;wKw`}C48n#{j*c*g5)QwB6M~o(w*W_t1_`1ggke2oLjiveoE?I0 zfG{1x2DFBW+W1T8!F9wjW`X}Psj{QJ=2b96oOSkT zshHnUy_Wlg!#DV_#}^NFpjJIEyw^c-T9R?B`*Y z0k!z{EYR3v+os4G#1>OM;GW7!_nM$bCr^vDarTi4(J#z#>7G2Jzan8ibl3*VN>Qq` z1U%}j95S(7@yOO?y3w&BKg3?88s@EeO>H}12EYZU|jUs$k?PC zDt>xCS9l9+uUL5nedtKAQFB{%ZCGKu=Ms~QK!OuFZgamvZyuES1r~dj? zK6n=J4D@~%g$FMZ;JrOnC}R1Pimfd{eJoz9ziLL*xx?mAZP*r%ZyW#ls^o#B2ypaL z#IU~}c&Ed4Pk)BFcb@ACIvpziY4u^Sl;W`6mXEfDV|6BskS`bwo-;({J1@EBmScqr zUdH~J`&996kyO?$y_ip}ilkWPr&#FM8G2c*eRh^CKKm14=I7sUjJ>J|OLUBR5RBhG zBQrN_d*BVl4q zNAtOZ9d`40vIw|`T}6D{B_sl`rPwZkqsN%$hUYS?Cru_yy%#V+mo`81xypF4A)L?1gV0pGX=9 z6>#owai>YV2-dOeUajX~i<9u~*ed!&*av?7Slr)%0Zxol)vxy%g93(7ROk@iua@nLLFGu{KSidmc`Ejzv;_}T3vdm%h*^AOES01XKvIc z;OQ2%E;6QztGYQ00^DqhO6TgPE=GladLKc+7ykyI=K}@?K?v3&R2Zvyn;j;r7CwT) zl&f@(5`eQXYz5OSabN$$7F6SW(gYfgUf1rWHcwlPV~C```Sw06cf6@Gzkz-{#vlP@ zrm0fY_CV?Di6+iK0hSdE;j9s<;k=>LN}a(>vc4Wis8GbTJIn_`Q0if1<_?Lt z9(|X+mri;AvZ@SN%4{Osl8DLAs2w*hM=MPS#|B*m#cA*brE1dKqgA_(f^S|AZ!5hq zARo7lsY6lAX$6tEs@IFghQPEy7+PwH5u%*T>end4;p2D#;9zed5q(j35T0x^+lfj! z7UE4+U?k#~7KVfg*G9CKP5zVn$r94%r)bMx>!<_w)&w!LDsfj$%7J@C@yz-tp$+96 zdN|Dp66n05NY+OuzOt)HB#tyPLaih0G@hwpyC_n~P=#k1$Qni-xV7N@f!Xko# zZasaYLj1v4{#gjtGGg5cr@^=H-5H!gA6YNj7&dfI-ifs8<{wXAC z7JEutfSXZK*kHYG7{thM5yGg~TKcOO5N?wIRFQg5?`IkopJ>T?2b0F2QRaKu0w2~N z-FD0*2t}sld?+MvY0a24A6ARI72jWYyb#7@n577;}2)EKYxQ2-FfYUV=&=D zXGs%D(_E-JwxB8VTt@mqPM+{noRz_XaBA_DooC{skY78)=(eAanTzdzVr6jzKY3M| z*DH|j^p0|FShj98ME`Kt)Rs!8KqDD3FcvmLg8e25h_MF|RhqC2Cyo#4-N{wo=>u z3~AIh(rnVg{kx#drw)lti4w3{91$*OWtq`im0WXpgG3t;A6CyLBZ7|l7T~rQcu79I z-dfL<&6tR9ViA_-kgi63)OtWgojVJ+vIa;;#ka90sxwusqT;d7m)CHl8dt+$!p8~9 zEwKKXziDtR+MaUfW5(cp=g|Y)_iqWbT*!jPYb0u{tB@P4iRInme1n-mT&QbG_cN>{ zW6as+1MHA-b4QM21ynW8h8&5>S)B@{{}eo zB1qjVImv#3bH!w(7{4t!;eWI{b?^+Pb@5~vv03z}C`6^=WFMx$0y_Ri?mn-2xBVju@j0! zUzl=bAYW1if4z@K<3a{gPptc8728gw`%~NwpfXb2 zETi>F5Z=@RK~6kgeTWTEuHM=zM?uv4P-E#+Q&m*GCk$aMz2izNORRE*;X>uw75;?|}b+Nb%9<;|G=|ERjp?{d;^p)zUT5hX4Ba z<@^CyYAg3VR@v3r1=1S~*5tOGPC)d#ZQq509ROQYN2At=&-K`>KE3*^U5B17_;)kS ztOr_9pZpROZ+?+vX3+~1JYcoGHKO;$OGCRHZyF}Xoa4|nvwhh0enl3_at6@~FlzB* zbGo?!q6E(OIuVjQZqaInhyoR4-%Yzslmvl=g_iNjn4BV!YIUbpjlk1b@^n!=5ab1Q zFl67#VN=Cs2jrNMCcy%0kr4if%hYI(AZHgK1f*brS*a>#gbGy_zAlvp>pbv(r8SE7{{K6ho=Q?fY`9{L< z9GdiD%VQlH@3u|-(4Oy3kNE-_bL$8N-mO#rvvmGnJip9;c>b<`J(IK04#vloZb-I^ zl?&P^%1ACqt2^{X;UbweiE8iL=jy_Jc|YNFRc(|{DVxGK5~$NfQ!BTKpCoPYA{4nr(Xp*e6ob^hm#L`gGqRx7bMGY(5Ys` z!d-xU&^;dNAp>qGmKQJG6fxy`7e+Tn=xYnD6mV3*-Yyx-(In1*Q4kS+j`ysH#OnrZ z{TjR?Klt1d@xb#_sBn`g5%l(f7CH;VE4%3KGE5u!q$6YW=KvDTR}dM*7^8Sl{QMCz zL0nOk+o=3<(C{1~QodH~E&{|!v%=uRN=ZOOK*SidrmecS{9PGqd$jVeBsQgAQ74h8 zN^M3TOIv$pKT{DCHWnjAwaFQGZSi#uyleeaECNZDlADfTf1OkA5fDTEoGd|ZsMTfX zFLAZ@So^JoSx(;Qvy_yS{MWx8HwuGj=sdZ-!5!K#ZLTalr0Ex{MP%|(LrzE4jKKxK z6G9$~;F`Xe{lu~l@0slo(*MBnbB?#!q+aA-uaz|O+TanMg&c?tMMxRysh+HIDcS54 z4Y%mke1MjLbJ~F|A;&~Ps>>Z2&babYUu9Kk<7_47A$Rt3x!6`a-Cv9}@jyF1%o*h=fI>?}NGd@t;s!%0PQPb1<| zHEVg3Z3N=o6oa(e3K07nD&YQ}mJ#-gmBw=-^67P|TwEY_lj*pYeXYxO6}XSkxSNgq zH9Tvg<-xn5)~S_wkwD%VpN4;LYR&jOJejM=Cf;XG|E9!yyqe%nk8X!nPffxJq|feC?<#(U zVqb+SB@^;5UdL`Pn1D#!eFy0x4YkdFO;u@q9qTrJ9Jg6{s;G}9zxDI|2=kF{Ygy(S zjnbV?UY-U5cMmO&XH0f2=j;3B_|wGDQvA+ zT{9yuef#rr%`+v{+$0$9AsOR)w;4m`>?FbIDX=4yqW#PJ1! zh_V@A+heI_xY?!`bFP|FvE8Dhlug&C^V)h$M{&|DHb+`wUr6p13Mn~N;0!88ce%R$%#wT>_VIH9^!|g>> zsq*&sW<7U(S=CdY=g=6_b%&-DDUN;5&Wtpb9sw^tIQn`b-onT2=ZeeilHLt|{ou0_ zSW~YS_;HQ6XQ78&{>5znD@ir|m06~I;4KTv?~Imo%qviqD`D&oy!YtSZu@6)LSE2Xf2b6taws&Q*D(h0`G6B3 zL`$g8$T_Z-R=S|7U7LOyKtidpJI5Ut#_wxu18pK|mS|VQwKBX7n2JSN0f*?an^_Ed z410W$kU-8s@y}ouGy-D^|AR`r{ozr6dWGWjhj`Yx5PW`oT3f%1JUx_Z3>1+Bfeb7G z-(f@%8d}65Qm77mddq#WBRP-z4RjE&!86{(Sie#(>UEJ=@%$u0N4NVe-L(nUsvS^d z0%A>RQsSg5f<24^tpQIaO-~i_^ip21!({;N89ww|9DGpk4DE`>P%liKqcVEz?Z<3Q zawWBQGs8G{Qj^hNcMN3#aD1U6IBt;mbiwTLxvYewXd;W8i=CT@2XgTl!S%HEJCib8VVcnDN^tN)Bz&k!c$;hApj68svwN$&@T09EpXFXvUUw)MTvDX0 zy&gv9+8pAj{$uh=y7b9}Qz8JMTQWMVuut+krR7rjqSdIII3z}nA_}vaU1nVQ!c026 zUZ}#B5hRa7o31`^D)76UY?mj57w@UptaG1tAv!$s=bopNvVERhaTP(w)p9M-^QZI$ z<*PAY>f_jDCl|mdW7YgL+UJV@xUCL{fc&gz9ZT(NN(Zt|^V>Q^d|Hk#2&HZ1#p5DY8;6SNYw0hA8 zh#x?~PxN}7*F*4>x$z&U^}j#M!2nrsD(m&OrOveExf9|H6k|TlwEKJ^okEh{JNb8BY9B6AJ0?JjAE{6%6G;)3VcPjJr8>M-B}Oo6<5B5;vclbfhT zYW?f<@E@u*nmAw@V;`P2wSz7&Sg9kJl?DVJy1YI~z3k)&6QDp^s>PZM0zlU9KqPLq zeKSW4FQv3cxA3Dw)TT=i7u#x=sU)S1O)HgSjci3o?npWX)3Gsy(#CeStgXfmUOs|9 z_7mG|$6uiQWX=+Kbd!Dp=q)BU5ix>(ep^E|ZaY3r^WE9i(B{rNq|udd>sGhSuO1w% zM98={FIW*%xW)=1)@R9{)&Tow`ZgNB`6W2WmI~E%ltW8xcdj1WSX;4lf)^L-hqrIs z&Fa~c@)D);ExB^(&tmsd^~;xWUYxYb-|M8a_--Sml~&vzhlwAdgG<4{<|)ug%r?`L z>(3npU&xVILNM{8#RoZuGo^Ax#BFU|>$q%g&+l9T^vsL-c#t^VR*NpvOu}UWLqd#9 z)>v4#o=$8mZorZ!{xgO3r{??)2vjzBS4Z;OWqe>S{DLLpj0B*wG3YDz&i2dW{bd`6 zx60H1NV=)FfuV)>D>MuM3JMQr|GzWx|7ol6k7;M&WCiZBqoq87g8l-eGydnuXu-`$ z5xfBZxS8>S0xK$8U{j;#c;48LViGdJUe=_h&u0Q-XX{bZ%myj%I_H-uuI<*?0){S&LH3`V568|&IQllT zL#Ngsryfo-?aZODO{{?VIs{W@^$;!nX7v1OYeY55EM~D31rrRYhl2S=cr-a~a3~6L z*?sxt8l}WJ`Q-r-^vOZ#7iK}aKGg40mh=t)#irRvC!IOS0{(gvr__ao$hcw~xJF38 zA)bIs-~>3kZ+`65Mxv+xcJIm6PbyIHHe?(-sCT6?R9J($HE_VCuuWuVMU~_T%|Lr^ z=!CwKE$B`^SR_=M-5OX%p`XXEI1|m+Yd;vY4kLw;5RAs8?{n0m>Af@&CkA`-4OAX6=@EGT<3D zZ+KJzo7mo7&_I88M^`^NvRZ75I%!-m32r8x z$%AML!kH18LRvw7;b&89VS}n*h4g)`7Qy1scN*Y_o}iP6Uhb~n7V1i+BrT54y@UAc z^p9Rbc@o#7?wi~C>x-WbTDmDI4zNx*E4;n8vM8hbuSSdQbTL%O(IT>KP6OSyEq(i(8=>>RXBj&M;_~fk!jR*-wFO-4Pjv%k|J8E!Defl;x3a0P%!#S z8PgcKD=i*ot5B<*pSHU4NPYl&6ox_&6+D0nv_ax+<{txlz?YI$zJ0Eu^H$!T1hQ2v~y)M0)u0T$VGcts`9dc6?5oyy1l1c)GO0mxTU7 z)bZgJSQObMiY4mcdE;{-&ys`spR6V}CSk|J1wfh-V#z}&3~t034%^23#F9z+W@-v;#BXUdtt&{1Z#b2>E(#>fvX+w zrJX35(nyMkIGopWnEl0Gg{D&y)lJ~87e411_Cjr?_uNlHt=G))Jk7B(&m=?ATY=sK zdHQ84(pY72y&tJ_i=ZtzFy+Ggon5Xwfc$!T2{RFFy$t|&=l=)+HW$~uvZjz@K zMd;p;_hfp`Ct7AU9s&9JkaM^LIcJA({5BO*{E7y@nOVKNef z$a`N@8fMQ#C`nTQ#+-i(v)!RDh#rkJ$Q^9^@`^WP_7&d^X*2%w3QXk>Mikuz$1&{lCwUq_(Y}o%5Vp%Wgo^8>M;%;?y@?nX-r5t$hd=n4ks| z0_*tY(0y?QKn;|UrPt^EAV|S?0Z5eMIB^WQNg6*eP>-Z$^!vPBK3cV@e8Q>ks2RN= zqW()Hp76W8g0kQepM`(rgjSJ`N<^!ON?}lm!{Lo{&`(od*>UJ$Bz)g0X6NzyG6B~Q zhcaOLu@OI{+zDnqn*?!ge1M$=bW1kw8WwIDxEQ*f@!F#(J+>M?Y-g1gnQL?RjlrP8 zc;@*rgP*Fl`TM?qMc6`YF)3Sl09TBE420_pm1Ryc^$zD)tG||3^tjIwgclJp2Nn$N zL+oFdov>W8I^%3902ic{7u*M-{T6-=Lxt*?yk$OiWI#Mk!6Fdg*-EC;%E2Zd$w zkydkmel#O(l_q0y$_x5Pa^Cu3W9z<2%mvGX>N0NfhP~_~ch4)obN3R5msO7nO+tW=gXekdRlPiQBm}74frmY^@Jq6m{d|3qPukJl2QgIj6MKF;-+MlKQK{cgxe*<*-aZ?9!?jCYuUa>f2*VCLdu+F%Oq(Q&eYP_4zn;>f{tmEoNSl| zt_H?()1BA4kqR}(_DbyCbPl`_)}mi+(fuX`dxGDJdRn>voOvFK6%jG)rxy{q&huq4 zjZmXor%z88kcNd@9>>WW&%pc=ctyS;JGBh;Dl+J&iJ#`%#z%p}6hTG8i_nTheDRmf z=7Wn-xzE%EF3^QOs1AL**?y5PT7%2ezsqCZKwRP4dUS_ItbFefZQOiOgH%DUc345n zDBd*wrhg!H^N|Oh+naqysGBW;Kur!-{2O84<>IE~blZt}_RdXOwy9P@DPg>q{Maqn zPH(#zX>z#1bI*k}MazZUv)D7_{k@}Cw6aS0dO>p$_;hL+W*#HE$sIu;`LN|mZWQ8# z)ZPUNZ696zh^2JiB6dL41VeiLjQ;bJ*ny~orLH%})*qWT-l22iyu_XsVdK07&&6zO zsrUE!w;<%B8pZOSot^V1<&QytkCavp@guHe0Hr=U+=R(B z*MY??KDOZG>HnL-vN17(=05=Ft;dLfNpMgUIsl_}7zHp44obrM`piKGJb{B6ald{j z#R9-XfMAKfPDgM7!jK?U5&$PSD`!$a8Z~hH3vszAj^cZwF=g^z&^^8Lja-PqRfmKI zfflfV=NFGj#%wJUCX+gyGDPaCnEJj=zL6B*llMFodG+tqv%NIV1JUa)g5!P{KHcyRiM{y+~k{!NxKq zZ)k|9Vr7Xkf-KsvQ~z3Y3WI`%pB-BWC28EFBI7*)UmqG9{SJW4$*z)Ab_EX@B~Cp= z>(Hb$OsUBxsWgO3QaQp2=oUK>V*rLDOEFMP8McK>qJJDF>cXXQ8?*i*f>LtxY!IO%SeTcv==HUM;mI)jsDA{CA z2tF{Xei(UC1(E_!5BJR(h0O)BiHQNXsZMgG0bzzIvG0aPm=`nXE*QmsSLxec7U>v9 z9R~CfjR*@upi1apoP&nYo<1NZ!rmx~8wU%a@{F=J!o2x|hPQW$*Sn`$5>J|M8SSHU zyXE=M#(*81BM;5;L9fOx&Dje%ohZA-%(suT%i%k8LMybuD+=K_ls8BySKa;tua>pB z9inoYa#63Rs=#;lzasnXj{%f$ACa-PI?rK6|K8q-U(&+0?Oc9GVqpU>3=rfM_wf}uox!-I`XJ=1u$ya{Y^vb*!VEb0LVf3zhR4n4rvVHw6N69Drw99PBzf$FIi9nF_l|H>fbNc$ zT-SOty1;HxbaO=TwzHYu)kb{2`YK=Ny{sdJvqMJv`RMV$uhmp%VkMSLXH&Fg<&z7n zQ@P&_42ri0uI?h=ur&9@w|^LXnK277*7`KS+Cx?~%tP2jr*8u2g`m0a64rdMrewH{ zJEok=7b(ylCcw?dPvToHDHuuSi~Q5;{(vvnr(9JqQrQ=|_>HP;N%03eLf+Dz130c1 zH9KFMvQ{jl!}_easK(9o>T>k0hV=;j-MNpI&1WYoX*mT&w20)Tu%yqD&M0B<9_qy+ zeejkatW$QGL+}kyTGa(gX+Bz5wP`!zoa+e?-cc!XP3E|kRH&)0L{J=Q9QPyg*4q^rMjkh8?QhZVL4O}1+(@)XpL$KQDW&&e@w=`B`Avgz?8(P>ukDff(R265 zQm+Vj(PK@POcN=m->zhne7FANro0hY8HjvFhXp0rREPw?iq+C6;PQQU_e!^yfP*{; zo?Yz*=`$WE=xdv?e2iTzki|jFC{KBR#GS`wKyhi**T^jJU+hGnN%ASHzOhQHr4gwa4LpksWN%(xNxw)h z)ljT8pj}d@81k0A3zkq0?t?Ta0aUF7L;x;ukk}6ZW~(11U<@La9)J$c!3;9m0?-40 zZIYmLpJ+IW!JrC>eCa@CxNOLj5+ z5f@knQNMYJTBHV;84~x4DkVCRp$EJKqK~jHS*mXN@bf;H%TN>q4|V`X_YXF05}qr+ zSRP_zsfiTRT-unDXu>LAo2N3N>Nda!2;lmKdF3)sBs%3S!*BSlQ}s*+u8D9X)*;m) z)~&J%@Z6d}*4D?G8|lUfzZ!wwP-nN?PRu_i9wy;3Fvw0BDB`TitD~^R$%+hElV>co z^F|VUx1){xprH1wOOtIdo}9cV7x-}S_N959K1GxnDC0!*A08xsRSd1w@1B4;O3eHd zXbrMTQ?Lp@xn8zUyWP6Su-Y1WOzON8Q&`&Fr1IBGX~6KYX%3%Hc7Hl$INMg)*bUU# z6dAba5Mt%Ob42zZcGy*a+KV!2m~TO~j&{(H&^f7eE`YgEV?j7FlU(cQJ%}dkGH7t! z+LQq!gAQt{*tyM4hQ+7SV#mZtyt@S;B3+T`Re%21J*z?!q(Ja()p?`Gb;QG7+xkQpt?f7HU^ z4aGA+P^#Gt`~_{dARP&*z;HwBLP%>!5J}EmM1(?bHSgYH2!vAj3+|?Rb!|{kxDh44}fp%2Q`poR;28b&*g76OyF(M`qHUHfuQktwyn$jBQjEjdXLR`$}b|*zU9wS@`wdtp2@Mv1~nmV}c=K z2V&G~4vIxH5xkOWs~&tKg+#{^8(-2@wOremJMOOLXM|xDXI1Qi!xA<3r&g3;^L&M7 zit(M&GCa-dtiQ3u@;%3B)-E$+w|$EmyG^31xWc=HwV7mb(k$ zXhPSr0waEE9GO3^Mds`Pb*lz6f2@1*xEU0-ZsVWyK)KC{`EVxuJ>kQ0&Q=GC9Lv$x z-oAk`V>uqkHA>7guH2QUkU;6*``uvQ8#@@!0)*&x)*?=P;`erbdK^7kQ4 zMw@8MKJOjafV4UrTbh@nS}}GqgbE7^9NnvBn0Rw#!@Ze~fr>O(8EVY-a-;ZrdH_WP z%vv1m*85jV9VaM`P=RxC|HpUJ0yVT8wm4Ay zo@@6CvFh9S#0Z5Gz}e5qeg<`13qVS~<)yT>C`zfizPvmWMjqA_ZByVE3f~(db5n%f z`~FgtnIj}S1;IDOV^Bc7B3r`#CLLtP4*!Xuf>oeiusuvH5vEV~A7f#%7p~9U^0PlG zogRM67!8L`gCP~oB#f*Fk^qbY_ks)LVg)=jth(>GW4cAVWno6|LB#VatQ zx+E&Q+O~PrU@Aa6zYl_BsMB0dp$1A2 z8I}z~cs&@5YF-sj6^aOx5t($!V3o)_;Xkn(#;2^Css0S7;oKLXaT-=Qc&!*~=O(W# zWQox0*yARl;z0Rx6}M_oIv>_{H}2zvo-n|3@6 zF={OPhpJ(!qs%B!DoKW{f04pJC=F^LtCPsddUOMhyxxf7c;^TlCeJ2vL0#K@w@Rj9C4OX8kQUyw3f#WKm!=jg8-JoJ8VACOz-^S+D z(8YxkdpFX{!`Vh}7i&|PbxSWty1Awd4+b}S7A4>U*-EKo(;3e3d|^le zCYaDa?c-fp>i%ysw)eNit2C~2VIo$m^EyJ7D05ZUg?4SXiEP|S=2rMS8mRCEuLkyF z>q*#I^naJYjaxlLpQPWfi(}94WjJ{0?t%-pZ_tMYSL8Lk*;RQ$hW+FkXlC_<@RI3o z=_GM^6HlMwFRas77)ogUjVZxKhGqU_W(gx0U~HS3t<^qF4wJ8`^DqWX%)|GOqYyDl z+4jghx)kPf5JNBL84K>^+&0qclYQDBt!l0##|YiPJ|jps8hKqki3;e$*EnFBZv#?h6g zZ*A>1i{C8d?t2e{)P-a3i*%nt4;%z?_8|3-J};AQyc_P$W6{r?0u4vqv=bkGOaS9Z1Q@P$hA+cnr9Rn^wrjJQ9*jL0V?HB<$+`mb#i%>euQIo*tg+n6J7^O?1M@YVYVS*9A>uke8^vDWFtU0d72E_ z`D*uSJo8`cmJ5fVCHt@NiZlOG3tjrM53X%8+mT-!FU;Ibk52&YsA)>wN@#hpIC{+c`RrtCt3TxH&|Vc9F@CM5)j_!B zLZ^k558q`wl1bCse0TX8G<@Iby>EzLQT$aqB#qsUi9DsD?okW2kA2;bym(Ao>@JhC9m5W4A|2WYMhdT-s!Mt+Z$L{vWYSQgoRFQMAp@> z%Z@EirSp)%C0=xsd`2bczcXoLY6Abf$Z z*61b^!WU`&)UvSo$M|lk5&TL%#5b3p{)8)p7!vZ)8_w42)NeNuS`8+=R-6dDmtqlQ z<)aeHWR_vDZ_dJ4iGf8Vxx%9H#LQVQq{hsH$v?|bse51Cem|_DQH2{qUqE9V9Pf80 zDO^Nd;0p?~_4%PNH_a#oilUKVp=BvI?EnjiXP)^T^kz#VRH*4qPUNYF-3O>|^ssn! z%SEoEf2+3?jC)c>-;%?PZ-3CkzV&~!|9c~v`zf8fDwJOmOO|RG9G-wvsub98?jZu( z9~|Puqi2qZN8c5(qqFvxJI#a*VmQ48xKQ1HVIO~$hHiUhz+XmbqmNg zL=m#rEvL|+Kxi+fWy{u&=&tpft=9fGuf|(2cZ2;&s_r!pHN0CliQ!B&zZ%SH5EvOP zQ3NrpS=MGZ(`cfGh+ewu$fPm|l|;mgpE~Z13qct|F5*OfOcnw5XoU_hzoVf!;bM#W zUj9pPS;j{9`6VXeoa3YBdP4+hegn8MhS(=Z;3P(IN$;tm!OzAb(cGc0rymO%Iz~I+ zI;H(8OSG{~1hu=b{*l(g8>JJMZa~|3r5KlPdhZx*Aw;?8=YlBvt#BO={;Ok(G>PkHl z*oATBcitXC3lp}QF4{p_ zGu(xnIm01OZ`Yc9grcsXhIo_XYOMHd3%s<*zQENr8Dd^Y(^evsR)X^o&=E){n1+^( z{O*FR!Z2h0SBc?GCTTZa@kuf8bun?7Nq5~*c67Ws`VnrRu5y3c-$RWm3W01x!gxn_ zv_b7SNO6tt#X;ai$$yaT#l80R6XWO#$P+>gE%8@M_^eH1L59mB3Q+kSWi}{pMYPs)s>#3r#p&UKu!>c7Fq(IY$Pup z?iBbdvgFqTO`BD+eyEac;~s;l6^TeHWOdmOEiF4dTUDSzps({Y!(SHPPqb93vf7f+ z?~_PB=^9B;3JK6-Bi{^3f(-{eGLYkb#ykPfNdtA(kiV{$i)DWkPl9sEV{f@+O@fOWX?C4VwyQlMP((V7pYJw3(}WH4qQA) zZmc1%zNi(*@ts__5iAO@ zbN-ht9n_oRmBeVgiibEeMf2T|tIQOYZV46o8U-4_m}@=}nJm#sI-m85f%!wBXXkYu z3q{xy+I}rR?8l14?Vfpbt~X^@39$B2ixOgKm(D2$3;IrZI6 z36QOCn7Jh%Z}Do=bcY6BUVR=$+#N<6xVD2C4CmtMYtQkEv5z0oG)9@B(jpF_-&QDr z6|@b|*(tm9*mEq&)RYQz%r(uTlJ(oCuWzue+$R47S$pYjX-J||Xk`0~VN&uzr>n!0 z7t(|2g}j0*h%V0u?-weII6JzBbwwDYTl?3eCFHaycYJ*V`Nw5gOm)ZckT21Sd3=NO z9tYO3J)x~VM2tx5C`AlYaXWcLlBv)_*!Usi-P6_J)zKHn>i7C{{3w^gS+6(pXDQt* zaMJl+{9s661y1%z(dqNOc3%Pf-{2BxRUAOwsv`!70|%K%0Wd+I;s7?-4@`j)?cW>n zAYKUo9&n+l4qk+y&1K7J=k{XM1ieWVvO7J#Qc|vn(vKZCuprn7)oDmMltxCebk6PW zmTKNWf)1sBDw;P5=O14W+b@uud6Xmx-T}2d&P0o(p}g@BSASxG?H9hp-K>!&Gdm{~ zRW8WHrVuujcxNEbMq#u;$?NUA_j6(dZRc}(KhUIJaVS`iP>IF~O-gm?1Xl!>GP=~6 zkItIS2Q~I?f0+|#f2@mG6->_97fx;+jxuS=i-QFzAq)0>4|P6o3FQ~QrXp?iefBrx z_LWewDYYZ734{)xiG{p(r18@3^W;@^OoUkPTwLnTg7?3f4P7y3@#1NtXtEg9w3{@( zX~2wb$ZB5hGN#;V2m1kbf)c?hHjxuzm@_g~GIpJ@rH4@2#5J@$v{I3@VVcw|WJvY< zmYRF>M(eu`+%8J&Cy^1oT<&~S|KX5ffDV_NCu^E(QZ5`-4mianUXH{yn}7iaiUN#( zUI*FgIVQzwDLF!9*X@RMt4XRKr_G#xDzGZ6oytbV)t9h3n*Cd6cQA&DbaBv-6ieWP zUwfCYN4Hm--p(gpA~v36u(#2Q`#{Ok@c4uBW^_06=8p|vL8Wt{55E1hFS}bzQP?tk z*ba;D{yvxk!uIXuPdOD?vA3J+vrpBd*HyKQRtTymRH?m+X!N*VU@j2sP}Z2_8%RkC zfDVF}{i+VK{||%#DwG1SU@cdL`kGxQN^lScjdiddIINOj29PEq|&f{-OGEtJwu#m_YJ0 z{pCg#)(M3zDMJaQcoi7LDU@yV<8n#Lx>V;1YGc*@C6IRib943Q+zgm2RVKU^N<=eD zv141j3mh`Vk-q=HcAj!!A}B~2fCF@yY-%3J*x5ETw2SJikj9_Wim0*CGx^(UrG7eW z8OgQU_8iboM-wIfY1Mc1fU>1TI3ku!uMdh;>`Jt}Z7MtY#K<#{=pj5 zcX%)7m@FVJrpMO>Lo9sXAD`;Vw3JP=c!uvW*T`1>51B`^q*e6Xa$Ml+m8m1;@c z%6~*ha0T>N;cFnp;M>NmX0@+)LAxIX*6<1m;qSPDf=b+^)@`&c>l5>}}J5IiHsCt48 z?Jpu^^Kv<%`5&5)AS|*z0`sjHnLfj5j_;YJ0P;B!X-7<85$6{$h`TSdLYNhyAzG^3 zIXl#ogwmE6rcsORZPnJfKv_e|qR@`CvkyzYq=1;JT{BlR_&?K zvrIv-I<15Fr8gLlQY#1AuPJopK)5tDSO;nKUcLb64*AA3lPgF}MqM(ZU5upO{}aj1 z5$REimU!Y#l4`xC={b4MZe+w2xzw*Ca8_(Mu2viA^~aHkj(ZyC$K<)ay40FrTz+%U zAvcb`*gYFA2T3|DQ_87%{M&75e;3g*e@)az`CiLsF&@!T}TVhnC01 zi4hvmf*o{ILpF|_^$~=wxi!_Q-5?l`N371dk!E*}nB?9_A8R{JBtxI7>AgJVoXSQ( zp~N;eu$gC-g^R5_uhq)?;_3FqRBhI!Rd$)?_a5d?4g1XXZt%apw}<6S0|__u?*6tt z6d@y?Y!5NDGJD7QCcO1+{SQyT=3dR|_ZM^o((R4PAaEcLkwji|oLFhGu{gXC9*+-_v&0mWh0R?s>74E;B?Y3p-&u<&wc#Mm01qcZS5tT*#7RC@>b zh~a;&4HT&M6`;jc01QBfuOU<4+1k207L@=CeA_$+EfE%V5uWpM1;zZalq|>^p{0Bow&B?iIDn(@Fq>^00Oy z24R8PI%*jN&kN!lE`E8c=wYpRKUU0+wa{+wS!v+)=dvn)>fp> z_=xCPy&NbdY9bW|{(iOUie6kZ*6a>yH1C^0;#-PusydFwzJI-wy=Nek+ghH%)?%rD zj$T#=dxzOrhy8Olp0u&e`REvTb5Ez19RN9S}cQijZ_lv6!tek@Ck z))%7;qw1E?0JxAue-Z?8PVk_rG+|g_u`ZGy?l8ETHIC9Qr7g!t6C|e+XneoRn}_Ru zDJ2$TxJqY8=N0-F;=c-Da&=;mT|y8j10`XVBuigq0j_Q zt!$PY!`gUp5{S@o-Qr4k1@PRJ+GR=^`Tva!x62yi>oi#m=+BpgUYa0!r<(!d0I#2` z9$p@K+2;a80!~W*spZVbvQvljL5F#&2xp?V3En4L9WPv%NlmLC!iAwrIxEp&tfOCzXc5Gh*JFk8I{i$`-^sr;43&g`3i2D?za{SVjxNc{!_5H~mHJV4KxX?Ftc+q@f-kF2dEviJHc~2vsaQIv*$n<-J<|n2NcCyGs zzu5qxGfVo0^9z$%(tg6ZLAg1>r?f2$Ii6O-X}MjeY*CD@!InGGwDbe~qA_)gn-32| z0IDl(jayzGHc}}MxlU;RPVR7l<)vPhC6bf?2D78BB3Tx!vaGrQ>g*z44i1q1!UnoU zx_Ey*EWR(K2jBLF&@{J=@ACl@)#(T`j;#m0N%qv%kMA9xC>0m)Kb{Z+z?`JSgwe{Z z1K@)MJ$@x3Ab$e@{}*s#I1(Z*q^1RjzV$+?KsQQgm9(s0G7^`<%OQ<2vQj#R1T)V^Wl;7YA7ZGOgQP~6|U3C z=i~vb)%?1}ZfJpj{u=*AHJ^_-a{MhZRLG$jyeR}l{x8Fa8F)O*LpSg^mc=wX=DwMU z%IdZ`&-r;5k?rDezasHusu1m{&?3wQd+sExe)bO%jM{M%^Xllwce&v8yUb@}qTMMX zxn|=QuktZ81{Ml}(L~)d5=yRS!fC9OTnZQ`S&Agj31W-Q1gl@0V{T`13AGdTTiW7p z-dd3BarHdWi9o~q-;z(42QHr7h%KGU!zs=&JjuTOi*zfGJ%NaWbkrQ*M=n@I+lL`m zK>v6Y7R_*XL%gGLOD2xXI~yg==VCpYsY7R_`H_;hEz0*ah{D#2wnE+s&$DmAuVY4ZK8e1{+@uP^vtG$k?z zV1RRTfec3g^sSu601E(+FSX4BBgEQ0SBYS>K^`ov(?6<$HOH%AlqLFKt{YD}E-ILw z6iD{y@;Hiah27|oN->}I&y#CHw}(!k=;6PneWtEU;qLS{P{7W$dMVcSuNE&JwC-iv zBBpA0wPzm>YH$R&JJ%R*@ZB!->{FuayOVcTOP2chFo{l&$K(Vc+=hmvUTW($P+c=6 z3;TWql}tDxa2YL+MbWAD+F}r0gwInL80v#zHP% zKz9Kp*Y*A+WOgIAsof7AbqKAvL)*zD zXQf{Ci<35=Erto$kP9iV*6G#2`M4t2-I-~>6Y*pdh3$v4un)a~ABUGPju&4Z9!4NK zXVbluqE!xZ#u%9?!#R?MI$I1kNtG4NzN}*2t%QrZ=&|HHm@Dfcd8a)@twV9l0(0eR znwjcH1d~Xf?O)EeZtd24`6hes@o`B8uN?-*JjT|?CXWs*L7Yz%*8p_IRqjY!%KhMD zZzZX?Ga86twsvZ-+;pbMv!;5j1 z9V}qIj$_fjxfjo&2z%ENn4u-SjD4|VbC^@ zQ~7rIqIU#5CW7Cnvo-v~b`(nh6CQx0AG)91n>Mi;b#%l|>Jr;sa#594x;Lk?kUMH% z-VJ8n2hCVSrIzvOpGtP+){ueBDzf%IA8dPj$iwY78pa$;;@55Q_j7#mfLJrDf>zVl zMC@1}UqScZ3;GqY?FZxG|J5Y|{V@VSg0pdcIXBE!c5^^E02oFMmPxkz{Ls%(OiInf z-=Bc80wyU7!@ia3UpUx{^eD+vLGmz{%zEb=(^V{EjX^b zPqc<%d(T=;8wj$)G}zTNzI>=Pw;tNIsQVR->R@ON70 zJbQQHtzodL{g2^%In_o;Xt0-JfUHZApmAbszkgMQS|>ohlK!Kr*o^8vn6^=}K_U2; z8sZ^H{itMDX@$QYQ@j&?=Oe81mS!SNVw@TCr_dbH4Uii$r$UO+t2q|s>Q}FU#qGwN zqym4cQJTtF%uxmlKWT3EEp5_ld&*^i_tit+4p_tN-Bx(61;b_i8vCDgH)>jSm{Yi( zmSl%?c|qp;ag-NmkEz({|I`Ck4mf0z%xK8O`Qc z%V=qkJ-Kwbojq=VfmY`|=}p5r$`&VQ)4W18^7{H}6lWuJF5s<|>uWPwoow0=-}NHc zPP&x&G(LJiwb<<0{<68pQ^f1Vc448Z+Ezu-I-qr19KHD;+cA@O#^vVk4nCentpoBF zF3Qj2-x$A8^>30cK7;Qb_q_N)L5 zqpcw_#3hhoc;Yk#=R^pFEHwO7i+460$MAq`a27{mWVL{wMV#rl4dT0IY=p0|nHwkt z{ldu~t~mn2lmH!HMmqjCml zy9~d0bJ&XoT^Yy{?RAq3puviX$#ka4-?O*TG8!bW0^3;SEOrbAK&4*@*Y7YV@DV!1YI#z|wB6pi0_e%AnOGYXa{sf*(<=6doDQDf~{o;fVH&n_t67g zc_DFTQDhM7o}t;aM=)J>94Cy1onb{FvzMsv1?|ywiB?^oarI znB}}&hb3ihflakFKkxQDMu^N|L5BlMYs9r;B22g^tqa~Ay?WZ|_YkNh8B^9C65s6J z?IJ&`u^arQI2w&3ZZyuBogq3-s-hmflkjLxSbqFhfC!;|{?v@SVJI5uN@WNoGrrn?ONuH7ZkSL(;6+?T+D-F&FE8 z+;&Q4cimASS6*DSrH(6eYUW?{jJTOVRpi}kB#w*sMDUFV-&W@L{Bg8%lWVA6i1>dx z5bKX6TpUyes=j@A?t2zXqu@3`DtKL6WzHv&?KTM4Lc!S4hn~C=N+%VVA-M;Fr6vfc zB!OECCu{wj@4N67yt+BXj&v4>gQ%2dx#Uxvd)wqw8rOEH_LZCH1DVgmFoN@v#gu*d ztp2$zNljz}J)7h4xA@2hx+AsUT!tdr{X7r9Fzk3rA5*h%vR zsl=6DpIlirKVrJFv09ydfE+L`@SYmcIHykBKAL0hf3E7MdXrCV_FE@gom~X8$k%^V zRmyQ>%(ikC5U6_Z43ldyw$2>CvnX&f;JoUoBCF`M_8Qt&Zjugd)=^>%7}ar5H6eBP zbwK=jv@)Pgw9=)$vzZYE3Ku_4ww9_bq%mlWAF^c1+a@#_Xi><$zfZPwd`#{6zBcz5 z>>O&GM;qXuYAzFvr*@S7X$Y}BP?So2OY56l?Ix7l3pVC!){A|gfNb%D*?2|I>Z@_; zIaW~iO6#lnc3L@}<`R^wd+cJvC}o%3l68Qw!>b-ttaQFFDDPTjO3`6 zoa*ej!3ypiUmqKntVN9oHnWzcuhm5%fB9A?M&|wbS>DQspWWq~C7eC+rZ{xDz+{|k zVxBn`$DsgAn#P3A#3?2yViV(J;=47^O@P41v{w!^n(USCZgzH^|3keWi|t!O!i1Y zI1xeyk6o{-=4vbUv#z{Aooo-rUmb!(_`sShKcLxTo+FT+(C%MFa+A8Y)z&eJo`#SK zBicGJ8z!rYdtnx$NPh#y$b^3~C{-!ktRij9N~^Qu?IK+^q?;A9c3{l_vmaFxnFKxW zB(}Oc=31o7XVs}0-ve>k3s7d1cvSwUfMElI4$Rux2!?0mn81V-vC|$B!-- zTIGK((17t6t(EAGxHE0Mj3gMi7NFJL2q|mpn|?n&At;!?^Jk<;n?-zTZg&+{B5oBu zXX^C#A*>JL+}<+*o5-?6fq*{TQg9fGkGMs14sO%DHx0Wj+3#Wc4*A4Teni6}W+k4$ zryTpbxLu2FHyOFfB?iD&2hGa1D!XDm?8Qh2UFRKhcf&kFtkL9*yrS#V{zO-=qzmlG zsD8(oyqGueteDVemdrlRal*33;YUYM@yy5B?3@wLT3*$Wad;$OFP`s@ujL=mHqd$KS+@`6~RUnO&5YGB?zdI(j$LlZD^9oV&@Ncb~Q#EIktk|5b&@ zih*It|3neM5TLMs_uITp6f?w7~A?IObJTB=+xc$gGuK9N$45VJ|TGm>eB{__oniuvmsnXK{2H zlYl|Mv#0tZ%XcQRNJosd1&HaSS?%iUy4>AvZ;;c2fq6#JKHK;&)#Zhq$2nuCTbd_P z5ba$)urL~JgVNJ?39nzlA`EHePIa9D}o42r?Jc6#C z_9gm|T8ZsdwUKjO;^o+JU|S}dPLwf=M?WjG*COoHaMp9xEj{8G17yB_i@}L!&bkf) zx46OhQ@Du4rMbO&5H#gk(eaQ$s8ZEz5;T@DecZBgPRD4)n5gfIoyc_9h<&D(#;-1? zT2`bLn~sLTX7nhAKYIQ%3!ck*!~uer*I64+pM7~mD|cl3K?s7^gP>-!xY5_@%iL4#f*BsO)^1# zyEBLuy#^Q6^SYTfJ5j{kr_sqjNAFrc74{Mqj+~pI^ zNzSKgC@u}J`nI6}WdDFqA5Rqd?_C3Mel-g*K>OZbmAYe-3;hm`#S-!D236lvN-^}r ze5g}cOYPE+8(tGhk~0kw)iY7swyatA-uFL7f#%`=hoo~1uj~2Ra8A@1jjhIZ(zvm0 z+cr;Z>%_Kgvr%K)b{aOxoB#X!u&@1j&zf0#&CIjzClC3XiERm!avBg2LcD3PG6!Wu zF_?@)^*HmeGci)I#@h0wdCV~;rqd5aes|{mdGgm4UNP3yj)^UU6dry66%BG7J%-|> z;+|QiOY;Iq;m37u^^={a_x(Wp>Fq?hRvwD6wi${e0+T}t|UJhjNNjp?O)G7izG^MwK9D%6W9jcJ7P&U;2ARke6|`5 zaGcfiS3gmRRjTHQ;5%#wfwulhH(v4Z^az%DzI z8JAGo8uju1UR<$;$sf{)IQAjnUEMvlDR!(C@6xrU;JfHrReKW!9HiDy6ejy`1=nF$ z8@+$e2yaEV;b*(g{TSSZF32tM`JI=O#8(5GrtA(h!2_LS%2hYOF#)?6fPAZO*N`+=6H%p7dbhQX{rkV|Mr@+du7=&A?hom^qWt#)EhFTL(V@S~P7K|CLZFL0{K` zVZbk4v2@)3_S)CAJBkcy2$Bh;t~4{^YxD|5h(6Mh{n^V{7nR>iNAz!k8 zdjmrckGF-)>lV&hJxVymShO6LrcII%sNHZNY#?i73zQeuyNKUGzE#t+cv=lot32EfRm({cKw%pvd>hMSUa$_k?jyuaz~{gO(_eO z`S~*F_#YqYk9Zn1whwM&Gg{>+hYU?)CNw<!@!}=F zO8K891}X!ZiuVE0^^}r@r*Gk|VU%;!IZ{lAP4?ivk+s>A+!T>di5)acI~Jj=45M4~ zp$A$}DjG<(QrgA7ux5(BK?V%RZR#N~)+tcP26eGG`*$!NXz6{BN=TW}f z&uef3Zvvp1D@I;*i0vmwT}c!;8)D^OyT?>+IMqC+a4tD4;%mu)Ob&c=q$3L-;=lmb zOD`YYkl6tCtN76vDblV8Zu8mAveA8&IRxZ!`=nXpPV{O77usGmZ`u(#4vUR5V*%Pv z=G_(BJPr$jjJ4<>j8;k~FIvaHR#W>ZojBPdqhp|;KmZ+(JoPb^`CiPl28~=$;9|nn zS~b$t%~w7%Q;M0y$5CFp&madO{W!9L^$K#ghO{w*nYv^!c3>sGa*`!O7tw)g7k4l; zO}hiN&41k$JZw?|FnI5~!=9&&=Y^ql zIS4Qh4qV;P@c~2nq2rC9pKCCVXc!rUw14i#rz}6EIub^{*82)Kn|ZoLR>wvOb@4g? zR})4YQlrz~=A1dI9JOI}*#HjN^s*RuzH0@w+jZ8@`S!$CH82P?Ou%k9aZJTSa3@*d zs-HPzDgVfqWnMUs;(}`{rf;-z_X>YYlV}TGQatUDn?hr1>Civf1ke*WEWwQT3*z%* z4c@eAbV>ty5uubO6wy%7KoY(-547T7g(Fj|{-`sOjM#+s}wSP+qwxpFf> zd1q`xrdlCaJx>J1S3_lyMa73VW9U$9Hl^E7RV-MC-GSX_ey~74;y^p^A~hu{>SO_- zU-9S6_-)|KxrXDs$-`8qzP402ATB{j)-rSA6^^UezdqQH(7Bsnim|#^%xp3W{$DvA z78ocT8H(m&@0-N{y5-7cP+E5^x2ONE39XhBlVcm^srIiSO-@W+Dx*5b=v-ks^qDnGL zX5OP5_r4N?Vz$l}8CJGnP0j{g{eiLC zvneyO^Ii0gmd7(xf9olO8$@c+q!%=3#Z=oIZfVgcN?0m5UA>d9V4&eyXVgl{WwieY z72|XBbT0ZmRa#34Pf~Y+=Z_~4rS?r)ebiVl9KeO6L5MHQrHO=A3~1q5Gu%JFXbo9e zTjM}t!wOrwagVGWr{M8TbJ{;1MgmWGbxUh_);#tsH~V{B84;j&sJ!S1BU3(smE}iIw#3N&afiXo>Pjlx=^igy^6nW;hXb zZ7Et!5?meI(se_i)^Z?Y9L?g#VuNTWPu^wEl`qmy3%zONs#dPArLjOnD8MMPyiT;V zrBFvpGwkF)(>F1EaUWiJC+>K$B-bERPusMn>?6*9?;QY@`G)Rk{XYAk!rv;lf%AiU znO#Z}(|bNC#hcr8$vAbXW)6Qk)h>4wVAq=KNi3T z06P0XJ{>&Y%!qWdvW}ub)L3%hO(P^-Oh;kKi{GF6+#u7|RsVj}fmd1}xz6^ogZw>l zUEO#4d!iXD9!u?@-@KLN<#HsJVT{)M+*`<`UHWx(X6+TpXU>gN_%+s8)IYWX9Cfu9 zZXy&pcBTRIlszkmiL^G=0)ynYjK|M`Ng$eFO@A+Np`CR<~$_&QU zFFgKpJ1R#(6!2xJ!h1ZPXm#hud*t**eB9@*nfF-WzM7+i+AY?)?$1t)-#&iKJQ5#+ z#zkA)l_N~wfYC^CanSNDJ$yYrN}d%_ucfb6?ac)f_8o$Y>H&u}UACM@xMBd_c;L26 zAe0UAA50B`{EH6hq~`AG!! z7elT}EyXygdK9~L53X;rt&o;lgn^7IQF`Hcqn&kSH4ZTkVepjQlf1j!98V|Q`K%1` zC{jAeaxhAmRC-P>V+)vNZ`cYv!@XR6n@sP}aGn(>BFQMb=0&0!00W>yZGv08qQJ>- z3Kf}DLOf$A@Hbo`Jh>kgf}ff|vXtS9!?GV4$Izeph8 z`e<@8ybb4f{<367`3rgnVTru)=onMF_b?(EKU0=r{I1marY9t1*8|`0hYX2k9BN-@ zlv}opf;U@m#M`2ebA~)sPQdoGrwr24HFy@X)#y_r-T*qjF?==(cH?3$ zLwT~8nKhkxgHqOS(I0>7XS=42QZb|-M0C9$rGmf%N}_5Mc$blFx`8;c?^r$!(FnhD zy`Ps9{5P*ZFs;h38#+So3g4C|wW7nCumhTB5pH zAh$J^iRq10vJF>lx4a{P;xrrV9@=dBUVifRV=B`n%?Ztu|LB^XQupa+7X>}x+K5+t zEcuAKg`Y0woVE^1*MW_SWHt|;VX+($=?dgOjwLKI;~Iy>-l3wO!8dfn{xXny2AxcqCxDs|HIClrW@sR|mO3mrMeAaU4~vpj~TE z4oLQVJ>pL4dxYVsN{6NY#MQ9O6wqZxnwfKd6o<5FKCHZ#z5>r835G+tIid@b4Uhzz z;B{8X1GRJ4?>pt+43bT;M2L&}li!{jFGe8}rZ?ZXiq`ZZAnlc66Cb!f)gO&=)KXQm z1h51~@G&iMQc(@e82gA3<&5U2F-)I(EQ;AO1-LBTcsXLqr3}@i6J_mc8k-MG z&L(FvA-iPToMMM_*RB6-e%YxovvhuHC=uU3waK5892oi#f2&AR?1~7iQ?3t|e?wWC zd_XU)_wW$fnc(MJ+FWS=c-cv*NaMb9lVrTg(f)k7Ck2uf@EeZMNO7V_k;1v90nvPU~WFaYTL^bH-K^GZzQj3u${)aD;=0&8qavs^3 z{etI`FTR*kmkn$fpt`X;nOEDpL9a_f?zE>=cFICGk7X|q5wNv($cl9WkXClDHC2zP zSNxi_X9OWXt9KIAtuw^M80~-5p(L*2l2J8PVQ;!K@3w4> zdCQM1{#b@f_`&>PleNu)+d}9sU*xB2jkQcjoU}ynBv)&0^o_Hp``6V79{&gWp?gM- zP_}d96m9Z)%Lxl7yAf6XnM`NB=_zq@m)tsqb{xosOVkhgsiVksyopVmD#6@%Hg(-$ zZ8a;pjNXMvbZ*x$QNCD1ZNlVT^RV~xAj3%OM%;IW zSFx>XeB!vp*3b<8yvqai)Lz{Ap( z&b*1v%Dc%XXVAhsOL=o!|1|XIRnby70slL0pVg)P<-kxMFwdpeO?s^Z|Da~%-akcb z71oY)+LP(m?^wYZEk5a+??GmvcjQyZqesAjuIo^_^ZN#HwAosYfIgm84A#J8rj*_= zHOr0y4YAreGsNJ@6iu6{xE1Q1+Ht0aJlL(XEXWF~;9LmWl_9Bqy{a;!J_2ax;leqe z=f4Qd7*ahWADL;YvjTUnwYLzC|B^g7^^4DD}v`83^HxeK&Vf{YbMIy$beiuE1;bx-XQ3)^UYK zJF5FsnMx&HkepGqh%Mjul{2S?B@2iGC<(=DT5uU-V64v5>gZ!V1iWE;o}z3ZTX(tuYKzYBRI~_j@?(Xh!gZ0)8K8Kic|<&eSSQ|p z{BvYUn!jeAL&=F2oFSQ_bExJ2fN7Ok~kOPF?lZOrseomTPu zg@+B+5{7{N#l6ov#)o60-40lztzLFC8WluH$O`;kJbXTEOP;EaGHyMQK8z3Mei&W^ zeGAJDzq>(f7=J$4ghsk*+La3}M$_wlzVkPZ_w>y@4hhFYyJ4PL?d&_SBH^~rDMcN; zS+ASnw#+rc-YV3?PA7%9nooqD;|Hn(R-UbPO#B^|o8I(WBQ==auQlQ;goejP%)ihyy<6G%> zP~tzp5F%DVYK{OYNsAzGZSaRUrISLN&ejqt%^iOtIcdv*JgQtStSEtUWO83ChiEs) zK7s6$yWur|sd8AO+yt1XXHxlUe-=>;{dSla2Rw|jXW4;CK?$?LMgQ*X$@80n{(uC< zK9C~wwzk4`N_0oFvKT`Q=aQ##thDaaxuPqF;3(^IdzX5ND9<0L6YaI>z?n`V(3Fo9 zgdO7S$4_)N_LN1HZN9fo(5Y&P+vs87KgTe z^ykfE>F@>Uh2_;)!WABPbJ32aiPY|(upyBv`7Q=nLb_pOO7SELAVYpBE253v*{tclLsxp-NJ_k+~LeGs0MOE*K)$ zOJc=|IPl6E2s`pHJjfb=y4l)b&mSn3?gfE{@4RhXEaHH({neIHyJKw8oh@8rFLFZ> zH7%=T9JvE6os@bb@=dx4LjfxWbm16;vDsqI|x;7 z(la7nbs4<8Hi12kw-GBosBV!nhBMeiOcv=}BLM3uF?tb}vLTBNh2Q=DVwqBJYRZF7 zd!zeSXaN%yA*4TyEf$V&Au-qJvwOTr*T}POOa>KH#HA7=4uhP(YbHtS zR-b&`05nm24FNsRv*^VMW}b#?TixPm_UuVg(c!-4`<51ZvOw6fU+$ zh-W|n3)U!X_dAJLA;`{06>gg8NPdKL*ZX4H#k{L=I5o=OMV%C>meTK)RrLx-f)O{C zPzfsGk)qtO6QmKkn0whdkSa~-o{*(pS$`oLi{PHe#Tneat;1l%Lz`@nz#~vBmoBHx z`nJos-s@d4tfQkGPzD|cnvHE8et`Edj7kCZf!U^kH*=l0eA?<(H=mME|AYv2cNZ^x zh#)0G)IsR$b~R;lm!^99CQ@T}|G?-jQKX*kta&S}BT53vukI_yA(M&;MEItkc(?U( zI2Z1G7>~VkmZ$HBIL_Ofj^~!7%`6HZaYd#}ZI9+x-V2wX=J81*Bh4gf#M32%8W(_9 zq(1vKe=cpc-iQb&qq%rVJOk3htmNaU?-Db z&rSNy{C6!xicn_xw7S_jRn#j>-LF40ah4_w{!!9OE>8AU>POC!5mLwSprb=(j+={FjbZR5 zSL`Q|Mg-?klP?}*;_6&e87S#siPMd=Hp7 z)>bu@=|(e`=2w?@$Yz$RkpVjFolTbT-tHJpog3O<3o7cE&RuEvN1YAC1OI+|)%(k! zn#1(b1RZoX;Zxo(Q0t^4ZeTlqhoe+&rOS4KLvkd~IQiZu>aiY2>tM?mU}30Ur{sS^ z)fH@Of=@J!NrWsr`(h8Hr3LwOnCGEQ@WsXSHc|zHG*_vGlBq=|g68d)A_0 z3L<2Ox3>&pG&XC1{}(4f(X`k1tQuH9s){MBg^;D_=1MpGIu5#NTHFd)Lwn}r@7hn$+?H!Q0O*}sX16rhLV$z zbu|)k*t3sOjeo9>X$HazuNPSTh6`Iy+R5cV#?n=-IhXRexsCL_7z@8B=1pHf)Ibrv&mInyGuXNpF-|eq*-Rg*T z%NBJMz8jjk5id5Uu%=$lf^nq)QH zRJdDwiC$q}ETH%_8+!@%haxRubs$&xrYvkFg5Tm2)UQek#o*4jVvtAN=fTT;XacVm z>50u+b6vA9Zxhd3=>Qz%AsEV%P;zlhS?<^7| zb82--KchR`FnCL;)J^mztkTj=IM^%E=Pazo492&UE>1dkLRUZPb%1r7Md)c1k7S&( zT6XWwB+cSB@A5ppL3(^n-?Tfcfr&W}Gu6O;9ycwI(Yta)3Rzo&F;tn_$Ci)SZpmwt zr2EmNrawTY8kfBHZyb;8S6%N-#O0!{WQ#JBU7K5RuR84a{m!21h)$IjH%mpj>_tHm zW3x`jjK<){F8AzL;{vnva@WlaZlM$qZ-H z5bQ=g`84IJMiwC1i|vKb9Ai{6T{7B?5e=_*sDij0U_$YkibU zQvhkQZCZ^_D{Mm6oJ&cL^E5blx$HYJ62$4^VmoWXyHJXx#I~nRPYaD{az*3EK0z8h zO2nDS#)p3uf=}SPlT#P@%<$1HIweUblhk0ZATT>Hb~G zgxRDH+()@)sIC*o{B$NYqx?xfb}+OiN#LQU2N%X(=37KIl}Blvr`r{Ew=j^tfr_kIxA?lq&DsLNC(3=;`;$qf}x&l2o*YWo+q*g(`06t@JSS z{aln{rl;puS~evHVXI`0=cVs6o4|@rT9xx6_{_)!t>36W@7Gk~zp$wv4f#ico&{36 z0?m%-0uvxaQPYK?^kj&H%=$1M{o#@0Aj?qH!kt5?ah~HI&TlXZ(Sf#LNHwKV{MNn> zbZ~zrc|^5Fg%(%c-p zf1B=3RW+gn$w(#yPXR-Ayt(9J2!T(OznWE|nbxa{E6eZY*Ah{~isE0Du&JpgG$%_s zoEe>mebh+139;0Yo#>~jY`_NvtmdMK7-*#^1(nehwxGN>f1>VsjgpTHyufp`{)zDE zA-{rYGFqt~`ze(OOpTRxXxgLRLZ6H()fBg5xV@yvWLpuHnWV#p7 zuE<(w+YM$CI&W-xVt_@LRMdQhOf~U6;))&Q6LIE9&}VTs`!qm(g~GZ(#sb&LP9^_B zx9&#Dild}Mqlo;v`|f)%>{)|Ltpf%OqSle1R+6Qas1icE407_L=3K|3va7pI!Vn7( zPcU3kbAo?_3gr7?0|b#6N*|qJk4w%Ak3A|hkoqXnQ~&X}$#?K6QtTe_w#+KMI=Vb- zsK_ZF&*2dkgS$=0UMUN~iuj1s-fOv`q%~3Pp>k49R=-oIRvEFY%K1%Qo`UCoW2GAM z58;~l@vzAV?FBdm-Cw*Zh z$HziB`0jG~9d3N|U;#KSCkBWjhnVDl!BO=D%(aN~wjoP;lz0AOJ)~{YXytvHfZ>Yg z48;C%DvcfB0VLp{7wT?$M9)={H>HT$l)}#!)tiAzk5d2D)!v7T5B;8SgD zk;ZR7-E{Wvj9~a1?0M)lkdPh`G>|r)0p_$%3=0=#wUUr}K8X0l(OFMhA^7+AX*X8& zQvshI;~L>NPFj7RroSiM>Q(gJ(=}(dW(qr-QYwg@!@xsCiobD9!-K_OrIaN97H90J zcOA2}VufFkR4_b`fDy2-q3CmGQWt(Jx~kW#5K3l#_1vF;)u;$^yf<*ATX&PqM|zOg zVE*;*R|iD@matg?zPs@od1RcMX4;+QsUoJZ&g$7IcrgL|ZDBX_6L}UOuN|z*TcLxv z`Nnqw0C27MEa34eAoS$>x-%4cmq0!owasq!Ome8TgHo{s z(S_%vxlpeAISc36vNG|}6x=bx%8Z-TEU+H1AEC?$;h^jJk1sZtDt5}xLJV+u8cMKM zFG$?dbg)hN^2)b^1#lal3@M|6vmGqO4WVS5B;Yt$`55+TH>mrmLEhVntYZiW00IHA{^r zK3zVV0X1E7LFusjS<_w ze7TP5X$LGV!D>p5JgRyE8WicrV;y-3h=9{mbX(RZlv;=;=&1_tPx^^_<`rjcgp~u$ zg*N%jmHmGQiZ8j!qR=NM#JFj4-TQRs0)$I+>-S9dfhrisu_Z~T9?+WAZW5P9qIc_y zG@^vr9r|%F%augF3Z>$~=cv1{X>xHer3Ge>LS+@9Uf15Ejss_m2;?zITDXr4cR*rw zMC>9txE2x>?@1)pz1D>HGQ`14?;cG}8!FA3L!XF?BG=VwB7Y_s|3c%H7im6U+BOXR zuPXrvd^{_aaF<_em34z2Hm@a`{-cjL*S7HW`$uo}`*3^vvV)_Wn;c9rgsm+QOSj$_ zrASXL9`;dB`TkoaR}+J8cQ@kNgut4cM{*(R=<3LEd!?HSmh5DF+Lid;40L4n3%hSS zGx9TL+UB%dnrfn%PbUnOR#c2Icx?E=@nc%g2X7xP9KOe;C|V#snn7Wy)poA~FRyh) z{P?(cN3Ip`hp?JLfs+nGuZJ+gcOjlvh-~QFY9}u%O2t?jkQIj7jl*%f6|gT%gSuKI z!nP{&=gXS!)PiIq9NMVJp8s#%?geDT23Qq#liqAc$OG-NPgAI+7U{5mxBtFQ8WXQG zCv?lWcc=3PNYaP#UhlZnf1}?%e#<$adCI|$ zsW(j04#IFOxVfO>bITuk`NPs%68KBJBp!tsI~#5t$Cj(~rRQ>Wk(EK%1nzZ)`!&Nt zGB-D)lS#nNcQvc0R8=DAD$Z0*1-3-Me1b|&T<@^NTaVhXTq;?}BNFx~~IEolV&VUO_ zt_E-b_s+WlHC3aT-L*oP#okm2FuMJHi>19*WBq7;!*1v$RX#aIc(;4qHtF-`KZos* z=xJkdMZ_mLd@t*>&o?MggTY&okKsWN+A9%RDv~H0yFKI(x=nu* z&kQ^%bG;XqHC37mL_BP6rf>JF#}X8lNm4BUN!9?!P+0_b;I4@8O(I7|+lM;N2uMJ_ zH2`_kC3MQ&L+)Z%OZjq@PTKaR2he5>zziS+5~&490;)iHP$G-qSEvwOFaSZha2mEoCRVX9PhD0HM*SD4W{;K zq6W~MA%^{A+YOog8>qgsaw3HVC^``%{N|wV)BSkd)Q;?kb|{b?=9=}G5uK>df_>~K zIB|Yz@f9Nfhv-kv31IX(=dN5TKwwP*s$0ay_&381iY@(|iWS`6;t0mjg^-b0kJG`$ zFS*w^!>)$TM>>b71C_S_@^Ru5U)X?<+k*MYPSmSDBd>{t+dKyyur%XTb&2v(Tuw;R5yp4Lugv+U4*!dPpjIz*>$K{{WqOw97)*E z`i0MORn`WnX#c4~D7O;KV@eH5%K9a^y3p#VIP`^=ANb zLEcba;WKe$(ArQESzGK61WnCNrMX`SvOOhBIP|^JdObGU0=Zjsm7{c>Db>Cby5fPh z{S!nY#jl5a+o}APFDs&rmj`y**9v=X&9={S2Wws{#aOEG?wCE?ri4OkQAatao7tL_gm7F9opFe;XDQwz^sa6`ag@+Vpr6*Bi8_{}>%lP2GCeJ}1}P zY3*fczq-_JM?xT4eDVbN$q(eQ1nulUCnf>+mUh+~9{ zK}D2hTU4tAD9^Ofio_@m zZC?d-8t*iIAM(Z4Ms=mckSjRF2%nzHN^M;8C58r)l|SRQo#&~BAk+AtM;Na=S@3x- z5WU_{y{JQr7N!%%m>=!-Te!KjVNX+xew(npYaN568JmCEOQQw0Zx9a2N4GtRd*@gV zQfY$!2JnOR zr51%&bx}J}7I#J6Yto=FEn6==S zUz;>H+nqlyKv?x-dsY70#HsLbP;!+(8FDquy~uWr|F3~|XvnDhD4JK0D+&_X#t<3l ztpw$6OYtBOn=?kM>5%sIY9QcJYSA=d;+OU?ja_5O-Z{Va!ulPKBYrr)W-eoIAB%tk zzi%(w5=vCIJN0nFa&zly*y-u|)Ak3sAzs+kYqEo<&#MTp?hmX@>dcyk@1|N6-5U@z z9Se?T!+U1IjWXW*xIdkZQz|SLbk-tPDrmr9uPw49O<5Yznsv$=xU9GnY=6;QcBQPX zrJ_jtR%S5dIYH?+&uL-uTz|fos)IpNJ(_!a>d!I6x%Y>s`&mbhxBg{UxwvKAMQ$`= z-~2Dym)!t#2zFMk|3w%y&OXa?ld^`$X>>rcZ_h~83d z*|wFMAfQUtt~CCqqCw$;q$W%nL(Si$d)r2pxF8{%DT;@ckt*Abuvvi+EN|f>Xdemd`XJXZ6G}<;Mme7dfTI4g)RYpuI5P7SHx$Zq^kaO?+E>s)FIT zvRnWzgZl2N8FoV$=@=OpK9r24pK0$6<0DN`K1I+SY*(MF{;B_MZ^4e-etq*f6ndD^ z;f*P*d(z@1*nw5JOXN8uu(mkiYMfc-=q%=IkD1WWu`dN0#&6>iWo+3nTG*Ai(L=A? z4TJn^2-ziE`Db*j5vL)RF85C{h`S_T;CkS?1F2^{5w??|8EZup3!@*-nuCQ9Z%VF~a1+6Jl?9EOw1ne~{Yp+|bJsQLUXQEgsyo1qudw#ov z4Am8Z`rn{8^nK}hc}wV8>3aa7UsfuE|3f5lf)bVhw4nPwz%J034$-q7CBHZ!1P1M% zflDk}O8h`r&n$DZz|=yY#mJ$J-Sk@yzl?}5yhMjK8qZCdh6j=eU3Z*nz391okP@`f zLQpt~1;V!)iTeiI%|m|~X1Vq!2xl7xqh)tXGSH)Yrt(lKzlf*w#a2^%n8aN}PewlG zjCIA>ysJIh+A#3pREuWP=rLc{r$>`)7Qr)(|5bJztJt~oovx*-foIx_sFW;`A%3Ke zUDq#oW{P?={#<3*PAlEn@U)~Huce$@mcweFu3F7gwWHM=V&~ZoAH#3u%tf^cD*8dU z$FR8ikN&)tX8lj!Cd5WcY}{QimwO2kN{G!p%><0#ZfBQu_mnb(8d7kU@)05*2M|AN!KmGGh z>WN!mb()syN)F6&QIE0Thy5kvygp6Be=(36`J%XRaQ{y!+Zr?o2m=7;e~d(>P?6&+ zinjO74O@d?c2NYhRlRDrCM%&M!4oYksraKoL9($G!Nd`JKq~7=3^EyltNb9A`ae6Hy@br<{d@W z+KrRA5p#&9|9B3;s=)$#5N*lSbc{6ee(}$VaOn+giVeZ9xS`ZZbnJ~!iPTOUu_jgJ zSoT)$;G1s{dbkrIx9*>#O9e0LUbZ}JHUu$cNs+&~2Ejxf4m_9(WV^+ST%KbaKnL$F zf4}}YZ^g$DHCaFt#bGYhKBtt>>i7%YODE$Mx^KM>^QPK^pa_xSF^}Y=45C_V=7R0y zYYmn&{pCNR!#bA910N?3#`s0U1l zkL!!%=IZir+KEPbW;FU22X{&_oNtfx2o+zEEhE4FZ&0bFAu>L$)~5}oM#z+pbWtN3 zHGEYU)nAjY@T3hGb?Ug~f7!m=ukFYh6Cw6wc`{0(sPN>ZLl(h<{`rHm`ucM)5; zmEu&WBjDZ%0j;O@`%x}}eqE?ZpU=K%4km_jTkNQ0iIJe-y!6mgyXweLJ^$L-;FC}C z$~u^Tuo__(ZXW9s$`niAOk^`OI+7C1w_8}FhpiIb4L|Nqmn@H}6{#UV%0FhS-4kC}=Ey*C1fy3LJ8ZIJ1Z+ystvo>uOai+z<2^ zG8yGRo=M}M_-0Ix8Fd=L{FLmxjx1ZA!(eYpfgGi+AU?G#ax_RE5Xbn2azg~R7ljIx|?e!y&Z3sCjFyHx{_`UBj|^^I6Ul1=#=7QpBgbCSJbF}I)Y9qz3ueRR7T^+GT0W0 zW2<>1GP_*E@@E(Y!KK57mxyGx<;AXRCJ_!T0i_tM{ziX+@7bOC23EGQMu0{n-?wPk z^JZCJWuN-t4gLDYH{%LG641*(`FvMtWRUvsE+^Jy#TnGd&t#V{)uC%>=oAzwGL5`W z##ywS`dtHuItpnd3M;Dgo3k`1wK-v%LkZM}Y`+n;iYclGwzk0Q=|%hliV0v7T=jRI zlQHkV8vNwl&_1HKTz#jBNsDHFk>6ha#{qSW+-rRe78)$6_22ZqoBB%BcZ77HAlgcbJ*#?fD4ocm$0BE}G zLpdJk6w78FF3Kmh=Wrr#l& zYmQ0@XTXjQgf&k;I1E)wJ7FXZk9qV38pH(`0URjHk57xUFWL20;$+rS84_P%1e_NH zv>eKiSpP8WU@V_Td2?5|xv*YR+ih|9BECSvqsoCkfM4 zcRcc%>FW~I8=i?-=?B?tvrOJWUm4d~m(!+K@T)msG!A#^w!H^3^IxU96IU{rRAeN# z=(g)|#MsFEo$};-CG!9vCf_`RlXf#B(N#YLy{}3IYd6-#8}8B?wGSTGor93V}ptQKH(>2nTJ%GH@0c%lfIWk5_*pe?k~3(;8CoMhNLxr*N(Vk ze1IuIaHwPpf^35v#2sCVx~3Fjv}-pFgjhrT8JkT665BG5I*e(M8POSr)@U(HwMJ!~ z2`o)Cg?al$%=2Auty~1F~g0`!X*V`ssio zV<}Xh>eypRwRU%F6u;vOvZiC(+4AmCC^eFUIF*1WmSrluG0XyC8p;N%_R*4oYxZfJ z95qiXc|R}4=56snSoE%TA(+oeenQFpblr#0HxpD(Jkbg)7CP=06{$ZrmpmbS9=(%) zeQygDJ7Y(Tt~B(ka`_z&y*!$@-r#_qtb8Az6zay)iifqC?D{u(d=)VbEmlk}Ll};c z+$IM|z7fK*5}HuUA>Nqg*c<&*Om)0c;n;XPh}=8ocn%sdrqidHz)^XL&^=4(%5nqN z`($;UbNs_y_jQ0B;bzvt`xzH{5|`p`slhy!#|^XdG3<{_eb5cJcE?mW_m)9W7yTF> zL3fuaOVU&P74>xn6VmMhcT1JlftQ)f{jFk!tkIKNEBAX z(+6!|ct`w1FqvQjQrZl9-Y4czIqg8rSFkjE*WSlHGw-=`-zP4E4aftVk_;fLL|&s& zGm{V#iHEoSP~q<%hZUZLOWxkdCAJ|8NSU44CdP-0v*ZUf&(JKYJSU4}(E^O^=CT|g zkiS(Dj%@Wv>~A<5U+&zs->R!EpA_P&S)y>5Rr!XZ#q;jwn`uV>YGU=Kofj1MHnN|u z>u@O0HFnU}(NyNy)tL4)gfs)`4AexGb+0-Jk0-gGv)SLs#5Yp{mxWN%hClYXd}dRY z*uwWtumyiTNVYl`x+BO)EPSwVHhF1`w%Vg4qcV(n#8`iZZzkzY=eMAKOCpp~eu z&-|-tK8eprFERDAm&4d>t!69}Ov!?ZtNp$E71<=hPWitGdM|x71aHj$uOZm}%W|dX zSRH0_m|G*kWUQs`ip7B7XC5}@!4hJ??r%vj0Tyu_HH7!;Lb{g(mx;9^fLk5)Ec(kW z8F^G5IqAuMIKBj2V&Fd{+`z#2T`rXWjKH+=Z5|HC3X;fUzt}1x>{K0N4=bRj#@Pdk zaMVMt9TMS2gkcSo*fb!qh7y~0p7%Aws+DToyZ?w@b?SKqJ}}ewC+0QSY8VcaA$DH) zFa=5h%LuOl75yvY|2lia4;<1k%NlRKd42XBy%>1eV@3+}8J(hp|GFD0%60W8gJpLP z-moGj%iHp@Hchcq%^2b+9LzADcMNfsmjYq4i_9)WM~wT|K_dDVHA zW##uKt=J5chb{l4@q`uzVYRCyjPSj8geVR6XP4C~W?FXx!r-L^gi2lG%z< zSrV_>(fr=d;Bb7mN}X^M4^1yb27t_$5$zz?!$MFIuzGcCZVT zn@njDyGmnS8xA+oH=w7~I*|G&K=N4@iX!+9`awCFMln*JG}o=KO*BX1zHu5liO?Vs zOr@`2OJumz#p7Z!YjyS2LX((Mni4~*k%uz4xr!J>8 z8<^AMeiutwe>UTZ!3IoB>mMl@iocMcHu26#L+Dc!rn(|&(S`ouvX{Rh*bbiLQ&AaE zEJMVSxu>6N8uEV*^(576U;YFa$T4VJ>W+;u;X%x6^$^JP1e|}6dZsuux;9^$R=Vae zKzp&pFvY~B*fF{{A}Fh#gg0I}aQwDGw%MpPn2TqVnA>GBfRj#(GQbQ6vxs;p zdMWk5OrC{2DAkmOFkw~;0fh*iSQHgc*Q%VN#|x)ump8U5ASJf;s$BxK`fN)nawEc0 zMkMYfa4J4^n*-?=%Rh5Ir~@bs zN;S;KdCYs0^xr)A)L7N|G@EQiPiqzkAFEZ$47fGk3-A^OplL4 zzZ33TYYF?+x>|Od*{CHiRYNMGs!cDw#zXG4%NB(BY##xyD?@TwgfIVfS4bWd;X`*|;^XsEYRabmHU(w;1$=ln@@f?7>8>hcMt*>T) zLX?68j+3d|u>e0nJuUq6Yo}&K5<|W$0;> zywubG@z+a9ShLfz5aR)Yq_y3oXYdh;6SY=r;q;-qm?`b=!+0BW^>xgOw1nYRGcW%^ zC^yvE1bf%)a%`)dW?BNFgmSFa4sPZl0AIC;&V6LLkn&NBOP!~+Xfxm#ldp{#aykt~ z9|0TdP3~;9v|*7yhALWQB5#5J?3%LbYFY+k$lKRb?(67++2XlueLpsFV||ljs9#3v zM!oa$i&c-uSEo8NBAr(E&z0?;@H{`GIM4-w2ZRKw{EStu6 zE!mG(OgXTsnmq}P4A>kyGf@rv891F(+rf z8|WZBmyKd)R3V?Z0+_TYu|FtWfMvbMi%z8r{yVvqyGBtE_%CxqJgovI2N5$#o8Mol zMq78|oP*<)lT2H^cjLE$szz$oU(2A^<3!!f%KM^`uWA44yywzLC?>!#9STc@bwcL3 zr_H+ob6C(G<^N1H>;MkwoW%&cTVM8nDH+ZI8toxg5dY*-H(JgHCdY+3p}xSUwh;*a zTixeKiXWs#=3#+jlDBZQaP4dlZt|}?u1li)I@Y={dq;7tW$XNnN}Se# z7!M(t4dwtYG{R~EpCF-8oLOwN&6b?9VuBc+cp?W1i+?qRU^SK96>6NUNn&jw*2nh8 z_&|m>zJH!}4ne2#gg;laq{ks|c1@T>e$$5Np=vy1?OzsNGJ@n*fJ#;3-IfS59vBF*!WcCAtZ52WBBOSM7;Bv^ z0UkK$3d%!Js;#y;STSou_*|i;IZC#K$OtJ$oWJM>iMI_XX(FRec~~$JypTMKeiUk- zvqK9^zigsCn`i-QiF<-}-$n#HY(EbR2z_EVwp*O$Xcm7^5i#$Lgm5x{$2z?-z?B^Y zaUob0jjl{Ps!%DiIuu0C6OAO%eAtu_(^rPsd$5liEF)dnR#YGzasotXLXa$AN=7$c zZ^4>Und2-3xp0oUGXz&%d>4rR3}@#733Q0L)m{wF_Ix_XijH?{qlkngH2z9)uDh+wi zF=Zv-#FT2qv~cHWmJPlRCcU2gt%_>2)yHS3xF0-eU^FCaIV}8m6EuQ%iZ#UCkDC2U zgPplNnH+a~Pn0J(d9w}{wBs7%@KjL#3IOY*<>5WdRW>V9RWt>{1}wd|C=eX)X{r$EVn_c znAIV)56sa1K3a*?4HL4)?Z1zWKS@~+Jfae;=6rTzI%@my4xD`%fDT>8A&bo#b%*(^ zJA2ljhQBl!#_?DANgc;EclbtkTv##}8Nh}s+Tcu@0*u-m8ruwE=d!dM#5RG-ZmE*3 zsTRpqsEVxa)?DH3;4HsIh)W4k=5xgS;Hw4;U>j;@`2r_uz*)0Mge^OHMt+=Kp_*Ud zS*+g~Q5*FG?Bdn10p^4d2P@zQ-g!xbj~O3O8{4IK4zscUf<2qW@(FzD|I7+tG&$+f zJu{mPGfwXl^T!Ez6JBQ@w-$6=L$Z2AM3~cx3{6rCG4{H#1grz&ZT4F5e>h0hS zUkH~*`anA#O@&U$vf_}Vq=G$~nzBtR!aMBnkrinuI&I2U1bo>j9M}haZ>Q~dPgRuq zQYMVYDP%O4Ux`_2e&p&j-L+X+j$XzO$Y}LCjWg%DimC8S9qNv+rc6ZH$% zjmxkZC41zEjf^*1xaevSotx??Ssp7@`0#XY?9Mob-%*D!w-0hELIn@oOt)`53N(=| zDCG$rR)zzs2$wGbi*{&>*BTn%bsL^Bj$OqAj+~?*eZnh1bHe&q3@GPQ0f?VjO%K+B z3bYT8B}9J9I=QK8D-ypnfoUA?0JIKtLiBpfb4%lR;`kNlO-t5w1Lq=vt=c>0d$q`A z;>wkxscy;}l(K-d{l3|@aobGbVmpbkhrgRP*Fx7lpnZ2u|8Lym>^b9;3(AEW^2NMa z8F0CB4O!G?J9utOC|hsm9HRT*)}>h{l7Zs82=}G0^IF+^wvwah2jWLH$<-YnLf`dF z@OJO^@zb7X&GnS^87uEa;Oqgf^RuzLG&1T{HAyMbfEHJqZ)keo5sK5F_Z2P@`;L_| zlDcC;8f>qsZ>5JL#x5U}pKi!M-P(l0TOnCJ+sA^PIi7ZG{m9cMzQJOM2zUMuZDFKg zTmTL4xmj3~P=C?_wB?l6g;D<9(`lX!!rnG?`oph@p+f4?sM`Fr$qc?F(|$?73)0p2 zyw2I=h~4L=bU}*%4>LE~Uv>JIlY^U;PeXcg7rvki$ME|Jdjh3617STgFny5Of^rNT zgYcJu&mXra2jCpX9|`?%r_|x_e~8PHtinuz3m5PA$PfT(7L^UglN#^Ux=R1UGf4;k zLETa>GEKye)}IBTkuchoC;U*CGpQf?n(^k?C0P-gdDnW(o$xh~3A^ZFm&JJyBp2qe z_?yZjY2;i%GH-pN9M_)(tdOS{gC%P{5XWN09shtL3KQtJgK@`R z7oT_22sQp0e7D83CC68D=pkl?{he*^!2E?&_5;9Hq^yl3LZS9*>g$vtUfDCsY|4OlM`tPN#W1a)rKE5_R{RV5)5N# z6af&80jEEiWw#JG#eOA6DgZ7M4}hYoadeAS>wyze8cmnbCZQLO)zwuE3!x%!&~nQ9 z6#zN-@W$DMBOFJfiMpe*fOO*Smwv~efb}cyvG+AYrtqYWxLnT?_A_S%t~PP2PFF7e z%b!y}SpEIz1+^NU12C6F{fAX)-D6{aXZEM;_n}kvxuo3|M@pN!zOTc0ZIn) zj6~1^IwG+(EGV`0#1|{Zb?Z6DH*NOD5&>{$f;#Gx8yhkE#HsVh@lD`)VPalDGzJAk zW1Gd!gaYkxfj~4Sr%miw(|We*mAB$^kXCo77Y;&cBuqP!rkKxC&_oK5V{$sR$XYvG zEX95{K0Wj%%BZDb{XP$PKJ?nwEq19>e2x0Ia$C$qCL8}IQ3so~XHF^d+N* zqh9g`Rk`H9%1mDc-q-bO#alsl39#adqXhGjEbn8#7?G7G6!QrUc*OPiuU@&n1~$t5 zDM6Xjv@SsD!2~+feqR9Hv!=5hzvhC9llwcM2kAe)=B8TTJHvB9G^2@#MC!y{T zr=-zmU*jp3As8ln<~}w_AH&2mzfGC>@BD7lZvGJ5yL5H;;wM_ zxX7pQ?Q*Q&m4oD&D$lm;)db2l$<3GHc7On(x@MW`-Z%RyTuHTGm&9OI5X#a-Uns$1 zwTs)&QxW~He;=L*0ZKDO6DMh9p7#Cqf;bO~=e(y~e4@*QIRx_6w^b~~N4oHZT1Lp-oxN80r^!$K01FH zs+~}jYR}~kGa&}O;MA1EBx~J%@Y-jL{W&Tk5lWJj8OMj(OJlwQC6`X1gnOU&3#K?C zH*QiW8+cIA4Vw!lzXRIH%7c2M~dnYpRFV5d2nttl%p<#-?#S_n3Qy6Yp@Aia)< z%}s^B!EKVT{c9*K`wFxk!2Ad9WL4j|^*9;y0m6{1x z%%QArzh7po_b<@dSe1JEY1lKyRj*ro&JP-}fkr6uFF?cCe{gA|&wrzzd6)}nLsgo| zeU)Bd8jFlU50J2jpkuGPL3z`cR9@V^^SvE)D#v>`@5J^Z2EAl}9CfH2Sh9y%+xZPr zqFnd?NIBK_!0-dV0Y;nw(C{7O*LE%Q9!1GOV-B}WAZ#E63^_!!EPu2>TSTYsK~eG5 z({uf=Bu<6a7CNFm6J&>r^K;)=r3+uPpc)jBYeng6naQNgVTn!IuvqO0W((8` zip*TU)lByXbf5vYuH)!?P`bVpZKry@fAu$fZ@w&n_>^n&1sgYt@ z3=qNB&QftU0a(U1A#Bx6u4EY@jy3)WRYjL9%9!fW2xRyz6IuddpAVPREVa%U5(OoF zE*NBas{DKS;rzMFdle|)d2Cl2gBzTpPK>q*$E?uzO%K&FL=6bZVIOlJDC*rdqfYXSiKYMMKWV3TmCtk-B$n_u)svmMB{Ihb3m@ZAk`rV8B&#GnBeub>Sa+oGgJqNd_2|Ij z1YMAo2e`gs#8b+3g}NXnSjbjZgyh<4C#SUBPM#w3P=-b*flnyRxd#5OflCp@dodaL z`~1_bf|giJ14%!sL81z;=UmpJrxeSk-_*2pAa?=$9l5B@#3RI%fJwpurLW3@Km5H&!H)7#O?b8+nM0e^Z zoTDea)D5svKLUC283Wf~Q1zv|#Upr$kFn)vxuIc-#JHLD_%PFOiC5X`AO~FJezjy1 zt-1e3)e!jk=L}inb6eA+(GHpJg754t_B@0pk_7&1!(k5)>H3W5GGISfuayJSbb&YZ zyv+gtg_e^^W1$6Ff{i}`Thja@pbTkO_rOs|s{_LiXh|fSCcX+dzdzLA0qS+N4YDj&^s&l!vU(PXyE=|(S8#aAN%9Y2YWvn(>`*Ta-NJS z8H7mLKFr@C2{w{7L2(utS|;S$_*G(#LAIEZ+~%F}kc|%BY@ZM5*STZQY7??Kh?3p_ zeLjk8IB9cv%W=yvRk&ojq^7b)3LfIZRDm-SMeO-H9vT(7D1{i-0bx_>O1Azch)^|4 z1emHK1x1}aZk(BPR*_`6EaW<`vPS!g0VwSolFVV4y$F z0t|G7Js1oEQs;W%pwVUEL%XDqk(gouG)f-t9sIUXnajU|{D9h3b6!`wPsXuX zLgQ=oO>1HL5|Nep-flrS*^-)3Zduiytx+F9hD2wH*RMAfxWo$p)(SmUu676DpLb#d z`iX{cL4VxqN?E*~^~Kl4YwVx~hoi<~*`k)4AE7SvscC&?$$H^mcn5x)jP{s9`WA@} zYX4V;5KWCYw|(Q|eZ!66!-umDY!DDf*8GJDih<`5e~#jx%{6nK7H8I)Ns4VvzCS_{xVQI>~ySpl!qV{M8}E~L#htK-A;Q|Q7VM2`r^&h!o9W#t-M;Xqow&;*hGF8Wv*rM8R7`^Rw0r61#N>LY{*0bR1Q0X=@yBdQ zQvoh9r-{6SVx^(Jg7N@R`;ik8m!KR1>^+(>3Ctr}AhZ+d#-)P`zq^WK8!)L{f{DdS z-**z8uLKeiID2+Hd#C7IQ}y1*Epen7tVuD$wUrs!AE1_^oRm1bR_7paFqf_!_HgK# zs{N+0mW?dH_zPL*z#2Hn)$3tGOxX|PQ*MI&YS$;1APNpxSh@iUE;wg3i2jydX}Y>Qu zcn9&grckEltDldkP>_wa<&ZBe$&#PX*GzLM5QfN7&vEb_i<@85xQ_~1QhJ^S$Cbf5pLh=SvK1*dn7;yp;Lpq6mw^0AX6r}E%? zAlxZnPUoxP=yWhx0GF)XFu8bNd-ylOgpjgrwKiwemR4fThYsztpIs_zlvq0Y9DM%H zIJYr@;Zmn0+r~nWrwB`PuE|cN5-V^L-KL>*i?o@@fmy<5t+f6 z29dm*A8EyZdl&=5oT4>?T{vafG)*nC@&VEtLTf+wb2yBOp6~BIOF|S?0i6))pr~T{ zq4Frl6bU?19$pX1vapARRY$g)1!Cg7dP+*B{zAC2a4Yc#?=!*8Z8d7O z7!=vmLr^hUzx-rBDIQ48+SVV%7BpwLBRtrfsyC;Q*3K=j4=MbkJJuYFSndC6Di9%} zuVuSbQRZnHEL{D`XGTVgqY>+%84n++Oi%Ry8nI-R?(k2c8`cXCNmE41HDx=QVwyba zVW>^mo}V$PdG|U0SX|@ih&y{0vH;jqVvTi}fHUC5EJ1qu7w)c6D+vXGR|pIn3SBf2 zL}waHg(%DCOC+*S^B6il+#NOJcEGb+1qexhU+wopiys=sPfHc`PB?;~NSJ zc;0#XyZ8*ciJMLACl|cJ9deVfm;zvX*&>hR9z5>aYYy1=*z?#M(9}{DGWFO#IlphT zM^M|m+=%-Vgi*&0=m)feV8@kJ1hP>?jhRL*KP^-Wf+ZH#<0Lt!q3`{U2={)@s33cc z8-SJdV7Ta43!5N7f#mM%3<>xy=a#ET8w;HJs@{aFh_ghYUq4%ranwzniUVxvev~YK zDYQXn*^9uSh$olOe~OSTS1i?jMr}%CKi@?rnq;#NhICx45wDlKseAQ7-ILJf?xxR9 zwNVdFjUXkhax_$fQ9%r|9E0TTllP<*uDC1X(WLr!p?Z;^a;hx7Os-ipAdlL%KM#8e zicnx#gqJtfi(=31*kBCXvjVn9poySEydiEU#7?%2KT7DGGcLF4*{Aw_;f%#EL zpQfys)39u!l_A0rC+4Jg^4Ni@L6M~;V%BE0ZmD8dgjJ!wk^!z>R6TPE`U z;-HQC%(kHEVszDslM8@(mYS-ZTNy~;;cnH{Mm^>_!78{=OypE+pDt)>bq+Vy4kK1} zeoTYkPWlaCryiUK8*uix(}IxR^-V{l{KEbEm0#$nNBJ-+iqY!tA%@mqdp4#=u-C5N zU*YIG(X7jdx}&!8L($(m=S-Ol-U{k@{&oVk-PfpDQ#X0I3zy?+Tc;@1bt1Ht@9k+X z>5b_7?FN@J|AFZJ{x!{!=Q|E^TQEp%S+)GS=4*)U3n*GqSk_ecUm^~N2Ezd5PBPA? zOH+UVL%`UxjE(y2kV2;O6Ct!Y~aICq1%e7KxOm zMM+C8MnD|04&R36AJXYT*SBcQ$UPLg+oOdcD|JR?t~&os_a*P&HRQKOPlHXX9^N$5C!2t@XimcAqzNY!- zs)bbVp!9?UmkqhK)_(CGuJlT+&dMUiV6w5t#Qw-)=if2wJ#N?PS9rUm&f&Oy!6wt( zTjz>|gw1gh_sFq`Ao870{ItSr$S)y)8UgJ^yI;^xq;IWPdj`onihHI%j9PR{Nz~E&%}@BDf8sMyGQU zvl&8Po%)Nmv0ZS=ZEvb!YDw50PJyk1bbo!ExE0GjT&NB)jqjZv`-ZrA%$nE$QmTE) z)dR0eT}oW`@@V6v*Dj}^)-zt$=bk7?nz7JjJV;~d#Q|;Lgq&QA)nV=bFheKdF(lBw+*_&MF%F-f*@ZDz=yVMSb4?xkvA6`f zK`Jt+I{*Lvv}piTFvzquSm2u@sRlG^&`^k42R=DKcHvK#RNYi_xzyUm*9eSZR)!G~?`oi7g*c;v3s6uc$p1->2x0T*mILwI+W@>Njs@URpXG8;w0L2T);brxHh`1)&!RkfPF;=)$S4-@}?k zvy$AKwpB6kNDP1j_#@hLCSEy<(U9Bn855nA1APcm1x#w#XdCk<%9?G2(~_kyGaWDG z4@+bvnsAAH4&9ZJz_WE6)=V`IWF(<3r(AAZEm((WkGwwd`C1h)bWy=obOWv zXIC6UPP|P|>Cfm~9L(nv;i_i-#7R@?SO%A_+9MGYVA$J7#G2-3OPPWb9})4@#-BNwm`8wOp^GIu{8`yZE4a&6l4)N-TAVW4J6hU2U?Y5ABk|EWRKh1pI zBf^b_w%3kGPj=L&3B7*(VPvj2P{O0o)XH(&2jMq#@faCpCAV(mym$JEh|AP}Dci+Z zG5p&)gCr_i4FI6f7AJcKO*^pg8!F4jSzsdsw-G9MnG?D7Bj}*W4hc0c_ka221(L^B)`cjP5cWJZ-bN3b-BVWu^lgUgQuHU9yuy=V z_>Ib(K=)u$Ma%tB)sFr5{W!am7$OWi{PTC9nAyhSJquyi=S5f##O?0`6ebdKP|O~1 zSyBMhEg0n#SDGA^OS@}@d7}(h&2i(5kF+PF+Y+A_9^kaEMcY481+y9ooX}R;IDuGJ zHvM>*5STytl@oMwj-7FKab2eI=JOCJ(5T2wg&t<`wsEBq6{dG|#<70?(I79Gsdx=i z4z*K>R7U0K=%T4KPob*8zUiU4Q`HJtasX^vF9k#V>%uFx8)99(jzYTjr_|_p5dqho z%AMoU)Sg$-Y%^L_yNf@Ys=8wK*@;Z9FT~Q}7@5B0%qz2noeO(mT(9A@K}Nd^fiQ() z2yU^ZUc58FN}*`D+kL56%@arP>M1VOsa#;x>-j40=I7+N8)$E?GsIC;3N_m?fCspd zNbNoxJa0jL@S?DsM0vZJi4lH!@|@fc5*2a*OErG$>YvuOL8z$FdERy^{9Ag4_`*$i z!cBUkKiAfmZ_&J6oUkPX*9DKBZAxOQz4l#^-HEz1N9}!LlC+ErlkDG^ATX!Gw45F| zE;im-9}&7M-~o)Et2$mxM-b7!m)J=%J{c<>XFPtqM|KyM@Afv1iiAhNHgxDZ-Uv;) z3MNFgJq0W3o0C=H;&f-jeSszoaAN-#Lpexb7+~x?Y0B`xyrSS#^uM$c-Gt4(6N|{H zy=eGUM0e>x=IDUg9#Z@Vv;oOi_p!S+23C2E@^n~MfcyzzDvRq{0ih2M#ec%NLA6n} z@rx5TM434;1Ydgs#sbV8sdBYeO6cdKEShlNgkhFA(FC%{+J!VD zqzXTmm{erQjAzS|;(;niMKC>^Cubty=nn)Lqap^jph=vjlBr3a0!-Pia4;9ff z3y)^{%s%S_4Ai+o^dW*65_a(^Mss`iR8euPbaYgJm@rL3Hs4C2Exx>y9e&@hrU%uf|tD{+0T1^{#hLf8m9GA zSrU(SW%>vLMVId%5sp_5j=V~N_nQ;=!&A>rP}XYimc(5bS0zINghgaW1YFYmCdyWL zBnJR_&CqaX)q(;{K!&LveknzX8VdysV1)7Q4G(TOZs)83 z9nXy@#zMn%W!ECo0f}qs!{8p<2gCDk}Lzi`U| zMU>fubo{wYB4#Msn~9L--RyHOG7(lZ z$TZWq7rae(vo>a)me%j>K`>%~Rc=Q)?*b*eLdGro-hLz~5oE`lBbH)cjPR+(yQN)) z9&$<;Hq#VT^D1g)USkH}K@Srl_|w$m{%c;ConFsPFNODH=2UO9qPgs5%7O0re&#it zBhYPLZ~dVnMWd){3!6q7HL+7exwQ-)bXzr&=Gw53#xpx;m6@nUux959TdO@JbaFnK zkCXrsLw0axis$&>L8U@vYIEjbP{f~WiC*vGs`AP`_C+OCe{(0m1W14#WG>Y-T0ZWC zJzm!~WFS8ud+>Gz6f}0lE&8w8sOm-Z%6V&(r5)Y)Q%Cz?q%GBsP$=F)eeeA3aN-zP z!-njbZcXpzu^=dIIj+0h(hp*-qA|T0hAQJ>SHCVQB zLir5W**YKkO2&kMzube_Gl1vYpE4T$P6OFI=X}tmThZ@Lk;8DjTz!88r0R-=w#})dmhD`~vX8aUVK9%C9A7I+;jVeF z@U?B;w)16N+Jr`S^$)_(qJqEv!D4oVVS|iMmYl1_(rS~((j^PqcPY5ksA13?hfVTH zoSd`%$E;|=3!pi3n4jPd(SS5g6xA>;e{q~pu@0QFz+jqx7!s~uwTouOm*@bC51quH ztHZ;5<<=pW5Tr_FfT*Rdtr+VsO%ZI=F7+7s(FI)7WPzl#$w8QzHa!1CfL5UF&>N#n zcqPqrh*js4@~=#^WtOtcM*W;X~Fd2r0bW#(rr0F!(NqFf(T(NK)}Z?fo*Dc$qjD#uE=L7UgS?6$LuAo+RvAA3x>adTbhtSd8!N1Jd2X33M-<{F>jUpwDs zR^(9!55L9&hr^E1C>Q#>1ajwSzWoi~^a1LR{YWDmrq^Jf`^XDIf&Tg({{?Bp4`4Fp zVfn9-HmSAlxWJ6^>sm{Dj{H^)xvUF)kfKIQ%5&BjtY9)ck`!RTKgp0t%(4RNh~VTx%_ZvgR}Rb;@3ff zAsmZ7!pekr3JB4y-!43qU1u}HAYYm5zZQ0$to;H@^MQ8N_>ra2bfGfh2> zZ8lk81Ocr0NGn}9CZ|{?fIpYsxpiA)%>2!>BI6#Gu3vMlV;qjDAQWtiAYUITg42)j zJD*Myj}kuOCv( zgDxoE9W;W7qS8G!7w_pivlX(6_XD1OyLuM?=HQX%OlaAe#^BH&k+n_|qW}(b2^Q=E z(Qi;0I;2(3Amc?O!GUw3C?=bf6g-Y905d_gP~tduz~yMvWrAlb5>tbe;7uT@!qUAW7P0tE%SB^ao>W;9YUt9K-C)g=OsB z>bS6V`tMhbGhy72KEsvi*mT_D=oN8oMgZsA_L4gr$k6-i$kQ7n?v zdf&ZSHL~K9cj2madj3e2JdrgZ!8YzLdpzc&Q!07ajSGP-k_gFBz9c$0Phq#!w(il_ zjj>XF@eUU*RDsHNr^vnuMmDLv-Z7QdV6Q=gtDQcxV_u+w@D$G^r=pQD6IF^hOg)!f zk3-bPPm`LAw#etMSh@t2J@4`h=?@H9k*yD9rDngaLlff}6%^grY7G_ug|R_3$4F1h z`V1|;Q%(1>oZ;kWeJR11M{pKf1}BW^^!s0H*M8E-|_q72Q*;w?6 zO=?7MeN73;cTz#^%gbu4epKP3wSoKZLX#Td`SymOFAsd12DK9>aE@xaXsUnJ|3`BEX95B}b~-_*Dl$NLC$NMHlIK*}CIGCJWr zwIXB&lZ+xd7J34+t}|=x$nuUHLzpwxEkCuz2zX3`jz@uXaNbiYBmmQpeM_*qKVt-} zM98qHZ~L91%|c{b)}Uf}_xUyKSeaHFoH}C+CX|WG8R0g5-Pa> z1aPXcLq0$ss!(A&K#_tOl*VrL{E5pk|3b%Xt2NF~oh_d{iZ_JjhU$Qymi&b>`o#Y< z1qDYiJ7h2SYj~k+lEs2)yp>}<8Gl|TluK#E@)5~5H#_aS+eNC~?CJb#lV3?*jXr8g z;4dh=889S3^jSfpZVulSqum>reoa*{IbPje*bD%R7S#520;H})IpQOoK@CZIBx3t# zrzJ70G`PoJ2}!cyLu8Y?>zxIguo_&>+YKilZQP~IK|F0)WGu(rvK=Tz1di1>a7in0 zJ!OGzx}!s5N^x)h*d0n&$#6p`vZ&PT_H2v8(4|p(ntVIx3D4IMi=o@4LQT!Tdweql z-FW2wBLnOE0AQ~+qA!|YL1?JJgBPyhUoTp!P#=sz+R=7k@ATqlE99+}l33fPlP%Tj`)a&P>5^}*qelaV^pk>a!QV`!sw;XU0NlPJ z2SW~27cVAs$OJ+HJ7~6R%DMc2*dap3QtZS`A0#D860m+_bC5;^;pe4ltgpjpaG8pR zD~SNxu%)=zxt}ItP(+90M`W=}PwgZJt`m3J2dVMNB3(0XgSD_(Aqso0a@g>r zx%C?D@CWCn7UQ8iL+)2Y@r~j=;HCdgNP_3{yC8rowe+c8;dV2J*755D`G8BKi!AoP zE?t@zDi~tg4>I8O5F-X72L%vB9WSvcP&QPtg+#y(kVtz0T~n>wQ%`9=b$Hc{+Q&mn zvB;v?gYkmWxN~Cv8J3H;XL;vso>wYu52r~i#c>VCAVy7g*14&S*UJ!#yxou?`S#qc z$MK*bl_Aq_Xh-}``n?i*=Q_mYl6irrprG%hnIWX)$I+TbNk_tLaR{*T#}BJl|JHrc z!PzhaAAez@pumS(PLNd8rBHckD6TCla=9uP5sAh&BTz(s#DXN~Gp~C+fUhjqTl)pj zHI}bq%LqG8(EZmk92M0vUNKF0-6XdBt3@<$W#kA_Rfu4=DlX9vU!r!wOE7!Qt#ataiN>0| z({^qzZU&e0lvHOH;D?NfB2n%sTrS||`Q^^i_>1AdG81VcG?*Mak6`cAQs=XacWQKO z99cFj@?4QWLO$cNqi^$@{1cvQng;2AISq(C5>U~BBvqhr6m6Ow8Q3i#5P}_idLK_K z%}i84931D@Hu1Y@LYNxnTC8#3*n%kfM&$gWtED#(|I_N1tsYPyfbcN?>Ai=6-o3Pj zM$%Q=Z3w?hRos^7)GBZ7EeC~Ed26NypP*sW6fk=%(EZxi5Jd?L#bQ$#Y|1&yZ-Q+# zA$94SJA z{&tMjL7JB+FyuV3%kUc7QJ%rwD>7f6tF$VBw&2mFFPN~c~=Q@xYZJTV+BnF$N|UEsfl zfQKPgrc(c^?^2=C$viWmyCN^DWY&MVl_@@6k`N%*s2M+;p_yEn8iav?L#vkzko3_e zU&ZIr^yuzKRc;As(+_r2(EZw~g_6%Z>v+GDCtSe&9Om2FK6Mmi|9?N4E2#h%y;Y17 z47ib6h1D}<3*6+X%iu7A(cbbaHCCV$o3FRxya;Ac5_3EH8O$#Cgc~t1ZVv)Iit0iO z3rnm9?~+(>@}$I(NF47SV(vfi37a0ux8TTl1Ny5pq-@8p-&BYmtm5FxFE_X`DBfXv z`8Re>AN?+IwpFtDHIp&{D#@=l;baQb@WgaL$*~G|ODo<6sDfszW=T!yIyeqblr_yI zeN_q)&O^C*x_ELKSZ!RoH{nJF)voa2Z~SGr*fEKjg_iy;HD&s#6lHPks@$&t=YXOy z%*4x2FC1{_#F(+w>?UV`ioieusl{8pDtD0o3Q_8)L|AnXJB2gPuy1_G*6yF-Y8 zXIaEibjlk!_t+4x^*|=+NQZFum=V5auOm-k56jPi=^RW4&AIpAQW6+n|`|^a(vs@8An`gn#9&kkl88#{fh=Y zML)GV(LKdF^QZDQHWe%U58mFCe5Id_Ei1~~I_mB&@03?|0Ae{z)$luT^S0r`AbMSpe@%*hdK z{+xdit~0KNoUEJ7wh_-qUywBcSV0kYkT|xLH4<@W2m*(YoEVU}Fc4|G%W!=ud?v#h zDohtGQt<6*f%5aeCtf6%_U~NJyGQ)X(h(#gV^Cq?$MjI-xW@TOsuMdejT<*;cj_cL znr8L5E9a%AV~4?7Is$S^n%%7sV@qBUg*sDr93Eco9_GUp@GQm!AN@4|6F=D^?N{9P zAq3Gn+Ae5hvDFvi9w+;MME$a9sjf^5xkJ^8>`hi_?{4&NE?7;iKA7OuqW(OTPZX=~ zk4#Zj@vpFJP1&gKd=YYMw&Izp82`ZQHil*hypTplR$TX>8kS zY}>YNpT5uYp6`77-&$kcbL@W$@7?1i2(6UpUlZ*rV5ai`ntss(^o~@`-hUA9y3^g;<)A=wRzq8HSryrLMD4uTf?aZs)K* z3g^`IP5EMNhAD?*w#qyRWPNkanrGSvPwyXY_2I&(2b|>bd2d_Y6$qnUW_MiKKsHmhrF`suOW8?ttfd%+Lu>UzI#IPA@RX}@-b5H zg0Pkr2~fB`tQ82}+?;iV3szKrf(qQ#JEfS&fdAV}5=B1Ho3H7>)Ib4w$()rDCvM?f zeOGSLc&KNrl?U%5gEbe1S&ov%xB<6y0v)HRNI!jeT`ud}?p@*|7a_%2>8b<@71=gg zF&dsKK|6HG-^e5}cZ}5s(m}2;50O8f zheo3qxGK8D`5rt}hok5}@Fy=LaQks|^qxtp{Gn3<$|JZ8+jRAZ4@@r#FHE0a9uciB zpj5uleYS0y@-OIWjLD3w#Y`VW5LlSr>4B0vU$C%fYHJLtbDbMu-% z&-OV4c2HwD&67SMnLh^?hr zLOAL6)d*n@sMrTW6}&;kpEAr#Vtq<+jmKteg8d$8IZAZ!cq#+b_Wei{|K1-H80MR` zrB%x)IZc7fxeY=1Qs3XJ4l{Ya4YoT5mL4oSV`3V3P5JL>Kkscg_F=gm=LWH59V->x1ld1wMHs zzA#TJmD}LC3cZcHrjEqvZTNeBcPnYv->RBz9sk>C48=G?S7k?ZDXnfyQwwBI8WQc% z3RzWlgCO@1u&X(IS=Lk?Fj$)$_uAzc0^E3;Ljtv^GBd^7=)RMHpOQ`Y&XX=Sl%JQm z^AV`)>xclaZDGwl1_-FaDkTsFyrjH5elIc!=8r*%8wSGwSAdG(HJKh13l2`lh+OWF zS#DVZosA2Ho|L1iI_a%uycFR#{BD|q&cQO~(5Ym?dpGqxo_W{9l+7-gU`wWQ&pK)r z^}u`Dqo%A;*KTqn=^L25u|AJ1!~9_uh_NmvVQfJ5VPxf+A) z^$S`q@fQ7&#ppNHmxp25ji25A+w$;}%574?TN||r`UD99TIGsA@O5LrXC?I5kY}nv z&?v8P8{=t|+$-GfH~*2PW~M&%L*JHQY*RcT#bGv=k#&bE{z`5@ty=>@MnwbW>Oir@ zN(InHn3}n{{;4XFV2}MeTZiq++7WgVjHY~AKX-|+wQ(o8wRLAK*nCk7<)e8Ao9jK+ z5R)8GEJ>{tt(+xG%_!n*40E~mmir^D_ByDz{Sn zWig|`T6a?eyXjf??*hh~&~4@h2z}vMoF`x`5{BNO=JP0!?;#aC{j%c$feyEobdFC%-ur$7ZVMIj)p8_Xs3Vki0+PZIWC1zlm56t}-)o9Kj zaaQK3lnNDrm$T?i$!$gz!>%yuIz0Q{E)((Xk;#SXoNL>@6Eqx?(6V?+`!-8q6cYd~ zZt7{0J;$hH=yb*Xm9KRt`kuj-EodH(goJxnm-YK%F@4oe1Q#PJ0tf!skbjrmp8(B{ zCd&pZg|Ix@-+gbrnE#&AT;4J3(QkhSP4Z?zlu)(xQ2G#r8Cs7pTLh*?Z*(Y6!SPo$ znj#*Il)}$?+MU|-_2HdY2=*Zv3k7Orwd4b9ed=3fRK9CJzg_HPp8lzMv6q3iwwse@ ztk>*A%P$bC3z(k&@r+6Td#z^C1EmEHbpL-|ebGxl3pg@iMmOCYYXmybRapNxjDNi; z<$}ba6}vhurB_uZAdu}xQ|3WZ@;fpzd0dmRT*;sdHNLA3@@D9yN^|>|N2{>l9Ppne zstHPFbihJ=Bx;g)X2`@Va#IaVS`;*zCitsRl+Rcs&Ga+#Q%*wxk^wv5@ZiB{@Z%RO zfgq_?d8FgUroVSoaNs9~K|%$b;GIxGhae#Nx4RX}!JXiHEV(5qY0lt5b^_f%8Nueg zQ#W+YgX@`*jp0ei;P9QZ>79nhs5O&CV0bDselhKL2T8Vk|0WrPH%VhD;H0lSDbcb+ z`bQK3;>S_&*l?{2fKreEjp)^X5Gb~r3y#PKVk(&OlZbgY+B3|56Glk*N%C1_d9#NP zR@dv@9337$S*5_2Kr!w;z!SEcaK?!JT@+9J~A2Hn|ku-5o>c;M1h-3%ofr-eB04S+VSeQ7xrkW&#h7PwP z1kVZtevrf8f^d=skk2p%_~e}mQ}GBGWEJJ`z&>R%64VluKArC}*R6^?>cTR&v=tG5 zDr!dE1eF;k!vkn!>=GzPNF(ijRur&^`pO#rCei(6#fHhD$2_IXXy=3FjpD#-*R|H(kc01f*DaR>gKI>oJ6B{0jDd7P zF(Va4^Q5V3 zv>FDay&lD;ES#xul*%eoV(2)BP1|N8)vD%F1%V)*@re+CJB2wgqZE(AZ~C3k#kyJSZ8|aiI#>Hnonq zufwg)1Htxfj!n%tpJ_RfVx^pun;}lKmy|m$1h`pFt%+B%&sBb*znj+!b@8~S=JN;= z`0@1@@{tTEV6Z;Q*&$jN{Ah$cjF1Xi)3aJq?v-Qj`9~j|^db~IApdk!fXtaeV!!1) zO-%W2r7cuBO&I>P^@drRMzdUSZ?w6p(x|(k1j?I2H7ot-k;tHyuB|TJd1^}KSS~_L zFGW~{a3i2S51l&2YU|%}mRmPyHfyg0l;0aI!vcq|PS-9ZwyHuq2>>}I^v-g}E-b${ z!q>;k@UHO0E83+iiPp|gmybSLkL{)CX$=Q#;noS;IZpfBP?umI@_)F{LP>kYt)aF8 zAfT;)G;;cwX@`6;IK9*IF?G<5FV)32#P{e&+RSk}%6%y@SQ{E;_F6=JS@!2K&-66c z)LR&1D_Q!|Ch0Lx<1|;pTe;Gq%K`p|w8x}% zk@9Tvn+Ij82qyeCM@Q%CZu_s@_u*h-b*K2FGbO)O^ux~v%BY!J>*}SzffXS49 z$q?Uz_c5+g)JR_K1bDWjTM}RXW=wsSLVq;TVmZb{9Nwe&Gc83Pyz*yPX>O67+oKE+ z21P2prDnHpEqDf8*%(BR5C(sx3{Fxlc)PD8W8IaNvtQm+#$SEu?J*=hwknrtXMQy5 zW@nb&G@=cZ-|pGz5_b7JlPd3Z11cjuBm21w!Af31yQYMCe5rrl7CBd47~g8x%Q6sK zeOHS!R_jjsN%pJk?E`SztvlpaOc%}B#@un_MP()r9=i&gygC@5D`({5RGMzt?cFNt z4G*mbrKsX-#Q(soI=(d=U}~BI18*650vYF>srCW$w2>C zBxnOHodAsEe??f|z`_~8IQ~~;ni{;27?k6`RpBz=_9Xu+fy%^+dfnMpA<^_7l((4&;dYO&`a9NGZ&>xM>mZq)m|s;tsML zc9FJwN|+L2f)g+>(t}vSDWLOiqkXZ1&X(fJ|1foh%_nPR1QsqoMliNQU>&mpB9z4I z*(=Z##7n_X&7T+q2I2-R5X_UF9txQp{hcdj$Q0sHSa_QmsQoe?_6e|$qn7W13MaMc zH38vJq7EQ}*v7)#!$JWE0|gV#Io=X)3B?xf8H6)Mn%;*j=`k^eAdKW+pI8FlJ^m?v zarZ{qqd45oFN&E&K=C z-rn8g<6)ySAaUI*fM3Yn0=kJ@fF_I-1>)lwcmdia3uU#^67=?ZK=izu8Dew*%>wBWB*z{4xZDP$VWYdIYZwm+*QVj3ETXYq?~nk%{t91x$TPXnR|bi8RD!^*cH-p)ucl>)Y*z zbrl@^CBog8&jCo_3g0_z-{xHO6f%_a6GS=f`%Vt=Uw~PP8I>3U9Rm|{7dl`A)R>5m zs?!L-!+vfH1n_{q5?rv4jco@IXmy-z;V^LQhrpxQnj0|iIKr%L;jb_Dcb|}G1>k_n!Ul**Ng-g~J@5(ehO_xU`?N3)L7>AvlNe7k zJ{4EaVxgA5;9;P*w`v=PwE}%I! z@DSYH0@wZ=n!+&MmV1YXpiiLffKOgp0PE+V2r>HQ4(QWoc{I@XE^GplT^T(DfPhhV z_YES#L-M^-A>BiNHVc)(3!>jGy?^#$h?rbvAv57oWB*c}+G@$64nP7EQ(%Umi0Bdl zRR^Iy75>a*fc+8D&5!amt^5y`{pK9}xi^Zm?J8CX-Ym7DQ^J@(e>ZjuRL?cs-Cqmi z0Aqx1^FTGQWoBH1w2bIzg6Kd+c=^uycQ4N{c`)754BpnOkd1PrK}Z*7x?-_m{i&^}tX0Y3F!v$4n;$ zavF@(Y+SQX=}}?~FPExSTL|3pjOkQg32#(1jc{A?vsnY%_`+;}O#=L4B$#;0LWx^gia=Y5;Zb20GlFWQxSekX3R)N1(x@85-yr z^wnx%R`+DH&mv94LNcL?e^leJMcNyk;T0* zC6FQ2j-0vikv92A1q7Nu5MGVxKN=YZq*!9xdvh5v~e&Alen7cM5>%jxr?MK@50;4jm z8*RZ{ULpkL-=Go)*7GFw^v#YkT@3Qj_^*{BV)gZ6?(AtKx^GGY{MlIRT`oaW6{1pEfID~3z)E2) zCU~Y{jOnWLiNCw;F8u3>(qcm+J$3Lb!@wRf0_qO|s!v_-LfyaTsz&*oBmnl@o6Fa4^KUjZa!|&H%w`BLuj|ycR~68``gGM6 z$eDvs=yJSqKba^ch`qqRJTNzmj$yWPo0X;k+(-^euic#_ zD!?A0@ZNLDCf!^FZKEjn&NT}+T={d+5htc=Q_Wkjd(`XTfe3r|k}6KbhhA4KWLJ_8 ztR8Py3mUTg%8BU~!e*;Hqh3E+yE|3PU}m!W;8EJj>&Y{qu1~K-#Z2`!GL;Es5V;r_ z9wM&Z*SQ2eq@<+kV^uhr8#FjcQsu)~=^IJB_(-s}sw5_p!zyMg zQY=Qk*Vk(u%u2Z*f-!9&((jW&;4kkh`KD-|RYKWX+L+Uy9}^XFc9%%3uk@C1UX@50 zeQUFlQ@z|$9~s91Qymo&X%#x_#>i$=acIlwnmQy$?{V(3?Hmqk^zeW2_VxDx4q5&R z%-@3F*)Fz3Cz`<}NKN1OKP-i5E?!?dbQUaV$i?7G21j=q;{~DJC!Gd|j0|E##BsgK zhwO&0ooHnxTfMS))`K!=vQ=Ls~RRC(ZC&Gw2xv_K{x< z_3JyV1#3&z?4dBsmA=;UN>qCf@Jq0YW5PG}p_+WU3`TA`$ z>6o%yukqyYW9GV{vlpNW7?herGJZ)$#%`RJK;!xfN8NHG&zn4tXsebEzMkn{n+x%5 zk54@O#=QdO?RL(Fkd*KQ*q>%ADj6QQe)k)4pE`QIo|&BI>HsP_3bt1W_*Lm9W?F1* z2~#EQw*AN|Y;l|B*28~|)Wa6|zbLcBcibOAp}D}TJ~GeMgmt=NG3;5)HNVN;p{}_E zM&lS6;0isOGM!!2GfgUL!;aS@gU^^(S3T+Xt-TK~Y*&x85T!r3R)=kS z;r#BT!Bx?)I*|=rEiV_k&`5 z=d_qAgBd8zcZA<(1%klOgM1?`K1Fp#3!0s+^+Eq7XP#9`)^8=7C5`>dmC8LuWFP$Q zFGmQe`IaD*l={8`&_YU)DzJkVo*N)}&*70GmA*deYXZ(Z9bJo1Km6a9m>Y5WP|erT2|9dWE& zw7!*leE2G9dN=9|@jG{&C~sjPw|9ypNG%J-BqIA<0Nxycnw{AC$*%E$DB|RC;2A$a(^{39L1CnS-kxcX?Ryf zHX^!}(Q^Sb$t`d)VqAsP*d4mfOf?^@N!D8@os2%B=7(BqqOnr&Y|n$nlDqsb54he9 z7c7+_qs0J&`u3c-J1t&*=4b1n8$8Z%nK`a8ZR0Ef|4z8x7O?m@ZbgaQ(9}njt%#j( zIG>oeNvh*q9!X9g_&8MgJVxDX(-zE`W?OewAMh(gbU5XKL_)IXT2G5^@Ay~AVaVW?SQl{C25uVI$W$NwRSG^MO;M*C<=L_>m`o%vtq z-a;3K4cK6x@w0w|bXc>EYI^@2Hl%sOrpa`4HLQn#xU$af&PlAl*lb8DF__@zf^1QKB4EE7- z0#qjWt2Vz^wH0i+LPR|`R;G{_A2V!J|lQm zt~r1IE6I2C&NF6n?DoAXG*nN24gz8?yp-TqfjP1Qf}FX?#^_;`4^7Ezqb8CDu7~^#&d66DQ_#$4^mAJx!LWjYJMKA4D%;D7C5n*g`Z*1}dQ%IVdj0L< zzWbZ#AK~5g`FSxZN}-iTU;-#y(}nj^b=p<=z=YfNRg>g7L}_Loo3{}}ZY{0IHfGUq z5mXsl$e`psrNnl)HCeSG@qwmFmtwriFj1dF%h?!+uX~xnk>x}_c@#Hrn5UTPdiJ+| zG`L!g7p`Od05yv45PpMz5e!$+IU|u)zc#e*sG&w21jG`>J8i$#jR2Mq-QjcM< z1zFB-Ol++)qseF-tWV=pAu9tRE22)#G5CAKlevZ0buldlgnis%ho1ZURMc1WbJ3t> zdQbcs9&CJ2eeQ)B8B-VgigzQfy20vCiiM?8*qU7Rb>uV#H3q61Ps_~`Rj1!REBtp+ z>kVGCNveiy50T?%z5x+M8|zqG%iJ_o8wx=Pm)$Y(sOi92YXo$s? zZf_@aG*nB@uTwekz!pyT5)j}CsX{%0kVv9o~#zGH=OC_Z2 z-Bvv}u*OrynJ7j|x_jlu3vcY&*SX<6$a|0aU8dg4~^>AVntVU^_kV9LX%dmd)*~F+8i3A>p8_5m{Rtd2!VgtyQz{`V*U*I<#7|I5M0zaeWEGyMBVv$nZMs?% z-ag^b!om$n&d)jwEqAJ=R(|J|qnAy%$Q1|%cTv%{1mG+~Mo;f}6{D3J(#G|e)k7=Y z^MVhYW8kUZlZyzPnw=;tSZ8;uL@%}B@;Z|+UXJFMd8@Pp+4P`KcT7=2Oqrkr7?CJ- z7Eqm>sq1(GeagZEZiB)+(qcL^-P;s{=Y??Jj%+6B0}F)62eqsS_1=-$icqP@ZcL>k zh$nm50UeAX_!Kkgo@g325{3N*9TkieMq_sB7^v$_pXi+Bb{`9?T0^{KQXX?uAJ;2h ziINk0yBet)O4U)VI>rQ7hJMXYha)t(Ag*D5T%FioqSR~Uk)N~o<%xW;N>fL0zpOY! zr#<>$-BfiX;-pFM*_6zMoZX~)1`y>FJX0DO5Pya6VmlVZzz&kh`(wpYfl0+lv7 zhKfukiEx?axMo7?#*h&I(FYj=4@!LL0d{^h4vJR1TZ9`oYoT%IDvR3N+yv+OH(BjB zHplI`cjNs_={V#OmH3%cX$dkd4w;d&)3j!-AEdR>zrv<7R$C)Lw<@D|I@RrhgSfqN z0XlP}Aq}w#LDO87!~T+S?!|!D{=+Km462jtB)X*|q65oIWb{Q7Yj1n{dOkJMvEIdG zP>3zc2;;5CY@H~*`?I`QT%oY(ETy6tcZ&UJpLk{0pXHXDGCZk}O9@W?E#aFwyR{a|SsVE6%lzE6fXKgie3$5Xdc8P9fmIgR{5+t&rPwqwOr4T( zDTfzy*lH6G<&649%nev!<^6?vVEe&fq`8McO&g`FsjwJ#PT*Aln#8)6zPc2m20_8ufHgiq+?N;vMM`GJQm7#JGnpmi zJDW^171n_&x;6K#?ojVYQ>@SamZLNN(Q4WdmV}$*HlDNpTPVaWs*ep*S#i;Eymk?e zB^Ugr6W8oq^-9!AgdeXLsqE}E0FCB1$t#{$KExqE)kdIpV0yPZ_=fcZ5mj;EfZh$Q zkxo@asEzyJG7z_q-{it0)rhGa5&3c*Q`_1s(6M4`kL5c{mq8OE!afSc(!m+s%CsE> z^pV_@2NL3i{1VqzF4d;1ZkH{Jo{i6Zh|fo(U-77ol(6ZkDZN<$T(Bn+AhV{X<0u%v zhOAJi>8X~>pf;YDT7lbkp9G?_%6}Dtfywjsr!8s9-M;Kedrmss7n}MA-{e7F8;LPJ z?Li!h_6@Euzt_yuDf-VSZwP2QEoScTih}tlXX*i2#@^X~ zkH#s_ZkjyAoqD6Z1g6{q#kwX!j#!?iD|Pqk1$oa~ z5r{Opr`g|$g0ZzLAL703dp1bRhD+H<|19yq?c%?KdqbdR!~nCA6&%A^MX1k{uEHQS zm>JGOiZDwenS3&ynh^emqj(?!Vg&g4u7i96S0AD~(Ld`xHQA#ja3JL_wrJ|y2>7lw zb_zwQ9VRCnY}M&k(aO_g=lU}MDO2r&Q%Ugcr*k!*K8Ly++4pLf_OVPIW;_f_p!N4;W)yDs^*`^6W(R!P_;Y1%L;YQ>Yd?dR5g&AIa5s;antK$P5)@F{6&|--SwqUYGVJB9s9siry2lYZ3W1j3U)FEy=Xtj2HdI0QGgvf2QRU%$ft zS>1@Hch-0WFhYhsjQXU7`EJ@@?J|p>J?>?AkMSr{R=ew;88sgXnY}81^Rw{zaY(hf z8drGA_ojUVZt*m%7?m#78cF$Si$kAE$b7bTlygsSb>#%F`sG_68^ZWK+bn%b!SUME zddHq(4m{98=`X#6Q^Yi+)|RM1-uU^SXxz|>+{SYsU@?VZ&1xfMzF6wvb@~l*^20KK zoYqFjj)|6iw{UxbivDlGsef$aFsxZ$bJgM`4Z257>r%HQ*c^F6+HWUSMAj^1j&_lJ zrUBEr`Co8gY7DAk?=wW3g{&xUC%DZ5NOPT^f=Tqlr58{2(Mwea{VR->qrV?)UgqB4 zA*^>dftdZO;ScsX=oJM^tg9IJ7V{9}zOpgd+7sW_S*`rq6*>q^PMA$fhEBYf755*a z?nIisTSKa zfA+xU*Uw@<8QoPBoT^iOMaJm~BHxWxW4QR&-?l{t`jTJ#{G1D&6Nc7JW0Q!nsim}M zhTSNoXdP))STXo`4|{*xqs_k4=l2SnF^R0X+2AkN<;MJGYK8ZXF{9lg{MSFqO&oE^ zryEEPUu1t&ZLDu^Oql(ihVjv)&i{w*d-0!AnlvdHMT9X4r;<#15DTIRUTSochI-Y%G^pusOw^R72TBNe8}J^K z<63f1^Ou$op}yBm%Sb=jty?tstnPF@Iog4?tKGrU=&uHut&h?+y?i27(APFv*`~FG z3uV#9@BBmaifJXwlM+hi*NIM={o$gC70=+oYivsAuHnd|?lMmEHzweeXfm#i*T3W! z1>Jutr(f|UJ=M0Q3ZaHTD2=3*sJ2b~X6i?QBlk9)Fc`{P&%4sYRN(MKs`gd$9`6IR zI8iOgMtS7I$57e(8Nt%-=OCgB-Qp2POuj8HsrSw`dXT0`U+QTYs!GMQy1Y&vFz?ru zj8B1!jckOxTGzWk7(H_{AX?cTG(yQ2D&zB2$!{$4i1LrK;3Hv3HVS*FVG)raE2ue_jmtV=BN zW7kJ#g-pJeL>zAB+FJ{~0 zYhMtLaCB7MEr&*jlE=v^pvvjdu1^oG#ju0n($}wC{iUZ5DV-9kg}KGj{1*loxFA$B zaPUGUEO--Pbh^}J(cK~`ndI~=tP6VD?GVZ6A)ud?xT=l`O~(zt^RfH0HmQeGv9#mU zo#bJOt-Ze)swVK+;j3k|J9yPigAxmNSLg7II1TBEV(ZK*t33gUjYNnWzJrSQJkWo} znylU&R&9LN#R-qGrEZ%q$omUmFy`{nS3GL&EEdfK)x=Mgdpi z-+I2R69c;l&yL^1ZpFXMlSS-%Zrti1+`V~MQzKB zUT&-#rJ8wH?UuG}Ew{nNnhQLrZFKjp5{VWZmEclb;eIxlOz^8<=c_OEuhtt1L;Z*5 zGKBf@NS5PKHY5<1+NF~3wB@!}hl6UJ76T20X1_4yGP9ea+~$k=u(79xNVGFevycB= z#ZGA|&04Ci?=c|HRcj9d27+nU&$%en9r`{wXm9q+%7Kj33RrT#8^51-6o-lvm@b=H zrSKhdN>h%+zwWapAN&k=Q!I8#95*lV8*YxL=X?8$GdGcCfc9n)|(JIszf$z!eLPIPw^q9MCx(U6#A-VW(u9 z*75r+8%LFdTv5O84Vk>QeNYDumlizjXZuGCI`4?OkwF^LXL(u%NiSu2`Oh z^YXHqM{Y|FCJXVZq0mppO9;DM7kNwF)zSBRnx`SqPVIU}=feAubLIBQdZX6b$|Esg z%;B$aAsrgitZRhX&S-^kYzP?2^*O@&(n`M7i;pLvJ+tHkWshlj1{z#1wb_`Ds@8_# zDeRr;6;LjE`}w!}f}9hDf#oRX+8|Q#)Xhgv=g<6on$yeL3&Pzgz5sN|u&lhXOMrhn zZDO@L*U1Q8BR^m5K@_`zR_J<$5|@l(?|Ll!U#3YTW`eZEBdku-6rj6b(5*A2PTL6H zpYB49*wz&QdB|hbr7@{8b$Os70L?YJFE6XmywwiAd@gX*&nz;U+@Lcgi>4V2oQJd@ zi(nA&kx|rn&)>^=^Vj$C;ba&h!TNwJ#)XS0s##ZICU%k|p%&{l3%e1^$Y7~*+q9|m z{VrvWcz?u&jZ*$?)9NrXJD&Su=TdeNjvJ{934{yCx3A2_e{!gHGu6s)aHCecM9%u% za4{4;PB~@mQv1)pk?3F4IJMHZ1F3tx8Ha>Y$xz%qa%3tG4uDbkYdiay{wP8kd!L>8 z2ix|r_GPmS_xU{z8dHwctVK$??T2!|M4gn3Gs4#qNTja^@&7>~rP;2yB5oScHjX2X zIko@-^JspAd=sL$^Ow(Df;*wQrI^f|g+`6czHf!FMIMOdt%5Iwn~9j=hiPXUOH9_C z`TCt?%APW8Zx;FDLYf$OVS=o5QO7Uu$!1s!R-fuO-*~dN4GHDL90u1<;6Y70;CASW z%sA}5^QfTz?CtWrho!mfoaf6!L-CzQ|x)|EWe#>1KvRXm)Ibz@-D@ z`j5narjVMewjq4k{#WOJ7Qr{mp+g$u{}-b{Z~ixP!j`fN3W}Oy)ChsxJoX)uSqhB% z|B1pk6I4Jp5P@<3KT(9{`!UEC=KqRfq;yI`!8K33L$1mESM)zK2w>dIDKqbo7?7+i zY}_n~B1KeyU5mLg!FnA7EQH4r2pmFH&&EdPc1?hUlgIz}HRpg{Ha~T~t@BnJP6$;l z_EvI+$IQoLazrFS4{S-i0a&E4N~0bFKybgRQ+oLoI$D_$!7O&sIfDikM3n z+zLz?bQSE64hX%x{P;CG2nwDamrq=3D?k=7`sZ~^GN7$6_uDY+cO>QJ+6HGRoOE!{ z%*;&4Oo9yT^UP=MxfAK$K;M%R$Kl7KMGp%<0*!* ziQ!nGbooF^L5nyaLB_tQLCo-M(WbkYd6?hv0vg$-H*biHiyk>OL3mg+NSqB*4nkNtA}F&oxIc#iC_ZsC!pnoR5X6V4Fb$lbY42P!)3D!99>3CG0kQ&o zgs)u5;ni~@*3?fkFrKhr{XohH;vry}i=*K^v=Lb22>8NT=z{&AddpS~03_{9&FoJe z0l3e(rLHj+S~**}Z)3BcQNlwW?*%k;3eFjVpX{vBVX8`7n%S&#D`_(C)`2T z8Km(i(3X*#FEEU89T~o3^wxV{{bVHb^d<2PZmq?8y1vE<^jJqn2V@-o+zy2CFx&78 zuBtY%LtuLKW()1PIOXf~Ym=UM8p8^{tI5u69L*|z7GM96L6cbzQG$=pK1Z^B9KMrv z!S45AY26hk!(6p9M*C?^*UY`?Qj|ZHe1pgNyTL{fQ#~wqP&_(2&w`-BoRR ztBv|p!Y#D_QXi^kO%DD0t=&~#&*)C8AoV>p-p`Q1UaL2hvIpNZ4x?b?b# zr1A}1z~BuHrA0|Egx8kvH(*bGUDphSl7Lqe=_wXi?3iVAXbJdW=RI^wE{rz%^8|BD zA<0Payzkc>j^Y3OUDweU4|-OeO~hlKx{rRu&;{DP{f8sj_}jfE*N__xn@X6#JXIP- zEG|oE;nyO6j2c;of=+^1h^>lMtPY~XQ>Sxhy+lrbql>F>TcNnC)RIE_1WEUcLL#e+z_^Blh` zRpnN!Lgp~~z(QnV0He(EIQ@RJg@UaprrWyQ`GJEQN(an-!FJF(<7V3#o(1B(EOfsV zDm$n-^oenZMFqP4ojO;I4cXoft#gvpXTUnkpp+oq*0C$pO>$+Dc7%6jnfeYQ%$!X9 zZoK}F!U>S(w)!MWPNu>~Wg+2nadp4PAB{N~1C`@K>GtH(TZlYl^6xS1!oy9VpNd?5 zKQ`Y#*uZqq-5It)eHjt439G z+X}!C&(exymPL9cA>FGR;8O_>MfzjQLy&HM$ehN!vUUkctnHbrOmbXby{tp2 zm#mLa{5c#ev!Q{GdmyA~vGMV=H+^AxlA9A4Qx2#Vg+^fCLomM5Sgw>`xhs8&ssvQ8 zL5=fa9ANVVrRy_m2DAHgP2XT7ry+==&?aHJc+1MGH8@-?$I29gNFYe0U#;r$Tt_)skEq1ZL@o3vO(70GVpby*{LN~F%v3xxaeKB~ar1b-<%*2z zDaU`%^3dGVzOX3#@$Yv93#P#1$pw^$_;42my-grVbuR7Le54vd(S)=xmr=~75ad#& z6Mt@fxO55L`q?AWZk_+kkHme&!J}_ZofPx;t*&G)EY9cQf#=UV%Kg8abcU!JF&{N0@e z1(xlevM=$A85M_{1^5)U0~K-Kl0~R4oVKo(-j{30fI^A2BJTZ#)o#mc7B0F0~*Hd(_afgGf{2Os@9ny9Tq& zTnsM6VX=W6AFp_;tM|Ys3cI5cb?b~nfIRA>HMyB@{1dsyhA=zQ`V0One^M6W8p#iP z-H>io)&8X*IRSjclpE+zYHr!)IVitIUPHH>x^@u!P%92Z2W-+BdC-9;K0r?UmOXQj zxi7+5#*Bg@3KoNRHG3zwZ(g#HVS1Mui}8eCT^DXn*iIfVG9WR9b^v_iCV*q*I@c!_0#aE<4bscJQGK$Fm>4S-c|zGh+5u6m%+Ocjd41c@7n zao!CZWSv&*sRlN6GH`6>-_KnYEmqN>{w0uJv9=jhDc*RVCt)|K8p1fJHsJK>^iTG~ zah7Joto{^I(nLJ&_DA1Le!}*NRfkRpzrOCfWx(#kUEYkq@!=QHMNp#2kTNR;t;GXB z_VGpv0%T_Pcnhp6MU;EPX(@*iS#`GWc^InfkP@%0hhz~RdfWRc)XkDT#U>%hdimaQq{unuQF)Xi-!>?rQ|rR|v~=I?n!PCD6n9d24N= z>(GvD?$|W7OmQ_2;e(>;AoY;3jST8yfJ?>up`rbE^H(HS!==qcq%g0 z5^`tTMjq3yu)Tx_8B! zbWQ;WQh?73;;$g9G9q?*&TFjvO4gQ;3P0yf_#67T$c%_Tq9*LI6UgyfCmt865VzYF zt2%h^G9&ub4YtLc>ZiytZ@lKIz+bMD){wNy7JrXJ{;>jU0X+$N-mtxIHxJD4eozT_v+-|ifJi$XH2m|;#sN{L14f-Rumj&_lnB}3p1uBi5vsubS2 z8VrpZ$7ZzsGj7yro?gA=_)_j-t?L;4gdq zCU$j&^OFk^i<~SkDkSpC0K`}q53}A>9HgHYO@&PCwDlI@xS({{1uDI}-e#DWGu4aY zVQyP*2UAd#=dT#9@J@{2?13kiv?rkXgi`h>neMweLMoHW~7T^j;5ILt^(lnlb z$q;mcipSWPdU_KgI+|H`kO|0{^TMVknB-E|typ2g7je{37_#NOv}R$2&JxmPS^n$#@}llQEKiC`U#`GC0A8aa3H~oBI5XfF~W@L^8t1ftZ|a z6ia<{?t0ZZx4>ug6Ogd^ZsqppaFJ@6L7&YyRrGe?jCaBKrHKA1L0Gc3J_!plVx_=> zCtU!>QZoq)b#0aYLuO<8J##YULvz&S@{qBucs8<(F0^vG6ge}1KwX#q+vH+hz=M?2 zN3L1)&Vtuwn#nGMYnvIxOdpR6j751s6BmLNp%GY9hvq&(~>Y3oBdXY4PZ7F-&<^^ppSA0>Yn4PoGwR zUL4G9`p=$oj&OtBdeR4W{ykTAFb&{Q+#KgPJG^LqJnDScHm%NoIMk6VCo@L9EM{%I z#{=HPJb+)AZJOI&t`R#~2UUe=AUKMMl+=sk-7Adm%~DPLkuFqWOtebVG1yEmd(Do| zEFh>o&x&n%!N{MMZ;k4o+XL~hJ|p_&s(%iKHTfh12y4+j%=;WEHe0504jdb^re09{ z>wr6VU^%>_0$aR)GGaPmm_nJ2d?i*mTOHY|Kc*=hOpokW_J!=?N%YopPPMtt$qN{g z9VYz#px(;fSeeV}W(_{gEa`jn-t*K7kj=oJGsmNo5|ui3*R0WhS?0HX!R@|8jgS=w zu48&FNvg>!O}SI(>InlLM}qsSCU2{Xq2TnGNn6MTRRO4f(I95UWa$GW(xuc$v|T|> zuIF^ClqAP7WYWe`LV5x3Y-yhPi}QdR3z9DC0CV(68mw-HMn(V2;KqgiRj!NCZXzTF z5ULqiKQ#}HZEmThYpV=XqnnAZ`z8YqhG3DcwNh{Xp2DdpDZl5~$j0&C5 z<$Ki*=Ym8``tLj`S?bHpmlHDa_Z@=Z#6!~^M#r1V9y}n zZidnr&rqO2F_Csey6M}IGHh%>G%{l;c#*l_t@j;&Z0~m#*va?>7Lp|Zmlx6al<$vQ z#LjQhADY{0o2l{dfAXR=d#bQ#H`z_BW7ZXfudQD>JdD+>LR30uvb@L4c}QYY(*tlj zR7brS{Wv3#ZuVBa)sAkSb~IFuu6a&Ah6zR3uPdSym+UCddnN(8aly+*{pLPzJ>kdf?HsohFFc zT@2VbBIEvjqrTH_aXtlSW!;XDkbILEgnFeSWP#Ch1_97QojeuwY|Y0GoevH&hJk z?&g_==;nd3kP4wqMAmNL-Z`?jS$fC4xq6sreSdem%s6BvB7J8m@wmbR_9CUX(Y4b4h1g{(dM#d#*F+>_>5vh3ero+s7V;46M@n$$~|f z!gtRR&uDC${d(i6mI(3-Br>snPd?tnYb4kFLju}*6*jz`iNhI6Lj0;58*+M zgAy$e`->1kFkA}EL;ixLGdB2thSMc)@jo==_p67m0!ZNbG0lxFE6QA#{nycg@r%sP ztsNa@g&TjKpHp8*uAC|@wQBzn!JIZv5l%=c$Xux!ZLINDb4ez8=Z>1b_YCla3Vnju zNN>H#q1;&bq>I_RErz3u^IG<~re2oP{$QC0%fhHqe_WQYv&GbR<;wgO<#@w+jvLhlq6^MX!22dw;(0pHGF++n9Fq?ln*{nY1Yz)U1eBlLiK9>m zkCh+Om8BM#6^bo?^^-fG;@V0nZsy18qW|_oJ6IugQ)b>{KlbI`_Kl7kKC9~2aqqu= zzih^=7-r(DR9Ie{|77&vI39(q{H58c+O55PgNDI+lh2Gd)@bW*#3kHS@yJaIvL z$b3J6)%VC!NYQ2aNn7|hQvCo?5Qug; zfue66qjNofq_te&d|9Pfd}SI}yZi0#X&{d451V7!<3>U(|6G5Sd7f5eUs8|0>-dP} zIjO;{0H^M6<^2iq_rC03+*??}+@$~uN$z1@|7V*4C>6%TM1ST7lDiMnB+d*2?R|a} zylNxXanB>CP7z7;QDW}+voJF{`Mp*?-^_cPjhf&jpk@=}w z?felyE8mPX&P~}2al14-ZuDEgw3_nOQesTf`1-pD$E%RCo%zg4m5a<_azrtNcye7} zVgo9F=FGV67=>BS@z5`7*wYpfPy;E`d=aW9O6nz- zjSU_ljL3^IOsSjB_s#hXvV5BXa*W&2rOR4>0}y+tj_Owa42wIWh~X>|Cp2$#*>2~( z^!adWL!Eup_hJ+;b9Jn3*EX~vLP}s55SB~IUb0V45KC9#wqN`mw0jI388crz-@Vwn zSSZWQ6u@P@3k&0?=NAlj_ZnG9_Mav@tt;Im90EtUw7Uy%QchtIv6ZafajK*EqvEW8 z7$&ervcNe+s3=7oX8o;VO9S0s2g&4zc_I^vuhH)Vd$&)Al5ZOxI4$Xzy``-0r@L92=Z1Un(K)fW9rl*~wolCU(0bvT$dI0Ni>A|dq%`8(k9=Q$ z;Dpppg#V=08jO4$a+^WRJqy!ERci--t0BhK$m_GDZ5@wD)D8dY<^r5bY@+Xlbhf97 zD8-z-Icaev4R67OnZf0d{(||2b39}g5iAcKT&&XmvP%BYtNtTVLH+)>h2|8FXOR`S zeu5~p3GH4@o5M5q$-!+iaS)b{&H(bji#NKwy(Gnc)CO^X(WX73%GE*%DY`s=sEer8 zN1@M(@JdLZS{|G1!(DEeFm*{Ety=r~tblUTt`fs%Tj=Fj^EA@$Z&RN_`ftm(J8v9~ zddac%-x3{2$`S*8mvq%oAsf7Qo8OVpH;vobSm{7&nzB7~k`ed8slj~(Uvbsv7Hf_bg;}m`OngZ zNUZ}FsJ{oRl9ILq4Z#P%7-9&nZ$duZJI@Qcp;zV9i22bh4dV8MGaVTNw#~Z;Gt2ef zRJfovc3aQCa_#<=k-`zILf?Zv~J&jKG3*899vY` z20;8%Hft)J2wc9ad zpiJS6i8sI0Q4PjRi*L~wFg#4}kSFme%5a=e2hnt4zgr{VgV2-li3Ces8TGPh9iX9S zN*oN&A)0?>Yglemr@mDkOm=L~PIpgrJ>X-79JNZh4u#%n-8hGTqVBIieU~oOWkmR! zF>%LYfH!_LZo8M&?_iC{;5#rjpPZ00cJMHI5@~@kWoH!!EZ_;8vXrd5qzx0LdUL;l zH0C?PSlA9Dm(WjK(k)b-y$A^qwwij{d?Z~VV{k%oxsE`o4g%YG9RhHVeY!*Z0G&A7 zKe+m)JFEm_Eir9>VtSbm)PSlWXxFO7*m&M}denI7_oufr>LQ>DMN8*5dj)U>QtU!D zTUoEsy)ky~x^4J99j@Yk3UXzTlX%FSxkGbWi|E~o+A2xx$(Syp6(?NFCn zibmif>`Ac&a_rT#JNJX?-07|BSt2tj=5>wn%WR`xpE*l^$TniWi*xbwL6^(KZkZZy z|DK%cVqn}k*pF*R1Zh?TiPE1AO8w%af7~#m`YlEKmPj1g~Q(9*#2QR0%_1TQ#*l>8r2RIWF= z>aiksU2x8S{ILeS_!|b-6k@v7L=-DO9(dYusl3QB>!Vz5Q{V`v6YUFm^p~rZ9yQI= zT2c6fcMY`TP|<$Zq~LFUq?tg~bBr}f4EH^fC`mxJvO3?UqUTU$n6yP@ZC_pPkF7&$ zyaZo2Gq(eGn(Trbk=BvY$`Ipzni({>jJ}Ebjt2C9t|QC4x&vD?S4Kz>Ak?G$0;qUC zhh4K4{HOcv5g2(_Ux|gJ@N{HR3U%~a2PX_rX8jnI8x`eA`{o_S++>-R zzJKpj6AncD!R2`mX|M=~gL@mM#_m{zcs$+a`c_C|5DGz)vq>*4l9IEj@IWvq-rI#$ z3gLC8!o;s}(B>vu=a5%TT`HY>ovdg>qVQ{fGG5rrvD258ps->HBlOwoL6n`9JL-)C zhekrXs_|DeaxY7k%!PxM>t}zOg?nlT4FMEG?rK%kO)#7RoMab zBL6vAh_$Fyy`tdCa9$14!j-oZGe!&sXdR(f`~qpW>AvqJi=-2pqx(dZk#otS5r4ve zaW4RLM;fM%n)an(?&D9BbLz9s(Z9GDJri&#^E~z)q60peaWbDSNjVqTzsUsJWOYK4 z6q%9ko=#SE?%LN)6&d4CLzk7k2(KJ<3?x7WA@!(^CZehIuDD3YOr5w=N*-D#g*CnE zYKoCZ52eP+`-oUSL;e{frqBbN0=DsggnASaV}hS-jdHR_3a!PXPA;@1kV4Xp!)u^N zV83b=&15up)dSB;a`xVSN$v|7%;DV=`fQ2K5~_oQz9CWS?dFN$jtprROZ4Sr+R$Si z`wMqxfp7$}(EQvS!yUVy#x4k)PRuj0(P$>%Il1qHzv*ysq%^RH<@|y9CCAZ!5|F^i zSVeB9>F4Ub{KUHDF;quQKORbrbB;t`^pcoJU^0~jCvO)hYXuebGGPwOPlTJ2F>0AA z!gY0JE9Kdu{+>lY@IWU1DpOZ+3x+u9F9WkD2zrtN!p`iwA~Z7A18 zN9Y7yZ(aC0iK^C3zhAOTD5N`obHk;)3Q0^W$4Tv|qp#0(^U24%v+6fNh9qCMZzcAF z1ZcnE*+$U?BE(O>z?fN*@NDc~K1Ph>9e+Io_8WTbZ+KH-zdhG++MLw2Ik$}_FQ{q! zPVF(bwOepoS=xOOzKw2odP%NAgdP`&Fkicb^5%&8)2h}clmLeRJHhpTaE9w~1|wCg z=wQvX85_ms))*&koD-;>Ox2!)4W@#ocP1#AqlRlz{OBZ%WRm@|?_>0ar) zVw+R7R8+){Qe>_$W|Ib;Y~0Gs))x+9jFNVUSPY<0HL{mK;QXG-ej!jv2ho!JS@q1G)5;&! zp=aj@(Q1k%)6*0PV_lS?-p|1B%cN`v9qr76vNUcrUIXEP^ILdh3UH}Le8M{YQ>5?P zg>h5(=(Zh%tD`Z0z*q(=f%=oMPFVS17G=!qGWqh4wswI>-rRI9S-TsdU3O4ucjiJs zK5X#MU)px8%GSH@|@}AEtxWwOJC=EjBw$u=z(nvo>!2V$S7%hM8e-y?pt_NfjZVURUSW zkMAXBYCRoQkdP7T@ge!T3Sw+0UsVRsw7{FJ6^THXU=Jd(DrGgm6B$wN8=Uc?bk@47 zvYM225B-7kyJK9Kin}1;j#ouh9iaf;r@+$0=FiEfkZYG{aE;E&$OYh%z=5; z^)>5>ZAnOf+|A-!;hPct(23G^=XEFyPf7}HHCd}-w#hWpMx=n!Cuu{{oY0WfUK*2X zs<0BsQZfu+ES>S)XP^(7P^=t4%)(5eMsP8CZV-%;QyuPtA~1z7xO-LkR4oqLjL#Fk zW?i-Z=o+IHB@fW`$lRX`#;TsF?W7*C0H~NC&bTOl0-UZvUC%FVd<^wp7eZlg;ou^y_sqR|tK@9&t_Q*v)bZo>mwG)Yf152N5% z_-pwoK)ZhzYPDBy-Z^w}Fgr z+CqSi0`(}!)y&iy>L({J1MSSOQsF8MJ&_Pm`(cB=M8A*8*-6DoL zTSNQV&H<^GT%g2jziOT$D+W}-B(1=|5=_;9?y`bc#2|j~3Q(v~=uRQc`~&rqk`a=*maykD|^fX3^gT+a7J&$y%d3F4o1)@CmopaJEFRDF#H0ho{d7qvxOPZL^cco!o|K&qS!K3L=1ubHa=UfSC7koHIyLX4R4T-O z$tBhEK_S5s(k}BexFg$-PF?Qdu4H$z z!G+6a>AyhwzQ;J4*~q?MCSs@<%&bN?F5)a}6EMIyg6k|K{}^PZ*6wodNdR5eDZ1iG z17v7x!Olb!{CtOId9yeg)mY@p!4CMTr9r8L+(7OIJu!(pmgpvA4%p=%bQj)#DBo0W z%q`*WO=7n~;UKP^jGjeirP%L~^&^?q5~pm;_*o2wE9R=C=31LA;5^ z$)6-NM-704D7Wm-eU?SM9~9ZM_!Y-gg=6b?Gt=`+n)IRwk7tqc`L1te(6b4(@V6LB ztuuT!vQqDvl1L7&P_unAJc7%A*2Zf!H`GBePb80qu7ezw`W7B`8Ppo!GBqrn-N}ml zkBp7u<@SGQsSeMJwf5gmQyaKmjl$cR&KN~s<9NG2R;9dG!6M?U5j7oh9iqE#g_LVi z_;l)fP*}>royy(mNSp0_v}||A-S7RVfhJ8AKRU+tc$Ixa@QX0>PhC@gDvs}?quppS zvDi&o;wMKgZ?ojM=@SFFI6DH1Y#TF@G}fDSmt)^4=mrNVtln;o$Rv2>fHuFeSNq-g z$X~t-&drUL@wdsT@hL96UT88(U7NPjeYWf9N2ZRh4({?VL`|Or(k2i1Erd<&#e7B zK0e^;1d6%LkyGeLnmZ7_`ksD+J}m;K3z(@^cYZ2-+83>S=fyjnn`8!;aBeue%XT-# zo13^XdZ9;9Y*|VENwMY}=4K{!H{jAl3SP<6_w7bDwAnUdwS}dB>};W2hBI1GSzG0> z{gJ?L>C@?IqKS9$mW;@IRRG-Kf(qK>XVscDwdD$Q!)7N(O+dq1Eu15QjabBsd?~EY z!}c*v*O3Qt2+yx4qBwFFYZ}CCP81StLILI=(=%#PBe64ou?G}KBgv~B<3AEs<9tTE zbB1)r2R~3Z06s6K7~CX9%RcXv?8hW3Fb>-=L$t&<+g7*eDZ)YxJZ9y_CE3kU*MJw5 zSOF(3v@)>IS@uLc;cU_CWyA!gae0nSVCVL#N@@>&`!1EEj_P+P^_NQI$ut> z=A#_I@AJn1cjg^Zd}hIlt4SV03PkE6ls-;gVoqVk{x+QJCr1$Fa(Qmo? zc4k8%55Yj3dPLWqw9Q#OKJ0*U)K3wC^T{GxpL(q4)65)4@|Lv=M2O$4wF~*!Ru!7` zY?zuBP&wU!kb>+4w>>PJjy{+gDj2nvyi8YiAd0c^!4Sm(X7 zb*oEC#<5qJo$)qxEH|FPzbS2wP=#!1E-YjS0`k@SdS+PTgMLqpdc-^pdffkl8WQT4 zql7&4m$U94^;ybz%ugfjs_rDP%&^(5EE|n~31y&uK!u=39iVJ!lEZNB|7A^qL~p`E zQPvrQB1e&bb;=z^o5EyzQG7m}za<2HD#U)eq5-kGb#nyPS;mho{pVOOO{`CpUa1jI zPqof+&S6TAAlH#r{BrB%%HwWYx0&8)Lf%%`*;tyvhPk_i6*U^%zXsw+ic|PbkEd#X z3;bM&u!4N-p3LjlRtmG}%LeJdog|MWV_l%i7j&bo)@f$VRRY!4;}`f*XIT& z7jaIV)*y&{&2J>fkE1zm!l)FHl&;7-l@WilOYwqeiyp78A_5WH!AV&ySsY!X-fwa% zgoWs%jI*4A?Ud@iZ@J#33xpOL>Khmwg8m>r znhu~a#(*`^A-wGAJ-K|#v89^)U)$)(DnsiBo3S3ivAVxwZRG;qt z2lV#`rfrZI2mGw6{Q$4BK*Cm2_3(bZyk;kF^d!#Hxqo4>yI-MyMjNIOIP&ki z1)Z_b9RZXH5^Rz7DRMjByclLRVh{J?khw*_4$&EZY^aL)?$W2Pv;0H`E+T7WFXKQg z>PFl&0svg&7FD6asL|HXy+Z!!ebUq=@Al;3PHGTl+Rf|js?=$^s@Cm>sUlWHi>I*^flYj`6lXf*zsefiVbh!vL+ zzmURQ(nC8ErM2U{1{4Jel-DJUrY;cYFsDKGfcYKZK=IXo+W|hc^hulNc&yIlQIQMV zw||g=n2lhm+Hzc&esPy8&R`F1iU^(fB@&2m^w0TY?5MxG+~H>RF?$G}dA8oyev!Eb zl|%7{=6Nu1F$6HZq8jZ*g=wcsJ3woV+3b)&gmiO3F>|M1i$Z6tx|dO*(-VKG_vl=c z<_7iU7&XCvRdRGsISzGxLE;h##|3%!x^!5Z^vg%k0lmEAvo*vJQh?8>97f0vH3yR3 zzV1|zxw8|t9$SP(5I6Ik?05u-BNxv5R!yR~L}C3Z$R?wBC7aEd07Im9wrm;p3FI9!75Nn4uMn%{%t8b}ww>v?)t9*--NQ8$N9<6;7JYfHWw18n_WdW# zUE8#E9=qe~&Pv~9x8L!EPYijNv3)yg%uGJHlX#nbq|MKI6za1hrW#pQ%JU6=^;UA@ z?4zN72wo{oc?YY8h9S* z1!o~PEP(V!i8bB$bj%nJ**51aFMa3t zn;PZQYE@FgdHhyl zl>I(~?$(Sh)33so!Zi=~;(tXm;<1|kQ-a<(K*>rhILPxCLV-5{M69o;TC9c~+2tZQ6H-K7@*8oX5SQ@F*Bo>0CS) z$Mhya->-TftJp>T?qMlDKqG@rUGClp8~Avuqi3P-l_>O(kM(mlpdhSA8HkUho4O12 z_b32K8Ipr=B3dR*5nU#grZzm^)++K?HGTr+H;G3;$HcG-{I>em8u)P#2AU?VZ5>yv z60$nkTgrWVwNHo$4)NgM$V|bB`xt$~t%C0s1^(mxg-FQHONeQ68KTsxcr2Q&YVnS2 z4BLiCl94^A1u0tVIv>ZToIfRfCvAd?50#oXr#wpd$FmCL;oRYfS#HKGI+RSp4%UMB z+KBn5W?u_UdgvuoA~L;y&()PpCl2f*EP!+>I)<}ic@7wInoBb^-Pjb!x1h?Vbpi2y|5xJ z3mdaU%MO6_QD>1-c-$!>lD$B^LgVx+L5JR$Jyn3@S=+y#=P4gJ+!>k;J5^JGefr|Z zaaHP;eyTXCp>~tY`kcB+$+k;(3kDj*uyZuB29XQsr(25!LLs|BLyXW>1}Tt!gS^ZS z;Gm<*vvtkSdD|O*q*Ji;qFMX_RX^Y@p@+jy7PxFdwLA%+J>=D7MIAWlwIROqli zs}WcPrwnbSkretKgMHd&-Ty{dYAka{*vbzHdK2LBfU=x_Ev{4(MN z`L>G++7p6}bOCz;CpC8JN+V|^IC0szP6=`TZ3#;p9 zF1#LpJxC#)^kQHF#w9?!AsMhViArQPBG~1k;a;-rHOgsyhq=CghZ2X=*xy3Lb ztpu4OB_b4mJ;3aoOINSx!1*=~U5LMPCD1bW=?&9mF1jE$%Q4$A62<6wEdXoDt z@ihIlkgBQO5hRj>>3$ifI?mu&z5%0xq5ZwU&-`?M_bfbG&4Jjnn-n9ibX$58G=9y| zla>%W$)9=LbPgVfY$RbputYMw)fBB(%d|1-3MnBRhrpD0VI zZzju}YvB5}jdBWI@^Ts0M2_M>^h>;>4-pQ3SD7H2p1gTleGMi&F36SN?9<}d-kj0y zZ?TnK<6PLQn^dBOdXk5=_PbG4;!0PDnDQkx!OaDYmB1N{Cn}SJMulm3xJvN9v6#_1 zJT!Y0WG@ySCf@>mN4LJdVoc2GLP{C23TWioX+5tph=k?n@dz6zZa;bjzeD}xagJku zxpueWT*QE@Y#52W6C*JHIc$dRrE`$V$UE$HsAkHgdSuM#!1&`Vl^*veNm3jE%x~vh zC(JqiU$EY&&RjwUXXwsR2{~2vcwk_8iB}SWpEA1o$hfp?L4DCx@U(o-)q8JkMMyuc z&01tq1=GSWkM6=?lT3)j@Lb~T8zi8AUNK*wJVI~X?4xZnZeStcPyyKc!~}pvgMK$eAs9CXS7~=sZ<4DV7GYu}Dt)ikt$XF4BN=wucyq=_hQe4Z>5EP*+gl zD+1L?(BsoX5EC65N4rgB#i}w;>+Wr#5%DA`?YzNy?^l(BAjtc`NXw+PWu&eAcy`?58%!HlSX2N#m5Eg+=ztfIXu3}pQ}4n~stU+|cE2<8Ql!XsyP zL-S*il>^J9iN4RP*50)QKXz;*)@#TJQQpcl1=jqR0^=rnKith7#PZ^Oo-Z>$KQB8; znX!)lga10#s%!DN|NdwvU;mGM7eq{wcdPoZ(i3}bQC`U+vtdH)6f0$a^D>+wb52>h z>U{CG!5JlMui%!>i0>EiSAR4#R~*|aL6TbKe(dx145j;yKP344*6~h~<_~h%?e#z% z`(VTY%T!RrZSOCm?LGvOpm<=|tPXxc-dl@q#K}zpNO|9Zv>;~n^KzLkam|-n`!e%6rK<#+AR;E|ty)AE4%xiMAO{azxCoXbW}SGdmmt?B8;_{X)(0Wr!*VCI2# z znk2Ib1K8|Fr+xi@A)me;S97VluddRU4x@$gy$V4FyNK;)oG@4mCZNW*Nm{SXXw@$}84bYJxgzVx#cr%z~7Ml$!Ht({WPM0CM^NeZ2 ze=}x(A`=_l6*gGCvFW=Q0U{K^4?W~Ju(P?a#Yhloj=MdJPavF1p0}9Z2#+nMZlqc zh^eG1i3N?&idBB~v=v$r)E57Ok= zWS!RS+xFwRSJRj9Oz)>+#s^_)_n{IYNjGE!GIa6ma4tZ?eYUJSrXVDmDNaA{xnZ1t zLQ z9wN~N_P6>D?>V<2s1B**cm(yh06l0kfyXI%-aYWZy+AJ%8r)imNa&s(0%^E)z()7^|L0H# z(0f$!jaqyg+DiSr>oIxkL?ymhkuiB)HJY}&TmN1?Ax3o@wcgvr) zEcLX(h4RL5NeHpn0ZlfkIeLNdp2$~^=IS*`h zBin+2X@V+0!@;yJ>h;wxJIjfGxyPUqyvzh{Y0UBW6tCo&k*^wt_>O;^x9UTB7jO6a z>HDL4rm1J4I1m5)JnhH19q3U*|S_L>!;<$P(1|N3-4)IZ>DmwGR7FmX`m2!(#Luk-yp0 zJ(cVqbcc+@J3b1hI=-f5ZAb-LWJlr~XDstwywFb|OgDU%OkX@nDMBRY*zi8N{2m|* zT~m%Q!Lfw=B<(-`n zD0vqwRDwd6aXP+)Ec>*pQsmI?dqf)KuJm*4qr_XhKB?S>K9Pm>m_d+Qk%W{udTPNQ zrZ#V!{3gfTW}0bQij-{Lc}f8ntXS5i5U%*kqTG9w|C6Q~6``|#nG8`S3q<(Ct3m?5 zr!(_H@9fizbU+Pt zzr8FS%@L$^M9-b)j&yPtbjI7BglvCseuHqM^8mnYTz+5R++}6#nh0n(Aw**;)aWY- zVl)9rCvGB$Q^VQn$0?3-FHVx3R+Nb%$J3K$;tDx974x8fbHEFWTRnBjE(e>3~GU{)f?(7)`B)_Fvo(`s`gH=Jd z1!w9Ssu<#bGAs*jIIu3M6nNJl-I4 zD1_xhx6q-=JUp8e3~y(8%9!gU8b}TP4U+01hJy%y^T55m7`{ZC2782jYW|tb2=#@1 zmC+l9WBsw+>Z-brIky0|Ju?RYhJH;4Eqz#g4AyFk-+3AJtk?Tb04RUVm6GhD#c^Ov#;Bc^Q+j#F|*dD8N?;$^3d&8tMM9{2Ecc1BSS_1K@QY@pn#q9 zmkW@8^G_x!P@@iZ4q!WoX^!kTO8%fv%=ukF?7p`r1IH#$4hofm6kLg>SGjFQ{fYsA zC!m^?42{i#m8u!IG!`30m2@5>|8M6T_({ws>XzVWsUkCF=*aA`cww598pk%7-p)0{S9Zy#9 zdmzZ#m8H$W4m_0@)w?w!YH4tHUT&y$l>P#c&&gB_XY8XY@LFGNLDoy=;Xp9y)@6Ku zxc<+FYj}%+HULd4c1n1?UjwZz|F@>M8O}a*EKnCbiv%c897unS}9eI6 z6i0_uI+}9FeCsg0EXTzqYR|!CujGDo?cJ!jvynpCX!~?#y_H$*Y{txj2}FeK&b5TIlO{AiCI4J5LxO>_)HhSG>*oneG;pWK1O3T6l}&Oxq!)*Ne@ zqgeOm?g{rW?$w!Fvk_ThDApwg)a@gbGsn5fD zyeF!TtQ#j2oo7*7`9kRBN|nu`2)p?*(LMC!!%>1$LZ%Bt*Ryp!Jw9@KmE^X6XIk40 zj;selyM1OAyIGf#heEi250SavELeYX3ZLid1gqlqGbW0O;fDaMoSPR%(lb|@ zSWk4#STAjjKjy)J9AX?M@mzMzQL43m2_m!-GURvy zX`XpNAG6eWS+K5y0adZc)we%_DqKKrg^MP)*6%+kwHdR;XuR2}S#a7am`Zn7@edhf z_j=|;nq(4{NVf6wESgSKrGHZL+4xII%`&1dnc?jj=ZDxeG$PQ*))j_0R|{goH+HXq z8DzFIdp#IZ{i*p zx=pKaxZamNmv7fZ%kg6kOLzD|6^lJ0N+Skg3!!=C5KBLZ?Pp^ydQiK z87YRKfwaGX_rV}IS)HtG5^;Xr1;v558L)adY@GK?r1|&orIu&;k$+!jBRvK;^4QnpF(53FjLZZ%)*P!(P$>g`IEm)y$ z1fV}kJgg~yori^aNzrRu*H537fTa~sVoQhcX6XbbIYyM)$Iu#(pA`t(^~B#;;}*J! zR+Xz&y$66&auOl9tY2d4G((Vs9u+g>&~aT!!s-5An+9+~pHL-VOkmp;sGPfEa^go= z`c}$bB*8Ja;F_k9`5CF(z%h5q7mL8a^GXhyquJ_z)km`E0?-7x#8fUKu&MJn>@bqd zEcuES_>kjE4r?2I{mzWBP!@ByA77ljq((g?q3J*I2-ugxK)7)KWego%rdC5>w-H9y z#<$k&w_1~3GRO~Yi6NGoY8C<-Lt<@-7YK>t5X1Z4+yDt^fte?YwW;0}I!&?T z)tW1R(1|m;9l6P#HDUawYh&g_ecfBxkmoGxx^g$eELTsw1j-F+60CDJI>)$XxdK!u zwm(t&$I#i%$<{BRK4@Chl*W`tQhJkrau{Zc((Zx(O=bp zopVk05EVPvhTrv8y=yre83AM&czLYkm!C?y5GB8guvwX;N*DnVgenDt8%Ve?U^ow1 zryu7q3B~hmp$?Q=OMn0(MQg5Lo=<##3uS!7>Cz0E?G;1-3X3;ZoaiafZFgL1{aA?@ zm0|&(x>_Ab%{x9jCaQt+ZE&ilw-zxb317xhvXXqTkscVSb3$5q0ZxJNz;4!KxF;Y5 z5eg#uo$ejnxErGAa?v}ol;=+6>>{BA99Mn3o~!eu23LFA8PbN6mMa)@q{9Y(K~0C4 zm_SKN!Zr{OZVQ8b4*Sirmsz;j9A-r-69{F0Yc{dYLv7@gM?Wjyn{v>Fon=&8QPc17 z;Khm+E$;5_#kIJ*yIY{ey;yLkxJz(~7k77xJH@p(JkR^C`{}OxA(P*k`OoaVlby4& zj&1mDJgd|@*~;=EQdIf$;sy}^BHn>F)1krC7F-7|j?5p<=&D#sK_$_0@_9M|G>hD> zMQgMjiS&7_O`$*hvoq6w>~7j)jx}&_zxWhgjpG5oyeh9R=TfqL{rA=3L&KrRw0VJg zr0T=Q?C%W1N)1w?zVNS|AHEg`kfv`dFeLUrw&JZm-ai$Ga<}?6H-@95qAu8U&};DT zG-F%3Y@TU&`AAW_}Jxe2nay6 zPSMzly6jh|tn4dyPs_45f;9@=2eqsiv#TdN3}-pk{@IJ@yaRUA`S?Q;U`b)it!_0G z(a_@$>-)4BwIX7&Q%Ygsxgd6YkMPXgpXLlqB+&<2O|9VQ^xwA#IUTk1Jf56fhRSKd zK&>A`JV*$dC!^)vmenNcupgogTJ2O>_%j`( zeB(2shoS--|Dv6Iet2?F_nE+3e4yile zkyz_P^kDV3a`aBi%W^pRb6Z9Y8)*S~>_eK%v)%O*;ZN`n=2DJ@I$p)#E5X(b8o)k= z3;W$uO85M1<4_)wWiR(Hn%~8zKG@LozJVEh!Nx5(o6psWEL{caD>=ionZtj1Ux80N zvMEeS{S>jyBa@cuU*&9>tzKHve8g z_z7kIDzXiSPkWmu_krPIJ^-ba^8rZHxUi;=sZonWvBd~_+Cg}5X7w`XEj}M8#%q~y zR;-USbl!d>g(>Cm8&>tj7!q@-+KH>fciJRXOh@=iU7|6LglQVrJR*zaH6gcFY1aMw zpPZ+!aODy1^zMk(y#p1U^Y^qIGU$S-Y>vgu!wpTxH-Akrcg!MJ*C>Sno=1lJV(R;& zV&zW3ow`@O&Jxwuzrn#xi2fg?uhyMit~fJ|h`Ji)^Je+OTf|sOts( z1K2$ifeaIbqyxYPmsBEP7J;OV065?=6#xSa$j=mj1IodMW(0|90wTcO>Hu*V&AJiDvva)C7)*035=AuCoCaAg2(31(-hua0CPTnGg8@a#I14AXY~J5-8UL zU=2Qve{XNU=)aK`aADGWCUq$Q2Mkf>!GbIx&k=u50)vq=-`i`dfebj;1)vDl$N{Ls zf)?r_()N4+BP{5?34+f>5QOIafr$+kCFXwIHlNCUf9DXy ziO+06p}=uRa|s2M`F*{Q!A~jApr==t`qo^Jpj9oGqnli#rgB3}Z&5%+iIvDnUZyrY zjj^t=LPu6UV_4Ez%0-?kZKr;O0Yl!*4y4YDudttmx=KMq2aE42(Fv2tRO zcl9_I(qAYpMN4btM@H(Rk|TlbKXyzuTGfcXj9Ekr8q<|DEC!W zYZMZ*%BRg*vgj&nPgzrt*dUnBaHXiNn0_YAx6f;jlP=wwmZs2wQ_VSP6S%<`?#hVuqap2m7 zG;DFrAc7dj#oJU3dBzQ6ID7qQDQ6IKzf`kLN909jekoTE9^#&3;l?nKD5pfojJgp+ zRD=pNC94x~jH;BF@hZF}X;Eqv2g1)JsIG8+k+!Mndl19iVHwm=Yf+{FVcd zHtV>S6eHuuDx+AVNQzWF*93|^`}GH+a;A<+@vVg(6^nN6IXXw`cA{v@IA4@z&xtjJ zBcQWXK>y4GRdCFuMJ7venv>vzJoEJ)>V}OqXE7+%&$d+@QsKafOBdvx1R0^pF*+@w zJLKClC8sP?yrmz^6jbhUY>?`mQ+}nLV(|mcX|WiuB&D<2KIo{06asHi2@>ZqZ#a@k zaJ*SF&IvxWLMI6r!TJb4{XV5Cnu0pRM`>!V?M)c zoBtx3fz}l)NGC^G!OPB*UfJhM||177G zY+eec5NTiwrtHnrl>xq*$m!r*N<>-dKCx0{(R@q995sAMGqzExAV-yu&c2AyW=zA- z&!h6BAdn)Pw|#0x8d*hCGljN5g>};vKM@OXH0eXb(Sl9K*GH}I;Z?>_L$SaP>=FW}bTC27SYz|n@KkyxBa zzBB>O3*m=bgcD5JrIcZk$@iYoy$AHa2|HOmwWmzG_)4eWKpf`?7CC|q+gKE{A*N9I z0?v2a`YW8U_SnEu!sGM5R@Ww<6h@2#?AkZViw`^6?&R|OEn1o@(3$UVXcgdRl+dZj z`@-H#u(u%J(^$E5eujxyQ6+8>>jXS8-kC;`Zguo$b{C9?d9GTF^rlq1 zAYuDrCv8a>4`y(dcuKk-HspFK&b%3FMCjtgJvl*>6GXfS1yWEL_ww4ORf&wqE^^LB zRpn@yIjVgXv&9|abKh~Z&VzRP#m`YgpK5#-;YyAPpkNJJf*DS2lj7e{@Oqg;k@;jC z&<8Paib#PLHRhj@1?-IJF~(wx zL0IHoq|*|VncCP1h@R4D7kivNYF;o}YIXJoN`*zu8D53b2iZuJXnslcIZ>yTGJOd4 z!;VF{2c7wQ-*GlMI|`dl`NNo^?UYJd{KGi#KSTrFf<1b@)5L)W4nZkS^KS{d9< zTscPafNo=^ly zYeh-c%F{`hG7=?eaemJK{^}GQ#>a8G&v!bE9OH*17h`2@pHo~JpwdrX;9PklhK`4C zMa~DIVZW$WEW%lXW-$05_Mx`C~w6$&mrdJGl4zv=z}e~(Dy38XZvU1n=N z$)i6L>Lez}y~M`!yOzNcP5c z`fS~T)4}SEkoFoe`KuhFpH?O2VkmXg+maE^bM)~278(~HSUM3Pl#4Npcb30cy_}TF7sHx;^{NB5g@Q5 zSq8Tgp78rwR^*Zr!B6IUoC;K|KWoSO-{axX^khabT)s1kmk~)2k+iv;8gBQnhc<41 z>CpYH@&s4&aG7~wJ4sTL$^k@6_JJFrr)nfooe;2J>A3B86c)OGw;XA_c9=v6SHXJddUIP|v!AN$-BBQob&ituru3)|QFwCfAJ`avSi{fD9z4RaI|N);P}vRM`&|GQ-3^A&U3F7p-xRZVn67IPJEeGdy{?3Q zm@)>O7n}Xda3KNpMl=WmDB!E6xl74VaFEUa)p?1oPW-$0S@Ur?k6=vdHPmD~pNsN< zjJ_*W{{^FSRo^(leel^Y=jJ3c*j!NYE4qmM7$>Vn@OLVaZfKeq*$S21;vQwWPW&uQ zB1*V}1F^Uyp;(Y6oNv)mn6evACZ8@A9-hx`Jy?{&Cc#4d*v4-UsFK_A%lsZ7>C+&{ z{&nET(PB^i?tn?zx#%KU<(|2YDBLVmkJ}>-uR?PqPlBhJd~OU+aBgdo`gA4TeUMa( zCv^|sd_kg?{&1h7dQQs80|_Q>;GE{gH`z>#?(#py$DXZ(v@Ua3T^|v_g^u7Vt3PKL zUDCuCuJMpw@Od@>vltS90qtRuYL>+jK@p4ORi1FJ^cx{m$yvt9e`xAm>_ z9=8j|RnGq<&Hu}_y2af;pedDV=LjX69O3Id2%n9c%z9|$Lr(!|Z-D`g;4B7=EVzA= zREeOqulhCY%wF9HkUv8fDOfx9L)X>68+M`cKQ8Db^AP6&Y08$|ly{}#DIWVLsq$Ec z{6N3&#rrfDomug+KICQZ`~Gqb^s*=Ku-tuJvJL1+rLLEQ$MgT&e^y*snbi5=z}KOL z&}f^#teCsVz^++hqiUOXc|UdQ#nyh>C~rDzl`rW}i%yf*p7)$f)u4A+f6bG}h0(v2 z3ZsD549(j@;Ar%~_oMNn+f!Q?pU$C05rv7PiS5xpJVP>@#Dabgnq2N?!jInSI!ATg zz{l37J--o|&65y3bSA?Dq^HHsr?>H=8&k1h#~R@@ePSgmQV}NlwqKuHt~#9#kFD9# zCzURY{Iir@uFctm{zWdWR&4HLny+y7lnmbvyQ{ux0bk#IA?wXFrWs@#y!o)@25P(O zRkK;nyZgT z;N}-74AdE8W?^sr$Fu_Eq^OemP7};>|Mq$jTX{4MS%aH}C3O9P;;_Lt)-RpWa}%`E?7}x zc{_eevEG@k>?k}RtvowW^k(KO7vr&OJkRa=0K9t49FqAUiS8?jUY_#(Zu1NJM(oJ8 z?ZV!mecyOASSCI`m!0P-4x@*Ktzut~jw_CxBX?shqN`1z)gjH-!}(#h2ix>`*L+MP zKvPxDJ0pECy?2jSeOx!S@=T_SD2!jMShz~rYOw@-K)S!%JnQ@Pd}eN_ERs7xb8uVP z1uSJbsrc$m`$Bk!=6hW6$b?j3IJS?IChP@|+l;x?P%^pMDQprPs~zlBCF{2V4{Z8n zivKYU(NyT5-vu;zX1cWf!3o`nBezhDF*J1+qic|}OUC_o2X|q|M$P@n_b0K}+CUA` z`Jvl}r|>7ASF_i8;WDH7nv>jPp4(MlHDEW59Ule7qYCiz>O<}m*w--E>QieN+8p?n z^!DIM{xvtwe@(OQ>|y>STzSLtF$NBdLrxkP6S+bi2>ZCUEhs~Y`9-277jts))&bO4 zwX^>A*7MdbvF+SHXM?>WE%^2wCS zr}12V{$Br%Za^>gV>HgT7s_!n0orkLat(_}w4QQx4a>i=fwf&-`i`$vG(4r-L|3uW zV{d6=C3q#J?ZRWsmeDL(i97kp5}0jzdPNFYabk~cdv||Bm8{6J3U=N%{A_g%<&*V@m&O5 z?>Vrvq*OvMpL$X|A0u@$bW1K(OLIH>xxV{w;LdCJUy>X9M&|HhEXJi?MPy8ec z^W){0bs!HyJX~~aoeV3BfuMuN<(dgQHu{@e>IkXb(klHEsUW}q{Z7P^L?p)u$=O|` zP)AbH)sTh-9LKmL-$bky-{?v)S6#C^ox@Kiv$oL6O?r2cK;O&jOf8`C8=oNqfr+r{ z12+QD7aw)v9A&@h31O^VoBdz&=%nWM)A;ZoQj%JBWx~xEk{$d@Y=)1YACUB9ns23T z4S1@FRTKn@ti!(CVX4~nSd{gr&yH5GU;!lOi46J0q>vY)38WH;WF#L8ysqw9Bu+l) zS$wk+5dwPum4?HWf&U5A_jJU&e3SX{=i5yCspT4EX4)n09FeI2^J-&sooQ={c=&WD zgzrtMV0QT*9X=8nU5NO)y@>AeWXO6XrA||5mLV0xwmLC(fA4tKku77${Tx$E<&h2< z;l?J=|7M?nLxe==vZT3JkOa9%I%VwN<|kzv{tFp5p-0l!+$NxG*JYokH8A`4^1|}7pOTO7SA@yZUNY%T8i zW<15e-x2Ps(h6|ITo{qgfoleQ)$*0!_ix{9I6wsXc_FTjR6cQ4UT_zf z%h-4PMA~rq<3Ww^)&SvkRXX1?8woG9JwL^htMO~^^VP%QP;P6lub<2qPjwZ(&B?!y zAI%!XI=yq7+<1AB^=m#Xe8YG0p>EM!srcS260WvNuigdp|9fuzk5}G-)ALXAF#d~f z#E>43rx}?0lPpt;8g@GI9%}24{)PUHSVgU(RoXN?o8)Ab97r}k>bCp`M&YVqFE4km-yj$1)Y$@EIR>(~ zE10tL`bB2otDuE zgA84jKjfsW{EsPESgcMtfj}#rR1K%Or1T!YOD5qHeiLy@0?7>Iuyx5lvN~mzj@i)+ zHBrkiDl0j88%eDsLT`>WAK>{EbBXA+J;|&tmw;u;KVDHj(RV8B)}iLSocO}h&!c+^ zdv8paxNiA!r)rdIs~qv_SR?=Yft2tjlE|Eb=m$;#qj=N1pHx-|KQ+2gyD3GOJ5M>+V`&jHC6mE)Sx_U9U2G zKPSUG*?KQ}Esye5PD-2bq8Z@VdgbX~9ui<%#!#FvoU;VcCzhI92FZYu80WeeZ{IH} zPk!7`7S+E7W>&&&yh>%(Uzqmt{$|JQqp-)WhbY|Mhs3GsgegYoC(#E2v;4C*PUit2 zu=liGe07ZrT=vTA$?Wp7;%zi#x?Efw{j$AAq9g;KR@rXnjgeQnfnh-9?m&rLvbrp^ zmaJyY&UwS=FxuOKZr8vmhUl(dk@%*|0afx6Vql^$@PNx8M-LtSclGJHU5FrfZZjn+ zJvZ&~iw_fD__7V!y15WgzOAyc$l+;Xk>ALPhGA8wZ`}m|-JdD^Q+pLT z3&xHW%3tvBFGC%kV=upZ4jk=%yP&^);S!Ka>b`E8Slrr7@m&1EC;dAeI{@WXQ_lWnB-dY_1%KTgO{?03YH#NV z_~zKB-qrlLvb?^Spf&l3hx#b63K|!-X;5BinHl;AgNREHVxxz~hUMd91=UOeuHo6) z*;qM1CDQzXvWid&Xwe`2a*#idQuQ>U?>Rp04bAizhkuQ@RQ90H936D_3)|Jr1Z_XKiK=6(q* zpiLAA-mz>x9I-?bQC}j;N#>cH++D}LR0xdru6N(D^0XQC9_;uiD8KwW4VJH473ov! zual2Mq&t=lCgr1<9w`Im{=mqWVD3+!ftQvIbhWkMOs3#aohpu$9iD@4A_-P~;AuPWRZX$oU^!5^{I16;>;cmnDkRET>4-SF}qu84YkFQm-b+_6id(HTFa3o@)PuA*aQpLuq0g4<74k=t;wa? zF{jjC2CKjZM=v(ST*wk%3eAI9Cswj{M7uuERabl@WOj-{AR?#*$_SpG(7AQgrUM0QQBqsd;-W2wKy=g3l1^#p0RYv4$c>Ux248HFc zb`$~BKa6}zJK-Q)%qeiFlWv#Juh67vK0w;~18-Vd|d;Drf0=G@B z_Y__ta`%NV&?2Q(!G-HNXbt8DKyHKKS7wpFr4+4QVIN*)&j_CGr@DtOU5V>zewh00 zpDUkl%KtOm8TGhnkrS$Rjjvc^8zpYAzmE2}d~~vNAFIEAUG5Hl6G{f1Lz892J=k?ncOg?`_2+lE5LHX}YVcv6$NT&KeO`dD&(fAs zh1w>}@llOc((HMzmQTRHtKU1D{`Z%6%K!Y^{{3|aHZ)1p1zbP9f$uNQb2mi%>lI?Y zSvxidYC`V)DwGE2F;wvW1TXoywYkF4nDa5vp zAUmnx73Bth<9*hjVxi^4NK9@xB`<|)Dw}9aRyCy~c^K zNwA*=5C{6c2p?fBDMkcktzqrmeE#qdrN;|1TD_so*BkVMa}oOf^Sr>;#>Gh_`kHK5 zrrBDWi zLapZ95>`6l4W=i^_DX-l-a=j;RJ=k{@<5wy0Asta*%6}gWYp(hIeTSSL7lXJgA8O* zPQ$s_kzp?j=eL7Qw45qm5TUx(8?Nopc|uT02d8y?(y)KCBNCui^plyHbn5UzSM1|y z3S*>VW7NYc5jkb~-YF)y$Rp65Ir|_#yX-pXE{j;{f(5a^;Bd=c2RABY@}6@?zd|mL z0890wptm!b8aXd8g$4lDvP{<5d)NsLtp{D|*$sBG1PC;F_a~yv;?&R0Z@)>LufhC# z?7AE6j~d&`n(N^eNT+t`$R11NT~{((;9XPNn70qq))BFqE57{=U`8I`6;aw)=bdAY zOhXRR*;s>SwW@1?4%!@m%@n<{$_S; zWTlyE8O!|cF;yE2kRb7Dig zq@Ywq1?Z{2YRo`X_hgVO9q$6nKXEdar#*JO4;DItc`qL`klYZL%drPB zl+4|Gam4p!dj)$?tRt$TO@jZSiZIm}MQJM>i;5?&BSBmFyka>kxngel_wGOBc6S6k z+^k-Ww*M&s%ivT*qu^^AbdzU2`2Rb9uuASEp1dP=BNxCt{$fo%Z!-ma(Iti(SM$iC zh&z%!ld>JE=f1m|O-vKaf~ikBuBA~b;y*}tD5J<(-r+B*Yx9{3__4}{@h3}=d6}Az zqYT|;O|twtQ>Q%6(Xx!+f4I#D@438kuy8ETp4T+_zfOWi9A=v4ScAsf}mGrNR zIwLr?4l(SZgw(W26%L5?*nU&~kZ#n(%3CsdsIf24gi$tVWXCI*K+Y+vi#Bf$PP{$I zKRh0sfh-8NzyX!&gU^ z+(w&zb17oUsV`+55iS?U55dxKtGgzRI4QnURs1xGo$@e%VMU~ZalD_mC)OsC1&SXB z&888MlN7R%55XXy2n|LzNMz;4Shp4(ox0e=k~Jrsn;AbIw!GaXW#Rk;Fq?}lvo!k2 zfsvK26g$|(XqDACrkWdof8P`bH4@=x_s657m(L`m(|>y9b$E8=DBin?8rY8_b$+N| zcP2Bd=XdVWFyZt0yf|`UP#I+bJ+Qxm>mo_vY?UT`r9K{LRot}hyEb&~65}&jyOIM@ zO!%)+&gD_)R*{l!+EhKpQ*Vi54aINL` z5M3>Ss&KMg=AebqIP#^HCCuuDp16bp^y$$7*RGJeTHEpCmcSGBp)+fy<1{Q?FaCmg z!rAHe1%qSj=|`-BPWe)g{G`k#=!JU^RM6m&42r|YV+pt8Mbg`e zthQ%Lr|0ZV6z}{3<>XRQg5z=ATjpH;oW_i5qks8MFDIf}vy-H^qv^i4QUPwI@#VBd z|NJ6E(4m!e+panj3v(vQlsiguoukgvb=D0_Hg9iSm~KadeY3v^v6mj8MeU^<2H49e z0qwjppPdd%wX)^}P><`R=>!+$V~RD5UT#o|-oR|fLdds*l0x=22VHG*5`UVe7SDmL z#cu%a<6@@U80lB6!mOO(vk|dFhYz>Z8NNzIfoN*EKNDVJ`@AvkRE`38Q!Zl`qbvdj z73)>7Glsz30>bqeP0|i;NVDL3<$F>R;0E!EQ#!LqylrHJ+&|>=P0~Q2jklAlhrY#< z4;LcDqzqTj&p=WU!7g8q=ckjQp!G%ALg7Me|^u%$ff~3vbPJP&5hzT1p`x{|=Q9V0aHj zMvH9k#7@96ZyXD5?PO0>$n>8;;5)=4{O(D5hj&jj1Pphulv>~luY=@kA-QlR*Fiw} zOa2<_ggl5V0^$lOg23;0D1bmhOFuUA^r7GuRuv?9`UnD@5MVp($S^E?hj&i{1nS;3 zRLn;q^6ZdY$_hxX>pMXDEsqR=qSEW*y(GwxWOWXpK^pIX1@iBu`jia>=-1Et{Gt-b zhQJ~O01$AUg8)soR?R+SnDWe!0_KtOeA{XLQ)6GcG%tpI`DJ@N ztdFyu$B$~_7xPmfao?K6uH;$2+neIjjf!%X;#)(90J_?H`cwII2QOA3yw!#DP|1P0 zuBi&k+A;H zmL!>;pqPXt4=0Bd7aKb(yEHooJ0}~fgqS3+m^2@U6q`7Ym@wJ@UxEs0NWsF<%H5ib zofE{S2TczO+ybBg1v|kf3)N@7-IbswgVozMd9TeCkCQe%o1wP2a|Ce=cv$Exq|XR& z6o|ZVQj;nLIi#@T@ew-_K?tpslyINTk70f*MoiPXVj@mlzgQj3;6M}W zZmk}d0~jZ`lg`p>J65yE93fBXtwRZzWDL2(`F5JPjj~Kl@LL8DK80|zJA|R_hZ)T= z`zqKK72~sk_XFSyYeP$p2v$uhTG474=6fqRu@%~lfxT;o_bKO4#4D(?e6_1<59)IF zzbFsUl+ss)Dn&0OLT3rj!uitJz+P(YL_ycaxN*P4+@5RHeG$=1DDA1s?^WIKHcJ(| zpk>X^kA@m$qmvljhLD@m(fEisfIRsFvN^3a{-w!IqH0j?Jap{$^U63XZ$~y?w_PLoxWNgCgAWu>dzsF)o#b5PoWpeMcW%F(FQ8|BmY-_>8gj zK$RoYyh>St#*FqtX|YdJvmw;f#QWzj-@hs4709x_Z`YK9{tqZab zRdo{q%Jksol??N$G?-_$=CLS=OJN_UP1Ty0v>M&gQh)HF_Jf7k&iaaK5ZL4w+-5 zLv~TKmdr&|{`qQ|%XwH6Y#+sSd-1v}I7yL>g^Q?OdpKXqU>1c!1wBaoF z4fU$>i0JbW=*K&Kq{TjJNWjiz_&tAx_fVP4CpNbYlXe5vsI9<7n;~LUrzM0G-sP0I#($2;%`uRmO;^Zr0&xntgI^q(G%3 zdi=24+QGGDJDkc;k6HrmG>dADT1@U9TKiDUh7Kt?@GtzBKJ+C^&4mTx+|P`=ZA4J%eSKp6!cp0CubOY6qpC!)1Rb~ev+;kO-B5>}D>b$vT3%5eW$@2%$q1W)1om#Yc zC0&TLSuEV2rbfJdkMW>FuZt}u4j3SAR9Z-SSFinmO>v4Sc7PD%Q6w9cdAP^C!kr(G z4?c}8fao0HG7j~EsGlqw6qA5*HU%Y-In+;{4L<_mIj3hOm9Yar%S*{H8HewYu0{J7 zd1UMVYQ!3d_x{6m+%^S9Hq9rv4l@wFkm8Nq7&YMI)H}~#QzR_7JRuZwPOGabW51Rz zj|IYO3_$8``|I{DCm}=n7a2%1CE49)U;Tv+sCdK4_VW{@1C5d4GRZTNBJhn97vYVO zcx(B|SiUPqd}5C}4bjIqD&oolof&RZr-@9+?~_P1KKMH$iGZ!=GM$5Tg2~tL@eSdE zp}%CSkSl5(_C=Y8Dzl@>HcvBjis4m$qh$3pLhYvqoKX}Hu;#i0)CrQ)41nS`bd10d zIa;3fqkCj}W6U}6l;!bUVn?nyQHr_r^{0++zFRc>6Rb>On%3+BU6Spb5-5zpp4!L` zmL+#KAoRl!i-ZNp(lzkwQ+>}%6P!>sG(ZsT+4n$gXd9Yb?NZ2e>a+cF@~~?IUtyfq zH*Tu?0pf4{a-^#b$Rw}&(_o}g3xv!k{34pc0}@qMWIfR#@ce1~9&&RJ@98(B1`vr_ z-t7{SE_mG%=quAhvh|mI!jd2F=h(!H$`R*jsLqnr-H0;I*ss8?r_zI^7Ok) z@uMhBpFb`8Gp}-wU|w8sZz#NFZZjP%bOt`HJ}rlU0OU7kZJOttKl8a=PweAV{}fac zu0@$iKKes;oW~drGpBCy_0JjVytLrAP~2!_W(`qgiBBp+Q;yRe-Sm)x{0PVRZH&58 zH9`DgBzckfiv=@A40*uFU}}U6d;}(XKV@h|RDOZKXXtZmm5tWZxhZNxtf-u9m35_0 zKx9bM5M>w|?tDOA(0hipDSln)Q4GvG3~sXaw)+BwLHX|qRkyBCxDcALn;HTC(h zFK(C&CL8@$J`3$BH{sd{RNa1^59PivJAWrgi;#Wd?qdK`T#GRR*%cXM9{RQ+mow@9 zb0C(EmBQO7q&u6BCC)fzY`eEnyu}r|TMk6S?c69Xn`S?c`AI!`Lp0mC#NLj;$pzHg zuQ#%_tsC>Io}aT#H#(h~vrM-F7TQEw#CGPFt=CrvXsCx)XDLBOK2p?a1#m09-DKxcjR8jJ3!(y~d{f_Sc5y@`H61_I+j&Od9y#wHcgYNS9OcB(j@JFJM|75mn?%o^a86>9A|B3$$Uk(t))+`WHoF7j5QiLeEPSp(Tm zle7?EIfW?J60}5qbQ56M=av>kSlfvLL-VlgoTWa`z#ajf7ufHCe>Mu4kS#!$M*t!e Q9&R2!WGX5tWohL91HwLPVE_OC delta 128070 zcmX`SV{|1<(}o*oV%wS6wr0YKZQHhXZ132%ZQHhOo9Fr7bJkhCR{yBKRn=Ej-PI*~ zL@A3zwQ6)=-0UfH_MkL?CRLr91ukTtn;NDL#xhl$Pxj_OXFXLg2qX!iB2h5HdK*dF zCTg;-&7K)9*L0p&-D}b-B~-6{ZYR^@DDIDbF0Z!ThLYTp+!IdCJ!u>mSrVXXKwO9m z!Un_k_v3<{U>`GL!ZxpFt9PfD`N_@F{u$g+>TGUfymgZaVw@F#B3_t$)p9@pvu54N zO*45OJ!9jk`>UQV$$rRV($;RHh-!JD|77VRw0EkXm~Rxq z<7E3MIUF`Kxl#m9RaE4Mu86kNi#*at#Eie)K*rtLX0>V{VB{1vs!;-)5hSO3mh|p? zG^;Mi-2OFOieM5j%L12l6qp2h7MBE8+0FzG)xcxUW_Hp3Dw1xjWA*gDm! zlzX}x)q&0%fKt{Wr(LFRF|_3I7tGDsGFXKv5yd9~g#T7oH?6OJQ4`Zn_5^O{i956~ zv7FQ{hKW{4XKGA()}19fcA5$e-Xlgj2g{K~W9v|;#5e*F=3b7c;5?<%k^>gD^w>oR zYogbpEY;Q?m(Iniq+=Y;u1{tfCR4BBD35``fnx-Jl!_3nD>FovN^{(B-Czu?l&GRo z!maHW2O;B)=>d^QYawov20h0S5>3Ee6VC2GYMz1dK`_4xQq=5QBxIIQd61m zNOsSp(K_5`dC7RhLpftb$rOvZO``JEN`2+Z z7k&l0O6vWsL;jv@b0N-@M*E~o=~i^)WJ|LfSEyC(IA%$bRLqe1++^4BzHno!v36LY zJSU4d{y4MeMW|ii*f2{jPZTtDPU{VoLQ8~@_kYY{)>uMO;?Kwn^o;SF|E|YY|9v5Wmd+oMx-X8wcg8+Q-1OdudI*McGek)RzBJ;-#Fdyd4C@?@OV@z|c@VH$1i$zjq zQ@Ei}OAnAIwM!sF0BM#W*#cq7*Kn$#-yVb(+0t`@J1Y+`80G;jg7|-J4Z#NPyNeQY zGGtcM--B&kWT5x*J#Ma2P3fTz?6N7e|JGn_f4%$4fS*DGHyYu9b3$k-yPnYK4W1lH zpa4-*ca8u*N9s$*WjN}DtP##=eNzK@uq|Rl=Ulr4{#l_AGR)v@O{qQ8CPhx{laiE&an$~K^TVFg)|tE= zMwx_k*$bbXDlVg)>m!q`eGn{1pbu{%IHKa=?dE3ah$S;cGtG;+{Sv_MT9lFaM<4<> zBG?TkkboNx)D6hZtthG-w0LDkJE2 z&3IMxl07E~@$%R2Ly|3M=31Y}vc4Rj!m`u$3VQPZ&T?Zyn{KWT{LCW)ueh;bs9Mi% zYWKY_YJdnlN}x4B10GR=9hj6H50p$n)hVnbM;ExNU2z%fMoL{*i>i@Y3Xv%@! zaLxk{2FA<*^#4HvW8nZYQ&Bf?@N<9y^u4EHpmT>w!%2>MNX+{(a;39=pWC9PMDZfZ zIe6(f;kqe0Fw6ltwq{q3uelugjO%`NVn|Uzh2>4xGK1Y@4zrhG@`(IFtSuA+NfV9s zyxm{v=55{@DAe8{KmY4c`f_)D$f@R4xzpcUMfA!M{#^R4NOnz*%^eqDS)~XIaO@{g zv{5?hH3#wxgd+E5>@=`ZuGC$<_+JLK3FGI&L zk<+0*Q?9zTD1loMCi2xreK&*}kW88CI97ws^`3cDFHi zy@<(tTkKG6-Ag*y;%*0r3@MZOVEm+CSzYj-e?g;f*%OBECj2*E9U!aOqBBJIR?=jG z?`eJ)7LUt<+W#A%o|#!-viZuyy_FBn%G3Jl9xi}J2E2k85~ygSh9W8@05-4Ghy+;b z`_x|l;3e@!`+Mb|jvmK+MwbBX|4Q&GsE>jrDN-PSe@H6^;`#*ZIUT)12wM5@^VIh1 zfH(=tU6PgL29$LwhoAPnIF-5U$}_8sXhAdr_KPEZfrOYLziYr_fS_saoS1@`JWw7g;?`PZabw}+Kc(x4uE^j!Vw9qt`RASm! zz-9QY|MkVNc8vMAhxftg!rJkIYdq7_GD&nOwk2Fcow4Iy16dy7Mg=0I|7__K%TOVl@Xe1FDJoi{+CsfQ$m!-m4L}?^#4w5 ztuV40JRSqe0=!bOyi)$u{=u3Q;BycPWm12uclh;Sp$V{EiOIO*5Lnu)O%|3DntUuuxob?fGo?1kfjG z%p>cN!q!P(*#w%0MC(%}!9Ga0y;g*x<~yeI5jt+D+F(7YVVxZDN~*s$=W{K9;lJE` zU04^2;Wq!X1FeuQNHnZ!YIwh{EVjT_w?J1{0eGeKf7`!Q(P_RERQW()bkl{u;&Mlbda9hg=SPDQNQ;XlU8&+rZP#_|AW3b zQHP$^(BNWr*Fft}j`UU+g+W60=#N;vGIu(w%o^YWq3k&_C9samz&!R^$(}x~xlj~= z0eHzpGlTR9N;8-;I52*q!IcyGUVG@DI|)MDH*Ea*mB)lWJ7Q3@Nwc2~Hna3g>ISb5 zbMIH<-;bED7?)oxSc*cia;^^jtW%UhFH`6&wF$%$f8NOgQu^nkGK=o=_6TT7Bt&gC zI_qPWw>sei;guR0iI9qH2;|BzcSU7AU;&fX_aE~}x?Oj2?hV}SX}3{7Uf3hTM$i|+ z7q43ahu=GvtWx=B%{WVm=s-<#=t3omZ?gUVEQM zXyvZpbS6qkA6Q1&uB8U)b%-pcSgglF@abE{!Nji&8|pf+hyn1yszx%h#z7%CX#n;c z<^bY=LJ<*Aeyt`8i77pXb$A~M-X z(`oYkjx0PN$#bdltfEs^Zb+i>8`LP;KS(y*2M)gG7uh4CKF;oHZS><CbnG)wkx&Ipca!bjn*NNaJV5b@ZsNlO^yJ!wfFecul);bKGbTr0yxYmb)gRMxwZgD+*E^?aMwH9`VZH> z@()v`>j(PH*T+XUs$WY#k=Y@W|1l69loObTgAU3DoI|H>*btQk2h_2X%ph0EwHq?W zTiPeWx&(=rQBfrOXMGc9)pm*Jb-^5zvwMuyxA+Bj3eDk-f}vPvd{1yw#++>W?s@vI z^8p5n!GSU-oY{WF=MiNrYoLnHsl zobd9Z)i-{%f>MVA08lesvpQxr8cUsaHY2&pKp0BNcJ+=OzG$J=f|?4s&&G6!NE{hl zvt6_L8r$`%F6yVv&K%FYqBabV)2lPA)0WpZIr%a|R=cdiid33(sz%t#G7xPL1MA&U zyr7|s5X3u5dIswT!eG!8L%0pY#rVrQ&%wZ2^tI3)ITlP*0Em5I;Qk|MB#l^Ra3gnj%g~tW--|Q2VxY zCJDCrRS^CzsWtIKy~tz`nYD}5EG#xTl==k*SaHON+H3KXE}fZAeo`}cP_8>5F}~$~ zp1*qorUYMn5OW3Zjz32b{r2rrvzlf8uTBAF{eR)GDJcz@h)2~DGhqjPf4%ui?x3Y7 zwcY`MSoy|WYI$h<^U}F*yDlFcQIB1az|``^5Gv|b-jjE8dbEJ-HbPrF z8hWSEpIgzT?M>JS7Ipezs50j&Ku?r&scEf&GkcD&425o2bRIyq==bpI@#qSQ6T1$3 zg8N#Fevum@bi!_uVUqT@K>$rc>4ikUrv-LNev&^RptZZ_^;RJq&-(>2PyJx9h0G(? z+EXc~9Zj%ck$UU${g_9^zlhQ{>U;flNY@>wE5Dd-o7gsC`$OzpYRoH{XMac)78JSs zMA&6*RBL_A3?9mLwd2~4Il8bU+lMmC)WG;~b@VI%Xhe?i7}K@r@2Pvd21gdn`iom4 zQWFti94k?eD0~*k0KJ)jr1)EK?67_pM+&-&+Q)#{N5PWiX$Mhjr%ACBJbpKKGf0=_ z9{V5Unf#1WK+wKHlyzbjYSB(C>Mk?QFPm8q(-*b_XB{sg1>78BQw%fi`ppJ|qKI2t zTIZrf8nBRo;^l=F4J$^L8@1X$-TrC4w&D?I@Bb zdf{JBmo2M|-A0B#xycZ-;h%s} zEi&1hls4$N3hE92GN_|7JUvJFY}6uJt=B?MraBB`suoqTpef)ksrR_P2?4O-&? zL(sW{&CH)FrC0`Ar&OcdgU08cOM2?nD>P8VAUoGT$~l{`4mv4Ud&Y+|g0QOieqo*b z6R=`gQPU@?-wchP*w>|=7h+>uP@=(Q^39barokkE6t2xUEViZ9?Ds)2^DbP}HH^_| zLO)mU^H4&Rfb1_7+Q?MPpJ%WQ9jYh5$QR2~s0G)o?;C}4g7h*Va>FnS|Ge?4D#evA zg;(H({=!{FwF!f37&v=HVjT=EWQyib2$g0llpL$PPGs%p27S3wggvsVLT!~?XxbqY zb_Inkk{`m5g=v0i&!0xwFTLd1+|D>HMTzmoDnz!cG>Uw`s@|Qwy)|X4$#@JPu$rMj z{}B17Nx59Hld^J#1%=41H>Fw;8n3WaZjAYL$oY8SLf!AcpI;@`Qve-q&Nc6jpQf+D za}hjYI052vjqSxo_+=;>7H-{Tz0%Si7l*w04}6$tH^wuSHyzmeJ12)XWTCZy=s4yY z+s#?Lt$;g?H(P?>io3Kxl|})Os-~yZOU1steW@MgL@GZzH?v%)l8TOlCQ*21% zn%#GN{d));E2On=*rW$==fJPSQSj*SE#LFKjIeyx?o5QLUcX^|h@~4EH%H7&$0Zp! zB2_KSQGPzWRH7Z~+~KNlPpBkCUY~pE&z@64uJtc*|o!0NPh~ zTp6g4_4v3JVedyKQ?2V{%elz<*Q>tO?)h8wh0u)H;6}}V>buJM-`o{jymT0)2{zHy z_RBU;=HEiTeUydJPc~HDv8YUO6G761QSm_&*OkB$b!D3#qj5Af!|Mj>j0q}ygd9Es z>I10@eKek9{Ui@)KqqQzhan>k0DU(l6$jgmc^>YMLHD$jACYqAhI+xMLt1&a4#~Uzd%i7}G z8SzeDDi8`w)U&Xn-$OwVhLOkC4*`l}EapZ2(vSxua#?M=u)cE~>;$(mq;<1$UL|S6&!uC$+oL3B zl&o;na^|V%k4o=8)ipYtwIq(97}`TUQ%CO>3G4ilm}AA8BT`R;&R=4S`v^lkrg95u zv)P3YI6Bta?|(epfz!es%%wR6M@|vIC&)hcfq+f;?iA;0l!1(vp~$raT;=}1?+6*QD6#+Bg1cdx_t#wa}-$WOsahdLwk}xS`kT# zy5|;CnCg6!@ReyYkttwmQ6AFIsban! z-WM$b^0VjGEAitj!h8Lq6lWaSuiiaf=lbfX0ZBB8$F|yInrzw7*?WmxKNmU05!E>2 z_3Up3t)%@VNr|ySzEl;el&G1e#jk6kSHK@zEoPXM#Obo(^DXp%R6TwZ$<;c*aG)mdW!%T@1uT>=i& z{vXR3KnITY>Vwjn^C`Lbwa$cN5aEU!RmVL30#9wd)p+0iNx8&KjAnN5R9{Z?#o|E< z8v@zO3<|-Kfw}Xrwy##6qZJElz}-~|WBt4|#a6*^GI`7vkivTJPat8|r0{tyrm#6E zQ7(?N%~A71?>wIWODSx2W@08{J3~uYK0a7RX%kyB zXLDjUCKi^IyKZP|fR2q*Ml0s`w%*=Z7Vm|%(Kzq(^cK<<#hGi#R0Z0%($02C@6oCA74+iryaB&kqdW-?3~; zNyX{Riu5Xq%1-EkbDv`$<1$r0k_+*4Ww$&bdWS@^9AEbvfHUBU__}l9X+m+v`Khx& zx@1UYtg>*j>GSnki)?v6^u}$5Nv|{WURNE$n zJy_gU8ul0V{1@lfxeMmB;fJ+qAHY>7>HL$F+g(2a$2QIKkB_%kZ}2fC^BzR|Ao3Rn z0vh8}KXKpAs(&QoHW*bf)nk`&K-wpPB`jh>WAQ2-;J>M@3n&gOL>xS_-sXNR9OEtK zwi}*;3L(6!1u~F0MWjvhPutPlu|umquRj{R8z>{PkD0qVUiroKUi~tUf<+ns8T6*e zhscQD17;I{68o5(1$>gS%|?joCK@>wNQyRTaL`@y8K4W!fl(QN^7^XAVOMnVkYsyU4Ns!a|ad3Ndb^>&}krgEB z6d=h{6*GJWh~1wbjK6%3Gg(^H`v3hNyQcAJK%HAai=+Eu^UzcUXp})S%IbOh`t(Wx zV&;6xDgbOJTP9p41I(RfiP_ zYuLK5La1~TC;$^7lbR5pvTu;=MAS+a<+$YRO+_x9t&Ux-O!$u^MJSvyKQQEn4m5tL zzH*K&H6V%J0>}xxgcWszI!q@I;iJ3&pO{s3+%Y%>P{7w^8+Xfv1Qo3RYc+P^1lOrz zpoG6Xsg27k`C~u;)Qi)JH1=epMl!j>{T_>Gc1Kgk20B?Nihi&NWb9IaKwP{>=kcq> zF*SdjL~UdVyAJKg=2qU)7*(b0_9asy4JnQTpQFPK`!tYQA%2Np4#MGOoN7A&jaTK2M$$!EhQ_&)6Zs5~ZLeT>WNmF&@dx5L$THN=SX?Ri%3nH(y zKyBCiDC6ni#3fG}?`&%=vuT7GHj&FjhZ$b9i?#&F?L=*lcGtjXFY)%$Fx6pNe}`$fu+!>lDG&7xooLtFUX?4hez>Hdq7g0v!X-p!kdjB-Px zH|QaeLbVWH^)c5e=}8s9R!A?r&QeJKB4|@LxPV!$g>DgB#l-)8SWLl*?h?rmk;3hw zQe;zPN_hHLW_d`ol`fO*smQ#DXG8S9;c^=as7BngfvkCZ;-S&}P?-lEe^_PG%P#H6 zgQYOjk5{v3HeT-cz2}X|{KvMR5-dpYg4Me8o^02&z)>60 z@?fyAj72Fm0(%i4ub6EyF(iFnUJ2Jy@_*K2wmLj1!t+Fk9ITNED?`Kf1I;a1c3Vagw)*zPHm=rdrjUegaxoe zhxwCs8>WSGjaEB;3Z_IlAroyloO26EJR@~N{z6r@SMZcR4BhqkmHq!+ZRhBB`?H>e zpN=*C%5yp?t#;gxe$iK1hloBX_=-4i)|eeS)*mff*e0Dy1>ptoXyVd{9T{BX&y+Er zfL4kaiR-i@`=L)!;=4+Z(-)K0u;hwGRyne*C0Q#A%2c*lYKd;|PgUOzs-SL*_AsPj zp^bslJ`-uT=&_yA;LuksiF)GEdhA%Wj6JSML~MWjU^O-3Z2S+`UG4Y=h0dY*mWqrk z>2)L=8k28)=#p7*NzZ74wFqq1S88c%fM6l<`O?@#g0-_ls?3vlJC&e*jNbOhVYXYh zmz@78$`+SdOt#W;V&B(W)TN0UXIxyfM3zy z4B@EqdnT)t_3_Iit~*yLaD-SAlF%!;!h?v0!)oEo6;SPTCwxlnQkX`i&rMC!$S}ep z35Y?5?KG_099)czK2e+08F%IGz>dfJwzQ{A?==?6qG0_i3z6X7hslkf`+v?Fw1oK+ zdRtY}IN$4DPqO~Xg1o)hM4MtjK-|)pX*zk@dnH-dg$EUOE#BHO6TdF3l@tS2@LFl- zZw3^k#fUBy3I9aX#oaZ%!ZKii&`uD^S~p^kKXz!6%6q0A$?TbG|q5 z_7Q3(-KB@0e^RIY_OS{x=Db_GTR#{0M0lC5(4yanJiHT8Z1gmy#I@5T+S;YY z?AMT3-?-J9b7A}ly*CK~2!6X<(czDol+^>xc_Zw)DKy^Iup>o6t-+6_z)9C&iIS$} zwP)1=TkaO$AhU5%S}Qi@LwcJ3Y)`9Pil*z%RI@q5?pi76Ou(9UR8{%Mn#4j{k0+=q z;9IqM(r)9=#n&>!zawQQH@ls(J4^psii`YXxWcMh)9w%Nq?A|#Xxqq>ZLL*y|3dY+ zLuHXpaP=dj@5U$fazXMCdPB{<6v08`dSIW@#fKKLeg|tHB-So9jM-=GQV&}^!pkha z&3WHVXMQR+8~GU}FXb6&^{~5u$6&^Tx85Y6NQTN;G%t-DM{yQ79m}{-@fa^v?LdNr z_CVuVLqTixozcqzXsUETo?lg%z0C1MR2)C9eX$o3hgU}TWvu+LA3w39zu zBlL_qp>4U;`}o;!j%)7SOBPZf(WR?vxyS2%XitdmHxg`!V}{)_EJlOB7x&mT_gFvv z%`JZS(5?7(T&rd!KWiGm;TvFQzbN)@+87KR&u$viTSDvyykpXo4sS<8Xchey4o7Y& zbM`+TxEJsE)Zvp}23deUaNaG19Ms@=7&jX_jCOM;yf$#@MD<0Akj3v;^JzKp+cS#C zK|&lbL&WZ8m{SA~slgMr4&7^jg5SY>h9qPQk=x)7nZalB#=CWV0UKBeCb}SGHs!}1 zOyy3z@I@a4q`Dv>8yy?{iL%i86O|^2Y&hAT62|N#6m^Ac%uGB?NM$&Qk8HFV#(Y6Y zIy?!=%t1U1HMr`BeF4R6dIif|d=1NNiV#*e5{bp%m@}d z!4D`(4`>G_GHmUp#He&EZjn2SuM9f%#z5DX8}1FF^f#u>z{OHXJ)s`%F>vJPF1sn5 z-%k1nMi~%O^=v?A2|Sas=^Hkt_ID+^iKSlKozP%;+9$|g?>!)x5=vwf^XDx|ZV5HD8rmO%K@r1$mLnGZe&v`PLolKa?kF1&Cr(#IDXk{Ww2s#& zC*|DT1RI;R=>4opO_xnkxwf09Q!^FUPBm&*(8v%2;+&xd23LcP_`d#8Bc?N4JCzuV zL}(u_B4-;+xet%z<)b44;eKPr)GP|_7o2hCU%r-{X$B;)xlGATEDCep?N{wiN<_7FIEIg8_ z_3Q$MvW)4yYY8O)`5VKkU0BEMaS8I2N6VOcu;8vy<_q|2pvgZEgRambn!eyIm;Ef(f{w^x$~^^XL{k-b z!CKj8xSw^$Ksa%D;E_uYL_SUH*WDfLl{G)aG=uGdN)RJDDfg`jDaE`}Ss&otWB+9_ zC03^nGk!M!;hYr-kuA*pcQJrPboMMNI}Ju#>ywYs4kg%O{KZ**ke943-ExEdtDT-> z=D>aqX^9;*PgH@oDK@U7(hDV*K7bQ5Ooa>UlCR!f?eTjpP;KpZ4_TtAS5CiI+G&Cl zj`*FuRpMwx+R`w|T)N%a*GzHTY(rTMt^k9-BPy-)ncG$=J~ zhluVk9K(haa#kcIwZtr-Kw1iP9iS*J;(T?qB`x)EAB(bPdFS6ZVYa}V;GbZfwf|{F z9Kd))Gzb=EHYQ-6CH;RLkq-d!$m^G6rN%Khwswm0ku6YJ@hJ3?dMt3dDR@YN6f+m{ z(EWzy=RX4Vk_H*;FMFNPL>i=?o!!``yH-XYsS9>G@~FY9!LPh99jGt<4pUko75J0t zuF6pPRFNacA=6E2%N^Fs6&ikgd{spGqu%NH#ealp8paZ<9tmg~ioalrY(<2vZ3?R_ z`cg$VsNWenGvz2*Z5d$O^r6}}bZvP(n=d2y+$CP=2U(8yTfsy`HXBSI2v&5NH>BPD z6>+7^mt?7&Xvio&7y@3A=18l^Kfd@RA9ftCiMKs-JlN78{zVKsA5uMm zy`~dfhc|C5QhIq#eIpe}<9ew`S;}&d-M%k1eJ1xxt40p%tHg;fUL~E3H3Zrz^@q*WP1!x z?@uf(mkyJDZ91d6lPF<^OH77P7J6nZm}HFkN7cSMd*b;eswvi<5r4x{pi3vW-+fsI z6C|OCWhDSzi+(NKh>TI`=XhhN<;K^$AXsH@n@aTJ`8QnmwS8|%EyzWV<;t9NSNxjV zt`K5Jz5ACA&HYs923Hp@aYu;WI;V&szqE1!`OULI+f^xD^UqZ)BA*YII8#4o6a6DW z4EO$^3jBlR0VC2|I6-650S?1@X~WufD62&^nj7FJd#H@;pVWQ(+%FmZBZgykavR;@ zvc=@F{2Q~hV@UZ%EFF%eO0|G^&i7jqNn<^g2((Br(69=^*;h{{E(>(q2`Z)&{Wkrk z2v$7_A)Nt(QMXP{QQaE*nX1?Qd<;VRsv+l?5~qD+kw4(EgDuky#;Np$p~gQ3rMK!i z;)?)3*R>FH$dB03hQG0BBTaKO4y0iP;2h4)dL5Rd#3;|G3Zvm;2<56g8ZVvXY3d(k}amxus1a zvmr*}X^LsDGAg|ZjAkP!BUohn6{!~N=1XtoO&?UCd}$CDRQS`xEH(J~1(q&Oqr?D) zSx2}T_K=OXW<*uBlGk`8kZ_=Dtw7^gNeio%q@ylG(FQE!%KcnNq^gZ((`>L(9|u}n zL0|Txc+n^12a#q+S$|eUdzD?T0;$%QzVjkmBX{m+#Lt|#?3TINyx3_g$hNJRPZ3#=$ zF-CJJ%NfL7#v<~SrtGllg--3dII%-}mnA?xb%Ta0+E(`dOk#V2C56tROrGo^+FSA_V=@hX3%RO9B zW=xXluZ`_X@F>wh{dBtRj=!tGsj_5u3_;4Ee%ui$PF`V44sqDHn;gy-@Zgo?C)DiV z`GR31a?IbOFlu20)H`mos>h6$GM0Jd+4h7AuY=hF|iBLY|mgI+&}%tq1j zIdqWST!^xTj{`9XfjNQ-lONUh5$v55>P9aB`3{2-rV@p}9A=$E8AndB3(URGhQUVK z1F?sZ)_)y+;$v;d(-uRzR08M29;7CepAp$fdlVTh$b_OuAN~dV>4)}P89P7sJ}FM7 zw{_HL^*}81iFh@^4+Q`Y!-{wpf@YatMJL4Y!p+Hyzw(CLOBPIGC7<`c0-34aQH`GJ25keSbcx-NDb>&1z(%s?8?2}8RM5nPY@FeZ4ufUkA30SkzaovZuzrCS2VRL zbF&}Nn9{y&5BuHZ{+sYa!jDrwW&aRlt0UPQ7{y|weeM!qk8$*6vCt~tq&E9n`4~wh_qzC^InuyDbw>3g9>E^+%CkKwY zRr7uf;MDa5TBNv|CGrKmW*yBg=jP_e_U@dqGCjZYe*zx66Ez~s|F1JJbFg##zdD2F zhV6zJvd@Xyq|wLU>r0nrCa}ZfMj2gt8paFhjb3Tf^6|z1sS6}B(BE6V^H@DaWd#&xOs8Gl_jTb_R`eM z+~Nsy?&GgcA&!!TsMz40vZ%8x>rsFjG_nK1n{c1;LuQN7wp(|?t|4P{NT8`9bIdJ` zStREB5%!pos}YN6scD{iP|Xe?&ADEL*bRuIM?{2}-)63RV~eq&?WE`MJq>X=C<9n)AXa`7Y7xB&px?he1# z9a@=H`T;}~qRQT1T)+WJl?jIUPa{?*e3*ALNKqYuYMlYBEH%q6$W9&_)nKU+Y4D)X zs=9LkBR0|BkSTc6g1Nq3=K6&`Slmei)x}48CIbFdqf-KYIwPpMJ{lEiW@>0Z0m*tu z+7k_(L0sqjd0c$m-NB#y?P=f!}1%r{Jy)-Jm^x#sBuXlW0*5z4Cle zMQ5CV$a3vgoI1nWBLidpi0_`j=W!3{VY7jKh5b=bXb}?+e-5CPl`RGEl$gde_DPOxwx4pZw6rVireKnP5P8vLUDpFDvpfQ)$oVkBmThkfX7t<+w zq<3`WvZn;+f`^MEn-eJ>tzjw|;YPKjO{Fxayo(!zUcnUiKBB{xI_X_}e-7j%sIt_0 z+-Sl~iR^3{keHNJg>3+4RDJ6iZ~LFFj_l+wM@cKd_G|{z#)R8@rLXhO>_w&h<39>n zV7bH1N;~1s6tTpe3b(m~lQ1L^b=iZtNkjLH*l1mw9CF$4EuQcTH zfI!p>BsrJ>L#k=;P(oxgI(`l}rX3VHGtN8b<6Czg!l;wpyx)L59A~7r2*C?N)R%s4 zH+$LNyL1B%$)Uq-SX!4gUR5vx)`l12>cw3|QCGcr1(jXAhS<{3!Z@GmJ*Y+CDY>Zd-8y z3OyU$=Mhz&)nNcit;|53%*ZC%#`jLBN6t@+k#VECge|aUF!Y2LVWtZj(I-s(w6Ak& z1y`K@I>Q%U!F@dTP@t0FQADyYgd3Fos&z_8-Q!yuytHYD3R{r ztvZ;sgnSh_!WG#^c51f_+`p)7^lM?ljBA73I_QELPo9h^<|$F9h)U0MlKN&mOZJ0^ z^EK}~+jjF~)|92#rJ5xtZ~c9#rCZoy4Lm8vDbYU`bzeMKw>XwK$OL@DSpaV>VbiCF zr^}o3voXLwR^g||^6u=J@`v-5hMdQn4kQ&& z)zDLeX>E999zpD~c5+ax?Dm(ZT8q=UuFnXjg0O&6133BEc9+xniOiPK>Gh1hb&9!$ z_)EErEBv`ycB5HSk4P;NU%tksGD}9epQ|Oj2_;?}tGTx3pB<%cR+};@E$ratR!Ez> zITuA z!^O~Hxh)OOBJt7%QcjB50zfxkk|x@1`W4_qTZ_J1?T#;;2K*h*@_JBO6W=)0MEh}& zjnzz==N4`wI;{!OOzJc-Z@Y2|yQMgF!o0j%D_Vw0*r~kWM9_UAS>f4jCI#po@hS$^ zIq6UQ8CDB9U?P?-RFDQ>cC7r z{)eY3O!>L2Fqvo)s2EP*-ZBhZ#J5>}f6}8$Lv=N&WH)Ax{DGh-F z9_t^xdzP_|mAfHo|68*oeNIYYoZs7uf6~*nsaD#dX;8>kd4b^J!c?I!yS~F(W{H`1gsh3UQbPgpmR6w?U ztbzY{kF5MpR7g)z%z|X-m_9HA8SQoeECHU>yfJT-@~RwY&<`h@HH9T`$E{-0+FYa{pPhDh71_i|jtGqRu z(^O6GZj;i6_*jEZ9lXzrijyoe6kYTD(QN&d`q@i~=6?f~$f22Z)?pWV$>1q9x~t~3 zhDO=~O}Ybaly+%%Tq{7l64kbJ;AUq4T58H5rwfunP0-o8nlDI$G=IP!R+FT zCFwI8AqKz&De}N*jwQD;>kh8gz)GYe+q2XgDxQv`(QOR7p}%?`6)L~p z{LqK1Dj}A;Lzp6BzHi#ez_N&sT2EGD5t|%3QY-#lHobt&ed@U6-ANWPooHR{PDtA|JO>ubs^6>vPAqUA4;T`u}qI)eF4=*^dMaAo}lD`wWnb zMVxQ=A8Z))^ua3WvFt8NIsjPytTc-el$Ep5dh}OPxDg;Ng-B<`E&$}sC8N-_k0YYd z4Mkj|RgKVa)(rb`$ii{@=*2*u`tTKi3pslftZR<*q^{fVaQ_3l69!sOMH@8GAy6x+ zf47KQfLcZA;I~-0bUBpJS18J zd0-?c08SErG;Z`o$Hfcj0~9-->z>bsin-2h?)Cz7haS{%)z!27CAtic%lu`9XbmV0 z{uE}Lff}p+ob6kyj->GxmCScch}-V`9Yk_7D}O)X3S~dOAySA>>B4^)0c&w97y#f7 z*mwIqYWM0uvuQ3*^4PImW7+JxI(gBSD;N0ykVX_eQN<|C^Q%0Frbd-fn~G2bBS##U z8o%L688mHqr8)5Sb?m1NCQ0=8H&G$|Sk!F7$R2|^AR==aTD2n^5 zl7AMjiPdHHl@;aIx!5r|L}pi{0PAhLFEVVOUIu$Kxgn!nnU1enqJhKI>22$MzzM?$ zD|+auVRJ90d_=`0fYc?*ERdCqB$28vMpn;me-Tm@%A+9{&(^4O4tH@{Tj$iqLIDgJ zQMBvx+9qnYwVmbsJG0#OC8c?N)A{?9qON9LQm<#aZ1??c`-_qa&{6ikh(!(jZ@ZD{ zf7^`>!y%@i0Qr5PHIeQcNpHSOXk?tB_>;0xR2>2a7J!bVtFQ!YYF0K)M|Uvd{Te17PUHdSrJylXb6r6uoT&q=iDW%I80;|NT#MZ&4tUM z+%ykP6vZVpoJ}>)i;bZ)Xy&<;#sCY(OJbFmKRFm^3KZV7y_m8KGumT{zFg@>R9eW& z!x89fQZy4gRu7fIb`mX{Ix!)Ms13K^lNutAH|iwpc1vgXWq9Lebr(8hsa@j`8Fa!# z0ymEU6tE#}dA>pXqlgN!)+$vWIE5sLIwRP73P%LnBj^yDX4eZ_BQrON9eUW%j1i#e zTal#BaqYxwyP_xxlc@}e)aXHU=EpZ`V3Xb?G0>dc_BLQGpXyF79XZ|_p}qQYJ!-lS zk}8I2*M!tZy430Gz6Q%}G=qg^O67XLbcE6Xd%dJ`m`3!oAL#M(1^BjDbrg`u^cwaL z>NHx}4l*{NcI0Q}zgNX@ZriUL3v79873IkV;Lox3ut8~FD^Diy6N@y74x-TXF`qL3 z1sB(Vj+=*~5WnegIQdBLoi)68@RY0FOz-I7@}YMdy_T$$f0;WAy+L4qNV9JgABjGY zA_tQoLc?Bau)JOq{Xsdz_(+KA-aoS6Ki|ty5CnB3!gBau+=PUngEKR+|L=EBW&{)% zAfL?7K5@vw-1T?A1DKRj=9AS9)p)@61|_7 zYn;!5k6j*(aJ*}|;^|ilWsITQi1%YnRt3KSvKsY!Y2M^jQ}=W2DLo$t;yG)1Ec`a~ zzPMA=nbmJ}JfyC0{VBIy$s4+EgMy@6Zs`D<^)23DAG~J`2rMP6XFv_bNK}Wl1?{Vr z_?+Y`P_=Y!5{0&OA?5bL7zvCWz$v3E3;epX_}89JJ|$$aGP-ytc<+_UK=x%w;HrFE-PaPu@L)1uSz!K0W3= zWI*ncI##W2bFWZ`o_hB61|*7}HR_4byyheke%NXb>q6)sU)SY%EZ5D^fi~rg;Vxy)Q*}U0#RIE?@*>75UyAU>ph|2aN^+JDjOHll;TfuqrAb+LkwaW;DQ_mjAmyHgYGjj& z6e4?X94}AH-MwLE^gUx0Jr};pMYZe>daMcN8U|&6BVvF)%KD>z*PTFyal40(vC$A^ zAu>xOWhm*S9V39+T|V^+#OI&J@g6kJzrqGa|08GoU)1JcjOPg zI8)#zU&mLQxvH+g_@76u3S+f5!NutmJm+PkL2I^<{uQ`#$IT1zc>7)BrHjC{$R zh*+JfZ7OBm*MhYMHX%aj3xrB&brZFS3V#2LIWl#x<1K*p+s9N?Ih*Sk=#QmP^G9qI z0hCR_7Q^Lw_`0joDAkO1EOdQI_iI6yj%zx4@9H0LthF1gCiE(13^?-#wJE|H%hGlV zt9B-k)fr<{Zy9hwoLho_k}0fuG7&<~(5LfnR<*?co3B-2Fo9aPXSO>A?(H_1nr zO8B5eS(^a(v+J0rzeJ*9PCu2GcZjFsZv(zEYV{I3MC~tCCg{qzLThua%PqU1aZ^U2 zIL;0l1+BVfa?4SwwGx>Gm(T@If5<_Rcv2U+?F2{SP-x5kvZa81Je8kRg)#EhAIAOZ zLRqefOG2eAzjswe8=~wc%pq6hN`FBpC#pB&1nULJS#@3V|BOzh;@*EPewq0_sT%_u zD2TaHKqs%kVSDsPzRVZ8CXQA5@`1oefEe?kGpn zip1z25~cwb?`E0_hPXZ!?y5I5LL%9_p`GyeYYXIrA2K7{2FEJ~-p>m%9JIvi{BOsf zatefB?R~(378Z_Ww5;`y-jRCMmjNEbc-Be1jM%=i? ztX=)#t-r0f@L%p1YjChUx1Q76>JhG$Wy7l|CG-RfN!htML|{-v6WiRVu4D9L^t+)@ z$A&?Ztz#hPIbhj_!698)_x44ux6P8gf}OApYobczQj%qhWJ`OYxhUZH+7k0-J=x~O3)tcR3)7GEj1L8anm`q1(0 zj+Sy!z zUQx@E&SQ1&=UZ-|G+94?*5A9 zIxsm3=m8Gai9yhwR*PNE-q}9NTX2wsNfn~y%kg)H6{1R)C>06a_q@TRsTlvRdErG( z`)0RnZByD8uldqyrgOV)pHIFVj~*zR;?H~H>L5(!Z0B^Rrz}>wSYrkb*0}BUjJ0&z z%n9S@+t7$BYX%>{n^`?-lLVB|J33WNkM5wFhBXwXXWri-L-PSh{vYX7{4|DT{Bo?)Dn>?zYRI-yyT9zCTYpXqK z8Ung~z^AS?BCQaTS$hxGq^GCgED2I;m-eQc(7iroqxa%O&ZGJw5HVrwd=;rm#y);8 zj!17e0)$XYj$F%0O?kD8ugP3ETm$_ZZf_%R8o zRqPEU9qKDXTXMbT@gIF#M}X1Wd;TQh7?kOLHmLxgIu@S2qId9tT)O1{vconF9h4J@ z;ztcc&N>3Ht&HmlF|3%|4=&)%XiHYan)YZg7yIg&n+5WEbR_+8z9xjvX;9MvLQ~uf z;<^u_*ZCeg%;2M;da+;ik$O8lzT!w2&IsCzCAM262ucrS^N&12xAd2};LmMm zlq8DJFTYt*q{hr~36R6*_y2PAiJuyU!?8b5;Wo41y03^ApH4`qWd&`>unWyKKZ|A! zNP67XEY4spnd5`pc48Vi_e-K%&w~?B&S=RKG!g6xHJ}Nf__OAH_`&!vo?>u2ilf%H zsucvFw*`%)*Ea#Ga)Su>kTR2N`*MNzps%_zc27?f)vxBD+3OK{gI|X$UMAqPutp%m zQd#nyZ9>Kl<+eWC<@t}EyMOKZ>h&rrh=lTn}Yn^juKaC z=$LzI&k#L+o{~R0H}1_kHq4mrpI775+u zgepP`Rt2f2u;otPXQ|`-aI;`k*V4@nUuhigs5>3x_aB5MY|K4VI-O*idw{(-Yrs{``RFHa8-3+9Wsl z=y%Up2z5e>S4E@~0`)5N(B)xkS8Ar_%>RdnkjTlL2+H;2SxG~TiBA5Qa1lLIhI0<< zYt88ZZQKmBk3QJ;T^ryWOjc>v?z8k{F5|bzZ+oCVBozCD&hz2oDT59v|L;!j<$?K5 z(JGvx3SMljluB}I68%S8l1>{XwVgI+#cf$E9^&3%cA z9#%tws4+5Q7TK;YrVD^9Eh`iA6vc0eawqpy273q=M-5%Vy#>4~(%TGVy)Pvb#2&(zlUgXvD`<;F zcWHWrN_fyusijjO%+RK(Z@b&~^wXa&WF*gwz)ORV&giOFJ#cZ5|5*yXA=5nggjRw>(A03G^RPs zvAjaWoO$z#U$Q?~J=`^7jqGsfA&dkw7ffH0Dem%&X~a+19t8&`ORR96Oxs8r`MNBfLY!j zqyvc>{N^R=3kq4&5>q^hQlab4ba`CM>eQ%y@3qGiatGQJpLyF(JkLmBr%kB;1xvpKGl{DIn~Ha!HWt0gfWC2uq05E4`-*au)0p-rqf|$k z%5Z4>7qVG31dL3te8Xwi6S*rkG)7;ZGP5CQW7S2P(-bcX{$E05(Jaq;W+rT$$gQ73 zao3zM!6H1{K@LP1umv4<+)1*8l3|oP4D`g+y6}$|-q??u>wl3MW1y#w%#(9tB~Af9 z>3)Y}PbMDvfmVpXSI^+;{8bkbiXmA-f<{1CpJL_exgOmCRW`Yg2D7-bX_~r3d-#qJ~`l!`iGf^gWH?9&j@GE05X4D>L zUvexz1JQC58)JHH_^Bh6UGg0Sgb`J81&r$aLsiyaKu%A_sPPxxh9HTWWB3)IIDeJx z{(3Clz4(I#Dl0M2<@AZdh?}@0^hDKDfA3<(L_Hg;^lPJ~V`Q?@od1vaoXrgLQ3u^Y zHsRx$dn^s<&!l0SdYgRwnEMYYV2%s}A?Nj{F7(Dk%Q3mYIt*7%$cO&Kr~hA0S%z}z z7uI@K>gt>M8EovQDJ;X##n`R?ifRi#e(?v){AX!Xt2=x66w zrCTfCIBQ72*dQ<4k*$@Rp$_769_dU}8lx+jt#$kZorbPfWr7B&hiM;|)f5}fpA6pd zbIj)N;zQ>!gdnc}GhEM0atpl!2b<@rh#%1l0OIK*>j`UOYwYCgXkq~5s`=iEDo3yT ztP}$SV`cl_(s`Xm4FD9NwA<4-S)@c{K|BnSqjd3F#sLSU0$cA!IIj}c5QDM3zfI$k zrG(}>Q;BxcfL#|%>=fdb)@bp-upDKA&*7-_)*Srp#+L29I_-IgJVmND!l7j~5m}&( zNuwOniC~d=7Ch2VxnBTUk*^@6+t&DY)2~in9M4n^o&F&UX4nttoapEB&O6u%3D3z4 zxIeihM7KcN-}=Mm=6&k2?~)%-Coxqoz7_2kENys2=|IrrU7pO)0`l_f^R56Q@+LUE zZQMv&MD&kJ@e%K$f-WsuU)ni{e5mw(9wkctf#_LTf5Y4o2+|5(6sI;WRj5IhjLfvn zNRVV{&}^7ed4Lb#fnlVNpD;341aI#fVNtVw?-5IT8@w_P!2^OwzI4dM*Y0(NYCkp_ z!d&d6l;Gbn8VuuQL|U2@JntZ=M!i!@J1^0a$82`^gTSpE&1A5tP|?-AW9(7}iH2XC z)4LEJ$NIrrt}CbaqF|RBQ9A9FCRtyy7uedrbV$xoKd=t47FIETqexWN=F6)wh(QJD zD)rVDPkIt{F>Y8G9y4F`x*t6XHeR z_%=1T9^~oIw!k^Jyqt2EleN8!H^>I^V*8gl`Xqk9CZBhEt4DK3t2O-Fn@C48MtF!Q z226alHkow=F2|D@id5;Wo9bF+YWe0@rz<$@EAyO+G%eZJ= z;Tce7k#)6R(0*QJ!)Gbr?$X8ixCQ$rDCvE&o}Zh)w3erG_6-xYy-P6~CEM!c1{xA-WatR?}dSae?^U>j7Bv%EW;IS_ zs|^-p(Uf&j#iyFoACyTpta~+pxnj9o6&21mqnkZOXKqiB>{telO=b|)paK>c4u_qc zH_VwT=1dOM=)KH3^a4(HmqcFa_+Rz&pz8pwnTk|Kvu+rl{uFEb$6eXH!r#(_lfyg6 zhwJ)RE6jPQel96;eba`$dw9&wAOAe~)S04kyNi)$DcN8l3fJU95bUPm=xXHHh&mh^ zNtNJ$7{Y% zxXI5k*Y3{c0qYyLu?jW$ zZ&-Bw$R9q4wIzSn3KAUf)hql-7nbn%O1(BlcLjV_NLxYWS4P)u^M$ByOYHEf0KU&b z`kS9Zq4tqsAKl0}ZB9x~M8gpw;PYo*9eBc9nV4r^+UZn`=8FJ9#(xEg2kc3M1iT2R z5B~Meg4J2^|B}tV8e<^3@Z|cjUrJ+>O4lt?GGg#ae{P~$PqLTTsjz-6B$+IrCs`V+ zt1ih}9B4qE_M8v~dmTbDhBj~90mg5zao;h09#E82^{7m9i;`ay8NnAq2Hr9`!`CRw zhZ~M1eajMc=sXnzRN$Lkx3KkUUu%nMd`JDC)&H;rM_#@WQoh(&073&P9$0^9 zGNsmd!u_~D5v(w&W3sh)7|Gaq9X@Aei=Wx0t;n3XZ^$=6SO*8)clpC&qG~cK%c?(> zJ^V(|Ypc07Nqlf>ypNSh>N3Y`{Kp-d-wU{Xw31fr@jNAP0aWFQz*2b;$jQR=DT)v{65f$aA8zSnRnJI=^#Q;M5Gcue0v!#<-`~# z`o!5C*ge(rNyyv#E*a=R=0-GdRu-23!8`L<+du*Kx@&+AlOZjY}#1egU zs>xUK;#$+oaCN5Jic<4#;(_OMx@;z5z``2QEgQ)xRh{EZL7xTo4>srNWWq1+ul6J} z%WQxK8lKZSdHZ@3?O7|Q*HcDB%yBp~MqHQ8t+;bDpZ(!rK}kk@A;8khjdZf=gF_iBmj_BgtnrELWy-x!c93~Oc8J-#4F7|vhKM}TGAl(n!sTpvw^NVf z_idnc+XYYmplo6=qlV|t1q}&S>&IZlDQV)8!WEnD8P?yu)IT2D+CuCy?5ItV^y)Xq zDY`%#?y(a7ceVy&XO2Tb`xa#k3aRP}I?6x+jHi7+C3+bcWXPbSa!RmCVCK6)Qn5_T zgdf9UKZu*Vw@R`R9g>ruwbh3nrk#z)P=vKwVuFBn7RLK})UJd5Q3bbT6o`T&sW>z|1 zBTyQdnw`)_s58xDSg(x+X|e%gb=oQbI@y9b&kAw8j)QJ09WD!7k9u|_<6bqvR3Ibd`>w1R_S z@pXQFh|9~}%T^vDHH+1Hz<-M^U3Hz(;(zm?o~{O!sY9AT4>uHKzaXl`ETS{a7#*+^i^AQ+s7dUYLFYo z-mQpSl1HpeDq51MM*40XmtIZfn0i2= zhl1%!avn`#Fm7f|-I7%<3$S#@$g{%)MyBb}x-fl7#;S>CoZbsFzs-9thjpje_u{)E z>wI?W*fmvb1Q^_KB{K+GSQn=@sJ2}*O+-i<&6p4?jS*zZl;@u6YfSR;mTD>%I@Kz_ z^qLs-X1%ke23X%Uiq|!mqdG?P9gFT`8X*U_=7pwtUuY}>L*N&Iba8@dP;L>KX7kk$ z!rDe|vWpmjUuf@BeVhC)Cq1SA3k#671_?OVjSj)c@C}#BvHV@<(JulD*tN%oAd6+L zOiZ!fz+jq)+T>XF6J(hNhNI&tNCsx3OPcl!w_Dj5H^h-1DlR10QR6%$XeY8Y*IouH z)|`O6GX(u4-(y(K{Zrwp>MbBhoLb=&I?yTuEG@mrGr6KJnO!f`-}y#Ujn#@fswrEw z*{@-Cgw!%8Os|*E5>VIzMEBm|Em`3ejWP53>|ps6prkoPD4k*GRFy=ho8o=)po1#} zY7{!M2fkzdZxJJRiry4|aGX3DM=34;LrQp))*`~CAWDFn)Bwcw>s1m*Qfx1PxNgtb z?#R=f0e0RM*-7?IUYY+JTWuI>sKP$D=8D9%N!@&dW3UO;A1ewN-`nIjFx|O4(zYrC zO*rCuyT4x-fL+(CYJTHvGA((U8)=SanBrk!s$#l19WmfBdmn$hS#MP~6L-cgcoOOe z);%v$XdvWUxay)XzSRc|#h&ukXR$MDYi!U;IoMiW(d#)zntO{)Y3W;RoUZWK`thH) zc!>}Qv#4CCVXXr29=J{Af7d-_wZU!jPuYLI^RgFFgvxf^p`Yf_4c7eDGapS__kMlm zY2mk$cx7zkH{lt7IeRIL(6(%yw=k4P17KKPT$(akD2YFKX;4%*CkS=$>Xta+g4VgvkMrz)|;^UF3Ao&|XRAn<)0; zUsYU~P)wwBRF2XE(HW`;nr}wi+U2$F#)Z6o={ws2Yn34S;-H4EQYTSU^2INf%gA01d$qxPiw&cMo8a$ zh^Iy5%#j5cRgBOev6f_1i*djUwbz;EGOMIlpQA-%3?Yc2m!s5Ll}eU_MoPZ>Jv79$ z?7*XrXZ2Fft@<`B|9cU!c2*qqaVG_WXcE>z;r6yvdaL^6&4^5gN#E` zgN$itN_A%AB`*8)*EDk*43Hhr8rIi*HxhfWi}BPd z_Qw4^oX>ItESzSYl`cr?K+-;hdcitmL1(R8hFd@7GxKIu9L6XCM;1&(VXhkxt|DjM z-}E<>Za(0d`8^!Cm_ySC&$^zqLCl==-t&*hPi=D+qum?E3ti($>(1s!2xwboOj5NF z>MWE^*rDFd#LZHvXwSwoY36H-WCa|!SQ$2yI3*+mkk`7oEE~*JZ7ozZiK?r=&Ee|` z412pe$%Ykon*pH4!FgG%IEn9sw9y`K<&DOS^(9+a(g2xqJ+eHay}W4V|12Ke!)%}o z|1)?1Hd1Ar*4U6kuHG>?8lh17cdg=XjLC^y)>7($HNcccLL%c77nTH31jzwOfX|l6 zxd^KlGHp$qQ!IRJ#K2`BRyOo9L=miskbDTu=ipk5ig6@Ujr`(>o+4r*x#j`f;75Kt z#e5(vj137j+izyZ-(p&59e*I-8LVN`7=jRXq;-P{So$N_b#+Oo3M2{TX z7Vg2Mg?$bW3x@Sh=BoPpPrS!Z$R8atoHeL$cmkG%NmT>2Y7p+XvpE=Sk7<~w29mH4 zpqY(h{nvB2fFuOJbw0+sy@0dd6)X%a0XIp9lE6<1>gpulFm=u|Be6#LNqWpkTjlmKteHn6=#0-5PNb!@X-rPM8}IeUyoDjXA4SshzHQ>=VxGx zqceyE{Y5%XiTxLoiyoO`29F|$%BNug3E+nzk?6*PcMvgx%khEC;n}cCIUL@O2E8hhk81Hx9ZOrQI)L9!aBtjIT>BKwZ`(KzkDiy z=jl)PRoOMgluMz)m^e?YJI4CZl-U#lFVbVXG1Ws?wM(YbvK5P!Qs!r0kMjGpTj4T3 z@wYAcyc1!WHsPpCzIk#MWQkEUPCXfK6FX$U(vk%Gcf@i@BjgPM93TA zn%WBklREbD*A+*NoV2>j7aE66xT3bKlOq)$jYS>)ftF9nuTs720JEzW5jNpJLQ6&? zQ*b1SqjuDY}jCB9y_b>5v`OO0g|np^AZ z`&UH~-_G*(@v_%bFOho2LV)yI%vWM#D9`6Q(HFyD{VoTH6;FSH6p>Nwv~8LCsa%us zV0XrkVh$e_3_HH?r<~(?HD_GW84JH$B_8;Gq@X{da!t3Gb=1mKy}ZS@tM#<=Ha18P zY9tsXNUYId2~YwDLlw$$pw%d+QC@qGPmyQn~7> z45YOt2%~MLh4>uz03L1as=KVOM_$J0@Vpt5ocN2mpCnRI52E*x2M_4r*LJNk&3#zy zwkqgvHCzzTm&%r|_}K0=<`R~$aamI7Z0SE8Usd&JO5uEXZ5Se7Yg1v4%RD_dj@80! zbds{p%G!D(2I?(F#imrt+9DXm54O_6otqeJ^15klklYQS0CP*4xJHJd7cqj34Ig@h za*n&|=ONAf8GVF>cNZ25a7M|m&*NpQxeaP_ za|%5s!!Ssd$l{Z`l&sG|Yk_MAB2s6%oEXx4{A}_nX>T|^7mmC0wcESqZJF7bP{nz= zqPr7y8}5={9@mfa?eP>|&da0Q(s6Sd{E?;i*EP+6OaL+kgTerQYGUmNP-pZkNfIzJ zQ1bhCY4Behu~z;A*?dBn`!e5H41;>A=wZiZyoP15i>v7dqSv=@d*)^Akt;{Ci1GEP z(?;nqicFSG-cjs+TFBAb*aB=JV#`(uCHkej)6IALJkH%5ljxk5z4j^5XoSevg8XE` zpjkS+a|Gfg954dW>F-}lopbQme29}VcL zcrRIynQiy@`5x6uIbOoHe^{W7Pv~n|Yugw-yrn-m2tL3S=&K)rKV~WF$`+o#z0;52 zlwYLqC8d}l@9HXW2r>=ECSE|ij*jt<2ks<768L}{@4gbFFZ8{~BMY)q5$TwiB*aN~T32qq#T6u>K(&Ir4#Arh2@tsLYM11LY zN1r}g#!s}yf{$c!Ix^2z&VkVG_DqOwnw9J}Ml(8#s!*xj z=|@^}GR$|x{PB5Xy1!a*h8KzIi4}=rZzYWOCP&U5xuyEv_gnFl!tP8rR!XVKJsA^dw|9^WC?$oIfQ-k zLXTw$?0yIdZE8l}0KPagLrC4NILW{K@x`ac7=5fb5kGY}Hwlbnwg{qYa9QjvzF3AoI7p%zbj~zGbYB27V~m12h{w*aD%~H1CbkDs!$H zB&4FUermG_{*0h-z?;eud#uHe4tvZK`hq+ZlL-_;KlI_Bnqxbf=>F$!2!n$j_K(BY zAr>xk!~+5rjH)IWa@Zo@*7CTWwnO)lPdwPO2p;Ar>(lpXV_-y23|U$A z|LYT?76|LO*5bY`5|BUF!@gwfG_s@b-HsgDH!UzGjBa)}3k_e(Uf7fa{- z-FN>y{NWACZTk`M#?k^}T0MCmT-wI<7UZlh)A z{73mK9oE>Non@OpESnHH)p-w~Q7pvOt#R*Ip@MArty0LC;Lcz5u21z@*vmSt-PctndEo@=8Q(D*!ptU zE<3$J#AZ;$=VhlyRT?##r7Oi<5O!ipXCHw<2m@3QLBZ9x_v6_SYFPikuywfK=7_)| zUk~<+j#g)3Ki+*$P|!K_pkaU;kAOFnD7N&G{*SH7vR^UOiTC1kgne46%A#2i^3_C& zn)YOf2J1tyQxlzezYe%*2eHmmONfT#aXNd$03e%?gpjq$#-2!R{o2A2%%s9*6>j-KGDXh1M`p z^NOykWoV01q>pD+pp86ELse)sd>`E0HJwO7{I{+Wp{_;Ba43VXz3&z0k7X73OEt0a z5SG4_YnPVz2dzvAa($!DJYTVkmHVoa;(i%NjrU?f0%nPS-DwOu(O_9fTfG~!!N2L+ z=#WNw7W0TiQ&oE%8U}yu7AlQq1!d1_RX908 zY$wieOZr-tY|HUop>a1F_^P!u#moGMPIO#|55$Z;*kwMO(X}KIVaj!qWWwv68nbgV zosF>0JMWv%B)FIXQ-Ylt;jY{YXvo}^>4yr)?w(+;!!QRB{n(WTt%ocNeYh=HMC(xN(3Ed^qO) z4K&Y|R3-@X|Di)lIH+9EQoge4Nm}yi_k6`YDcRI05a%ut?Q^>!O9Rw8!2`f4B6fa? zyP-o9L5Mn{Rt4jyiQW>2_mqSY~pBf;j<*(C}+Mj8=hH-jm#9q&ntfAug)$oAYKUyTq{-rMT z_A#a%i2Q43majjg8qN@VjF0afzUrL9Q&4Zae|NNW@rJR74%#Cc7gsU48}I>QR%0H| zc28V%X!Acdu?dkb8MQG8xkmy!P|Fy%Q1PW147emeO^_F(E?y9CpV8HyPW@{QTH{D@Z)F$b%M7&skDlW~iY0EsM4u|iAM^WA*CCDY;MVFZ;3QjeF9#T3ZK7WW&( zu7HtX-^TG>y09WU^Fd|;{MuGt}dXG zMnE}&fi{Xp6(gV!ynK$x-{y>D3@PcEQw1ksfGhUXT*ZP-YjjdrXxGPNEd?%YFCBPP z)yZZab--F={*;urhUEgANfXE_8_oEqW`m5I?R|7JdQMC+?XY|bC_kYVp2$(n2qpsS z*`}b#rv5n9WG%h4dSYtqn;0!mGo7U#3!FB#qah-Ohe5$}4%s!onAWl5r{PI=qS>!o z`$#S>SiC5QoSzW!g}Xe_Oeok>CIJ)O|9@djpnHeFP>IVIR^x@a@rqdMS~y*{dN@H$v{QF@+@_dZ6X$b0(cQt9nj zdQcAfi&NJwR5P}`M2H9fSuAJ?Yj>zG6;@N$V909MB>5w0PsE&@619zH4g#UhDC!0; z>cD_?7ie{n*PviP_iOaJoZA2}VDv3o9oxe*DDc+L>n+HkptR-@l2%(Nz?@GBV48p70)PO|q8x1NDr^nV)YQ^TPU5{ zYlZEkobY@TxpPEV3jt|uRP#Iy*6 zyjpeo0?CSNC7DSu?IMEJW^xx5C+6q3Gg9Yv#6y>nHjI{$An!GSK(7Pnqyuw;P8Vb0-kJTUk9n08y7q z=JzV|#VS{YDg~c&ZS!TW?6N@D_aps8J`}3--1fPz7KKl&MghtmW{7sQI9Pgie zuZ#E~_r+sCjhL-o$<-f0uPv^D-8^`Ds?IP+~?riBKj2h1^``)*Z*4MwER z%*)Q!Z(u8O+er99So=^f7^Aw$NCIgbR7tnHobb7uXge54W8G`LZZn)Zo$7STH!Re> zoPvldAXLH5cqOV$ISU!Q*U_v-<<(X@j`Rh)fAOQIb*F-kf!eyv`#reaMs`58-z#^ zja=?51#5ZIq&W-F*U`pKu4_Z@yuO@l6Nju5~EtD#p z1ZM|;Rk);M6dAFmzPOOv)_!%|qG@=TIMJl~5yyTA+MG4TmmAb6Zxb&omA=kUR|-1V zpzO1rL;Hl6<)YV)7DTl+RH7DVf@rsY+233qKJFbnTudB@mMs72k77!ASEK6TL_?ek z6cxDI_Y*lev1pBCb5Kq^6<4viVT~|KY-RzNbe&PiCZ9#D$#MK@{#ze#6ZLGGxOINJ zc7v{{w)vKcea1)UIfhxDi;9#g2~Vp1`ok z6WGKn_&HeZL8G5OaX0||T(P8p#%d8g{Hb&q2L$J-7epPbBckK{wBexK zwYeBJbJel$_}UsgxIf0ygrpn#|H)Vmpg{>*5;8m}FBnKok{mp!03h0eda!>wmQfY8 zynM>n&gS*9?2xk6wA%+PV0Y?b2BY~nbzpxNGr#U&#oT}kww7fM7jM+Df+Mv;>#xDt zD*A6JJ&lsA4Vs@@AwS#hL&zQH8IYK2;UthsGSlKXPB-WQpR(Ey%in8r#P*d+PgamJ ztV2xkKeeTSdm6S9umA?>d{ew}yWSrnbCkre-qI5hUcyGXugPWSl8rlXZQW#wW(>$| z4%klu+%Lc%P}?EH^VVrb_JMw@jv~*RcIFxqAMuj>J9Ja_qZ`c{;Iz^2A;vxU12OP~ z(GqlxVt(BUK>0jmF}VCR`K%Ln>zVoGRd*pYqnR%Bc_h-?0Q7V*GrY#v87b!C7^2uA zA`=MbyZoda1W+;nXO_9r-1ex)pInMcoR*)mZsTS^9iEwWsU|uG(96pkPg`!X0^!(5{c~ zP=;oyoDhCNWScTgP7!@eT3EBXDbB$fB3!lM7e>>mDPs;`hoL$wr{jG2V(7()m|2V2 zkHsgxlN4u6GLE2f%J4{5D}=(P@8k0Ul701RiZ0)JGg!Rvgw6@=yRfv$N#M+=J_RhV z@frWasTSwZ1cv3O(dk=Qy!?sBBy`wbN*r{fltW!&y$3gk3@}aCv^F510J{{EsP&GJPo`kJbfo!aJBr4ZB7oWi-IVjK1WZj z=zboI^qash22{rI1wZ-0UTwb<6sb`n2!0dPxvuF7o$ttQnbTVMvk6?fM~S93{>nYI zvsEy9?gz!b#{f2I=G1W2Lc{YHmh7%0)`i%XvC0}Cu0wpF)m#14i8g0;$SD+QngD(@ z+M!rb6)yyj7qS^MDCgv33Q3Re;ai4Q?){yHW*sU33pd2292{PD#KovMQ%E%gnzSSv zMm{*ZBH$d)!7KS8f0qtgNo5viD?_#kD-(s7+8Nh1hJYD&!9a$G&TV~PGdNB*mG`>* z(3Jx)O$~iOX30?NpCx~N4uZGlX0E;%?`|9xz#w*H8LB=D)ubzBLw41u6FIf?+Ug3) z`p3R7q4Qo`KmOPUs)AHNASFCp;?EJ~7X|JzDQ=z_Pr1oH+~jAVF+Sow5AI1w#D)n) zeINav-j2jCN*SU0lH=_crCQAoa9T^ouhaybNIHU%i(HHBHfWPa|qn2Ni`1Aco$#400rX|KQs<`)kY3s0So!7gi#izL~h3I(DaX8YcCpspV z>vy4GU)emDeU_r?I>*QL&yD^!9c}v)B0)4RonFh%K+Ln2-@av8Isv>wddS5j`AKmA z)%i!~3ChKC!wYX159-ZmZ_mW#R?@f)ceO7dI2KWLmn$<2Bo9m3~+Cqg|0R z&*m1H8^l?A?OM4%#c~4f)`ohd*h}K|!t_B`4u&rr-8j18gPs#@&PNd%G7gJ594CtD zY8Z0X0|?Sp+$kN1?wclLJsO!JqQ7E@So2A?`U^vHt&CnREXKQ-R3MwG04i-7nIBM{ z#7ALCCvcz?Nx>+fJb*h6mD2fp2dlux*++qHgA%ZOHs=Mv7Nc-2>6`q#P6?+64`EE# z%b}5e{;x=H^=e>3->AU{B%zowsrfGql$va9k$&@94Q~(olPtAVWHHF)ANZjYOGmPs zCK3OSt+xz{E83!UY24l2-Q8UioZ#*d+?~eVf;9w(;O-8=-QC^YojlGtuimR$b*q2v zpIv*bwda~^%x|0(%EO|RB2tLeA<31PkfBA*Bqvo+Zgs?Gv5;R)qENmh84%(#u&SEgbsD?altE@F$h07R?JyLjeu4iNGSVKgHxfJRJ`Be;LsPc#m{SAGx!x?pAd#SfzDT3vjl_ z>W1=g&9qnyOjmR;6>W26-%p~@JHD*ZZS^eh^#`znDbvfNDK>wWMsE`1+)+9d_d~wD zVq%s0;<52i*qWYIHEJ=ZP6%c2sHIEsBx;`Wv~-uh={Ahw?=v^f*a73?+^rUe8`6`8 z10{#_dC)biY<`V>H!>rb@!B5Cn$8H zrVJjNNlE@=Xx(YBdzZy1Y^)b#fCivxCd34cga0=HwfPMjFbEDh!UkZ0s>zNsKQJ>8(}Z~MR?g9&r9o>MATlQKaH#jtS8FQRJtb_0W4Cw%RISNs|K zx=}9JptcE*wDUCz9+c1*FVm%2NqH^@DikJ}RoVGd5QpIJn0adG4i-j_m$` zbsKJ^rV=S>Nk77Fg!+`07ZMs+2u@~Pg+T-ch8|QloZDM~*kS;{DGJz&9@0`$IC>ejSKG? zgPG=ckaSxPNu7tv>4b>~z8~OACuzsyqBDI-|z_HR3P_7i2KYLuSjER!T zp@yhE7K>TGq^6a`b^)v}*24Yel4qu_IH2O(hKr+E@v@I7(Ml^Wmzf=vkPZ5FrbJc> zD#j#34b)NVjnNv%jDh6*13o{XgWH0fTUgPEK$)Sw1ny-{rI7nYYpuLz=L-M_Jn@m! z1(LdeyMjl!sCv*+|7^KoN#`|~ptnTf7PaCA5XBc5=m03Hn| zX(jRTaX4m#a5@?XtCs~8gvl2hNY{B!z`x&gg);p9va}V!HrvbRg(OuUJvUc?w1|Gn zo5eDClw_@-$qc-rR#0QH=m+)XbaQscWXP!{+0f9}$Iq^1KIOp~Ei>#p_t1x$F9zomNnI68|!wiRX|YbvAcF_?-a! zycCZeBs$FJ(N5aL6GH$x4sNY}y*t~AfYyDwL1!hrs25m{&AdSL;}D-UD_E*BW0 z9c&f)2W?`>XWq0+LgmiE!UK9Mp70p{n19z7DhVW;*Cm>5yD0+N;Sa?y$AvJ*cXa<5 zU+Zt&V!NIlFjsVi+ke!bKHkJz3A>4Qo`+NhgfuKm=R zI6gnqBx4}hR)CAtHy>i66$on&=_ekIyVM5q6O%%Ege6>&?SD3-Hb=h3U&ls{>gVs0 zN){4J7V49o<{^7U{JrSn|JEg;=qv$DNb>Y^UetSo!3i z-j0e%3HPXMLNuNsArGw8X7>0e8_Uz<>hR;&$X7zbOD$gpqge9`Zw~!*4t7#~p;z=e zGxo#8^ZTl&AHO#9Sz&s1j-k>a8nj6MtNOzM@wW-3%6YqEhVZzqIudci^yboPGntE} zIR{z817bgOZ9_wws*N2Sz5#b&V`_Ly&;BgLBF#TW*R!I!1rUGX+PFEwgx;n9?R{am zef0SzHoo`bKk}A$+E%)C6M;44T6eIP2Zr}H$LjV+Jd(6epgnIj;Di>A zwP-A7jX*=H{Ts>_d(9THw%&(h_-2lb8yKF|@+?P*`9VrofgYq;xGEVEnwFMm2T5Y` z{Y8uSuuKKG9ed4dQvDtq+7H8=tEo{Cn*aa)WiGHs$LsrNbONdrhx;IecsI4E=iGo&wk?S9FFDlpb5@y5*i+Ipa> z;qBf%C)X;N_SI7YCpFOAh@?kCCE1ajBG7XbZV&D!7rlK~Hj#Hd6}n2V@kY?z_02`& z@Y}+~g&!qav>YZ4VI@3eWG>roP+|-)Zvp?~ui<0c#?MCN-Uzu41kWhl4eL1{89Y|o z!8CZ!AYFC!?YR_>LxoO)*IC=3obsvaJiYZJd#HEqP2AntUWrWUkQvB>2R3m!s^nW4xwE1hsW^!VfaD2n-bfm0wlf{=E@L5ZY2+14 zcrY4$*HPa5IMHeokp0$KSyPj>0 zA_;TK)?eHUaH$|(x8-?rl5jDbmY(ol6`8Q8S~EzQ+h!;Z_s0yOSf8}1=E?FW++JW* zKY$eLMBg1ZRb3cF0j@aM7$QTuP3I4BwL^swi zA&PSVD8WH29A9;Y7Z<=7{685RN&IV^z#|6OdvY=?1=VE6EO<_@ueOd0ybwj)`1t{eoivG5RFp$WF!(4?A3&G^C#(xh_TMuU=0Gw4nEdonaKz3=Vni z2%W&X0tbPNM`EWCSPP?ORNTz5Vv9|dIE!Nc5ZWPO6g(X>+Frm;1Qg^DCX?ztaPH+t zJEN<;{#3CGP2~bEpXH$4+vu|)%Vdm-rUfo zaL*s_D*fb87;L@Q>nJmIgqE;mX*EW~N)zUhVOX1#mv)t2d}(YMfx8Mdxa<&;f&BTy zW84AO`=*oN5=XtUzB0APYBbemKoDlAb6TOYR3YBlAKl2sW%6GhgR_u*x9Yof!`5@U zTmeWRvh%L4vfI;DUcZ!TODTf~n>`xU8x$$e`5{0&Qc0Ur9;M9RUww*RGy*E4&4Xj7 z`6UY~&4-@pAO&MGz<6zTlcBqp{tA9w6Z4SN>mgQXnoU_HoTLK;e{(xFyl6 zdBn5|65Dhd+j7#w*qAc30dnl~TU3U@kF1AiL9E%Tf3@f-a9eb*r<=Ux`|+L;1?+MI z)MsI*8?kdfSfx&ONaLGXo6uRaYwe-LbguCootn#cu|fh!!$0g@2=dbB(Ic%u>9_5>=!=WVYE57Rb}BfRDikaO{}nl%$sVQdLt4g7-~=Oz>?b zumJvKj^GSZ|f{C0-o;Hw9#ox8hcfl-OoD(~bp;m`LqI=RVppWv$)RES(x@r`^ z&FU+DHA;dhoaQE{59WvS!^#*6%!Ud&>YYu=Us_1FpIBCHGTPc3_!1Qu&)%nAmy*=b z(;)G0WU*`CKgcwwt-hNV#hnV&VvkAK@Dg$gS^(X&HL43ntsI;gmEY^QvoGfBDRyN@ zSg*nMGl?%hZbi(!aNfP}ITn7L9x-qxD?)&#L>&KtyHkV;{cq{4@BlEudHMd|jazfg zew`cD?@9AoC0a|*KvF-59O~!1R1~z^h97Lgy%>X&U0!0z>F(H;2W?_|qI;5U4D>{> zg0n306d#AgPq;P-$PZ^RY^Kmu@i!DL*j=(dHk=44L^bSujr@%P5}6Ex^<7-F@!YK- zW097R^>L+)=m)NNG$d9O@i1-yAf~Apct2zhoKyy(-*wfj_kjz(mxCLIPb<6K3_*R_ zM2k0frB3azPs3nFxm!9n!|6lWi_-xNIIJoiS7e$<_#ywxw7BjNdqjei_{yV<` zVxdJ+&yZ&3oV2ILh)^`9_*r1#7!kTz-E?s(5DxxV?8a@rOV8LS8R&(U(~Mw9C{UIJRo-@RQPAuqwr0sN744LMh*`)9L}+r#NXjDe4Al3YwYkm_u$A=}}cHQD~} zW3xEt_q5hebw2TiGWpL`a1Xz$|sb>hw>rj{E2ZUO8Jc#w{IG z)GYbR?{AIjAxAI%K=+g9D#$Lmi+AhQr?(PZymSwdQQIn{VJV;3ma&r`04Q(-;?^kI z=nE~8(d4NRPe`eze>`7sf=-P>2hVY|L{SHPHdL1X$Y?tANVdE2eYK5 z-M+kD1X7nO8lv#)QCc{{MBL196<(>jxYKAeKsY>XEzaZR09{_e!_jz)c%4GCw6_7y zK#J48YYmBq+++j%AeI+m^?S?99npt{plr8Elrfi6%r0U2m)YYnnslTO&!f!3y$lD2 zJveWcqs0dJhE|(7cuR%5?z;lXggrqzP;=i64~dc^_|z@%apdWXmvevQ4;~Ldeke!9 z7b*^0WBF$U{frYtv{C<`g-HXV69F&-bsg47G5x<#Wi!8if-gVc@>^%|Tdyukt^Tn` zStg6*g0cK+j{>`c$>Ne+?S5Cy{t=@58|bpLmpK)l%SV2^0mX+)7lS!yNJ>Toml1JnA~GJ_J}0tdY2$zE!yX(!KdxfyEda-?Lq{ez5I3UObBNL2Xt;nt5D00nLj7(147V zgL>H7jfp>7=_e8W=&!P`2on1QCEkH1pX`6@#qnmy0MBc-799nH;dtA7I0F6xj6m)R zhf{Ou+EknN(?07$P0&ty<*hAlDFx&- zrwOd|lDPIiFR0^`S)vLHdp);o0iczD_c99Dc_{^X!DicZbolNmz>{s{4=?m$6D0=U z-Vv_*$X)(ALU^l3rWo_QF|^->ak*~ctL2t(==yNS0y)D0vsxQw1R2!zU`OzG7G;mJ ziE>mPz0nrWs*~c=>c009Xu}Z{B9-{_7k&z{YwM0LAcUGhI*vfP&LV<#yS|KJMqSF< z?r-eY=?_i)<2?@jEz)G+=i74kbhq?+!jPwg8|X;$O!K z8A$*uF!R6XhiBMtjxjI7>KA3Ds&uOOmGcsF!5}UK_%tMFsh1}r(?0O;vt_4w#kE5m zdpr&nwn4}dxhyzlWcmrr!vobIkS zM~W{79%6AQmWHxPR2wFgV<2`Z@KG~ctlEJxA{AGBHg-cUrsWCWq!)=mi9{_Ln`@kP zY006yRs)q*UP-mdHhVJY3GHvXsFyHrM~`PdqBwMZo8X~Q{5UgOWRl5}yl|$;8pRmm zu+*Fpi6`up(^fn3_mjPcQ$3ma*0ht5-=|E`$#AqNqMB{N&Ij3$gJn73IBB5+zGm($ z@Qo5h&o_&XmR*>Nq_od5yT2x#Y`Lo4&$TsALv-@(k5c6c`AguNh_;FsEBR7n)&ju< znTR8gTuQUnGS5tV=$WhU`OVEs%V1H-+KH3*ym_xNkQ8o@;-ZsLUWmTnjb-u|@1J1OR?`&XWS41d-PALP14+K3 zFSV&?T1l<3=B3E^UF8|4aB{|J$0Yxua=p_|#B#W68)_m!EpOEV{HUezkqy+@a6&WS z66?k_uAX1UbQ0ht@_|6|b@u5#rF4k7Eu_=gQ;DE!5|z+RFgfkwvvlYm^gaOo{p!(2 z*OIbGWhe`feE&g#LkA>3x?-&TMHU?DuuiD~} z{OXDlzXBLRsWJc(;G@x4k28L6wahGiB*APiD`AUSil6_RwI;PyBy1ZWg-jy8T8Z`6=eAJ;WKbI2kyJIjB)q z8j6129zLl#zE{lkh7_GJ75VyYW;1J2@l?pH5*uw@JAB(RU>D@u{-yr2`RgYYqfAM4 zQ5e-2vJ_)IDQW>BMqJdP5ow4~ujg-+c&Xq=;u%pAqL+8RR0(cTbfR3|{98;F3542y zMt4W+Nhkax(FM+NZPI(qK4_(&6j;;K^fFh|bT->N<3!FOK}7|1oKOB3X(#miFCqsn z5;xgc(4zAua3whg@&cR@yuZ%gG9P+w^rf+0hdY-x1iV_?iXWcR>S)1~3*=Jsqs)Pc z5z~tlEtZYs#oiWeEAyuavl<^Te<_=&qEA3`gjveL@4)I%F9}^6gmP0#yG?C%Q03L)>PW(zLuhy&NId z*28sV(!(ljuwKnuVX5lv8-No#c~>PHTvPNcYu=Hg^7oZZ+*fPyXr=-j>)aZ@pf6rG zf#!D)FjY@a4L#$COeygfk^b*J&M|qns5<}@$`>#>;18jUOmG{d=3*QG#sgd;Fh>)n&d;+hN}T3E&{-Z(n+5PylcO-xz%5 z8~Yz*GM9%o>ZC~B0@dvU$~fKDWM62V#-f1n7fy4w#yBCojwda99eYdhL)2E$u--0G z%;b%~R-YFvV0Y>I5)t~@!|1-NRL>lq?kVMW$7zuVr4v1Iv%PzHS|4(E|DJ4>Awec!i+`!C|E1ql`f#8TJ?W`rb_BWbkQ1Fx(6yu%bukkoU8# zAcj%(WXk123i8I%L)1D3O2u*)EuVTC;+oR+>giWfkLNQ`Ge*B&CHSrbPYht}^!v7J z5YhwdSlSr6tcTWq$9>iQDn|L|zAFY9AN`k9r|%S~&1%)>A(%%tt( z59%8vrL;TEOPvFL*XN*Oz>Y9 z{hRmpybuMQUaHgX&J)W$3ePVStN&>STLFOeU!Da6Xj%!t24v?L^WAFZV5IooJA4F3 zppBdw{GU6;KQvaP`sBW1t=F=Ja{MokwsQli&><>-GkC&w&`RmBI;Yo@9CQc+eF}|i za$-Q6JlYOBzB4-0u*X^3_??1rVhf$!j&8~a#jA7KD(un_tgF?F3wclzBAUDk8Km5; zUkICYy5ol=iWx9MYyAdRXMCdPAXfsVCHHKnClmz|>xyp{&zbIc_}WnH)qI$PNdw)pCQZP{lok z#qIC)vn&*Yj%!4jD7nS42O}I1Zs~|Dy)5=glIrCGH3m0hAB$IYEZ~}dQJ7KHgH4A5YF>| zurN5Y;2l1q^;DS~UYcnwAMLPh`j<$!!xT2L$^FAFRZ8T$E_WIjQNi2E$an{VHR4yO zO57%rFzUhWKYb*J6NVucvNU(;&c|dS!Rx%e7@KpJSN1$t+Qw4}XfOfoL zFhJ0@FpQufbpVkBKpCsinxImyjS{No^L>&!0ztO1Bc#;eT zpZG3=S12l2jANk)H&dfs+fAuI7HF3!2{WbZ#J?MgBPB5^L5S#xo!ykEeeY!oe=^Gk zhFrlu8)u3Dr6p@}@NH7~m#LZStb#p`gBou0*2zwI*HRv{(c34MTP}P0l|a|&CC@kN z(PV~&zP4w`cZa)WI@9!cZ%1qmiy%RmzB{W_>R@O}7pNlijYl<+Pxa)~aGr_Z=oA>; zb1GaWe5tdcb;F7d(CjgNg7?KcSvlcAxy%J$gZg+%2}i(j&fimu&4t;I!2RbMJzGU68)&oW>UfKMToKO$3Iav^w2J?OKZ4r=G~)QyVJ& zJ@2Tvdzh*;LS?A;y!7QLeS$ASAPwn(T-Cq23l7>}hSQ`ZR!qOE>f!r=8VI)51h6*^ zT^*Hgn*A&3>c#c--nHQP;V^D;tTKSF~d2yI&ctVm^0!S*rH3QFJ zDjav|jRG0qp8rD9s^XY&dEQ{0RBK|RPJ%sgVLygfTzw%4JO$k#$OR9*;bptX0ZHo= zqDZ`P_V&aC$<)&d2yqR^ zXE=mbHWM>-Gqb&WG_@{cx^2vcPPLRH0tLUJkD=)~^NU1-obYvn4jry5@LHlMP`D_W z%K7#Ne7k&0y>Na}U)mf+Go~-Y4B|6Hd_Y{-hCJWD1g~!JJXpnF-7!uQMAJM*h9Tx~ zp8$CpSkyZ^H{k|{Y0Rk0Qji5DkKRBqVnK>)mYcx+8RdMLeWGMB7|{5UUEB~r8!~QrFitkgf7lI z1q#sqBC2AePU8i^zD586yA-&l(|7?@E(0)hN-$M2PWv&%Ndh#wG=fe_@<)drIe(weQATaIUo*LvWN(h!3BE}m@gb{=p|Ey zS1UU#bS-%Nv$q|eZH}gywNz(R5xiAQNdONrICWqVA~_)#AsbK@c1GE9K$3z72Uk`3 zlP;a>n}QtOdL~OMNAB56zsD^lLq#h-@z$^S@${-7FjprEf|v&K9iFFf=}iVyt%8i* zjc$eKAb-Sh|DT$CKiV^!6()Wyx>iGlXgQ;EWZ#1O?JH$)C-tC0aiL}K{<8KT8uE*l zP(e&f5_$AgeXjwoUkR+toO$5qO2h`qDtC^NXDt6zh706EzAFlAB83HF^UX~bYL2P? zW*SLCrrDSvuFvg1|KOeSd0M%ee>|pgjG9p|UwCt0_#GMTF%R95&5Xr&xYYIIF4^ z#5}SZQ4te{t?trvh3AHUddXx8YRkh=Ooj0YPd^Lo?UM{k&y4#`WUz!f?J^=8eqGeR zO%~`!`Wu_z4@{;ig`?wg=(sfcA$7X)Zs4`iBf5VwT1c55?e;s$MhC_OQbeQg3?vxW z-A8(+VvP*H+N|ht)Tq%RO?8EYU_x`QuS*OwJ9&LxFi_u)G8kHeBUzGbl%Yv zudhKkY<_+-&a-!wL?gNgIou9duSaB(c*779W$Y=n;R{jXHP=fZr!Ma)?)*cedy1Y* zxCF)jdt!GV4nNoZVCnmP*DcHx3nPd%L#JuG-w+MNZ-j9P_JDgsaXXL(#8J`fpBZjQ zv)OOIn25k@zc|hSzVs`;HhY~>_|(xzw=k@c5(=I=$A?(^fP-PSaNM+wu~I37tlVTg znGkQuM;Ga^33UE2W2;FZSAx||EG$SHDy87m|KXoV1j+tO;gJ3x^;(>KxPWxicZVoJ z#vx}Lf|DzAG&7OGQ^yoY;291~oh3=}*qL3;MhbmZvJGawAhzHHciX0>qfr{vKwH13 zz6EWw=ij)(?#Vec>sJ73t26?oJpjfMNiKJeuAeM?>t zZS2K|BbbA6!lnWi{<6=9=3IfRtwQUh&H%2_45$S_>d|~hQb4JLryqID} zw^JN`&Rf%MDmY1HsF?AzD)j515|o7dmh5wf12d zs_ymN3?@)3>}O5&gKO&S;<4~dtyP2g`?D~Oj6dd1h+L<|?a$McWjaNMf#wGAvh@gh zj#J9fI)mx{b_W!*N5zIpb=&LHP?pf8GIevT7aJ9AfJ;yp&bOK`LPlzsjbeg?J=j5K z1HxRg#Q?uYJ^HsfSYW}M+_qU`IgO8+!^V7j`^jOJRdx zGK@s|IN>UoqvQ*r&PJ*G+FEFJ4QIUO6mwmgTQX|!IJkV@*U-i)ER(q_(%-k1Q@{Sr zKV51r2>J9R)Z@4u?kuDF{y#O`Ac}b&s>av9_jt4`qg`M7XyfVu-{Xk1`#f1Kfnp;KsDt})rnV5vqdGyky68*?C%YPbm zH_-EZ#MND$114|cF^+X=flFT?g53u|9k*r=s%lEr@I%6~-z&|M+ zVn=j=Z>`l+E27!2=dd@w4^!fuBap8GT#I0X~xSxUWS znmfU<5XJ&*4b9{DfsIdOQ|bn4b0%C2zkIWE!7HCbr7W|r6L*gdf>ULa>RWXLp+^bj zkO0@J9pV|$1zm*V#YXPMgyEXFbFCQ=K0PfJMG)GB<<*m7Cm_<&k67vx1Y9zHX_MHm zdKj?U*rdom(5pm1^j5Fm5mtTTsfI|f_zI|gxcvc>VA5)Q(Pm2xRWN6?bKPr<5?N#X z!|jH4|6uG$IHU6OwW&V7tYIbi+}>rOWFJUNwfkp*am`|ldoQCOOpt)J&F7aE#}s@4 zv3;Ikx)j$M^Hx#0nPr`d7sW_-;2cQBFP&bxQ^?&O3KQnM#oc{r8HcvSM zydl6jd6Rn3X@Ltm-yOc32wi8I*UQ9=!r}3~un967(6R^=ih)gGC})i|2Cki|jm}B$ zZ~hc(BZH%2x8ItcYNitiku8P&34hJ=?}eaGZXaF4ip#`XqH9p?RG_Cajv6duDmMio zipY72kQuJEj$#SRev7 z9ObOw8FMwK$y^`>*hIr~lQ?;#U@=wpLT2#YjmB^hs_8@{aKWnx#y(Zi2c2Sx4@!?!=cfn1||%=uR71c?NPsxVker>x)o%s5!iTR1r9N((a>yL-X@$9VCkbA5T-0kIqRE3z&x*8o z2oC8zruN@V)C`vHOy-JWn@*c|P56A&{n3jBLmsGMb|dB;|5)NNF4jL3xjjk_>*82& zos&B?+^i#Q1Yvsza!Yzh21J5@Fz4YxKlNdL*c5>+tbXP`&S$~?_*0ua2*K+HKu!XJ z5yF5G=D^sfOj+B~1n7f$?9rjy0f_GI-E-ij%q32o$_HZ)ZT+mp7tk>u9Y>05*&~|f zek$`9yr=S?^gvLx%3sBc7@-Ub%0cX�ew^NVUsr{`qWY47hJJOGikEqaI@qBDV)S z#x@3BnLK08OiKGCS6jDN;bTRm+#*1^mz%wzj*6&<9`(lYF>f+ph~`@TZ)jFhp@R&M z45Vce2(qWbkPo&Q`}166fnmkk{mYI^2v4OA88F{BD0b-eu6A7Okbz@6m`lKW5zWC9 zZA}<*MYeRX8^e%SvszpX3donMs`tmLtml~NsT1~20n*@*sp{lKY9#wIXkR+^rn zF$re?I8auRdIm+)9jvWhnRZ>(TC^}|k-Kxl*iG3IY zx-Y3;c6~t`X2UN!8@!-9^~N|}_U0UV!t6e!@n~}wFTd=mF$Pg5b)qi@0OL`HMwk8S z#h$I3uJ!&)nII^7EEs;;ZeNPeG8gCJ{3pd$PxD#ZvuYLaGGr@mYxL!yqSvh4QPb|$ zOWm&>nZfY55@d>Yp{h`wW=K$j0zW_5kq?qD%=f!;Cj%| zeRuYGc{>U)M{n5u@$9|v>n}EI)W}g(ZHJhwH&@2JO~N;QD6#pu<*dLr<$Y)UZ33k$ z0yat`Wa*Hj(Y0}GyG71Nb37J{zQ55VQq?}0_1gr09ZXu_W4(B%$I+wthFSiX#s(78 z^FIkw`viI7#LfEm)2oA}K-7c@^|~OXnZSPmvV1`7-*exNB)tvUtTa~wk_<>T{sD3o zLRLY_$6savZjf;-3+SOL&RA?39L?29TIWC3&Swk_Sa!a?IBN=Si!}B))xbl`En>S< zK@%9XaZyEMW1-g7r~Qw6TUUGj2%Y5yUWqwA=NbjZe%m1Zs3Jfcw{VCy^}8TYez#}( zk10j^*bLk4d^-Um9#8)jC1OX4jzV;b@~mGPN=Yr@x_o!cq`roSxIT`6DUN6|F>|-S zhK>SljA?lNH|wSa)Sh*{@;~IXzGM%N<^?Iu@oFVW6(~~UzrxT82swtN{TsZ*zsxl& z0j<=pXgW5CHfMdQlRHM+flx|7CYg_v)7X^q=-)C>9$J06*vaAmqH|~a5zKUK>ST%0 zyt;LjNc`Ab$9}h1x_L)^;f$3wU)9@PQTS9eoL{mM-LtTJdW@<{H)70~+AR?&Wz1$w zj6rrpP1s~$k<%4qlvY}bnB1&5EWb6bmnAd0+38|aq{*ATB$udWm!l7y5*Ui<#7pf1}<^@ElfKM$|ba+S3 zERdhwyQ38;I?|l`lG_jTEp3)DC_x$1|NVC-a#h>?dlS2Ev6s+>zs@L_JeFtzW^*}4 z4PM-ft$`>b{WN!dH%7*f{Y{8Z&j9?d44NbfLT+uZvh#iOt&sENN*4|H37pAkf{SxL z8ha5|H+XL_%Pe@pgnJRi4E>{o&@#hTCk}N&@6J$v%x5o5gvNB>>xxfXpS@Q>BLQ>c zP?Hnf@uFSrX>nUZ@B|6k}FFGEVzs21o|Wlj|;S9k}MyZN4vu{8mrSj}Fk{#S-&0gv^@` zixrH?oi#~4F!=3(j9(Fv9ua&AJ=$Tznk4-rUPOBtQSJ2pX<*l|uI?=uG)76WzYtuW zyoghk{b}}yBlkz}^#4-5akG(v(!LfUI436vEeOB>%Jv651BWs%!NRhSU|)#{_(KFJ z`=^oh4+1pC8d#lwIXztTHt6l*s^F)f#TMXU+0$We5#W+GtYKZiYSk6oJG*TlRLQm1 zztnA~7P!We5Y}fi$o&%Gb6T;Bo*B%*LTDi^d>QVSc6ztnP&!E*Uc7g5g9?vU%62&8 zx6G-hJi(rc0oq9sYS`!->bp5qt$~VJq@?)eF7GP3sfhez<1&jpsuSb(bJk|E>r?y+1cV zAZzeEK9kmGm$7tOz+l;2Y?IcR)wfnQejAHk9i!CR3Z+DRxt9vUa*tQXz@oQHH7$aK zQqU4w7_9Y1lYv0axMAqIjLS+L)KqA4S}berBWNZ3*LU>l`p^wu4wf_B-dar4 z>R4222TYuh*_ab`i#2!ZIrNB6j7^J+7UPVC_u|0pRN*w|PqSd0YLZn^%6JX112Gb% z@YitIuPOHxf84MKc-_O>W->M7hvB|WyKKC0dzBAWu;PouB)!0jNeLHv+g$tH7$7yV z-jMCS6|^ZFx___UZ&4^Cs8#jv6k@ z{kT7x>&}Wz3j4o_@9Qh@^)UdU1by}JsB7F<^w+G_3hcGZgk3T6;YOyxT_HIu=RTjZ z>|{w#G)aG4NZ7VyPI+{{T^k2kMC79QnAsJvs-*z|p`>#r%iK^VeqnU=%DV+8o1j>! zMoU|YAUr`DQLQe-tG6S^h9vmQ0!cL+MIy}$sD$X0l%fI1V5pjH5u8Z=1&-IHDa86JcvKFmqOO!kiU?-%#dunGBYr#V^{E}xi<|RBf*U^`g>+e zBp<{)5!hNCmyUI&w~NQGP(BM_{8pw>TvU#UUMDzC8|T+ml825h(i$;Ywg=emD74Y^ zS|w6ok20yXun?)n`Gre?9im zBNm^FY5XE8+@ShRFhbSix6~eI1VupbVg#U|LkXxYx1J|5G6=t?clZHh-kew11K$>x zylBQyCGcTRYAZxKc@1kUkQPT=Cd|qr$tGS(ApdLaXAp@@cYF4dOEougDG$>ww(w*b z9O&c3Ehtn*(AsR20G)UBI_Hp-KhoWbP6?RfgPn-DsA7%{k}(S010!V_WiCrFdR%7v8lRNf?}MkEbnEN&Fews>VrY11|uoC_=b;N(#7=_{R6F*(X~h zU-yvf$$+_?vV_;+nx$fA`X)H|C?$C<4H(&6x+=9kwOUrJ=yfz#V&OXEL=A%oW8Ip&3(`p;=v*kM?8j+-n>=Q*!uBGt*vdnn?cO%~r`sGQMi(li$?3$|H zR$-P#)QsfEI!<(l$FzpnN+jQwb*=18)suTPdOdqkVhwl$-sPgzUjQO!xRIO z2l=;=MgQuRHGU`1fZc`bi~8QL`epf1Hl4mad@5I9A+8mN&C)wT2XpEIXiYG8X`YE-c!K~lga*08n zBeD?ol06o1dK4W?eGXegB`A$!G>=d75?&lHY(9lhSgT=LxwoO_7 zbXM1gxU|0ReFHER0V$_InYWIOiPcNk)o{9^?o?QPqC;47<8DDB#&m>?!M3Hy)b$f= z&^?~!&R4WNE+VSt-=jI$EZxoF{lo3XJa>1SQr*?ttEcbDC*h;`O>W-Lsh^tT?Bfwk zZnF=BM++Ci$)%InEG0(tWgop#YbizZPcRW~;WSBf*}K3r_*IWU;ejPZY;X)JZ#jJ| z8%NH*blIW9xv+g5s2{vUugOi{-tfosUq4kBj~*A-9J?d!{+e{&OB zA|G1$k_O2Ja$I=(>W9t*a$P12#qOeQ_KVjX+t2Sg(OZYX95ffsn6_h9A~rB|Yxpn> zYH(YvpPGs={A2s#g648t5&d3`3C3!^bM`LsOO@(y ziuGsFtLs0a;LxI@SHYnXgw3)UMkiY^LfWIoe-2WnAq&8pN>s6rS-Erl%?t5CBrvf* zk=_QcS|UVF{{Dsvyn+_4% z&%$IY^T(p{w!f97Sdmg|LMO?@Ij*^zmYpk1Ab-1@cQ)TZ?tZ4l8REv>YG{=Hyys$f z70arsQL2BJ&ErzY|HIZfMQ7G@?K-y6vC*+@+qP{xd16~nY}+@A(xVPmPIPyAwF|_sdHUEjFdnBq4j0*#$DqoKR1qWEG zQ{RJdCpz6Shx!LobH-95@ub#oLCOoWd*TqST76yeiaD0~CJij&V>VpF({>OBV|2Md z9?jYlD$=`Dw@$&KESze>*F{ughCWt7@7e7yHNYW%g^8Q|DjL^zVA>8-IvNoHPL+H_ zaEvN7FdkCvXSGdN?Oc%Dn35j+&`~bG;$c@DSicM?^n`A48_*`JfChJd(?b|OGPx&x zKH`(lFP|GMw$>ps+WBQCNK()N3IFmM{Fbjo7@#z?g`l#*=UC!y;d%IQtGFmw*=3Xp zY={E=`Rx=y;zQg183&pP3JCiHH?HD>{<9z%3>pj~+VmjYl;aUx1|n!NW(*nP$n?4a z-JtBzq!O?D;(eoynV!wI(eBwkx8)`Z&jAc4n|e%6tK?{P5Mj*-4I@L|?7&?8wKpd` zCNS=xKB1#mUuUNM z)Vm0(G*D3G=VP3Up%-K|IE!e9W*h1)&apzXi?^!Xsx!zl3z*$W7NsXf?3%0Srv-a>73b?EyPBH}!~kLc2L8}&s!c|;V-%=(&S9oRykt*IyQ zSNa{x%vO{6fAPZr_QZWZ=AA%k0Xtgqj@eAueP^0)FjG!;rjLcSKx^I79gUM!d37lx z`Q^m;Kn(XyKz>}V5HalF0MB*edcgHsz@NK-b%}jwp?!}~8j4ej(-TDo;Qg|`*!rJ(PFCCcm+wpj@&a3g0~njjBu2QP$SmKi9%GmfY&op` z-M5Zyhhguo?U6Eb)3tT6qNiB4O#bX2&arpq&3e;$C2SLUB9EU>Qxtrd$N6PB)PpEc zBj`dPBpfBv;E(PLAt=F-OlypqXSPn*#%aeI=F`tcUeG7K%PD~OJFd_n#rkvXHvN$kE#8G` z1q~$!*ERH5SdG!22FP(N7!qSNz{2r9Bu{rJ5~wVjUX<;33vtt}4l%mlZ;WgLH6|E~ znY{jv(1|hVtb+Rk|4kH;)$jwaTvU6O;?IsSjZ|~HP3w;>TOrDRfFAOB1u8!b5zBN~u%MjJPn)#T5u4{JqoG5B_O|2;k@vql7 zb>G;uIa&^%T*uuUz$E6NdzXf~XWQ$D#m+>fyDk*?4b{)1pgQjnkxtgvg&Ocj0xp0gOiGO5~{C(6YX$`qc#eCb>Vp zgGNY0V0TIZQym-2hPhKGPH+E(Uvw{v|C6HlaP-lArQ5Ol4q9mRuy1)uO3eblRo0p4?e&aAF?Z&UIF>1y-Pu2UZ{2Y z$ltw&5}d?%3#XYWjpqwE)kx-B2Etm1Atr+iNVWLKlj-W&cPxSE!kshly?(Z-cPyR! zNZsIqt)xFdLP*aU#~uJkB6dsD|EKN&I-`POKyY(#a{=%BpsCxfQ$hJb0j2}jT)0F; zgjrU;7N=e&1L89lN14XSh(7m+7ghqf$p4HX+b}osY=x6~j(b(-;y}FLdoj5Py*ppP z9)_a%-hOkC_wTGPPFDkSDQ@0aNbNb-z|1Ra5R>9*8SmT;qUx-Y z_J+HnNF1-A)sjMF!YWE;0DQrIalF0GWsof3o~!jlD$JMK>how|gLMt*TMb+8AW|T~ zi0pS6xdoU;+}q3s{3B`35(Ov1?mD=4+@?XAp!sLgEo<2hmif({f}Q|)#V+$&u5g|1 zCoCM?FYHgOwvPi9X%yX+ztjxr+cn(Y<*_P0Xd1TD+ONm-hN%Lf0T0;<4T0B#8(`IB zsrsJQO>OP1dRw;;M0}m$i(Bvod@#Rt>9Sl|5CY5;EaeS{5{iAD#&$xSAjeGn__C8J zggTMJy6*nK_oK~X?~$$k2o>mxO&2=F+b2*=V+@IRkULEg#;#Q<9vbZ_*_{6?He&-l z>RHtu^M{~MeP(de18|;Zvq0;7HKFbQ^r681=@;xJx6Q|sGP;ed%FDixj8;&)a2FNG+sd z&_G}KWX4*)(zoE+Jkx5h5z`S&)3 z$`4U<^zd*IDa)Rg^LB&z3kF7d&*3t+`!GIn1$!CUsmKOXKjLy%n&CskxtAsibMURW zP-NPZX>?!sZ4{LX7F$zc*{=iW6vIqCX6Sl~k?Y=JQE~5lwa%Eb%v^P$k$lfBc$A)Y zuwPzDy>9D2ssVZo7JX&EDqqs4Cpk>Juoa0F6u9ts&;}%P%}N+4D@he~dFWmuHxBm0b!R z$Glw2qxd!NXfKJ-6LI&4d`8~BsvYl2v%S3;1FN8HZ_kOBtQ)^l9T)=)hvDX&^zFQP zcS{cE`#azzb{T=pf46W+9*jy2bK)ICr(F9_SF=(il){I-LszZ|xqmYouMhQ9^XHS3 zpPOHfh3A%g1#n1?VYlTn?soumtRJlq2g#!D)XuuOeh9TBNM2lrj7G?&rrQh3vp|m| zg!Wz`eCpCE#hIigj+Jtjym)gn{@0?&A3C4Af2sg~_fZGaVSKzXPZCU{a571K%oKiV%xLBGy!{P9)n>*a!h)RNLZIbXE?+={faw6pCoM5c8y zEf`agOwCxvkuX1*uT$Yz{mffb@@9x~_;?Akzn!GHXiKYOQ(=f{UK2Ov&1C}Eztn4Q zA58!w$1e5^B6Az!jl)A5pu3nhB27GvsX=yT5#*qf>QZ8JsQJjVcP))uhb30C^4PFl zvu$o-Blzp~xto4Y>dif!zS>L0_ik-U7j(wKU&a0_6B)Z=!YEZ*W99s?wibcWOIm!( z#CPWTgjWuidS8BbGU^Zn+`T1Oo^!olf4l)nB+B_rM=Rt$2bamLkAshEG_#h0|3tA~ z@6aWyt2`(OPofc&12QbdX}~fQ7dK!Ub_i&SOt`DZ=urZYNacDHsK7i%H|G8zcFmXk zmRX+@)HcTO;(W5Ez4wh?pN!aPPow5nh*q7zRW8nNV^>p<>;y?#G4S79J!V!fyle*m zZL~bv*L;(@h0JegMejcRl*w^tE4~7?54@s>f~a)wQY@r(sjpMq~uTC!h3M5w@>=U1%CwWYJ*ByLVp-o{Y)q$xP5{> z^>^&fj#b!3)BPB^OIimxxikDY% zC9}I-<&p0v>#59PPM|!R5Uc#BZ)4t>ZMJjC_KWGEebpY`o`>0lk#X=rbt`xNgh0dF zVzX2BGB9&l=X~E1+{{zOb_6Lu0MV%RrQO5WXJ1fh(XgAsUZ255^T>NZrua7mXVMuT zHi?-eu~lszg;$g?tLS_3X@n`j)p*xawEWpyJ1FX0Gh1kX=XW%YuuR6}o@h0jzA{^} zrgbxv$mt|q+o|Mlj5|$pZ0&gnM|TY{b&R38AWS=xQX|IDWkM7}D8MM$ir?Eaf-~mi z;22U)hUS9cgMa-!xHC*@2d805zS1M{LVkZ~Kt6Jxg)tf15bFckJ&HQb49NlQMpv`G z(pp3{NBHrXn%CYB$j_ITr!9aJhdSn4a z_+&pQnASYaEX7!c{ah^C9KRh1KcIK^qScM*y--b6t2Clu)Glc$^ylHqt7rL*0@@bN z+p_Goz`$5rptCr@dZChMGT+*vO^VO@m{f^g>K~U0Q-+vU46^bQcv;f66|(J2;!D7&JWxV_xYJ+y7LGvN zeZ{An<7!Qrr&|@7grqs2kLGOO^f^s$k#0PYwg?2qZ71dTKa8glRcr> z+#C_?Qup7Z{6EtTQ9f*^GDq=2JWnFYVeYNj;SUftH9tKOF&CFH~B3s|cmr!3?^AS?rT?cr*kDS&qdbkR51qgRN2-R~ohW@Ua-*N+8 z+^AVE>S0+fK_@t54|e!&RCC0gOP}Rv?a6;oM|QA_(({B!$=ZYf-zvN0PZyPrn%z}Y zng@Yo3evV?MO1|nSn*udDvZH)j?rO`BYOE)fbp$BxogCd0z4=r+E3N~5N|mkF%3-_;uyJ1OkFJE#TpN87D*)apn8nU#pEIeKs`HJjNzo?(G zUNT&}01;yC_%(#Hwb4jltGjk;UmczR$B^oCA_ys)<8kfrEaHOc5yKb2%EXyUoO9DLrA4Q z)D$UR{ThSym=|V$jXVrdCd~29727ZL*t$4o9Ft#jOSF0c+_Ea})l*``10{I1FS8|$ zf7C^+NQG&B_*+SEQYFz=_F`FdGZB_ym|3pkhn8eLP8konUBBT@W@r$h+Hm#KoMZ@y zs0sF@bY?B-1}bMh#ad?lWcg7|m5)v_d!MyEDMh|^OenxM4o`MQ$PoDV^ahq5(L4tw#=R_BY9o`4kR2%ja`uTg-es2jtc711(MKM~4&p8pt5-{UW+P9pc_Evu==WKXdK$tQBqX9_Tx-M) z^B&gEhv*j!2k}33!%6v6n&jw2b5bU!iyFfu4sl#ZX*Q;fi%@tgSS^pfc&=zCM4jt$Zayk1un0Q;)M zl%O;D?L*5{0tISRXpU=GoSCFHo;1T@M>nWihoF?=qRSa;SuN!Lh(yi)L%(_kRbg7T zpqEyvrs77U{Ldq6GJ7SLg{941!y-|8g)OV25`ah~TYIDfOE`h|v|wqex~*oKgy-MN4F zCq6!|=+SWB< z9|3#T{4pfr-rf9DClxg|6&3DDIozgFfXB}RJWqEA$F0jjjUZx8LHWkphMjr83;X&@ zG0XK3Ayw5e0TEq62SpLC!bk=MqKQ9>iVvlRf=t7ipt6t(&4{$3e#}u{RcTPk{;$eJ zovVJk0~tdPqi0t-D;iO%8;d;tgyZ$TBQC3mlE&qP)|9sJpE% zYE+q`XcydoAaz4q0eV5UzOZ63J;7;UC6Wgec4S9yUTtq)R?)KnQKsL3U`$o+EIOrO zLK`9W(cFCFnq1rZUL)C!U;$vR@+czfbZ(I%9PG<+&2lRgMKrl;($l|P*W8sW{ zP(@>2C{dKYCkgX>K*|hY>@5k|rXMhNbu>#&Gkd*kP+eWDvvKvz5G%g2=sNt%BuyRR zhH$d)#C$&pQ!NI^0Qaq@8LkF-<{Vl^l;|fxZJ1`@vns=UNec_C_}CE4n|;h~8iZCB zRbf}4EeOp}#H05tr<16A0()1QqiQ-=A^S=!>S)vzyN2lDIL9i00l_||ij)@LTk$aP zW8270TKoaV=6W|7tiFJuUP+8m`!z0G*4KLi<#dP2Bea0Knd05Rigkw8d3>DqrBRZs z35W|CxRB#09@lS}v*OzVp@*|SDF#(3m|v^0j?W*|a~yu}hj3q(xPQvOLx6cDB7P{7 zF0n>o7{SU#!Lnb{kYA6cTyA{!%da1q6ombNzSwcp^C?qL4rV@lb|;zzb=p7jwA#tW5I1~k#4Z4j&FYK0>u_24wG zVbRji#p0^XYC*ku0U8|g8>F7i;Yqw$QDC$@p|V12?qC2OC*#-EWfQRLdt@2wP9my7 zup=@@mK|CmG(Rq}4QOBKkGNB`(n%tZQ3J->7;q!GzlmAg)bvDL%en#u%iyp$7oJzR zJ>pOh`8-Qqp`LuQJmM`^ZxFh81_V zz&&8Rj?w|nMVsXp>(e_aM|y)AzNUzyq>cs^Y?v=`weM<*^wPZPt4%G?ww7yDq{=IQ zW>W*yn}%j>Ra{!OVW+W&ysI>F4JdJ{FUJ+LWVG4hj*k2z zf>>_#>7=y7kAbQacS&%NPuqsqD-JPve(9Tn7@Ncb?Ps5!hdAvkbJ#&@AlV%1sT|Zd zW#WLPUmZ`L$bE2!Wu0uD(PnlFd<>j*b4DVZW)n}+Ha{$(N(trr#f|^%cfn{vtB15Gc+BTqXlDWqxj5S9H8OBDxwI~SlPfy^WGbAL)y72rl4NpbeJZ!!`lxw` z4Ln9!>k;fZ*YzM99D-S5kP{?gZ%MkpRUq&OWceupDcj(+^rVP+utM1+sfPR5!K~qE zhI>Y_o&BB52T1v8$XWf$0Hibknhs|%Q{f7P>=X;8n{9*c%uez?#`uGjXj(~jwA&N_*QX{%XLY0v zyi%5`>PkjOd_Ha%6K1xu);n;Pb&o67q)(i>eduF|RbhHBs2)(JnV#Hr%?ZIHxi19W&UT&}5g z2SPyb`y}kWm9jP1v1}7yB=A;CHu0vy{4l6fnYGxy7sM=l$;|fL2%9dg#QheT>I_Z# zmK_)!CF4dg5c#n1Iz)X#{B2#%2ItF7r3ga5l}gHrqvAlLO!9H?*Z+7_yB?QX4> zzYsT*`YF?k&wAY%JNuO={~hzSek#AQxW82w@O_J#PhtfQBV7Wa7+Ah)?$MG4w!)&DLo?b)aap0Wur{U zW%9nYx7Zc=A+8MgcGVA1p3)|*_wLKHpXgG(14TVfzw=mzJ-BHjZa6yAxC7eK?m1qi zT!W=9O(L%b7+&xmcf^CQ_ExtL)TtPM)05GY;$fZqkGSGbw!iwYKsl{9hlpZExD-X; zsb+%~J0$t`k);8Z-4?7T%q?1+{I63mT#4Ml*jyK~*g*g<0uFka-jATsf{3h_s%xi?aqrG*TC*526jll6Q8l9$|&&BjL9qr zmm^{vxCoog-1MteL{HA{R@y4@=Z?!>#@g3XeiOq@LQYOvYrhuu%U;JC`rd`QOM7>v z{Ue!F#BRVSBr@f5a?9vkDM;^6l0Pf+4%7$ECpz(>A4uw$-lu>l%#ZNY)k~SjfPH_> zTUH2_vVcbEFVKzHD0aM0a201Svx673z(1zOX zFn4}B+*bMvm55-R0Byw#h5cX-?cZ^^#Dc`KSL$Lo*W;3v>taQP;y63rgmHD&TY3&i z9|k!TCqiWY?b5$w1juLXw5~GDA!VAMAniU7ojLxQy!U!cBIQ{CgjF4oJ9?5qgQ@?fvQrUr31c>@Zg?a!_wF4r*Ci~nx+Nr zG2Vonq`Qj{*+?UpTq?pLR!$>C8LlZ}rvif7X(R2BwKYs#S($guSWuHP|7E;09|3@9 zZieok?fFk1#AA$fwZE^1$)r|QzdH!4$6AZ+2AQizpNC4Xxhgs_re-9pY4SY>^cRCf z%M2Sy&5r=7m>2P7DHmQ)nY95K*Cyr6MV{Kl1sFBwrN5Yu#U|1ba^$TSi0N)+HY$kcpikkpGhlm1p9FI@Zf#cA8o zY1f_jMg1@$AJ+v?)jj;9 zSfGw=jizx{ajItdn}$!jp4gv-LB#&s39vt}xNoUz$-Jv=BCh*!&d_K>BMGC+j$e>C zf%|#f4eHLBc22uuWv&Br1SpV0VYtxr5vd&cayo=C>E|L&lg!czT zF4TR4OP0+~<^&on2h7z+r?XCBoBfD&>RPcl`?@8b_g(+ERcUZ|^azQ=j$iuztH_8= z@M@f9{khH<$6M83=8&sxvJrpo|9p#z$+y`Glqb*%y1>CkP#nM?D}j)`q#Wsq_W&*c zKn9wVGG#jm)!LeE36piWF0Qq|6<)XR3*6jn8GGYCqAKJHFhZb9Uhl2Bw4C@iHXC$8 z!3o`40e(WW={M~#49Bvk8!CRM(g{xpEWI^Bpw$0qtltvrSz@Kyd}cmZ4|xT;UGQN~ zWI!t{1oLPs7Y3OW?9#XWZCXt=g1+*3%}j7xl+;%^;}AbKrc*ZqhR(lLbthlCZ{c@o zo^TWOQb@4JXCsWf37FIbiVZW^UuVIH!KDUlYyw3`NM#nbuy_*w&FxHK2b}-@4*PI} zcG9ZN5Vi-r|9%C;u3+Ca5W%#$>AC{8f&P;UBuCE$r={y6px_*65i)yBX#>H>>ZBJ# zRt_&7Rk|6Qm3W<%Q*1oNXDL=V9+Lj&kAqZ_$?`qK)`u-zDyPlOq(M-pQ%W-OPE?|f zc1W-`VI!(^5>f5td;ieMG)1EW#!yiqOyX8vAG`^DbH&-it3{hteFsTRbzz!x0b-MY;P!GNV6W7_eU z_)no*8BJ^Rr%z0Vp>(-7sOQP{*1D@;;$0pT6GF4sS=~UEq{a_Os#Bfce0$!aBJn<8vy76~ zJi}(J&kbAlwh*IQc0T?|g;y!OCT@)?n{MwZK(o%3t7j@f_@C7W-^Hfvb$ltwJN01! zPaLiW#h9%W7ZZd{l>Ge4@E`Y>LeDtNzZ7CYVC+wRm_ECMk%$Q=2}`1iU_`V={giYf zkj8In>MAXX!p`!OpJCMr0QUH8w8abbYHK8Dd(i6sP3($?&Bn@FEDyT!3VPROaNX|orDCcayel?-3 zrgllNQ55xQBr#D`h!jAGFxyB`5fvT@_s&4mh@yw0RUt`NUKvEsJnuzZuS%STtY!Jy zZzICdM&!q&kPwP7N_b%_qiJv4jlny&0{nC^>Gm^a(1lu*dRnaLiEAXqa#Ui9_g@?& zkT45t48q62FMd-_Gf-*DrkWntsS!!}zU)IOi08hlMxk@05wEwDu$N?+O{dTv%Qv z-u|o^l08#og!5akat$tBqfxWXkYKRIcC8_{R#?&n{=_Hpg}V9R znDe*cE9L3z3w&o^N*o9}0*VgG^i2}QXm{uWy$1(scYS+&b8-NkwLxjy3;IBrKmlb+ z2qf`-hS=i_&&-%Y=x~&1LmvU9(d6Bk37&{tn}1q(9j(Jl4)Dh;tpGY(^oZ+4v}*|> zXYQGrUhk*EG62PSE^q`^6B#lP&jWPyeb1|CiPcRJ1!AeR=U-=4=}{WA)(kL~@iaC6 zj_>B-W^P5sJvHL9EhiCgT~t@@NdXBz>&E%MiwP&d{nW6hNs=~9y9{G(YC8H4E&?fU ztg|%w-J@nDJA0`haL(8I9r}xfE^F-#S>TiGK8x^Elz5rSvb) zmnGiJZL4=D6_0TY;g#a6L{`GX@gZrKP&|M>f&|w(P`$#FiMcEGfO90cDeDM$a zztMgDPDb4-?%Kyq^BRrOTQ!w$U2{L4+x#T(p^Fq(&!!j~rjc_L_ZZ{YM@-K2Wt@-Q z5glHPeLi0PwgaTs#|pazD7Fdn1yB{SCd54_%c^U{GRtfnUjw5V+;g-WJ8%x@mUcVB zcek}lX>i$r9;9tma6&_}xH6WR)4#>&v8w3ebrj696J>ZEOWWEgMpX8y%HS*X1~qG2 z(nn`M0blo+AfVBIH+FY{EmKGOOeSRqSw#=_{)m{qf4=RN5g$1y{%^Q}2l>z#;Owmb zL&4JkF9$(sz?m_mfLKGIoS@x6#UW5tfW@9FRl0359A&F;f3^kAsck_aM6O0Iqv-P% zBXS5cZ2@*~tzG+8Dy0zqw>OBpTq4|5=fQ46-ost^^>QMT6mvS?~;y}4+qY7#?5Y{2a2Nx?D5H7~k z74Ld@-aVzga^lDSEUC;Y{;!WC2|4jEECeNU`S9B57C1y7MauGNyn#sf`dsx_J*2aP z@6475??9pOGyJx|v@s*AjsVH_GzueXK;gcvb;*_sLQ1F45!ExRiiW4&MNV=@VJk!GQSO5~?VLeFg{uMv%+-Jb>>4xW*)oAUYo?G+**{t8I<3gvoZvN(`U z6)OUF(O+-RUU$$hTv4q5jYQ?(w~HOs2xt=+5cB&-1O^>~(gShFK@R|A%stW=@TfY3 z+L27(USXyq6>f3@$L4Jt^g7Wm>|{_8x~iW07e``jEs-VN?2X_FQDJ97Ba5F}n7S?h z*r?2zn+PQkgiX2J#Iu0*j|`G#RnC*WL>Z>8sNH?Cc%3c%TREg{5Tuj3Hm_x)d}7JK z=M{|BKm%3zygJ|Pc?Lk4;+l%_k7nfS!^a)mDGRBQrv*osKC0X;0;@suv>m_IPcDK+ zw;-aJ}*_kEKa`L|IPxod)@t@0|>urFH9M^_T7amC%RMtOA+ zz0t_+10t=x`@gK!$*L9DlIslq5vyXu2>)E0R;yyI@+;RZs}h4(-{*f?jta$Z-=?+V zrRwG4QyR3A0Yu!6djy{n+@1V2TI>wI{_L3 z3Q&#rq$!J18DdsXg|E@?@r*)2dT3TJEq6tr7WRQzK!O;%InsG$w4^F^xcjW&Qv?!H zG5MyEsxfuRziXBntn~(F8a%a{(bg|%op~FY)`2xPeE4OG_ZhZqF?NgXXc>^GOZ(wX zyiRpp7;}^zMYgz(Q@Y~v3FhTKMxxiC0sQbc6>2(08id09N@ARPqPpWEu^MiPjgs6( z)3T!sfHRhq+WboaxPi<^p)G ziK8~pmTg$z2}j_W3&?y zomYm|X)V~Wo=*_yP%(Z3yl($fEYG%l7A)SOVN;8#l!IMlIAjg+s51)CZZg_ zAw$j>RScVAWK#{5W$*Elfk^V@H?(OT+kp8}L=< zgDn`U6gA}$kti5u+yf>ftE~F}5mJ~KBR6-fFv~Hx5S))c>yDS5nG5$3fk4C1`B1Hf@ z6|Hz^S}|kbf^fBS=6xeEj0CFPJ0Ttg^Zj7U7gaKg4}_~yEb*C^10163)Egc;c6fI| zQ29U90o#{}Cul1RRQ#vf++seCwG!iy_mL9`-zV3Y{nL@hbm&f(&`ocsq1%bFRQUBr z4si+(iayG}yYja1M4ZdRg~6O_N3sB;#K}yBj$OX+l^J?y;tn0T#p4Tv>YX}uHTc0J zGxnOs+vynx`;FL_qteJTZ#!D4#OJofzF6@bbH1g+QmOi*=&3i=0?arY3v*R?q!d}y zN|F^i&P2~+-R!?eH0EWJcl@Mm3i-Yc-|a)jpRNY-EyF2?G{Aqd{SmrD@aF*9Ds5Wd zGzXik)(M@W2A$g(dYXhIPuTWy3qQ(%z9=GVy;niKyhH)TVCTvujSawH2e0M7i ze`IZn!SW@}I3+yp=?6qpKV#z8h)79~s=ZLDB|^r{FmUL1;7T`Y~tg%4!gD4F~NizKW!_tI202*d*6Zk8&B; zp_AO9{m$7I!+z>+)?=`mR{77|Idt(be}*T&wH}dNM1=1`4+VYqUKwf%Gv(LKXVzZZ z{-1-jYXYtS8J7Rj-EsQ+oG&vID_hF9CI7s-)10-}1cpp!96V3g>7fmR2mu+j=AJN1`~V0gB|T`euu^!C$t_EAP+!YwEXy zufSHFx;qnin;PtKIjx#28U!`FPqH@pmjR=>Pbxl$HjgH>1xRoerUSZ^p6z)!^uOW| zjrV;3nylbV`qz8vDnG?ZVnPU*^)a?m{rKGhZzCD19!;&5rWA8Xf$zcCf9}^YD@bjS zP|b^^A=9JFoV9XdwOnjerquOI_qgkaW$clDdbq@ws;}jZ|E{hkYm?iNy0%Tc)(zL* z9mqv^p?9ZOT^Wa~Gi}Rghi^Az8XMnvKr&k?{Wiu~Y@=P9Docv1?Bl-G06;JnfO5-BgmrEPXRvaJEXho zr!8JCKhWu87Nr0ELpA7u;+Nv+Za0T}WGItb4Y zWF~|x*hpOsaCis@6FYyD*<>IV^r^-*x>0Dig;XN*qB?w$MlwZk@oXRm-%3wCe)*Y{ zX{5M#8}$e}bo_fUEdBzyo*E@8Y27_<1^C0CC0VNzPQZ8~`ahU#Y@mG()kRVL@yiL&@pv60fqA`RwmS+CjPAQqM=!HCg4{d@ zw86hV^$33gW1koZJIt7-n`>BofAJQB8HBXTMx1$1C$Jy~2e4$h`ZKQ;2N(dO6`<5T z8`$GXbx`*il+o~oSgjE~8LpktD;JWNx~Bn1WTl7N(HGn$S%=X#-Pm~Q2fmP|C%g$U zlRN2r*Ih5~MTN7y^WO)yY9eUn$1eyt1M!NtiI_wtDqgPZ81ODzY>5zHHi(pAIUd;K z)MRnj>%-q07X`?0^@wB~qQlAya;a6<%`KA`lE<*Lwgk z9dYI@@hM7n@uw!Ce_1Xt*s1GDb8kO`Xg6f}(ip0&x6dA*^Ek|}t;%su2*+{@B9MB0 z^UndA;uX50bA)karuBv8HO9ma^t{J!gR?4Y2P2sz?kFUdqMA4)<5$&K?PSiy#pvxV zWX`V_@y?oJjS1zmb@wstAXWcvo6Z%2kVfT%7N|lJ?}sD`-wh@h7a*ouac#W{jC#JP z*Q6}Oo3HC*U37NfOaFD|F0~I>Z1$-*5TeY_I}SHK;MoMOJ^stC zAIsm_^~VgFE(IkY5{OIy3fZ>yzn!LNl_}u?7fc5UtrLokKgM|O(At#D?2pA~2y}%` ztt?2-^rmF*Iu{g1&>K%usr1XFVErpc(_y0Z`M@p2^Dk58aDhz(`dRu3rg*MMc^GE& zbfx~4c9idlZa@r%vZ-hT%#FzOyv01D6ghB0`PG)uq3vhyu4e-Hn7r7mMHPTM1cQfG z10Ed`J*53rf-hQ5r7r8txVQb)Xj4P#x}kGRnnS~YKwqRO*pXPhHN=2_=bNp)d5U_S*WwEob+8e&*p^eX*UbHEbCKy5nu#q{}zlN#?rTu zkD-c>2PJ0(IB;=R`)QbR#QZ$ zw#D7HX;#Xhp}pcM-Cc3^Laj0#&aWJye3=Ks?|P@zStjf8F*i6$kVVT=$wDhrPvQMc zKqkh^zrlJ+Vg3H)%W6?}oz(+N92Op&vSnEF1p*o3kp164wyk~--8S;L7U<7?zit25 zogiT&P?R^XuorR;G`#m2E0jnRFz*o%X_ygms8)#g6(ClOLV6-NL=?`N#DY9!L|;$7 zL7XEQ5>psz9BjsEplCQ1F#th0XwVIbF!R+r-6tPeW6Rv5DbGo4*f$ipZPo`c@XbOBIXE&xm`G;R+`JRF3 z#x|$0olD`dFwoU;De86R))D0E4rYDyIXapO3`@6*c`JcdgB{MM117KCI610X|I95FeW0R+6m0qsRi` zrI?-XlSK#8w&+FI$AvZ(lZGHh&&sLo)wt*&~eWxE9(&)i!Ni0Zx3Jo*uXKqJ3&Q zZrOZLt-UJNMQy20&>#x0Swi&NxnS9Ie8BxO<7QMiufF5Ve(2=B*}>{t@|@$PGW_R& zj?^h-aO?Of=>7iaE`{Q|yiqOmd!3Mm640;o7wp&9kAeSE?sDTBBQanAO$*%J1|FZBOVWUr@;})6p?N&5nN&HBjpiNAKn@^d<{W6#N0@$1smhAx()q%VDZZ zY_|${c{f@tmQ^8^;acW*v%}RK-|flF%{8=uGzm5nac0Xd4wj-p0j64oTVQ@9`^j|5 z$W1N3&{>KRW!5itCbPMz{oMT(obDo09uZ-&zzX&xey3KtPOWE3_lc$ zl4p>#TQD~7maw^-_pQWSt9V^#H`eD&L3S5xnEzcW2Z0Od26l}Tek!1^hfHN zY&GiI1lnlM?72I#t^KSwwK}0(V@9A%3PwOE!s5BEb~RKpZW71-Zc-p}tCABYT&M_j z>HV$}!l05JAsXeZtu8b`Ohq7GO`|JM`EuogW5dV%_ir|LA1va~U>a+l z<0yH76RDYpAF|wJZBzraLURLbVY&R%$)d*9Whfm*?dLn^z*b z!)G1h^xer=pjB^>FJFTJgarq4f8yho2j~UI9S$dc?-DYAE|fC*RPojD9y?jX`)#Vs z`z;;{pw?p3eH02P*vq6eKl?8&^R_`TQ!M6a+fVntb+qr5gg`->(%jApo)ldwE=dk% z{6l-IvKo_)u4ABKt_)rDDc}pS$BGBbnkzs$)I$*O8H9Tth;~r{>; z$Yr1y6?K~aVo76lK@qL|r9HRfWHWugLA8qN>SKg7;7W8lh<`htaDa6b`7DSOrKm7Q zBCPnbUI2LS>G5m0WsdhaX)Y&3hg`17IN~K_5w?OGPlnf}0sy9fN-)*`f`RUZ{oOaK zX$va;l`W#v-Zg)p>slF*82t(#Cp*sr=+_u53puowui5C-&@hGba@8f^wlA2uEh^5Q zIs)H_2!VAkfR~pwZ~%hX$vf1B{*?aono%Gm@C4;thlAK3jM-TTHTeAeYEiaU_Z$oY z_7=;p2!ET6|DB7E=iRImMAb?;BG4E@&x#B#GOi$EkO_qB=#W#k{9DgurvHr_u*)1} z3gLr&La@jQ$1b(p>{k_z{7)ZyI+-imZESc>{>_HA6Wiq!!W0`t69gOr{oIT2Zs{HU zd+iyy`7T-WgH?uVrnA{IX9kJSox&0YV$zGVr=lq!mumJ?mrWE(;rYi4gLPs=LJ0!- z;rM}6E_2bBYP9PGtE6{L!R|FkH1GEGe)6ptJ{W!-o&^x%k&qI5POO1I>y|` zxR7faVOQek&M)hEMxCP~VjdW|U{;fdHLV)8M?agMY49|m0TN#2>{hll8E^dd4tU76 z;gss1o;B-(5!7CNCHsC{?eqFdbDU}Qr`Bwz2qyO>rz!wWV|Rc;5%btqv+e`uQ8{(D z;0~c@>am^Y8aCn>{h;bm+utd_lRxIYHVh_LPKDi5jii$?{`g&5!}cD0NB%Rb@P`#Q z#pU+Yak}_R07RsPjk;|_g{_;%@K_?E|HsxlMOPMW(b}rR*&NTOpQCrZg6sG`M7j#b42+{YLlalX07D7vceFJfQqV&MqdJ{ z^x^~P%(?;LKGDV2(?`yscn$}d_MqG)Q_`KIFAUoufDK)Z@B13P3+fA|^{qK>Hb4OT z&jIyIu#S4VazwphIf4oGC9sF z^3V-}BbnwZwY2?`S!U%sd=^o`#q&LeI#vLORUZjHHvoWBH!*$?{tY>i`4kOqG- zJl<}xJRO1LL@k+L5g&4&qf2u;ov72=;U&p+Ie}F3uZ?;Vr z6s1YCH2*whOr03PHvR&TGH3FSPLJ#B69dZB?8g)T_I3v9Y_JbWVv&xZUmNERUC+4F5%sN zI6=sAeR8y>7XMR_^80@kBrp8`DM+bQYyJcP)6LWX6MTfk3R4{ovl2P$@%AI3pK$#D zB(#6m*t9G>B4>!M%B~qI?o5FjvDN9l{~U4Ed{=8~uGj=PIpTS#4bK56+Vf}K{748f z6lR$>dbpPmorW#-tpmE^-6;WtWG81Up$B`j_~FahYf~Z1qoZTkoY59~dl6u}Apn;$ zFc7>%Xo$X-e@Z_RK{@g4aOMu&vUb<_+|;FuMBzffXY+^3&OwHwRn)A*k%QC5YZ{l? z*IWDK_BEu%$%>yJEb%zO%o_gcNpQAuEAQ9nYkQ~+5?W2SZ<9TAJbM?QXfRh#2*s~o zN?FB(TwP0S>0&~&jeV+1avxBZ4@3PQ33Y8o9x$pdAjUNf2o8$WxNZFNO2GBlNwS~L z?HrLGo_M9BC9ElHJ%4qpITgEA>s_h{3o70gre4KFFffs|82(Bz+*X`qf1Ie0VcPCf zoV??&-zh)+r~%(j)$_Zk8y+G@zj=3@NUV;aGQdIGBiBTy`DfqXz@+@xK@puNulA<4 z+Vlh)x-?RJad`teVl`j?1)4e`s)%%AToG6yyV>~>eknjVf|L9|NF+aj8gsfxLe6RQ$V64uCF`gBqp+chZ|%b+%BFNu!V&~VV2I6zC80(iNy5!8t@@!p6^F<>J4an@#;$?yXia0oM|j z@B8ShAf?J-5@LPg!d0VoyC@xhmU>jK$uM$MzGj|LjCu5E{7~pwLJvbmGkF}Z6@QnfV4r&?(Rzd)Rvx0u{6d1h1Ux^e+99|tkrEZ2*}zaaaj`0W4H z?RMY9dxZI9Zb+R7B~9}sP3vi&J@4^<1SSABG*5&Yr}-MEy?GJGb^3~DK{7YL^cS{J zpYObN4KiTKC^*CaHP~mor~4|>)l8VM$T^e&=6g;{T6^p66^U_4iqb-b5u*4J)hAP= zWl`W>H%Tyje(LM@wY)JMzbiM->2n zwW%PJR#um9PeznlbeA|bqd9ArRpXnbrq0=#d2qnlg(Yp8T}r{4RKhQrk|XDPH|PA^y$r|{DP7Abq?Vx&o%<`IDL{@$~yru=+LszxpGb;@liKWnD>XpTiuB&k87 z|Bzj=0gpH0Nc%eA?AQ55i`~DCuzO37e`V%JTFKS|3)rpZdF6*1&PRi?997rW=KdbxfODUZ~YYP;2hun&1|due&_Au&(tMl`BYvrkgy=o1Wv8 zdiWQT7aicx=Jrp*^C!N=s|BkS?|~PFqMMr2Syg8&(Yd_P{6GQO-45a6=JEkxWtnR5 zw$O5sH$Z2&{KbeRu>hF&`InK)N;jhT-drP52@&DQwScPI6%W*5wovmD4y1zBW!^Ft z{+Aa^3;NG5%4c~>OhoWYfeX{l|L%lQQ~zE6*k8C9|Az~%y$5Cl>1s&bzyG=5BH-Hk zC<2$H3Uxt@(0KVQ%C>uULliIu|6xNCpBMv3!IR4fwTl~#jr9@*3iQ^~P84bmNR2v5 zo36D)dxpI+%PfAgMcdjxhc%neH#&eQZVUmg0KU*W(h*GuM_Jb$&R ztTvEWzJLJwR*9N{k8A2IQN?fTLSX99f8+Q+h#*3BD|&9SQA3a1&#w3YIe!Jx<{0Fz zRzr+@7*Dg8UFN;qf~xtrdD1iwG>jysnlMBqtJ5o6-;6byJ1PA6B|XOM4*7O&yNgzq z-vaeKLwHh6?6%A1I>4wK%kjI8kvS`OGE~L?)w^qH?hxkauB48$K}X!675_zu);(c& zUS&>Jm}(#rNEDZxz=PONjeYqZ-4P$ikY8+C-V z(diRgbO!=#p=${B`l#i5@~y571a1s64oD!YsSYvMty6L&F-7Py0r3Y>8rF`RrnmhC zWrc0K-#eLSf4GkpYFJ+WK&~U{Bp#^;6z!wmbbJGK4%Yo&dyf76&!nsO^z+KeI4%4% zTR*FX?d>B~O4Xb22!{G&>+6RAOPO*e;iG9;u)dIRO1(eFH@fa~+9ZU;?#+r^ELk?) zoN25_L$?cqb@32&QLxEb<_JWG{v|ZzgyuA48b}f=O)>YpH?f+VlglCuNUyUxP=}v( zo9e{R&FY3n5f#A;LKZool41l<5E*9eB8wfs>A3Hh3L}2_k?T@JEYkGlgui<4HjE}l zABdl&HyO&!EP{A@90`}dPV^nU4A4h(O?6`9-c_2T)eSm|L?Ppak|beVVE;5*M~xIz zr(>9-@40>WG9248Uswwc-Pi+zbLERO7@}uZL!m~96xgT@_%*kaiMIg2-EsD!l2vWV z6B#F$V*TM8Ng&DtBH|5#22x%vjZ1U8i;l0z+|k7Hj`|(YNOBc<_wmwrb5{1tkw9}< zu2qIMaq>7Yn?{||1HCkt{2EaL^tXH2OasxKYoYG12qChn@Hi1>-*Sz*_s*bV>G7g( zf>4*NxkRsWwc7A=HtYd6m7z@(S=K?gBmW?dbMQoN1brFa{Ep(oKI8}YY7s3C3r7Oz zQ--=yuM1955NCX!lfqzKejuNVUh}Dg)h-PAfKZ=S&ZrK{Q-p^K-cYMDvnDhRCl4is z{zo(mYRpJ>Vc~g&O&KX{w%{H84)5qfttFCP$^MPMRt7YJRf7Q1R5@Vt`b8@=voKmE zO%<{zWs&^K#wj~nz$g>ZCKm8u*c!1a{H{w8 zv%XSfqX84sA49ow|5w1lc?vBD*Z|N|)4h_G74t|8P*pMK60=sZD-B&d`9MjtvG2zw zV=ZNq^*@GZq+Eb1JwAjJp(e+)aMBfqXu8d?8&-3_3M!p)**ML{o_MqI#;9g74qlTs ztP4JGL%AeXCiHA#VtSj-u5| z7vY38pos-f&8`Lak2Hi)nn#9~*bokE=oZYL1n;&2)YSxdsmU2wQwov>Pnwht>j{60 ztCJg0Jo-nqy=oN5$Xt-GPNXPyRH3&>@J`=cO(yk^zI8;S{Bx|Mg~3x>Z9jL}A4u&s z9F9Mg_qQ1e{ytgebUMUrpi>v(Lc*rJN@rSbZa$1-Q@KS~>YAS-w!}j!@||2l@UkQo zLv2$SneQ6D)hA1)*E+erNdcrAx!~**@f0b5YNni`OE6D#`O{k?{${E}44e)DuWq9| zljN%Ux6U_51O|}{qU6ZV}3c>P(hLd+8?WM9G^;nncyqILBw%^P(k+2s@o1@20_5RX5Ig|jYI7SyY>D%1 z)z|x?j<_x@(KLqmDpj!+kLE4GjhssuSE=k@NdV5l7LKuwtfUL{143&=hyYkt@<7KX zErTc`)QEb2t!jO(c(7OFnq&7g9gf%pFj`%H@T@_@F71ENX=W%&HUy~hjtskh@xcQ< z6~WGBR05p?J4$IBX;1+0N_Yq>F3dC745o#V+e|)$+L9kw)ylo(vbzI4X!` zljP?-o6fRX*U@C^wA+gW^p)G}$OBD9`M|p8o&`d%pEE@W3pDkvdVuDB`{4~RA+YvT zOstSZ#@D{%y5k!MsNqzHIMt*ZJ{;8Kcwu$n9)GNbFqIG-Pf3QXS$&X9rW3mDt}8p*Mg zmNeX>C^pcwq-rVg5Mtk6lJWKE6b^xeBE|2!@z2+M325wsd#*w=v@9vu?-Rz9P(w_} zDDIQ-4&YrOkNX>mGIq^rKw`EZ+oUml_w(Z?)()R4q`kyw@%wRAL_(4zy9tKlgfx4Q zuevRnRS3d;hWUUCz&_)m4(`X~HGLB==rb9Mu{sLY?cftg3QQC@+LVRAVnoseXLg4e z=tzWp3gV2VZftNqGo&GFGJe6)Rb`?Gc8-3GHq(GiK$&9vk(8)JIyRZ=FwDquiB7Iv z1}_v@YN3_ua`@=BlX)T_OGxzO!$sTESsW31vR01n+X{*dP`FBCz~ zC@9Glf?W1YeuN;rRJZblr2GkXdDf0ap9Hr*l`@>@h!g{pc`61#5mig-gfH>9PNRq6 z>-WqH!HA0b=>MNJ$-%@z$e6164UC+q2~C^21q^}%xbBn5TF$R?%YLXO{K|I7YF^97 zST!M|Psg4a%<*7xpwuLZsIEB;Hzvd$^-|BHq~VND7wkL?x&!fuUayer>3lGjB+U;B zyf)a=r=|DUrmxxd9wjy88oLiDFo`5ZX8o-#ugKJBJ&XAL&Urej^FWS)9;2^$UmN{R8XW%3OTdzU3mc zwCb-D%7wYPR!8=)#+9kfp4_pW8GW=ETrCahs285*eN+Hfl zDai3^Omr8OAyDkmeI7X_y@tV2@lt51_FOxI%>C!jl*MUF%S(DCw?TXCIKq^2|slY%`K{%OG=l%osRh@8LU_{$`rfD2V?<>dmHGyVH zqT(TBp{0Z!Qb3V0;gXR0^Xf_ZN7jbIN7?{OT_fdsiR*bYG`bgp@b(K%CTEqe&db-0 z2WmE;H{#eY3CvItXsYk+@*sX8GPf243Z-S)_TcvZtS+%7vf&@BCUBNe209q+^4|ci zaiB3|K$cOhx_`~sN%LMq(mwTErvZy1`P=$rHyuAO1Q*e>?cU%mvA!U6EI8B{5iwb5 z<~wRo+{VPjtGx!tu6{hi(?a;vzcF6DHm^-V?q9XnjT2jCN-!Q(%k%@Y*qrxh(|>Rl zJK&^b2(!;$XHj`#;R>-5Xc2w&nzgx>sAOB~0ot%l&~}po67^2u_Rt23j@SsSJ7KC_ z2n?U3+|F4J(~AAi0n*EKj{uc(zGX;vKSC~J!~gm7fPz0s$QHpBLqE~_Xi zOIGE(D&{D16>0_3gra3;l*md2Bna@QtWX(mh4N@hD^JUiwp zEn0oioi4A}Tof9|!;0eiJ*%#VNArA3XfwJR4Jlr0?Waga@k^vyuU$-lgp#*Z%QI(c|iE*oGKpgFN@)$7#*-UTlIdVL@v3#g*1l|C)p zuckIX-MJkMFZT`~fWQF~%p=TrT=${KNl8O?!3t19 z>jPKXwWM%E3zb2_zo#3rnA-Lyq(Gf07>l`@zPiiw4UPvJCpT9lSw7wfuYozE5DIKRGAfuE z8vL+10IL&G1En;sNT|q^g>M3IIHZC#qzoY@0jk6Bua1cMQ%<<)30Ukre^6{mezW!^focl|M>!FA806A+W=>$ufMuiJ#_ z9F-p8q)S6L)SA!PP=;?3Yt2lGs*kAw&V-pB*zK6@%KSwf8+uGDloj4(80(*;ReDj` z&PmR?5rKaAU=M_UA%VBhAz*7EvuU@1>eZUN}C%ch0jaelt-U)KU3Fb%iS^&}Wq66EC&Ss;dKr93X=otl=k*z%gqOJ1SEM5N)gsq8l83krRFOx zP-=zdR9y=&b7}YD+~T3w*2b>QU4-p2yxtQIlY0vdAXZ zPP~K|QWlZK!A}w3N(QEiGTnw~+fqVqlg5={6=}osB8o&Q#hn1M5#X;lKK1X@^v89F z8jUn3!oRCGZLeG#YEE_{VpCz{2)h~lP~^ZV`d=bM86~4p3d?6WqL{{m#_#hqmktk+ z5ijosPtNqG*N~)mdjmQt`~AU>J=CB4+9b=QKw0#bn}KynW2Mlx*4O5h$iy14^*oNH z^$~t$>Da9A8fOB^`B<}E-jNm?x7OR8%$Gw!4uE)$QS=X?3M?~yP7qX{F+MqdjlW?% z<07IHUR**c8UctBrc>B5){koAQPaSw1&#$(jVqd)yE1*CaYF+~IHFU*ee5Zw2naDP6a8O6^1u|M$gVrUV2;XPQ7C99DTf(b0u#-N7D z$jQ@mXPezEH{OyrWOMo+b$S#6!Q2X7-e)9+&5Nflx`@A(QUE>AZMV4lp?@8yMFyr0 zzOewncdH;?8>ut!Ae5;RNFa0op&#!kR@aG|_dIbGYbl$}tTP0d0W+@@@+XCZo;J|M_hKA|T96=h zbz%98NTO*F276*Tbe^)$dTYGFb`4gWm&8VhcGHcB7+#D{PZ=H znf&}sfiUw!6_Y9&X;?sut>{Ica2F`(JeyJW6^D4Jn*d8XgYPcXN<)cvHFe=xY0-M^ zK9$#~iwt1Mn8fK0?$4^ol`fo|8_O0CoP1#2xF4gE*o zL&?c@H#G|(sVi&D*rglUiin!+RplFj3>{{U465b5VQVdeV2$H`AIg&P?C5h1d0ex*KijccH6iNU|NJPjepn1C&wq>28^ME$C#kOZ_`K) zP@I7?hf*O0-VV@F2@g$cWDKEFN^&|UFfi2&Kxq3J6Y%QcNhv9Rh~r8SfHj=zu4fq) zwkCog+Pm`m*>bJi5nYGd9+O--mF&CsG;F>~1UPrY3E&eDr5s`1Xt^+`@JP;V+kY@2 zsC}dalhavprlL;D0nnHf5kRJ-gf#u8gwiNvf3j%*Bx8yv#d&xr)l4+A;&6!>Th7J9 z{x*VxkB_7g&qL-)X09Iv0hqu68>PYQ*-x~)a0b>mKpm(-dN8n9AXbC+1*^4iw98)% z9L2|3U=|e6iYzXKlRHBN(Az6-UpB3)j^7AoTH0~Mx2S1exeiSiA#zAzUeu7DsxMux zbs-${J&PHt9j5Z}(0B)(zveOsuBd;XA;j!C#+_X4oZ)Ivscz)!*enTDEUmJx4Kom& z9Zw%0lb~~+OsztnXMgMaxklTW&QI9sS#J$UdJcE9&?dyHFK&c{eVep9+I&g1c~(7i z(^O3e_vm-ueiy-FRuDgS-P97jteh3D6(AeqWD5{M4FLtFZI3 zS>*fEqyZU%NnXy~&t9ut@k)DfZ?;)2aU;v(?CrkjPiTkDIZY2f*Xc0!)aP@y>`%#b zkj_fh(7nCyM~}D^*y%};Intb|!^i9hQ)UV=cj6sFV^mz`>O#&q)tl>ro;-ff+l3*( z&-oLr@4j%5ZgoB3+Cu2GqJ*m9!OqL0vZiAcxUCyIig{hfGrTHq9bLx{JS_UXeB>e9 zPl~*{-b6ho9o^InA#L-ouEY}V)atlB$o6o20|C=&+NAEIflz>QFteua;r+~QW40TN zXq_iCP!GhQ$Rdh`DMm1?;4sd?{`m9wSmDjVQu#D<8eiS9#AJV(GYiEO4h?N%V+Ybb zlAtw70=9GF15#09PV&?RF}Lm z(GXcdDVB5)HUdYdMmZE*^jjKpNZFu>32+-5)$ zW;7rkE8gl9HxlJcM?^Hti8aZcB7%OF$14F*(2r9`P$SXqXDUiJdaS1au;i0qf9lVW zz|q=S{EfDhhTar-iQ`q%IcaSiD7Me`Cdqu{v|;x_!EmmBI70^`OP{DH=w$c5`0hXe6HG5?+CWNW?})r=jJeK zG}C?_UhXm#Tazd#M)4r>wr{yxf3ha}%Of+CC8&g?rP&jAPH~ACtR~$U?G98{75$& z8y}6FGcx}b*50B;g**1y!%D4(KfVG0h`Wrkgxm!!Ez6*7U3=9+8wZSM^b3Hm+pRu8 zHV$RGL|#;2Paeen)V`y>-SF!&P)*!Kr!iglT=cG{J8<6}odLLdfDTnD7g9$te@=FG z&i_nTw5dY4KX+SD7Kllqw4_LM1hl<>v!8a{e&IsZ3gulbhAwxHtaP8`c*#i5K`A^` zi@1qHkObaA5~6wVQ;a8uL1r;t7jO$%oJ?#NH~h{=BEvgo1F|acGCz{3aq>8#8U(uw zno(d7i}QfSEJ~6a7`J-I3RCmI2<_A+#)Vlfxl(dJ0qDS z&^LpV9?@RW_u~@IlK_Efd+f>vv2hWHc`&&+f)m_KFTZuQQoffmRcJM57+lk^JEK%=+H59S33jWOLuST z(f4?)FfrCndqT*tkvZuz{q7hx{X4?I+HRDVP)i1M!eqruIC*+@d`^en=+0Ehut@|Z z{j(YZuK-9cV;=tX<6V;M>oX|J;afv_E~0ohatFL~tboEdkm8dLNa`id&l}3clxR>) z15n$r{n@wp+^AI!M$+B7y7|DXL5D}iAr(aoiflk0SSO%&gKb#Apnp$Snq95SnoDYu z;%nZ-n3+vyVQ)}G(L#~j#H^lE+!}?dHg%K1>u)>5wd}D zZ#$Ss3s=|G^u^iSofZf2W*Q&faf772j3!T9XU&Vqx}a?}wyj@Rv@@b zSl_PJ#<-e5P{Xlh)gGHX6J^0IJZeb)X$^OT@O_ZJ)Pk*5x5zA$QG|K#xMBa4A&d| z6)aWA^a$?7Yx-$_M;j%+768Ocz(-sg(H|~?NncJYh%8|QPqtAUC4^I;jN9UY)j{SswTy5se8b(#cK|IWkvA`+UbzyX7iv?>=Xi|85c zV28F#s!o=W?XtBNp@upK$BN`^0SW>}bUotd^!^;33eHBsoDd5nZJ^s8n6G}!1zzVS z*7lV&7Ng(BrxOA>tMOI}jXBl-n&nx4QhD5JDYHE(Br>ibpH+FqF5!1UzD;&k!=%$^ zyOQg9_n_475B9|00Gx>X6FqCxJaqpraFpGHO^&SX&&(T+UalY7&Ec<~>+U-u5L)2W zEux=K#-I4eAHcd)0b7dF#C1DBKuQ^fL#Ewu)qp~QRED2FK?6X@aliu- zM?69Mr|ve466}bWqKh~(aYl~+?IJNHNCDUpG2-gzoNFTUb!^Q;5hZ7ybP7D3K|+!x z1tp$I(}CG;bmeqH5#f$@&qR45?Kgx;$GKtrZ-K3f$Pjr`R9Xp3aX}@f-}{E3X=+*ZtU{U$Mhe#oAjV{yi7MjRC2-NDJ>n=IDwb1+3%*6oOk7sDu1dSW84#b4BF8E}PgCAJ4NanMRsPKh^mZxK z{c3K+>Xg7?G+xVLUrZnDxH3qUSRMlDrY5@sXR8RM+kFjwo|)C8wSbiBc;141m_c1U zxV9Y>$|k8j5$bu`$=4K_z2mh5)-6AfD1P;KeL%`;q%GdlImEEIOA@AUT518^YQ1yB zwSYL!C3qI;gBDJJ0rLWH@k%rmUE2Xo3HD{z46~jkt|?^}3h0u~hHc!s?Ij0j`x!X` zb&UP1c~DbRL466<@Th8)@0C3PWurp#_2b)VZ&<0y0E!y1G%>9Z>N$KDNb7}zd?E_| z9H3yMR$X;X1j~5A98$0vWTSwoTy2I5!&)N-ai8uiZ#=7+S%~N&LW7+)`+RD++3&zA#+#+ddf_MSWBXQv|DOTyZ^La%gkIC=W=9WVJ5-g=e zVc`pBrx(dmNgd(OYhB%M4?f-+g`e(Do2#{}ov^%^O?a7LD20du`?Zj?J)dcyd-iX= z{n5$loLXTvoD62(iM!!T?v@~44dWjk`xR|{tQye%R5SpAe+QgVMQ7<(D^C(#)#N5ByRT2}sybY1Y!|0f4pjeo=mpwog0*^G z$7i5lr>{`;>RdqZd0s;SHzdi#l=yTPZQz%>cR=e>JbAUZT&dDL=qdUee_1XQDIj9D zS{!LC_I)R#EQnUL4qWs!x%Rp+H2KsNmubPxh>Jj~>+KcxoN4m-{J=>;!_Y(hi@KWU zj-J`1`2xo6yW@c&znQ8()t>x6W8Y6Gfr*kTwT24h4uIyD#CgkG?eD@xz9_=#H}?$R z$bjL&sgidEMP4aQ$J$rLsY7QL@*EA7-|vzkquURZ1{#PX7<-Gf9pk$DmkpghlG5(% zL8s9x*%LPlN0D2)1;f;j%VfCHY?5CY+P+gZjCgj}ki7hFv(m3*_C0oK(tr6*K<4WE zb#wL!nE*VS97TJME4#;Tz8R)!OiSMAh&%71VN(%07u8cycc1!+swnw5bv5i}zsAP0 zB2<7nm-}j|7!HI#(|L?+vI@-8hDrhG7as|?CuBtVq_Qypv4L}9CR)bqhfmqZZ0~O~ zQ+3Gsu@3tb#%VLKMT7~D_51C)*v4L$w`nXh6VN=hwdUMD7y74&Bw3w#yVOzAJ3NW? zzMf+6bqs$>Rw~gJDyB5pT82J6R1;7oDc>=et*TZu?)X~9MY&9&NW)WBYW7?j zSrZp*hzt$bOvzJA>((8uMDsRQ6nm>>n0wBJ2*)MGxBw6llgRrSO2kOg%!A?b>t{}o z@f4&~D~D>yi5;G0zcVGBXT6-UK<;r!9|Hqm?^*>(FmSkFxpF}C=6WXc9 z)JcvYd&4oT|3u~tTJ@N*Irr0KNObLqh559$U7YC{KS{iMI{Ypv7qL7U5wfu*kt{RX z)>oN03K&XU++=L40Pu!===v16;OD zzo>-MPw;XQ;zC5suzFGu1r$KT124?TlWHK(-?8)M^2H@*FCY>#WV)j4;HG)xK-pxt z#ISqi-A829nLw>aQoPd6xC6~Tg|@Jab=Ordn24b^lnAKEr3C^c1bgK+;rgy`1fyNS zdIhfcb`J=RlEB2CL%g8$19sU3{l4G+rXmLLej#a*h01;P6~D*1Hd_IEx@ANj{b0V~ z)A6^qJh$MIb@1`k$Q~t6G0o;jqdk0X#254G)yI$qn}4)Lb)?QA34$-~F7*gQwRv+6 zB=&tII+9)_{X@65%I`cVQfr|)Dp142|b*|MlvuY{CD`3uN(#Vm8HeG`_8B*hw+$Rr{0 zlKA!1XFP_cXbgy`wp{fr6QQHV*~AU1X6J-g(TW|A%1h|?x5Kmql{Q?P&pC{Q@TW2@~}y3Dqbv zN{Hw0`=X`>A{4DC50ZAv7`W#=krDK^J0S)j@DLaVD(*-9vO8%eic5~OGz{xDDkFgi zy@kN+#lLC)HjUSZ=ia%le&HJQkipU7E&U7sprW<;M06f8L__CIhzcTtciGx8+9AtC z^5LPy!8Fc@_Xz^1lK%zX%sa*{mtqEOCz)N_s~Xx1$rn0$W;LoIt$CGKRjF*RObC<- zW+*R;!`>P7)Yi5A#9X^UOU#p~V$iWu2l0j<;MI!tM!c=JIyB zcBkH}idoyxm~vg&SI643XSRaU8I7-(isQ@naU@2 z77+1P*X`kMRL3yY?DCi(&%ZCll*UZ`?1oXp&bm>boTYIBQ`D0s=}76SUZ+SsF6VO9 zoY3Kd`E6GHL_4DKh{YprH75SD>p@GoXu9%5$31B~^#1%9>&MCE$`1Xjam?G21yTQp zA}5R#^sx!RE)W-*AMNo&Z0LcK&mNP{6CJv!bSv4|as5|H(O^j=*5asv3%r$f3+Q(v zTAMHqJjulMm58xWWUmE~q~|~MTgD6fy$3B!;L^FsW+>!3RF=l*7}SLSGFf(YmRPx% z_x6CNE-^56=bpCwRq^q+r^l8ZSt5T^>!yqTy4~5@%J#hW>|CeQ)A<>HAtF6AYW{d? z?l~A^HtyItuuPg;zzm0YW246M?4x(gT-R8A(OHGJtFWiEhrA2mp;~RA<#g9zT>X`~ zNWt&>DKHoMxFi70*^u=AICZ!<6Z5c9TSS;alz;)NsszeB*a(o6$yNxzDHnjgiFo?vRl`H02Zj3mIsc?%W(#(O@fPa#@Wl{I5EDM`>pP{@ z*uVqGt13v>(=Nn?$Ey3ifEeou4Z)lEm?_2&Mr(yTbO&D`?&zUVXFmS436n^{8b8pV z3~Hwb#0;$IC+*sI(P|51yCZPTvoYSg*p-`NJP+9Ev9HAK{&a&7bh z$FM%6v5~0cFC{t-&ZRp2aJnucO=S$YEd~V`&c1RaRHTvtN09WcgIt+_Y72}J2)7q) zb$7>wZd{!OuV=FTqM?TmyN;LT*TD}+t5W!Q=ZuXpSj%@pn!(Na?F`=|-`5JGff_58 zu%M%+cK;gD=>)NzPV>8L_?TSg!`mO3=zE6R(^+e!5b`yx{DO)7sGaCnT1`0;L|h8s zqgIl^?=+_Y?$Lok1=N~L)24Ptw+MpGilhYHKBALhFgz8%Yrx7`TJ0a(8T7S>e%W=} zUxr1D17689jGHxB;Tp#6%z#WNXTxg7{KnCO5P1sm&On*Nl)(f@^7-T8b4vESc(`x1Ehp^Z$|Pp7rIpHUa>Ob z<@0M+m{>GL8p;c_m8!1gB`%*VS)UckS0sIHa%f1R*&|0fz?_s|T&JildlLweiKN zOoWu^Hue1|i9aZ3(dLRe`tj}Bd|-EsC2y$b^vC==hsEus3A!8xSqOEU?$PG5k_STC zA?zi}o%9LYBDJU@E95(_{|**_EZm!f!Q-hv>kjACGQYKW9RIz^Q1ts--$F5`pZ_@W z{_uPH4_kL^v^N3nE5B5Cg0mhbSyVn(p;12D#?6gDw-w~oNRFy%5ZFFLwqHUo!TqoH*u*YaH66b6!nEnrtH@kyU1*17-k2 zFS4cG@cVan1Fm0@^Dh(tA}v&WfUD(9x7ow>DKfXfM~t80UI%lH^RpUxF$@fvBJCuP zi3S4?(FAkGiUCsiK#g*|ibKhm_)!AclN@zw=Y>z{h4q77hHTq;3JWO$b;;u?W0=91 z^b7NYsv%V1ru41NNWo2FW!5@$Mq~s%m(baPyFx z?124)rEXZR)A2$H?fssRk?wM0{g$UvCY?k@K)sStl=7?0JJ_{(JfLI!i31iWVAnsd>Wybpw=Vq_pnIj^_NrvO zDcQJO+PQwmoMV-g(3a!%ywR{?_)>*`AaF8fqN;zrIDk7d|ctz>uMi z{udq?;RIm=<>2@+Is7z$12q2|9`5?5c3IaI_s-P8qJ;&XP;3j!YNOvGs5Jnoq42Aq zo>amy{$b^LD!~lpD-}zNd7CS-o<-={ zi7{!8Au-&)wu!u6e6&Cp7wxp!>LxS#1F+KH)`P`VcYEzU~s4m;2%0S4;y zyCf%S43yjG{}6Rf(SbzG+K!WnZOz2CZB5LH?M!TRY)ow1wrx&q+jjmr=ljn3FRRw> zr|)|2>iyPR(2B>`#`vD5?TyN z1yS5tD2uTD;A!W6FuO>iQ-DdJLo>xv%1w}KMp1B~Wex>c=P$YC^@Qf`7CeYCagm|r zFHOONd%_bMDeXhx2bnRV2F<4W#eXM~>lwxFg^$uWfG(5mOi2n_>T*zGv*&GGlTga!ID(W=2;U=mhB9bP z{P=?l4$I-k?BAp8U5z~f7Wvg^qk`fZ&e{(o>(I* zGWhF;-wrp8AfbA8 z7%==nL|IU+vKgV|?yJNl@ZNb@W!qm+t-a#sAod%L20Vf0-ih#si@nEjx6>{`hf^DQ zt2=x8B-5mnJ4TuPCTuZ&b=uM0!PzKKH(?~7ZktY&R$a3f$+IvrBpG&QKIoRGx%J4? z=2=WJEZSC0 zo!-2os%X>ecDgsWVONKH2gT{nn6Gl_5q_;_$R3$Z9+(H+vQ-}$k-d6xwP^zuZTTlo z%$?8W@UQkvshwU64e0_XPhMQ<3=C&ChBsVun}23~)$QJwtG6!!rmJrAJ%SGNx6qN* zZu>8hAMN;+$ym1SkCttpDf3Y8u^-nAL|4>}Pf<*hyYEkfx9<}f>!PzADevw~S@_;} z?<%=+d8GLQqgBy5cvMR`aWa7O>G`>mZ01D;mNltHi+@yO*Tqp1*t9V7;MDJ*Rq{)} z|Bb+gQo~tUElD#Uj$(|X*F&S!rT{O0utCLKQ;1_BRg$T_$IC;mI?DwJi5jbyUK5oX z-oWaNcy^15KZR3+Iz&W`OMiDJG$O?p&8_N;mm(G~NL@G06G$&r+s=>=hmQOz7WD6j z$*f>TFBxjxE`X@k3KUuP2rEOoCDUlkhfAC1Tg5E4VWa&LlDQ7BTqh$_Jm~F9K~KaM z$8ROgB@&hLRQd7~Ug!V>`<1%XY3?;>q18Ob6nQJHUSplJ7UdLG{o`{Js*jeN>Mhb& zwkC8nt0nm(FpDR1mn*xY+9yLCW(lRsh$0KMT{=&dBnA#vaVX9Bk~#x%A+epzQIrV$?vC8>Gs- z))PCJQk+s8qU6yG-Npv-Bx6qxqX_&4!JLjiF#@b)bAxX~0hrR9GjdT1seSnAQ01PELxMi0rq8tZ0_fcOH! zCCIlzAH<>l#VC^!K6mhuf{HAG(u z2m_=U&~`r^z8-d8Ju-0Cii{pWdaus_;&?@5{ge5)&juw+tx6vR)mK=mW9*!Sx~QUjVTN|F+)PPc=d{)n0CF~0~+N#I-^!1|GYb9O&cK-)tG{5iUu{3vdXW^AFDc)CiYgD^FED~HE( zHT&(2Jupa6LHiUE5NP+=d)@=%3!)&Zpx_G);sWa1DfqbFjS3vQ^8^5RK>rBtyLaZ! z6o{5Fjy@1dhyCKWcV>D33Pr5F(fir;W%2w38xjJo4-WbrWKo|72L34fSQ6X%Z3%Y; zP!GlGfe1|?LIQa{d+3%ol?+~N8(rb?HBqha3$GG0Q+HM_7ZUoELUa_D_jY{7dUL7 zj8G|rkeGo!ZmEb|>*bdtHu<&id^D@#7zAbfQ;8A)O0yZf@7jW&-FBhrU;}X0`H(`s z=U+;bdM%&T?g7I3`gARL`%4G`_h4XO{cYAg9mLolzcf3PTSg@ALO6@Q3Z85MaHDX3|>$hy)E`e*{1rD?& zHjJ21O@R@zjL?HUY`NzCZl446yy5QO`3ihS80 zfZFkpDEHg^c|xnpmn?j*dq3M3ysWB{+_MG3NMatY9@ZDth-(=`nhh?J z+2&NTGSc(0Y05Ka!-hT-!C;D7le-=*Q&z&2>6w$*tBInF%_oy3eonnA))MGOyQjJw zK`|859FQpNzb%5Pt{O1*FtAxKJmTf2#u zE;cJ~nabY+XHv=h<;ZDAP-~7j+cN;j47R=opY%nFx^%Yl`xVxab%16W5ryALwC)y@ z7N38?Wyy?fj9qS{Fcremt$HVP=Q%rS4vOuZnSD85yi;g`HDp;55mlQ4n(K9_M?t8g zIb=d{bDdTwB^U(V-4TZ}??1;Kvy4lx8&M1zN`({Fm@vzFjIXvKk!5G+4J-girK}zq zU_5KTeYW`5(cB@-I19dK!pgCpj%=Q-$yzF5a(+n~_mDIEyUoiCaQ#(7zEAcO`gH#& z*Q(K(I-D;aH8nd&o^hLma)u=BukxDk=zI@5CPFe|8u0tWW8vb z9Wvwg-+@^^u8R+dzSRflRXzdWGMz%RozVWyFtz;9mA}53c0pfF=$b+p_X)MS*(9>q zKPJ#L`Of6`12|piIM~lSn?0W)V2j)8Imwn(2kQYIH*=K@qviL_DY)!#y`S6Zge%%x zC&7e;maa5{`q8n(z+!)RUckM<;LO+%grC|MV93b6|h{iKPrVM$~dH$ZYASwcLMH>k> zxY>ka22NetCaKn9)_2{(6ohCd zan8%$ZKSh^QZm>jwS9Gi>V0+FE%``27&peksqR2oUK+7Z6SYWj+K0KY%1oGuW<53) zsthIfbos6$S9PSGpWdDCa+>hlV0`3#aua;^%=(R%6C`!0Rnz>x881lvM#E%#EYuQ>cU1!J}n#4s2gLQ5E z_-2T0GPS&=f;HZhWLZ8rU(8EOwjXbrAJn1@4Ug;z4ZvDt z5-)#>zy356E1S+BKE04sKb+f>om96yf=7xsN_K4LC5dTf1Gf>U)-}&8QCTv6t35(a zU#m!{bZc)6s&gIW@$s`D3I}#`iC}R_sjB`#m4JrGIUj%@=EG;J`pv+(z4>%3hs{A{ z{=y}{Rxwoai?}hl5FI!6R>A961efl?!bq`TOaV?bXt0Ne=ZBK>Xnh;s=mNS~(+cBv zq<1cdFnLYh-=AmU5L8NHxT9c)5?&Uw9_h|~O_eAh(zX|IBQuz?i!~k{)HyTvOH|_T zcIS*@>v=%AF+sNaKyh&sPggA3p{T=S_VfS;tap9gqVb%#i`hr@CIan#>&+_jc$No4 z^@0X`SE>(*2~-}2TxpOZY%61kl2>OjuJhzo36+qKofZ{aIg%v(8~lOBod39h9{Ja4 z=qpnMf*#B`E1*6g451_*LZ?^ev&V zIr@Q{CfB7Sli%w$FJn0p&nSUqW{bnpL0^&@;LiNua%PA4+LYxQv7EqRzqjc0c^!|m zg#@z1-C@Q!JjI)dFnA=~K@yT$Ril2?{fU~S4j7E8?VX`7zqPKfw;9Dj)SeGOvl0bm zCHtGDz<46ZGL}D_hc04vB$6qaN8f`ld*&;*_5#+<6MLFBsCu2(sa4T(`RmnXsV8`t zw=$HZibIk2SAbGob3Jz9`ph~v@u1(XF9qN#d@o5cQDI3nyegrn7MH~|fYhOKF4e4) z&QFcap;VFIIC@vA4nY%o4^w7j2`@kdD0*X5bKd>rW`Hsc+N zC?*C-M8Ye0%S$ieaNe&{PTf*t@jucOS}AUcMB*|t%Q2Rz$GdmJ<8S#)FUZ58515!|>Et2A*F#);>|LvnjEt5A^xrkNTE za!qHIiY99a>`E6xAI38y=k?d;_W`VvKWNy`uf5)dHX?OBsTFP2b}1TbF}xBxZ7FyT z9sbne#mA8-a@+t|+3D_{$I((r4`W1M!OUigS|*EHAuO(8Jit+`q+gcp0pH^rJ7en^bktt#8v73}o@Zft_Yz&lOcm!q7r^Z5ynKya%ed z4^kE`rm+BCE!%GSkGaN@c~XngwfwkHe9ByCS}7TbFUrA(AKT{{FH34TDu+~Wbmx;Jr(8G3LjMRpk(>rhQo z#+E1cETH03s)dv&A*^DmQqzLp2h@jNBbe#AEhwmvED1L+=v8Wp{jc?)*}}0P+*o)n z{tYfMJGp|0;Qaf`DKE*gnZajEpMG{kvEr-AL*R~z-x63k=7Fw%4XIJ)&o8EiT*gaxy1v_mv4^LPdbI=3g zLLN{06fpK>sT#v*ziCGubSYpLJ7VqE>XS|b8WtM+WUeS^&lZv*Bf2nD0t=lkRvTK^ z1<9K+s*_mO=u)L0%W#&Cw)WM#Qi;j2E?8Ad_Zsw=w3#{*iF4mknUiT+tw;u;4ar#d z%5#4H+YEEFgba+8pVad%3nx~orqF=+M)$@vcVJ|pH6SlIWMGF}&o4Bth6Wb>QanJC zN5QyDBc4sx*4b1HnDmIYPq(_^OfUJ<=n`CbKwaFjfZFvcx*Jpg+c%|m^Dp4`E${w4DXm@VeWHDd z-5R^)t*4&2;3$`YPx|Cu#)kNw65&^5A|z^s+Y#Lqq*iR_it)V*%P+9~fr7BSUB}F? zEF0e2aiKW*h4ct@cWgm^!Y340DKr+@omUQ?Vxq&gf!syl=b=5bixW_aVh@(QSXP%} z%%7>Ni0*qP6py#UzuMZhGL$}J943{YvIfmD3ifbi=&v0xk0RemRz|YC7*J_4%MCtp z!;So*GFR0Isc<+n{u$SB_Sdt>4D0SiWZts3#4{IClj^513c2x)J9H&E_YqLHId+-0M&DK+_g z^_I?Gn8Epz_8{Z#R63&T%~kwMncn&Fm_&>gHK#_czr)8Q=+8jUALLY&@qP99nHh`# zj6Y2U8nsLe8OgN~>IzZ0m$j{ZyMob_2s(P8+XOLKebGn`6GIcPjfw);{IE+I@_CUC zeMjNm7+6Iy8K_)w+<$}*vLbRP*`=99Eq_NfQERh`{3 zF1BAJy7!jN47qXtEy8YNBB{ydm$Ju*?=tv7`!bn*2!wQAO3n{6aXL+T3Vj!|T zyg0tl`4>@m9q&n|8IUgU>WX-&n`CL(+~VHoVD;$1paTM+ghYH{BG(qlLVCpEo22K! zgUr^Hg%_$?z#6BQLwf%aNu*bJ`a@v&EvGkXuXTc{2z4JlgNkQCt)E|_2^!7$nym}! z+5D|PPuy;A50ioJnl`UREkYsmprg?|4E)~x-P&-Lo!M@ECT$z-3x6e+Fl^kl#GW^z zCd3abis}=n{p>2egXy879VE=kLq8$m?XO`AMi=qqJh^E zxHr>co8~#gWOc7w2Uc%if=oLip$%n4BjzkEJh9w=M&DXhRH_3dSq1oY9RVSf7Aa3X zF#p=33axjVbhFR-6gd=#37}xtqiT7mZEtbIc^hS)7;M3i<+LB!vTnZrz@xd+gu|!) zA?@*TEOXYKML<5ayN`sJ`bVyOaQP^I2v2~t(kL?o4u&hPYQ2cp0DeG4nq6$CF%f4#VCJdyO{U1T@J*IAkc6-!MT>!xNc>v7AN_hF(m_Pxid3iM%AX)ND}|vP5;z84U#FosH_yuba0Ctk z8=+gZ>0tiC-9)xGwPw@672bx`Gr0PkqtUjUS*>4v^jxZ%vOmKwCD-sVd8p)B)=&c~ zVhQ6ndVD)I2!n`t8V(`mya-j71FX`x2$dJ+qdJ~=?d-j(*dNl8tHpicWJnj~ryMU+ z_n+NjD5e>60^G~^xu#%@myac{wCe7G5SbTp>GdaeuT^LC#+!TUQjc-1Q>!nXh|g1u z#j9>~kHi5ep{^5&-A(32;R6M3^6J3F{C^cvEKzr%f&D6a4sdF>wag}Vf+O}JosHwx zd+x(|E+PGNkG3VR?~U(n`$lt@%267lVOGcKoz9A;X~}9`mr$E7scF?c+i5XAo||3nd#FK_`4-wZZ%XYsoiaAPaYeWi<>Er;s4{F5*Z1cX zl%>;iG&$+p1wpj6jqw(ClV}r-$70zh&a8WDv>fmoPSD9 z<=F!q^NY^UXy+QxEA-evZmx`!#6T7eNLKUfLP@7^CBVWrhhRx zH;rS9BK~=W)!V1Z!7ojoK(I7%=;snm_R-OB7}}#N*<&DK5w5^knLsz-1vHF*rvt%L z(^U+W+M7!3_(YCx3^y}97*eh7^`_zZ8yp@({N=#DYwKlI3oA(m zOxKnp=~NZAWF|U$8734!s<@f8>*T{Y;Wz{`EHWO~~ zPjR+mwgqP5To-(VRINp$V#*1apB?rw4GYmT0m|CnGILM;QAXvoF2Qm z+jh+$3}x8zOerV^R)cf+XsPjvX$bQ2J{e ztFjdIWz2+!#eRywH=Aqn#?Bi&RvK`y5sbL@EeLAJ#pfK!N-ODkqdoGVbRQ5$p@?Y>={U zV)PH+gpLjBt}GWxEk;$r^Kje$Oa|xuDw~{sc1yTkhnStdvBi+z-K3Y8MnQvmg`1Sk z0-a$kuS~KkeD1<=*BO1)n+VO;4p@*y3x9s zk`)6;2`?xJ+Ie{zk~i^LR`PaRRl?n|HwzpbWl7v1Mk1WCgZElaxoh=MXA^csGT$sY zRNA|cY&deZ>ippopOx6s9|Xm1Nf|^`7*K}`runPGAV>JtZuOcENvck0K$Go zK{a?+9J;R*KNRQDh-B{X9v#Is-e2j^hdtGRWq%;goS4SDLsycIX{55sqd+Nma30lK zWkNZRgF_(&{0$wGp}0HJ2&=d*lza7#f+fIJal(E|yqU!)cExD-8$Ps&48GXFV_18S zC3%QnZ$AIF&o2Ia(D`G19`?Z1kNc?XPDVEkAQ3)@OVkg~D(J#rewMa42PS$c!YMh7bw?6SsVcGY>4B zn~0ZZ{_QVzGFcRnsL|DN&Gz9XR097#Q21kJ4P)CUCGB8i>2c@e5O3KyrCe!$IM5*un^cAp1R(){8f)iY zly9?YLO1znoaeW$;?By-*ElY#mRTUjY3rVVeTed=5};Y&jP6sg3p8|AS}LYxQGLUP&;**PB@P^I)K{jXLJT?qg`-&3 zZ8#~Wy>L7IMl)Th!MoI>S*c4)x>(84C+6Bsmq=8UW2{BpW(|ss7DHg5vcP?(SL=PM z0!`b8r2)IgL`&-xLa=Rk`PI9FXs|adW7|ALpnE2&H_h)}6#>q1?8NX<5{GlD? z_%p-RAA~AHnUv$qp-4$YFAQJ1Zt)&o)~UeMo=x%i6Z#yEFWQ;wkC*V^-D( zd$8C;ttybhlFJ=QmXRu6h)H+gsdH53-+y$4>;lpLk_qqEEOfvkrsj~UR*@ras60z) zA}}!B6GCTW#)W^SEwd?pBl#rkZ&QLVYu88`wbF2Nr{~joYLf8MR&9QXrPpg z@P8?!=FdY2FP8su{$~+yG+i_k1O2id~-A1p>PqAV0#GXXf%n$&-=|C>Po=VE70RtyD0hh%1AV`Gcok){Bw zTaE0hrtA3e*Icren{4oCnrzmZ)UDT=)XpoGo8q6EpFXb~PgOddffEZAJ#X19mIXFr z(cMQE`m!v{7m7lVp^q55;EHZe?zM=kkNeLYgzDmNn2*d`KW~avH;0jJ`b+xQeBbvDB zt6b?ypcChrAnaeiKq4G-v0V|jjlQ+;X5~nR+K;#`(&xbHZ%CXJk`saC!;xW)VpTG~SwZ`|ky`9;!vZ!agZ5wzKKjYn$tD z-hpplrDt0w*rfA~2coLxpT#7fHkkD+O&qliEYKw_?;=&X@uOdqpEiIRYs0%d&j;2R zum;z2-)+Suf<+;N)WabD3OvB=b$5M9w|$+F(OO^a$PQ0TK;{@37(g-F*VKcyq-c15 zA!!?_Q&1@LBm-AH0*&vkA6HY%Y!8fV!N2rsIS#+oO4}loU!&2al|vM#rn4#J|1tp6 z%{=LR39{z0Hpm0qf&j>LrNz-a%7zF%OcEWh?(y+#`c_Ta!b1TT+^>3;52oK831BA{RSw8=#?C*h2O;QjdqELR9Jt;%TEht}=BZxOV^=#}h^?;`2Zyn13T1equXZE&9& z4)kQpoiRte_Qh>c={%v?6j^CbG;W0FI<@bLg2Rg)Dwt%A5VS#vE;UkD|M`3CEG?qk zflWEigHXl?q3Gne&a*)u_Uroiw?iXL3lLvPY*%z@)Mm@cEV+lIjSBT#@oq@~ZOvO$%(%jbkYAJ}RI{#&F4mPb4#3I6wYd)MWuvYz`g?n$H7}(^f z6(5534Ik{J3jg>q!qy&Quf) zjkP_7*ZH@=xD1GPzc!~Ao!ZoTMuJ#;{v~z{21+8gc|}XZsIgCfxkFD&h_DNT&~xfQ zcye1YsoV1Ng5~B&Lrj6Q&?5*`9e~|bI@iyYx6bjsZ!yliu)Y5se3Ve~UHg~5L?oX8 zvp2Oes_E14*(>FCC0m0-IbIsOOCa0#pTB7$Lhi5az{-)CRaEoopL{B+Z;Nd-@4M2@ z#j=*>Xs5$nK*-}!89ay>a?3-|o65XDij9s{s>^h>%bWnod7Crq7^I4sbaqCirLJ zm%?x%LQIp7RiOl+K&kmY2)=>h_qJ8U=g5f8( zAe|>jzt7jFdv<4c-&!%`z5^X_#PbdCZvkVQo`o*IeHsh6!EDdWA5sh@?c-`YDE zc@B#oM$nHAMPhGClR40s|{(|Etr3Nx4(i#T|KSN?{5w7KP@01G!$h_xRkjf zO0l5|OT;`HS<99%8js`n74XRm#bQ!Zc|~rCyTXm%^EbET{%zErzsbOcR|@gz>Vz=m z-G8r?XN>9HkTYoZ_cKdLea4l37`U4*N@5os&XaTlZHRJf##Ci7YZ z)BPo0IzBkSAP1ZeZ<^^2%OOWuZa^U9-WGYc>8JoFZ!02Z>!6y6G$`?eYc%7BuJgg{ zCK%<+D;j~FJ`tgP8!CJ`kF}dNy>I5!iKYe!*8SS|8|v!n@r}f2Z%Cs^*Vgt}Mxi(p zmrs^7uyQE;YR34yhm?7=F5R$#kXR3uQwBrf(7b}Z74kCG6{bN6OgV==fpc0u=NJsH!_-s;l%>4?7E51tJK z&7ZviFgr^aZCMpk4$sOiMXwZuQW@eC5Go15V)NlF#MYtsjdPQ9o}{jmqr8&{8?i5?9`q^e8**${;4Q+Nczm;+wv1Z3@`qF0L^>r31cn_d>6 zfysRUQ^(};l^Ehq6e|l8wR$-rlNU8kGBUf92}DdqZ#+1wgu>DlTzA62*KI%yENM=J zbm35{UA%-V+7KzEB#S89PC7GUzf+*Se$CXTc9%n{*NfNsCO7xXpB|A?_Q@22R-=0d ztMJxPFD(0yfREdg436vZT@_RP2Q>#E6FbBUQ7?UO+I4mjCbIbRlYb0Wym|JgG1d}$ z^%sw|44s9JVokRhNrkFOWQ|RLXz+CX6!o)kkEv@?9NkKmygmFSbxeo)5G1&FE9lj>C=!x* zBA;zbgrd5g5uv;5*rZZ>5?nLj&KIn@LuLu}J^3kcIe6JF%W-C5fO{i^$BO1FXXAWv z#+`%Dp|Eh$J}I>ON5Mx}CJk>}V9KvmiY3tqH5B_u@=3b~?pm-3xm!nz>4R zkKd)OZf@H>alyacu~6&^;PF#z*P_g9Oa71+0)qA#!u+F*A~NO>k)`Rt;Sqh*KbXD82D!zR_TF39zEeB)s2d0#PkZ# z$@=?YrM?j>Tc!=W96u#CnUfrTBP-eml-G-## zb~#sY;isP7a;^|X-&BYt59x$ueh}?+FoS3X%H$!IU_#leVc_2Ajv)0m@^mL`0=_!F zqdAusQ!U}IC_-NzY~l+fHo|%JIq$p&paIzM>O9T^9Sxr9M>Pq6dp#Kxrk8q-)0sXQ zsW0R1M!2Bv??}zD<74E=ydYn?g2PlPYRmhmPv0Da_nZVok6JQx^xTv$Z~MicNHE`> zf+%rD-hFv8$wVsbEg8NJV%7<1J+6M=dFxNGhUcOkVICT0blsTvMEzMuQ@{K?j4UxE zPl2azRB6z31F>BI>^_FCvgR~Irl?ou!yda@SNU>gV0R}|8h(`vM5+Ww zRIG~ZWe)z*scxV7<-=daG{1k$AR2K7?Xmk^E9z*6BOyc$q280Gdto^ zzCQL^*eWA4x#idLdkuT{?|vO;?FJRZN{ANWiNCDX^pzOVtXaeJ|iWUw(|86>O zzpRIi{axtHV^DX(AmD6vAJmHY#wq3%vjF3+a)xU+?1I0v0ngc5ZIC4?7BRJH=-P6| z>y!-R$bCalYgC3?*!T@JRtQO(+8Hk)*!gz8O;+hy2zTwf@Gjh+zS8i#TDq1$1@VTX z(Rm+4D)({f~T_64Nr1XA>Nr zdNp%@R{`h6T&1=a35~(s-*NYu?oCJHR1vsYgW2?0Xv995+~d8W+leB=b&jmZ?z6=3 zpUno~otqU=`gm2VAw;f>AD?i>G|JAp6w887auG;wfFj}7w-xIo|81EI_GEf@$ypy? zwME2ydmUgri<>HxApMi++d&z;u`8Q~C}HQ;KmWp05U@4LDpC7F{juLaX(ajDS5Hg{ z>ylLA$$6K=8xzUM8AXU7ERh9R#vJ3bXy7RwJ~b~LYtJ@Y2Wdw*bCxLRgkBxYRbjj!;5$KGhuSo;dCm+Xq;c+@dgDtleVfYF-3mZUxE|&7Vc}Te&fB&7kVM5F?xesrr0OH z>@~RjGnmd=#!M!unMEL}D>7}h`Brc3y>Y6_fRv#8nrHZY1IDUHLbabAk+L+4ll@t| zFSSaqJ9Q2HR=aq^4i;f8Y@?)-HI<&0{?ZE_%Dn6y^pNc>h<{a19xhO7?0W!)bun_u zXa&#HvM`LbeLBFf1L#0HoK)ZjXy2+Y*wJ3oNo76aTl@1~NlpFQNXil&! z_v$YaFIa8_wS4#m;I%6Lb{O}@sx6&c<1CJ>u~R|n#GSbdnHN26~nj=%r92$ z)i!oTDBb6?QWnIbNKQ7{-^AHpLs+1a(Z9ORsWsf(K=Oo>&*J1kw$gxsw_=0O8g`4* ziy)>%)gjnX7o8=6Bt3tJ8fZf0tz%0--|Bx?Vz3@rt5 zR)FvxYfKY#QGAbmzbNYHon&$fB!GJenT{9?f)WF-|g-O&QA(Qat6i#2n8IJ*3O%$!F2PlUyejWEY} z`dow6OzbU|-g~a&r1NXm`(38TbGrY?!gW5;+E^#AIBjHmn)I(Iq72mc<6NUfJ6)lc zw@QvemhCkCIZ5TVf0EDsIw00g@P(m=*vyYD z;_GMgWBL^ov-|Ee*Jzgk@>Xm=Q?3m&%Y+o!)X0iTA0X4sF^T9mZlSwL7O~?m_ORSB zZk0 zpDvcar?NdCXzgh%-U<0g*lZOwa{YFd{mDXDsA^e;1eqd#uh+Vd>?O1NqQaS(cI2># z`u}M9rs&MNrrX%I(XnkC9ox3;m9ox2TCnxXt&VN^PSFJt9UN>v)QM2YO zWHMuUdrv(>Wn4v*|;3^DZ5BLGWw|hpWP>Sf3;^(IN8H z9#IEbNQcJ<=GE;ls@K;IWKy-M<) zu!PpvzUR12;E!U0l47kZP%)w$(N>l^zbBOLcQ*`Wp3(5Qw1zCJX(PFG4q=1!K6XX! zl$MFNuT=oSZlm;0K^g@FJtQrK$Y3xm=$gbW0Qk;b-?<0uz4DW zVas|OG5uhT`KtUd1sFZ+n~l@)`5!AI^8m`cI{%PVHZHDRVNv^%^Y(SLX*6W&yCH3* zgbay_6)ksew;r2*69v=R(@`ff)`Jx;L^Ck&MoEAp{BM#_DUP{AjS_U__5E>sJrQ;J z1#(thzuDD2va#g%Lv4MwCE|^msHe`;fID{idq$KCBh#veTD74s*u!7DS^8)~LX4`F z1?pT5Fn2gX$@u}q$3_6#I0dzh?VfD5i>^yiru%gZ=Bee`?->~!%f|E%h)HYMU<`6A zJR*SLh~`dP>zM4Bt5hSPtBg$(3dG@I!YYi=2+M(T)gF4bD>^R6q* zOszOFc%mFCc%`RJp`7D_a1$wZROn1C#SMm$CoQL zh<;)U`16g#I@VrBC$NLWIhBco9YGmf`SIyW;I~7pVE@?i~QJJ<`6tsTUjAMm2L}bfvlXW zetd*X6&92}SQfLANnpTl+`!JXt9!U>vmVL)d>D#>i#X;H$= z?UhsNg@~pgg(i&QY>R{tpcn9#wn_Y#KU=#Hp*x$X`m3rJvpNAuu z{m7JRS}yW=g^1~&JowF~S=|14vp#Vvlw$g7Cx9(^|5Vx@kFF&fg$c~DbV=wPjA>z~ouO5Z(alc5)j?recoi|)(npB{nu zpkY~W0q5sP4nwh06DAStD>LF}=E4Qi7*veMqf$|>ZxFCvAYOd^*O8Ds#dUqDCy&E{ z!rq8%!BlcawIksDMg9(5hz@`i9vVA?rd3+R z?ml*gIVdIQZQL)H6y!6fos$++QQhR9jZ?gj&K5w?A~QQ8S9cju>lgK)-f*-B_gX4I z+PpN`jCev`MIIhep$0r_7@~>zGSfEdG-w=gm)J+J3OSq&+K>R24`i|#r2+td-bPg` zFs?uvW{y-}t6~dD5v*93D^{%HLV}*VH)PYMO5mfZ#9~e0>OE~Lox$#psR>v-(u~ae zW#A+=YlJSa@{rv?b}3k97xXe1ujX+lNB;Tp-=^Dv$4I>y)C0=QA%nBk7;2}D2WtKz zFz64yQC3~cKvU5shoybftHvo#6ec-s7}h3N?$3)J=d*T42+d1`WJO8ucY>fCk7t_V zch1WkGq8ef3XZZ??p7%mmlb3mTNO8cpNy#g%Kw&UwCw?+4O&By!NYcsnS^>5GYAyo z9M%`xO-SO7905eaaa?300x9Dg~|b8@4-VcLm(o_L1*R&zNDKBb`R#9gR|w3x_F zWa|NnhI*Zr=rqe29%g;A#vW0QjCJ@EFom5W5!rxw@$^gb8qBfSs>iGEslQuA00Bi4 z8Tnc^EDi7h-q!hgTCsV&UpRwr@o{Di6*+^F27o|c^ zbPb zAf^_1Mqy8e+KAZ3{Rqp{pQ+iSIC%RxLIA3Xur*MyzouDEU;oTEMM^OGGSgtm`szZc z-JYQfeshH-CuXpN&IWLM1&GRRg(}mFw`_t<=CzEABxd-IPGDgRM5;nb4qnFCK%gY) zNAwgFeF?FcSW8O&T}B3#j1+McTX11TYR9xq$Bz-=1&LCD;7IDrUhsJcIiBW45d~}% z1;=3-x>Mz6O#PUUci8fWiD1PH5!9*pkBux}N4AWY1&BaX0Leiku)q2|0voMkd1a1S-|4jP$ z{U*+?SbWt0u2^;FZ;4N_C0VBQn*^lEMy?6R4Qh8*z=h0rEf+TjsBwsCy84#B!WYLu z+5yYyS|5I5w_g`l4%hc}ruxFp0q=`T?7J}8zsA)CtY-FgqvW(xu2BNBJ+^9EOLge- z2E&=WXu3yy=Fz?}XCs~Q$7Ec(KLwF3dQs`ROp513!c_!>yJy@PW+x4jA^>bzj_}k! zD$hJ)h_-OQLdy}njv|dSJ$a}4Wi@s~OF~0l{FkinS1WhGeDRros}X*zTq|TG>5ue@ zvjH=?3$;Wy=~!Im;pBwh`o6|Z6KUS`trz;gV`*lE0tr`51_ab$;BAeENR%P*uI*Y#me)Vwo=0gK1vpCAF!ROg^0=3# zh`Q`OrR8Z8EZp6i<)S8mfJv-j!Y2tRm)9&(g zJ6{n-=tg46#jfTSu@G_2gpkog*J|GDY-xj|{HQR#uvozT&mKMDi+KKy9=bzbyKuWU z%$`DI7@t1Z8ABz_`OW^yV+rI}me~C>aNOa!egG*nZ`wvcEC)#YlkVfL(MrP)CMwkz zbbP*z5x+t2qHsi71MU!--&;bs#&wZOiSu$y`M$cWRq+Cox_W5gm=q=I;Ra&s2MV5q zL19eXd#22+g6aEO=YFE{@HL5%@J77~&7W*gj;5myO$_Z=Ig4|d11bT$T8(wkvk?}s z)>dobwbiEchXcScd>LT=Mlz!4u#g(xU2`|k?eeQ=ie;Wuhg<32)$Z3?$ZU7+8@gy? z$``rVW+)GB?GM zvh(udbp11epSy-z-Q?mAZfgV4V z2vljI`9cizwS)WJ6H-i-!4(0mJwAP+JQk>Y!jz4i8QfK0tC@JjqH9XGC6mKZrOP

    }Ln%>~gIML)ACGn(F2nw?9R>&Q*9c??7owgl}1#jTu%Bh6cd@}FEKE9f6#;)bC zsV|#*HUyaLyHP68$?+C>*H3D{UAO1!0p&HwS0tXe{Ta}X>rGM5i>v+23TIND@D&e) zs(7n)VDx{k*IcotL~<#l4V9Un$`SW)OaTq7EZLF#hwXgUaVq;GhVP3=<$GY)&1vZz zQ0c?-b78nHu^HriCu~b{DD2G}bnArEvsr+X*8+gJ+kIn&^K45I6KLvV4mAr8!V7gh z)=Y!xLa7j^Ls0V(t+TmX8w_A=QLL=W>Kl;ArWFM(5cjS-CEq1;*U!5-G^=~x%{h6I zOb7AwyF%Gi{EU<|h$5-OdT?)NjnK}$-~aCX4mcG}5~Ync8IQk}&rR~41csGi|8fhf z$pVbOZbG==3;h_-CM^-5z=vZ};>ofR4rV+2wJ)Tg53e@P;RK{}2{$#gZY+!$6Py9`W#STGls!d@S#&W;G4|D^HrA8o-@Y%u~ehp z#S!g%M3FHF=Hs%A1UsseHV(oBW8oVN7}{-kx-rXPx(VaCCTtCdzr=>kA<2}2tpTtE zj4WWJ=oSK-uqyrL8W|oGRk5IN`#g88!qLmz3+D{|gTH+i5-T;w&Rh0c$AABe309!2 z>{me7q#=H5R+3X-vc1wft-=c2dd}8Y%Bql@;DMGEx}lj+7{71ho0Y{8--PR$)F?j& zjvJ+i@M`@ri^TA=tL4Hn=vH;yH_ zDKw>G)rt*;8_0htHU`9R-C(*>(-=Ssk~W0)RKLCn-WpS)b7h9FR*lWTlcwe2K}FvL z!4)rb7+%~utiua2`21Fgz@Qw(DBM6fT$lWo1QEQ&eny%PA~!$JifQzqm8d@}ALp4z z6(xDN4F(pIOaPAfrKA#lbO3O;)GAQ0H~W-Z7vdzU*5+V6oc*hF`Go0tSRdv^tt;;i zdlqoD)xUg?!I&p$cF(26gwQKnc(-#Jho?1!9RIBNKCHJl4i-aE3D|jw;43P^lVtAc zVyh;&ziL<0^a(ZsZEo|JQ>#W4SJ@3R@+yz?5kO?G{Ufmz<4$14^a=Q(Ft3Ff^Ng9# zAz9oKw_ZK^)=jb8t8rVblxUkE6z5e^vHeV>Ob!R=mhQolT^WBi-rrg38w~zFTRmkW zNO$w7`glCr_7mUl`c!3W_`0dL;fo~7tCi?Q91YVIM_e~-4F2q5^p!f_LXhR0rTnES z%>j5W5qPsw4d_LfHvj%*A)K*$1OQ9+P367fb%0p--@l9zZAQVS2~u$0g-6PhIRVU}goFq9N4j7A5%Fhw z{$|mUKo4ze>ixFIgA;{aXB>049(_iF#cqJf$ymfi_Awa8G0s*-YGezLJhs60w~<{^ zcaWMB(~3qfvK(P^lH>5dNimleRz+mr*F#3y=7#)TxEC+OJ7+WD+Zt%I=wAwz5tdQ4 z$2PNq#|+~9&<1!c!!h3~>f8as?GeMMdE3uwX*tE7Fy7vOw~w6MtR#GuBwm zKK0U_8)Jp*4xU=L2;v^Eb^4HEX-B{fkdWL6gHn(^=7`U3pSWatG~naXHXszNIgOU!Bewih<2^HtT>>m=O%mwfbC|9xS1o*$w{C2WTVc$j{ zs>VhXi}dvo4bm;n>J;Ka$$u>Fi@6gDmcEDi1wRZJqTkW}OCjKUCTWUmPd0%a-jc%q zq`m7&Sz4?iVw>?vqfoCDKye$nmH%4bL`F#tMEe0A8r#iyEp3PUAiQf`Pe{Z?{EDk$ zdFWdW{Tsk$DzMfNP0)1M^ktmdU#jA(kxQ*>5tqPJ2eGc<5dTE4r~z0`d2?mV?{nWT~%gqZ@Kfh zi;eL0EL#95{FDoV_DDrfZ}ozbyQ5PzHBx{Lmj^(11s|V(Qw7o6CiIY>kv0)s!q#dh ztrxXS^rPaBjLCd%BBW8`j>mncm8RSzEtK5y8tk5={k|kP74w98J@{FUqG}5JkXlQ& zVx*||{jdRslGtkEu+;Iv!35WMX-_!rPqtwL?XNu1@O!A*mXP|bME<*kc zm2EdpoS5+;IMmf=@Y5%MPqLayhc9BOL>xeXmjm02m}zi*h~Bt#M8pP?oAYn_FR|>n zzptYUTMr#@CeUqv*B83y1@+6BN{N~_B=h(-jczV!|4f*cPtT%7{_aGSu3A8DF4@h1 zg7EmURiOUT<+-g)5jUVMkBJ)cLrACw^xm7?*BoVmnlj$)KhElGdk$r*uUl61p-5ty}VB3N~$4_FC&UhDo zUp*M$DCouEUzjQocp(&=8j9C?k4gblSk^i42s%R$Xxxf2wP%rTLnoL&aC~Vh@rd^b z%gQ#6z$!{!;%67q#|7-kLtv!iV`s1_H(#59>G(_j0ec=C8|=TZnHr>c#mxySfU_zc z(Zn!~(l?JT*RDz7m(61@I#)rlRco=9uo$tQqtXk~mMwW_5;|2eJ}+?)&msWknYKZf zIpZs9DG);Mlw-h@K(iYiLP5ntXjtXvxLncf7SK=ZowY$pC%^JfR}qXXe9C!khnDKl zzwxNI;9KaU^?JPtgwsNsdJWI2_!{I zcKETSMRSBoRb-aoYJ*ES5Z)hGG4_cIb#| z0WBo$N_xKiTbIo0xnmqWJfni|kl;7?o@SD+z9wDyF?d2l$UO0*HZ!wPjs*~+CfJ@y zB=2onQ^mRkGrO?omZRHGG7=pZZqKFki@DU@9NcTyOZ>1?Nvs?9nh`(u{}Xp8U{VfZ zi=TG-v$8fYK7~>2YmZe{S6SJeDtY zH6Pe0=8-2q5UN2NirbmIsWKTvnJu`Q*#{t) ze&2|}XJYa;X#t3VE$bq3`mXy^?rKrqCL5iCvISaR>JE1=0@JF_YMVzJD~#X7--91z z>1en-dq}>uJ|A+!^dclBXe@oZr?Jv%R%3N#X7;Puxu^i+F8)Sf7@)9b^7K*B_Fq*K zH#_$u%ea+`xMf~CbTy^IsOH;;;M+XUW}*JOJJ7I4z9h#J`ClZQBsen(#+WlRt-Wdm z=C^*~!jm~(d2=>Ov!^a&$3m{gQo-gM5eL{cse;I5P%~F+e}2;;YXUS^6u=oy2d|T? zsY?@_adH8xJ8pJYkYT5?`M1kxCo0AeFHkkJN}eRo2KKLPIU(egvY;MN9}&=)1DgfN zY|1i8<|)hGTs1u(BAW!PT-?^v5bnrfE89@Z=rOO|(3~$3K3ksbi?$ALWL+4u_aapI zMwEzHuZaSQQdxUhTHnvJ4C0D=`1>QINNzChP1XQKeQXie7)L$>B>_t2NuqO_o_{jVdE$iSQY4eBvW9;S%7UZ}`pe*vrICs=WWcc;TL zf0)H>0b3^$iC$l!2pgS1eVZRi({z(WC&;v!8tFcNC`730^CGe}#k#>|hCXoDty{>` z=OVy_2Pq3|nBy&i!i?i<&Z^SY`S66yedlQ(=tr?yk^D}LqBSRXZO7Dl{gytOaUSk z{=v%EO5YtLgqFl}gWfyUH{JQtq$zTj&V2oK;aP@n+C!CdKR2>%N@2yRaO>#ECQU1W zypH@HuqazRWt=Z1H>QF9L!Gg{xT|Z8l*65Z;-DGm5IP)IQGmCnF~>L-+sg_fLny?2 zWq%073E-JE~7)}JxXgCwLN4t}Oz#MuJu-7DdYD8wa-AxG>OQ1=;I0tL@9gMz%vm~w6F z4<$IN5m)G7lzf^23<}NKnX7jWh#ZliSS|k1Xb^W&^HelODhw2q@ zc;jo#L;B-+WNyPVpb!9Kozv5yHAq@aWpB#D7Z6G6Se#@L!_v~eDoe?L<9%{y0?18q zg+F+aCZjKmAjU2?1@iwsBIJ3eOvPA#k>W77mVh&X0s0iY+yT#Fh|_EYNN-^ymA zR<}ZG=}tII8oKgbG9R8lK`g0m<7rg+qlL;P-!pfx%Eu2RE3w|@)I^OJUa!VlF9Mo~iTfM&itAYq< zMm+XqMHJl>ps{ojh=>k1iDzzL#r#s?Y98xFfJ5lGTLoA5@*5G;S_wCIzKtPhdO|71tl`IG}$;8%GR{q z6KYG)jLLRSMqCaIBi6{JHplV$@Jr4-xxum`l(1oCL?;ty=ZuyVCy>yUN_Nm6!D>1l zjyN3hg8)L9%z`TqM20qOsvH)4Ht8}`q)--qPKlquh<|)7B1nhiO>hpp+;PTUS@_NCn0`Ls8kVAdKGTp*-BQG-W5Z zy6A2h5E>!fipc9HD}J1V8w%dw>>W`*P9i5Rn*@XsIEHpXx^|Bu$VLRE4;3h_Eic$O zhc-uA%&_+}O*?u>9CV(VY4@bjQT1+18bCbg!C+7Sg zr6D)WvJ5R_ph)CC4}q4v^MonHFIWAoQMVT=7kK8R?xy_Wexg-l{kR$15?LvQ zY{)GHY88tdPmGsdK~YYrgBcg)k$EmGqbxw91JbrL-T`cNEhOhsQSMqrb~grQ`lrvu z8-2C*0lrZ>k~O|^M5!3FL<*=-*9?d~syuDT=na=!Nv{k!v_@}G39^pJJQi(UL^kCmoYe(QSM&WgRt~mg9 zX%=DL(uh@AMkD?A+dEXQn^m?C=pQrdCBi-f23`Ur#X3 zbrqck2nt6lEy}!%B{L3@lbU(NO+1Tes9#2`f{HAvtNC$FB4Lvr!$TKC@bbS4voJN? zr`Q4SSXo1@1mMq=PEF0fl~>^)#P9)yOqPe1U0h8xj`t-%XevQ6qrdT3jf+KZUw2|N z`g1?Prd}Q8^p^L1NW|j917q6?Ap)D2BRj$K>g({tLe$88=$-)0ZYfwN zM9TigQO%AamCMik>{?~g6RBR^*_{eLqA;xSrjpqGL{6jvd%B*rB?G(~=7Q1Tt)vg^ z)?H`qP|N;Y)ppg`m2LG9(hGkVny&9PRDX1Pmpcs~qApDUe>+uxG|j>*>GtU;ZU=PK zhRzT9YcM3hm}S3TPwXalwon0JSjKRv9cu%!N@SKss*eNJAx!8fC$zQ?1;Ov15E;sB*HN(GrV24OKQ)_Bz?g4uw)yVb8%$ zR%5X6M5ceu+ca%E#pyKk*Mie(5JoqpSvANxfgU(*_nBNUa3^y=0O0`tZt9ZUt(j>P zB=cb0*UBM3?h?ldL%EMV{C_)q1sqhbYFE4#L1m4guw!)vT>E%Eyk2F|wIQ_PIo>TL zH(2R=LgYJN`*w-Vc=Lx@;r|{KXTt&Rm7{l^Q)+oW2GuFH-SJcFT46|5E! zj@}D#Ye}7m;-VM5P=5oEf^XbWB(3IL4E{T;s#?%LH|FRG&908W^&$Oq&AkeUa@d}~ zKg-~XB(|03tiKi@^4660quOHd55(drveUycQ5}_1V|OgxMqYh1DjiX>XA}kMiCw6; zG>6`QPo?zaKIsW@=;4>3hw@tS2+csU&NV%Mw^wzY69LUkhQ$+MumVz+8^ znK{Z)da87LaBe!M&3LWZyWrXY$@G%nCi^Gx5;Gw%!1TpGzKc; zroV(l8XJE$rxm9yOc#L?RYz?*(M@$-))m}Q;bVYzO00W0OZaQ zy+@PZXl)3dd3ec#>;pQcL+;LLgQ}j)#3OUMO@Ao@=p*#R`%C|{3Wqs;MZtMH{xw#i zffSD=ffE_GLzM_Fkd7;d7jm=WF_$tmzwUfPPwr}pBNzd z{qeYLB{~0PYx0}n7Xr9~{DZ`C4*sWK#D14}vFSP@EUMJvo!{4r?Yi*VM59-sSN<`HgvB8SAeJh$aoeJPDbG7+}zH8t~ zU#9IoL5GK1Np2bXR!DH@4Fsdwb1LVO?eP7jP1T7FUTq>>e8Rw9Tk-%Icn#}&58~sd zIi5J6=Af#y8}rV-SXOhBFfor8=)P`P+E7psUyz)LXzUG*M#%Kq~+?0`@4O*`@QK8-TKljWBJ23jjbwFkneq{ zud?x6c7q#LASI0KD?4RF4uQgT$&`h3?9KGYIXJw(B&M^~(hs|38OAF6EEeLJXZVK) zfWeJ^ZPz?D0>&TPlM*-ih_}mZ_Or-&WSfv0PTH%umVw+Lh_2U^exMWfkyOX4PN!}#vv^J1#*Wl0`Yp3qKv)tRVJtbk+Id^LcXB9wvc^-sId+j#Un~l0KIz0CR#JrTy$Yga;Cr!5{*?KcZ@V-+k;>GX<|14uz@fH zmDNpE?W_(FJwufHkqqFrpV%6|H!B@Y`fmfti&mj>Lqq|4Qsi zqn0{+XfZw=&E93H-v%mtt}wSo%C=g6!;~V(IHLv&ZEvr{7zxj=SseklLY+2BT=8Jr zwA$F+_MWi6u1j^&wF=x|z1$uF$~j3cdJB~b61>XVp2AqdS1Prks$>+u{>B_jk-l)+ zVFGR7opfY!@{3}K7f6h`9Ff;wQBmT|q?&e`pryNmH_+Js^t_eW%%YSIoDfPbvm~}` z6zAi=%Ddkz;Nf{{U=&);Y?n;Z#eNa1w@(i9;%F%s_#g&N?%`C5E-$T110kLQAcmmE~iAPSG7oXjbTkA z_-c^>r$${l(K$;NhQ~~vI%>djx7Dec_mF*?!G%arFYWiWPcpU8-;B+vDE1h~9vCCR z;S$=ikzC-d9Kzj|)V$n)jE>7pIZiUTOOWBY$qpBF=4P8PFpK3h7>tTYK9`uFfWS~} z#=<6oacNB8>0+(Wn5+e@{xD$F4qLkNuJAl?xBF_pK8p=2<`JHytErX3wM_FHud%UOLPAa#&mW)MiE6=WggWH?VW`Q)9&7mZL zh(OR~c$0=59H{6P!{|^I##|?rPmQ^X(grWdmg^tRAB$O4LiSkdRX$#KDQA3&^F$m1CC8Hvv^_`e(#Qa%lQq-uLs=# z9hqtEF_YKb4>j|P2*3WD`j{^p56lH}r+qgKhj{xeRzh?J7=ErY+nEItTvj?~mQwvA znXU0`P1fb;y%0ig1J=|aF;l=fARJ5h5d@j-XJ}cLz__0e zAPms_N&uCbd2`wMn|$D`a&b?7s6cVqqL+=tQq${SZE|20!HHD{U3QA=xCOzxCv*&h z>M63NQ$8gMh(InhNEN(z$@;7eQDMu(qFNwnGA}+UR6DExrb~k-ArRp26?AkQ%Qxwc zxKrY&30Os)&PrTkIduQmQ{&W59T(vT(PT#Im|3fqD@RxDeK3=ENrH1?>w{FV#l&yu z`{n)N@3(^5zf^Bwk~%uYWXSQ+kPDuF7p9IYk~J3zKvbjm%cnqG3t&&&KuJW&n!>LChCnJ!TWY)-ftQLcY|Nwy+NQIgS4y;=-~g?DEr_ z{f$Qam!~^`M;jEvEMb@zMe!l-oU=uo$@QD!x{0YBGM0us1&`H07YNg^Swr%&b;oDQ z;dtx}pqllsah48ubX#b*4=`cW%YujyD~|fPOiLRryNItCbjZJya7h>n5w0DjNkF7Q zu!jE6obCgLd35uzf1VyH%JMSZK?taT+Bk?rF~s9UYvD=~h~{q>b8?bm-W>~Lb%qhh z#g=P1?22ja&hcP?k5*FE1+v*2akND6Js!~=U}4_gY8V{;60q!iD@Fc@V>=+xG3@cD zj{3)2gqK=1N%mQ``{tU|ats2$1H;)^oP>hu@3n>OhwV!)4Fm2Ct+wrMDu^ZpR z*1P14W2m)}NLFgcFo4N(hCd3FO_g1k6gYa_u)L#0FXrEDl6 zOd0Em9z|@P2s!JKikO~a=&()QFUU}0ygqG?0oyRXJUW0NU7b^Kymn;ZKtN1CBvU4>ehwg$ z6%M1tL717YD;Bmm%R}8tI7s&1;PbziN~ri=^5jlf=2#hDEt7tLZo)bcxm$ByQqA=Z zJM9Rojci2sAI662MFKkk_gTX=%CKTV1{N9nSYSmr>KHBYg}S-%uPjmqH1~$yP_Sy` z$y)Qc1ZdQp_nO&UXhVW@a@N>`+yd?sfeaqRqdVl?hn3dEg`iNP_?CN!utv~F({;2U zJ`561yn*laE?B7?DMcxcdpooUK2(ev);JoFt>lgcc-lnua6qjilw6mJTAM%2ra(`T zD>y4F&-O2ZQ%>I?+9$REZs}M>FOES3V%p-Rn1i{AWW&MShfbh~Rl}Eh)_Udek1Up8VsGa_v(c8QlZpdh!YKfx*o-Vfm%4q;l-LF&oU7`cm8uTx&jOPx7U3G^&{c@Q8E~6^QOn6NpzqXqkE?2BxoEifhvvM7a zIvfu5uldR>_?hYX)O)oq#_TkqluOFdoKSFKJCA}sxU@h1O!7P%5bg1-neiV!`qA{! zQ8E0==6Wa$bHc3ztgGjod$680OoGh*dOa^wZP|IzN4QvwS&IEe>63Nsa`=9#g-CC( z{p}}h%y2bdCME%+eGU(}e@rkW!D5QW31}N+cy#^3cs26?6_936>_V!`AP3_+eAKZ} z*lOv_axG5MI}Fhh7#V`th@pZqKC;X3B&h5-=h7Q%Mq_%glOmGDk+ULG;P`_4$mVCiGmv4 zq%x*<-HtZB9l^%z=h zCNNw4B5d{!aybI0+nqU!gySF{)^KxppG&cGNh&mNv3%6m$p0Gi!HD$_ztTV&nOeqnI87P9EM%hJUqLUx!2s{>p>HXrH$jHf2!gcCyaunGySFEV$T^vlMNLsBC%ykw zGNvdy;Jy3WKEp*mi-W&+*9xvkY0t;3bzCEBl1J5T>S{tGLuTr%kd<*5MQw95^X>Wv z5LOJ`i@)ufq|wN|s8NZS)Av>`K*lA~9O*pRL<2RmA6%3f4hdWxsZ)(Qk<930S;%lPel(>pNAeb6#ZRz`k+#_jqB=`8 z&lV_*R15o2+{r6VFJ2jVo^7R7O=H>X2W87)tP%Sj_x9h^Nn? zmx5k$#Hj`%H9ZChKEGH#H}G0zlaQeNFyW4#V|S~g(u^H8Z%<1;9u5nsbdSXzFfgo> zPdBHB^;we?1o6lI$Xop8pSu7hPaY;Or6+yo%}Ahsa7$#1xOm_EwepAcvf!BUTzQI3 zwfuUQct;?`Wsw*~q)1G44z8qcH}C+?Jf<4V%^&)H5xsn^zOMe(7NFd2VE8JI=S`+IBtAQQ|vB{uLvc9 zN-^V|ES=qW{KI}b(OC^dnL%e?dr+h<48oZ4U&BIep^is=*p?_xWZoD&*HTX9q^LZq6DMhCZ(L{{ZwV|q4Ax(Wmc`xQtDe3579PGb=&{lGlH3}URFUbHbTayLhf>@ z^s~k#lU)7c(5fuQM$KIIj7%#KmOFDTuhwJUr48D7STN;#IdIe>{xDBp9z7$(no%Ga ztB6Q<==+L(x4Wl{du`kc0Zf?A=&7s>FmmAs&iUMtr^?{Pj0JR>ha9b?1Sa4*5{{W7 z&nFmmhh82D+WLH3ps`0f85LXKbGB)ed5-mv;u9pZ;|wr>rFPgv@&B1O7rP5(8Zh$& z&4~i{1?1yNfO?qQRiIj0orCQuwuSX1D;07 z%e2E2`o^)X9B*!!X%tBFL4dfCl2>_XMbWb)9np#=9c54Qm>aT91fuYi_z3!$|gGySDMU31f@ydkt3-8#jS)dw@+Z@KoL^CX54 zd5i@iFtxB&lz&N90P0KRgel8vizw&rU+;WbbU>Zdhdy-x%9_9GUP|?pCnHsu`n@Ta zvQSjqU+O9Qfd*~6kduBAixWx^Th5^_(RK@EMaj$tT6i`Fa_eUjcwxpwv|Fggvk=L= z{r)8IN-s3k5tTggPQoqknRaCYr@9+rG<*17%993E(I9sYp<)|n(mtfYB9IxfypJ){q$?{B% zgRZJ&Rr3t#`LylS61iZqTcNfiW>w?o=mJO%y*E*d2Pc|;pC%BP1ry`yJrxT52{+&G zqt)-Td^~cFuhy_N&4?JUvPg= zBSkm6mQA|m(tl1l(zXhGZeH(Piy>2KH?SHe5ukurgw%)lL+yFCK@9`xYuVEbdD_9u zgrG_OHf&M8!|>SO_yQ)8FZAnU^7%aI1@1+HcT-D6Su_s)w`MhI=TA!>M5;sxEuul1On{iIqDZ+$Rl*-6%}^CIP3KIQ-l?q#)SKUY65zLYx~@+BDt%Hf9ho|pRw z^{yTq0jI1DeixDPSWh^v_{&FOu-4hemXp@(1IY4^*{jN-gMOP$LdB0F^8&I$dtp$# z$kg#YAeiDSgzdoqs9gab2Kua;xfb8t_Ma7)ePLdf)#|5WZPs_|Rh9N3CJhurbo{0R&7iC5dz%f__=r z2hT?M5K+$ZFx|<7u_CF!j|1XG& zkq1G948r+8A=MU{dI*F<5Khh(9W_Wkkkp4<2-KEJ4ajGZ#CQbM)G7_g#uh6bNMVpv zj(P}`mTo=BCy-RfJ7Cn*4-?3S7B3@6dyrIZHAvK!AN&h6HA4duH5I}JvaRLJ0umB5 zHBTE7z2yh@f~J=1L87PjhC|A>P&z;=f~M9R{vTd<{tq9U{2z{S{}1C?{0}#^lzBs* zgQVVD|GfHVeRGR00P+kZb;KT0s3j}_(g`%x+ZoazwKp2lvSlFTzZAXu|FBhydj#Yu zNb0T^R!%Ih+7BA1D2Xx0Ew1b*#TMKg4YbG4U#HZ^7FA(|6%?%NG*`mu=1aM z`@!&_z)Yz^q`>4zT&%4B=iyAm#>LE*dfN?24bYUw9dGtKR)5&pY3-nM02XUCqZ>9H zg=o$A1p%$DEeXpZ)Go|znzxsiPog4B+_kI_9y~|zMyY5)JfWsgKeN?OQUp_Q5>Fp> z3t*H^@81ana@O=8NDd@pQ}fvs{X?emM|v z0fK9Aw|xE1_n*0$i@E7qXZ7!S_S(C;&gnX}#qN`?WZVFu;Xqli2&_Lcd<%3*TAz0Z zt}%$dGZe&_j|ck$j;2VX$1MoCxDm2L_qW^D=CF?(J4lF>$?(tsTx(JfM{_R{Zei%> zA2TS_yT;#|VTODO@)P=@#N(ym;JFD*en3A9{$Ts)1DhX=0Mi&OOht!$fXdb<9mO4O z%UmUvc;D3>%RDzd~-Ms878_l?~eN;Mc4B@UN44}J-K zs1+ueJXpVtnpqJUz>%OguE2ypB5Ie}CK`jWMbUso9Vshh6{X+^@kJ%h7oZ~#5fi1B zBqKVLMvBks`>qy z5OXfAEQy*rbxg>jJws6#`FhnE(!Y2Np9kRDLr~_B(C94+0C6F)$(<16Uir32xIeOS z+;tqq@kHKGO-OyS#^G#)jW(F^b69q~4O%)9T8-fuv%H(eliNrJ{gP)9b~|orEwIC%o80T%>*f8!Y89hX2Y7AlH&C8St(s4ZgBo%?2D6Wi!EaB^==$}~ zWS^gdIsFLpfyr53t3FU8T*9+8VZ1zj-6CU|0Cm3Cw^W@;3Ac8CLuM0ZsM^1cFb{=T zP=+jyt?qpiUNnMMVe39)ukOy`f zWA)_rumfK{73d}}6?iu;M*Ak`SMcF{9uUdGb{@rq2WV!4g+huox51vWAD13SCFCafO%|Fm5-3u9-{ z@_BH!rdC~kJ06*2>2s$}Yz~=$tg8dV&8A{34YR9IZvEHMn<6mxs(Jx3cO1z9GL@_| zW^QJahkWE|NVq-7K1QxwXG5>L#eS5Z^@4mI8klM`Sk*K!l@-c{Jw4=rgHvT;k7!91 zjFdkYs|uMdub7D3Tl7cUOFHX9KiO31qdr02L~4*kqm4%>xKlm$CXuZnN*5_gjUO_> zcLvhQ_|0i3KGoTYipLC#o&8~hjxS|O$ip0hVGUzDFc1acRd2bqw3ERjWM(vFj;SfP z0C!ZRLW=*m{I}!+JYy~Y_lQzF5~~q*r^T!MGyU< zmL~<7tyI8L6wO*QK<{IYn8eU5G~kq>0pj(jFnR^}k(w~Uo17|`q5%U|>8Qr0DnVGP z=Pbq;srFP#d6H~srhxy6ZTqe(J+8zO{P8siL1cpKhZa3re$nhEK|l1DBogjVbC=Xz9JXIoSPa$l(SuNfuvhjj1j8j?mWf2X$52i*k(geqza`2DkG>J{CC;13?Ng zc<_nRs!i5g43J5Ydx;x-DXUpNqj+_%5lZ#> zbz=A{v1GLx2ZdSE*^T3(o0}}ugr@Cq*IODeL)-sYUw^>|JfxKa8GiNB;Urn2a-*t! zrnJ2!G%HGT`@aOlU{}I@P;#0ofn+p6Ih4Eo^R@xp!VyBO%_cqD=HzuztpiuXFrlcn zXX?Rt`!eXH(f8gJlb1}&++ z(B;bFb>Gr+c8X}xtl0Z-a3pm~H26`t^ahf-S?qk?BMIdfP~hP~uT?elO|osYO^j>$ z! zfEz~@%o^SeY~__24+}92g^IgGF6xQi5M|^289r1IV~4m9U#Tyh0FlMtxz3QKBpBK- zc5*6nSY{eCp2(@E`>AUa)2uHNxlB)s(W^wR@#WqN<~eV}zbA1CCrW-O(dymhLwUGb zE0OHi_6UUvAu$^&s=g(;xor2 zEuzs0x=t)jLw~c-TeIgR4Npe3jz8_5T^qm#e6{%*n=U`_p$>^2JN(ZJNMWU`%1;8P zFPw7DRygJNuz;LZ@~sBnwRcXOpq0k!`r1jR7(#6#em) z-YJlQw=eSHbs|j7DX}W7bN67|&phB%3NDK7TK^2Kn8Y)RFQWY4zcGq27xj2JPVa{$ z1i9#bd>+>$rVLi(boQgfGIud%=Qy3D9)ivt3_dAmC+e@uJ}uWEqAW)EsHa$o`(0rk z8D`nx((YgBtcLVj^da^6k(h392vc;9=Sh4ou&o@0b3%RKI39TWQ~Sy%sBnX*qqK;U zMHou0wX4t|B13({=O?VvY;4>I13kQZ0e9yt>pV4I7zO@KY2~t^YQ=y=1mb7`5HB?{ zRjcyweQHR+WjSszi9}W2JdvDbEMyu%G?(jaroyC2ygAHCxi58DT(WWEi&@n1-d*vN7Wg*0iJMmc^qY+7e za=-(nDScF7Y!7lsMMlc?Bf>|7x3aO#wjl?_SB|*Ds`V0kvRX5e3x;4-frGy#*Y^Gm ziVx|->kE_@lhmXgpT6S~0ESHnl=St906E!CQ`|T)bnbW5ZaO|1Yn$W2Pq}6hRR`4+ zNp833BSne1|F9ST<;vVuC|;WX|TBJyjCaFj^4oVo;k>trB(K z;kaXigMQUe`z7xY_#6*8r7ux?R6%gvrIFER()v>0X{LwC?Mm@ z8vOjT(F^%@0`(`qnDLYce^B%HYiNn9jl{5-jNp5-@w+QZW~Rz?7cp9ug`Dv*oy`~3 zD(#G)R~Cj+Z8Hchj8dMm9o&M-pz=Ze7NX3%Lp8?le{n{yT6MKlp1c>-YNYQpV7jkw zmb%}YG;>}%iShO~_blJdc?EJ~9f8B7W4w)SB7^>kQL#NNf)2e{9)=NT#CyF?1^;9`;v09o?H=zX6tlul%(w ziY^mBkI$Po_s>07x2Ly>NY8(;5VkbEur^KIte?alj1Fl%+=cI3w})+Ohdw>b4%QKE zOhly3_dc3(br53&rnZD0UzQYDx!e@HByCNm-EGHN5HICDhxF`kq#YRk^~~AOz}C{3 zKCh`oyqVpjNqE_+dh7aV%?wzpNACPx>i*Z2%Kf*CcipD}amn3%#zILi7_uk_3hk?@ zjba_obTx8w1z1SVKYe|k(uN`Wx?;Xyind4JhcC~SpJ*at{(8Fi@87&mVr7}6=xa}n zDe1IJcM!=$&DhhTvoKxvhw0$@cB0=juHJ7ax2wI_si7zFp;dc%zny@@(1vW+R^r7r zstDoIX78lyz4qokST5~ zZNKu`bR+dLzOB!_%ZQF&bHFCQ-jnJ-R=9KU%92y_UL zK7jkt)rY5VX$I!+$(wWZMbGvwscrDvD!HP4yQ;6Oc|y9o8g?6oof3vIvVFcXk$hRB zrFP&>WrFE@fb{?<`NfDsuHCOgH+r03>_1D7Pp;+M0abwFp;oQyX4NDJHd`Rr(GIfc zvuCR4bhS0Ve)96<>gqhYP$c#NU0D*RK)~JR)e$C86AET1vA%{l_Glrsx}Lf{{#CaP zW8|*WKY{!?6QKlv>n1e_y_!Pc!QVGVSQ5y1Lsq?nSA(0<(dHixI36$;Y?Je0hZ(E8 zF0nAWY64#Q+cHzrqJKP9bUyp|ad+xU;*RxThB%(rbVGR*D*WI`%G?%OMY32=|M98` z`7vc}hr==?JzegMr!zKu{GKSjXzwj#DsR>AROdW4MJ|$y9nS5M}wBi(sD+>89;gSuyHIyT2pg|CkC?|cf5#2XDhhoI!{$z_Yb*@RY0Mv^X>cM{H{8l&Jj*}<7iv;@N|GV`;@|> z6~5Vv#r=io_e18qca*vxe8&35$UMl*GJwcuohYa_`9Q+p-Hz<8`F>ia?kU*{NrOBY zx`N9#*wrnj?v2J%UHNjBTr}yCMB9m%n7o^-sfN&bO4DD2Ldo$_KK`FD!zRH0x_NSb zyZ>?@^1T#!B-9iWzUh~@?nd7;w==e=+_|P8ST64RsMXE;ba{5sZ}kKYGv5CEJ+@5r3n&>h)TYrRmN#Twy(WaO zi|cq%nIee0dLc1{Ue%T#Vl^K2C#=|U(!Pdn`&i+sYC*G$SER8jJ3hpNA~XH_;WcAvUF%!vXf&zP%{G)GZi(bPH5#_==A}oK|^)A zuYT_*quc7KCbS?rr59D1?lP!V>0e*4V*<)fO?-ZOjOZO&Q`U+2KO7uyqdU91PP6|= zEZ~oTX8P-fPWc|Lo_;eM`L(o4IR2Qw-rz;`z?6fepUTjR^IL@->5Oq-T`&EIZcf`h z%u|v%K{_ua#*oAL%$reoCZrR9B;budYlr=?>aoZl*JLPN5LZuoM|UQpK5Iaiy=B21 zPgU!?S-ue#oE_~bpIl^{l$LDO2n(n3Gr3ZNs;pp@lA0`alYC$i%aU91oYYDvm#cc{ z8x)OxL-U%AiG`O`BiQ!)kzH*(i*~2vvmN}5MTya;KSwyy>F2!S6u-h_``EgmG8bN1kEo7?`Pso&Tj_2Q zNkbKy3rA6HeCtwT`hfsAZV~)hsu3RKz6F-T5oY?tdZjPFvkDGqaWZfQX{6{<;HBCk zzuA4n89adF7B-*B8y=wngjklrN#G!@8A_hg(x;~&8nJe{_;U?#qQm!v0~~In9v)=d zPff?k@lNxFegq!7xTc|Gl^1s$3G43G*ybSGvMa|Lx#>5f3r<)2OrrJBtflBu8mTLM zP_hqLQB)(?D!6-ev0(15?q-QF4k4vx-+yg2AOT5z2scP2$1m247$F(i3UrvE`iUEJ8Bc|6fk5s1p<#g3eVyn`&+1Nz zVHZJT%%kk=Bh?!1&Ig9B&RltLe%|G&>MEskV_Z$9Mst?TQwEdffuJYE{aey~f>V-w zhj5BswS+|+0WdGzV6V1%O(HmT6mOvuF-DSK8ql~>#NkM$(1o5NB(Cz}T`gu|?GuM$ zsPVmtYm@S}F|Hv)?2u@lJJEav9;cDJGTH~m1UocKl+@z#=3e8?Ia?gsYHF=w+Tt}b z3rL`gp01i`q~47_ZBK@fZPARp3K0egYWD4i^3GhYY5ocv?Ue zksQ{}Tr#cg$;~8!x-&*r?cV>UznL za%pMC23&khP0&pSF^GTly2j5GcgW7bd&x zUOOiwpj6A)`S?(Ssxk2~?UmJ9;!fM~a}?hJ>1|oo zb05o*Zxe0%!m5O^c7I`R`XB!4pW(+Ob|!2XUhcHICBz$ec3w6f_O!BPM3Vmr$CnWy zv&}mY(b5#DpfNx3aGY^ z1y*!eFhv`;Qpm5YM55o5#wNzgn>Ag~&wt0ei@*ut{(J6zD-(iLG6jKgbP4W>UYV>>oKW0c~?5t!`su7k&krbDaLjCuxA(EKPuCM zB-N&&x1S4e0%^*v+LvSP$OWv7WKQyVMs{u(TngfewC=qHKlb9j!`El?;C<%OH_I$@@LkMe?I=!TJ znAP_m)&GIly>mya_|ZC=`scTQ)UZ4S8*14U?aV%aHagB*s(3IJo5@V1i^8C2V!Fpp z!YAiu9c3m(p&JGY2R_~$3eES<@8+L4ZVLAvmCX4uyK79-@rvtk!$-dEXbDCZH>Rjk z`!FJVt8aU$d$V}f1i@LnTSXrB`J>xS=N;?>|3v&O3!~N7T|simSV6?utun^MeEG>K zgK3ByuqW*g#R+x7G49=Baw1}g?QZLlL}>;ij)Z*J=@RM;@N0b=284-Q)=4k6NOLh- zwgdxw{8#6Kb45s>2=7x@-#uI3#`|lh*Hxl;phY-#W<<&Z!rzIW-KwU$iO$d8ANWj3 zL(7*`5SD~r+>WQI1K)_u*9nqOtw1%vz12F9>+pWU_w&@u-4e_r`*Jp&FA{Nm=QbcL zZI!Bil>5f_aHv>2x$SjG7w}_>X}(hN^{9>%(N0AlXyUy)DZQB9FtnQc$0_9(QNwvz zGI{)xa~I&Ma(kBqw`Kfo0RdwHA?D3uSXiz|`)kdEUlP@YFO|D(02YzV-|B}AJ)>-3 z0KxAw?lMwfm|amI|GdawuCWKmbKU;^V7=4}czhXuJ50;0mg>JhczM|(+1%{yFMJ#YZa~#LQrr`bne%djlV|Xlk+ic zYu=l1EoG_^C8@=C*7GZK2Sa6!atl95&dp3#qb|w#)Fxz39Q#cf~7pJfqkGT_4=c1G}fEF z=jk>f#JD~WKVnml=H>Ply3au1*E}YO>^jIgl#^{bz0&YgfLM&BKRnH=UHKN!9LQvR zS!57YpB9@shxFwPjb3JuWTDU$<7DjMHZN_?l=$-O1b;jX%1?_1=ANl=2-fe*S+F4= zo;R_7pB(#R(?(lMth?J=^ddh@594}PvK&K1#`!J5yaGM@9^lfo34av?(bhl(a3;IagtDh)hOjQXr;b9MM*LCJ9MpiE zVtNgJ>=^<7Y?Mi?!Gf4m5J-Ut{n8Qup0JqnN+)W{&chA??_s7TcNQ0)` zC-;Bvk1|xX(TxE1c@|pOvQljGG~^}l<3z#QnOsuYy^m$(TJ>mB8OT=B(mChJwE2*j zAGI`lMXCwRDWLws+rZfr#;LzZGhrU~uCRHB|mx$S245}2Yd4j8Lhb?}J1JmZ$txXDJv3_aUhZ8w|)%92< zVqZ2gkHcC#<+CKqjt?DXGsqioQCzLu5!oLG9jANO*ujX^lZiI?O~Rv%%QF<fN~T{G{2n4Ond zMmqok(WL3Cn5_|%k&kt<+e-9Zv}!xDSPE<|P7U6s+RmG1G~Y8?CogCXT8EC=5*33! z_f0hmL<~A=G=>i#mEBPc8QVKVCfJ>dDt1>md^^@|3Kgv|OLJa{316}y2+aq5vi7H~ z?q*vfBPO*W$NYUuqHibKO~qnTM+jdX#s(mWos#?@tl&nc$z0=1x3}F8SzSo{iZL^O zzqRpJ&CT|!XbG3#D;8vDg0Ur3$REj*Cl{be&1mZ(&4KiHtbX-gE<)Af`{SHmz6hU| z-D`q&ydBe#<0@n$;D_%>Lx$ygirI_hAlw- z%w=3XRVrs^B20o^O+(5PSXl`%P(xf~pYUy-BbKP(o3qt?;1y$Rm7%q*&K^FpySsk8 z)-Si$Y?&I?UQ<}rlM?#Xq@}2mv&8TFvz+>N&xSd4c#+Iy)}+|2rWrrFT9stjiTj!> zkU5T)5XU~SXt83Q9Zk=qa(?WP2w=W26t9NLL8xlFh^$QMqO8%niUqY=%gE^WR^;A2 zz2c2;QZJyaCrDM=$QV^;W!u|Q3#=}M6SO3NztRWGXikbmDFc^2XzKkJfRjY6#32;=^WY=q{a@dJX2OoQQk(X}F89Jsro8v|vWIfI*Y}dwmHE?#+sa(u>EP$e(#`V%k~^Gh z#_E<+n0KzghxRS{g?}`Fe1OCk5ed5I|$Y+vb+1eCDv%m#J zm5urL2(A;_Pj?8kcZ|_${?wyi!p{TmHPU?xB)Z1Mg>dM)Jxv**XDTUANGc%O=~PBYvhWX+uOL&@q2ro$fMpBy&5uPSxIyvO(6Y$Df_5n##$t&V35!dk!ACQ-}y>;c@gsp!uiiY zxIze*>rUMISOwwmA&8MHGA&E*5x+CC)^aeQ;0ysnmObvEkRgFM#2E|XoE8sp{swWT zmSwpbp#BVz%|e``l$V1QZXulMjK`gK%zwcgM9=^c=s=o~9Hq`~r7DD|bRmLKNXc@L zk`+)OoZ5oN-L>3*!IHqF>H}eMAPH4S_r&EO2@6O>H#h@Lz9b1_ zL&7^w7f4n#2@-zb@_}T<)FB}?mw+NG@{Qw=zg*_uz5WFe!_ZK8T42 zq$O~BV_lh|p^=V8NzH#;OaEGf{ay1&B79eRnF1e!kbK=jf{_%#}I{8-Nx7Azc9`C z01?NY3nEhckFj)uB)R|RN|H=pkQ>Ay#>Fnl!Nblg0g_;2mz3h;5|a?)l#=8S6PFZ| z6ejzBBar44EgY@ft;smJ*m;wBvFHHm6+e3;58~mgt%pVxR{doU!{xhQo6C+}j;H4v zYt?_3&I6AfHMkVm-SWSwm#$)GSR4L(WUN*wvv zQ)l>qh>lxZu)@$%;?uEoiI1wI$Xl zs<>OueJG-DZ_YR+6Muz@Z`p{pmuth2XIO<0 zXie6BQ)}BD?mAN>ez{-@gsEYn8F_>=k7_IZRimVrwV&B>AhI?g2Hg$58Mk)PGC#i| zW!T4#pM+Y=NfC=`V?74?lT%=dr^*~a!@oUv=;#oivow1J-FQgURFQA_Dno7Ectoo6 z5a{fri5q;~T9g^BgW$7BFI1OMlnxwgHP_IRxle;YZdN&JGw8Qmg;BbtZBB;y#vfH@ zt1#Lp177&xg+!$qEM}a}&l^xuq5e*2^#!$&D$?noGxBn$vg-q=l@l9&iuAaXM~)!4 z&;t^kvLI8-TUNqg6Sq}(c*39|%Vf4fkJYj^#NlMMC40`9cKg2H{o^>QOZJR2G4>s8 zGQz5&x}1iS);iM2oLx~JlR}h5Tk%luIq$|h` zW(R=*%FxskrT}s^Cx$6*U{o6dtus|lY^ev!I2q6?cg&#k;gsBnSVXth7;~wY?MF{MxU0k+*8UqiLByAa z-%crxA(M3SL8qM|#CcdJ?S9`T}SNxoW3WW;rsDMIo+;z za^5k?ZU5laf{(??i}=uCH=Gmg@1;RG&scfRzZR;zM4^T5EJWFb!fy60-%c`Wp)!u> zvvQq4Y${^7Gm=|SO_q#X8mfqQS){BpJTOYPsKCThvkiX6kxfBq)UQQ*LuF{CTdvu8 z$t@F5r|SQXEkGOkk^1h<7T$t8JCA+x;vN1noZfY1Pc4&m|vSygBZChL#x4uxJe zd#LQK$km2_YHTR~>pqn&QYd=L4lQ6@xk@@szk;xCf^o*>chwkTSeL2JS@Kr5e71j@ zqTDC^2UKl0)RsT$ADW0&}VU>ah>w$*DlAPXE`#oG&6dN`S)xwh0zsMMU zW!`i{ggGI=gH}QV3{#VdG$H;8x{nHBYZK2FP*Tl4mmYSm`-g%#SL zOy`d5vxI-jv&(jDw1V7{5H@tDgI&{6 zaoI2!n04Rdk#zS%NAo#JMgP6}7dYdHB3s{q!v zb)8q0T+DTy<6H9!);=wFHnu-I&#;vyQ@zf2wyf)4w*|PH^xluBZEYJ~Ur3f09G~JG zXB%g(zjo-VN9;53b9w01Oxx+1wcRegFAc3jUxJ;kP|MLPel^Q>gv&86F6;@dc2C015h2`K@xHkMB4Ed} X!D)e)h$N^WHa2b)YHBGJX_WsDN3dbW diff --git a/doc/pub/week9/pdf/week9.pdf b/doc/pub/week9/pdf/week9.pdf index e7f2de84a2f0a4aecde91e6950a8c89ae32a8658..59fdc2a971261873271b687a14cd119fece163ca 100644 GIT binary patch delta 104274 zcmV)GK)%1Z&K|$s9>X!u-WKtG`@a-w0Kw0 z;ty9eUVnFU_0_kM7K{<66fbUW3nm0H+7?iCG=B~b82&2kndo5Ubf2~DX zbxpN@UB>aD?l#wNZod2KTOkWaoaP#t(PF_Y5!MAZf9sp=JNQ6|a_}7CW>vqYLT;O( zX}35c%l7twWN3$K9d7$-^Ko4Vy8COTXHvl1hih)j`1Y>Chx;(%B~xl98KZ(Y=86S3 zgfbT714%@nm9Egl8DWuD6RHYH7?XjFM}B&LMzf$qC{rvnQN{$aXb`EBX^8-%aw4?| zR4;PEF=7}8?na&$Oc2e~Ot7nOZmxc~Vqkk%*Me(BBsYa-k_h0w++4kSLyHx>{H_3D zoGl(a=>}y}U=doZi@#p|Irv5*7T1~xCJIdzp_HA_QV=SEj#v>D_!6lK@CD#8(<>)` zutPEAMFKm*oIWM+$SVFw zj0`x%e6r^`hy%r=>oRHmzEze3rof)ujL^v)$2}~&eh3d9FX2v-c?yC51L@~Ta z6muhr`H@{;uOg3Ty$(FKEmA#1hSv<1-`}>~s_r6j8?JRfG@ELO%!X08eyFy8tEyZ1 zZ8}#eMgwKQEp*UmRo^!i!Zo>GNMXu9wkTeZ`W^+TyJvvGbYc&%T`$}1>QMEEyJm^> zcj56}6Ta@l<1QMe3Xhw*>UUif7efRN4tJnzoMYK`UA^{#^+(lEmM|Ju<}G{&A`g4% z>tH$o_p1gmx9;jKz8P@PfR(&|70V!X1*TjE>N>6vevafl8bHu;Sw(hT-S5^zxNr8e zfK@zw*Sy^Y!TmhJFcuAiRPXSM(FZ)Yrmq7f2;_m4P=2VY)MWHQFriSiTvy#Y^as43 z7TiBgOA3$DmDM(mLx`1Ah|R>QIHv$$mr^Ek{*sWDi=`H0DIL)`mt4$$lokRaCQStZ z?^INke`?X$QJHGpx8W{i7I0Gs1=l+;@Tflw`wS$~jh@5~?JFQ7aB;5DzlEgHzlCJw zx~^7z9Bji-6&_dZ?yXN_V5r}1!eF~kSh_G$;RZ*VxUZsf;l5LJ`BM)1=PV3Zg{C|h%>~-= z0}CG*0Us!0RWzf^ukwBt#~{7p$t>T;E#%Wa)U#W+qvV(?hONlPY!Y|DFn9ywU|1^{ zeLyYS-Fg*?VCf=Bdq#B$X$bh;<*G~W!2fNkIJ z<`-jMy-0LnR(6{{JT-k@el#oypyyxehYkuXUwdJ(_KDRT z;Ig7NE$P60L^&!QgIO@xYB!c;+n6ADy{a%a6>o@xnaBcv;yX5`aI0)0=oFvQ;>g0L zrIuo5q6TF*p%?}wSil~D*5~D(pvr~E>0xy`S^D9hAjEby;gODfnxiZRLR7R=G%BWDTxYJb>zpRRK z7gtgCvMS1dovxx{5oInbqU6&SQR;LN1;&|CJ=tu>swg%uqrT2z3um~3aCLsA0=p49 z>j@Ue=1_!@c?5jaM1f=hlEDQBiO1H>ZAK00y>+fe9&aS$bDO;g_^gQnU)JCh?!;bn z5QZV|l}LBGrx6+vr#55cC4xlBW=!qV&r zzYML|6Ty|R>_3xt9D^(I{|i@put~JS+S1N{!6tj8lT;(;4w^)gJSVe(MB1UNOjKXw zD!>I7iWmyu23I}y`(JLc;PVw4T%khC)r33yS!oz_h%h=NvUbpzLAA%H&WuyBBB z4V8!|AM5b6gOl`bGA#iBXf7|=>z6ct!p-?cI3*SW#_UkJ>hq_{%nDy&QEop4h>o6k z>nAz!X7illgk;RyO&`xa48(t@Aq}wL0OIq`ye;)sV;1~So#<9zS%ZrJDN2mVzsguc zK~O^IF|bdg0t^t5bI67LC1+{m>G>=1iwmX$uYo|1z96vh4RZ=PMDoR|1%aj6~ z7gbV&V=22cB1otx9Chh8w{4ey$k5kaQ}=TT9w9daz-Lf71>~-_t9BC$IV_iU8#MY` zzf^VSk3Xaj1ad5|`Z}B@rzZjv>YwU0IY#s>yZiK&LF<`$sINYL?Aj`vKGb~(pbm-xGa z#Umj9GlVtevg+$+c+_sgK@7fKRYMit`coDh@_!8YgC##c9!%k@4ol_W0r)t1fb$-J z|0n=KAKBE~x~m5NUj;nI5NChjL#sPDYXH~n;~kM5(xk?Qe{ z!!b}&mU8(qvl12}>C_#6$W;h)**LclzEsNf734rAJtYg&434|>tFO;ZB|}1xe8k@5 zf0>zmWqm|XzVdt1mETOOJK4|Q?f=+l!@z#8>-|dG@duBt4{zdQ`~^{$w^bKzs)=6} zJP07+S)6ogv^+)|>@uk6!nsz_?7(kc(#(-09Gmr1yD5D5Gr~|AJlUIm)dZl1trOBW zJJxeDA%P_5-vRHt=9i(&0TYu)BohQOF*Pugfp#c=)mr;+<2DlhJ%5F+El|0Q8Sx>B z>Y)c*AG^Q-Eqc4bUV&_IkZmSWqK7NV*|>lG4Ih$4S@GJkFS}h}ZHf_Rh9AEf4(ain zTgP)=pLyo{hs(2zSIl!TMv?Cm=kmtEl%g=;PT+@#P~W*+I9KkEMYdekNq#$-($IJ7 zJE0qY^P((88rMmY>yDdMK37lIm;btWMVS+!C?J6pJoOOuxiiHaQ66czmv>1811SYb zlPTfuLg>e)d!~9tRqMw}2&srN_d`uYaM@}y4UH2G8LXBCB%Rv9E^Lq{luq5gv4wKz9Tm|1EDtn%ch zC^P-ANUFL_W~(NKww6n)yzMYy{G~fp#3UTehnk|*qX2S|B^^4Xb?-gjga26mt+y11 z2v;-tz6bo?1F;AHZ0l@Os#t<|6VK~^qI@~4kT+ted(nqE>tbtG@9DT%^rG&__5V|p zMI{N4J;Q)sM4Z2i)0KEJ%ssS=2JhnXlbUnH=5E&grs3~1J`NcNhm6BB!O%<;Mtca; zPW-ca2DCb~A zPz-GR^eVYNiLvdpPvv6Y9;9Zh=X^xVzQBLe2YzGc)KK2qaYv)_&7i&g?c+q?9HuO6 zl_haLKaC5Fu-u*Y*J2FV+be2G=>h!q@+a~5IqZ1VA7w4Qy;+3D>&IlO%QDI9z5*%S z2n?wX(dt79w0g9KUue?Az$ts%_E<-+9K;xRGT z7d!1&r0p1gs$)dwV#PW|#H+fnE>~n{_j2W92LppyU(RQpXc}U|>Xh;N^ECAYS2~Af zefNLutZzR*A4(0YRW>$JKO(~)K6qNhyt+S45_tcDe4@>=Jsd?R_KE0!bW}!jKZ=Zj zwem-haX4v~(|WpkxIRs;)`?Ez@-<`5Au_(*!udp#nKS_5&5?la+SE?vkeyUwpXVDN*x`9fdj#tMZjzOENww-+d6uGDrFC_{HD!AxJaRvl7uj-^bRa$- zOqI^}&IYBev>TCpA5qqShNQ3cZ&>zx7z(|x4O&I>3Vm+h{dy-lmb4DzLx_zbwv*o@ zp^KjqmC0NkBE{RvhO_hWPQI^FM7ERg1N|E#+?R3v%(dpRz&@q%Or(b%L&_d9hqKiW zz=8SxWc_OK)eu|lZTRDK<=b@S5EF@iQb%9^RX}}@DSL>Q%lN;4!|<{Yc_BM48eoE= zFgPh`>!eP_Cx92DkmLQ~-F*tyPq5Xg#+zSX_kVZW`O8}0HWE^xad6a9QOXb@_@u5- z-4!3YchdcOjVX>0XOJM#0CAsbrEi4F1=@m*Ow@M;G_kv=z5_fw&;22l9mJha;NwBQ zxm~j1p(v8E0Til#x)26moFk9>&Sj>`H&p3G=+07IzA={t7!O)4%4`0lZpH@Bn~g z_NaWw)jFCYU*4ctL%x>*7rO>*3Ef)UigL$f2^bE?6-$_mOgMD^v3trCQb!sE_E5arc1j>9(Vp!l z);?n(oXEg4de9oeu&9G6M?}UMpjBk57V&^s2CR#m&P#uvOmPqnN`%BKfFRTrN%j+| zU*h&lq?0>)SgUPXfo=`J0C9n}altm$rqs57vH9-BV3;Bv^cF)EUILF|O(NKn-1EA}0-z?;GkuxHRmmM45<6x7CIQ-!oN;DQ=ToKg&!F$!vtUzZ9h?Bwrup2QU^Z zpZ8?}nsQey=q|3C)_|S_Mww<@t+Qoa)X6+f)7Ey6_Kk;q#*ad6Jmi%o*Axpdi!tAS z{m+EME?{a;!CaN!G>3+s!|x$mbVu9ZJTNh)15Q0D6%d4 z2;JE}bY}#rBQ%Cd~;UrGWj7M~DU{ z?e(o!VW<2bzq&Om3T19&b98cLVQmVNb1)kOGC4LmlYw?9f7M%CkK4EvexG0AM;nQ` zykEKq3T)dHE%v486uZFA7SPz5j1t?LSazoK(%;@g9!eH9lrPDoNM15BMIIiW^PMXd zI(avtlfPck`1hyls~_GfHF3mgt|!-b6D9;P+D^1GgbOvfUQT|SZf3t<|NV!zLiQ#q z;w;bz@8`Syf5WOIv$>RV`seb73b`r&fQzK=Q)iZFIhiv@gmbv~-#9PfV$O+G#{W^w z;Jk3&W)YkP(?E&nS?D_JGZ6gCYm@Ste;^CR|0q=(#3PL3>`_Nne^uT zIGZzVvIH3?+OmWqKZ_#HvFRLcTIO#kWw>kmr12?xe`+UXyD;Td=^?K?;dPUdxB@;z zpIMN0ZeKyH>9%c=K)u48QXEwbAYfiR+z@Lc7=F2*2{wJ2&4p3Z+f@w@Y`XAcAF9o} zKW1>q2xT000^$L@l1-F#gv!bF8VFwBmq3HNX|vmwn;DzF+uh?peTRQ{?ToM;H?ST2 z1Kat@fB0ry?(Y3NJ6-w#JFOag@pun3`&2AQ4OZ{hGkClWOUiA3KRc~&n{xYUrqs09 zRrRKMol{#pkklY*YLXLeP6$C7N(|-68sHZeO(@!ID=Kg2${0ee!fC5w`!1NV64SRc ztERQ*T~d5;gilTdp+=?Da*Xyk;1u{%gEg;le?OEaJS^%pc$WWEyBSFE3CBPaY! zOHfkz0CINIaVf zd+-h)_a4YL02yTTP*mOwjYJm$BW1wux(vQ%*WlpY*0a7w@fnu0QtN{@Pj~$FxV*W8 zVK?Bi*esDDuQlAgz=2Kqh`$lLi(S1v!jPnj7zj@}hLT9?h!iFln#b6d5L~oMe+h#& z6=?HMH43*vPwmwoHN%FQu^i+TK~`-B;i4(l!MrG-4d@k~+Ea`qejq*rGNKQlhqVzG zy(sid@i68U-{8u28fbnU=QJIGBLbF|7&kb{7YXq*-Mj;&r1;9MWu0)rA(VQS*#z{HqglC%B zvrA3ovuv*~0L=FK#6DzlSVMHGlpD%fE~}@I_3J~jd;S3jR5)WjDr1bRe;nhfpZX|>Vf))b>-EoF)3O7|5pwp22xEBi0t`^tG`@deZ1=849H0d7o0GK za&NJ|`t5f*S;9-7M+tC9k74Q>0OqV;wwnC&>K|B+CfjwSAXzlRB^Iw)VqMGtR3YPV zon>&EtzeuA2@F;4q$cI(e`!)DWCR7fK@^QdX;+ z<5UF9DHZX+-i)*dJ#m@K&b=e-`%pdDxb+-l&3ydVR+7Kqj7=VJ?0_2lL*lXcl z`n;3Ay-@m+3ZjQnfAa~JaEBrd&U7f!;KzLtYu%VSj+UsLaFB6AdwIH8R1`$5z~$N4 z3&EXyWzrl20Bl1cQY1^yOGs(H$CrlJA2WOVCZD}5X*=4XlwZG}=u>?S{>4pr_%{sy zHw@2tNS5Zk7dZp+UDwo^FSB9I`-{uN+Gts;DA8u1-bIZCe>K!5v1Ie*p6@W>bFW4- zp+c0iru*H76i7lRb-;UiE@u|9NM`D-9 z_sXz?wds|JFym;6C@2(iZot8brhs5B^nsE!Rotz>Kh4Z{Mf|CY?Nj8m!ipJQmfNaq zUbi9+P1Jale_ZkCfV5SWx0w;n4B8vOBJb$HnIpi30I+8kE7WDI}1OGM>2NwDWQQ^K{0p*3@6vDV0wD)|=4*Mp$1&tp9JnTculeaBT z7%ld@*yO9xLcI-ct75Y#U*n88Rr_}`d8d7E=V28ofB9ZbT;kgYBYkv1Ex7ouMGvs_ zcK|MQYu(hh>w?-@bPcp~4JNthe0r)3fP$M?sAKSV^wMY?r$qUmll5kM_j6FUBM z`g6HEvYVhlh7S&ULz&8$lp+jp((fi!**l^(-A}L5Zm1Bs+cbcLf&GbQYCP+oLw)J@ z*1?KKfA^NeLacz$lmyANj1?Tf=yzmb{8}F?V|^B;(=s+aYD=mW-ncWOLggekrqJMm zfkdK~Wx1($Wxwa>^Xl5;5fG<6*uA5Qcn4pEI7Aa@Llo5mh;s*#p4xs37|0F+U?1fC zu=${G39pU@C2nYdP_pIcJMaE!xNqkPF>;#Ke{mZCw*($BS-Q#M-`R_p@&V4{BcU8G zdXTYBlF#oKTi?Q?*e{m0)Y!E;OWOLPgGul=nhy>}vOylwFs#H6DUgqrt5vuQd-Run zO}|Hq&(&tRUxdgTeunfYqd+deA9Q`WOLR79j8I4xdz0RrjGLP``Q{xwwo`I`3>#zK zf7$7ANuvjy_hx0U;=rpaLZy1a8@Wsb$77;bK)5f7>1&F$b)oZyllqN_eh1Xx?YgVuR_>6mfPG zaqly`fP2wLJ4uRYsh_8E36Ug~d!MZ;wHZzrWYN@iCwXl3<7FG-5k4=ItC;k=ASg|t za&`Xe5#riUgU-%EK4j90q+vSVX;*`fUg*e2FGm&WZ#$D4iSWTv)`ytDe4M+ee?R5; zQsL&U*Kr%+D=8RG)Zi~KsT21uClW-R`7&yFMaAv&%c60`e3<~m*Al?{?Ae&JIcokg z5b{1y$DHGq2j9@UqG>Noa(6V4aahO(7^FjxJATm}O-l-5y|Xbe<_b!GN8^QHg*=IC z`*m@x2loIhud`>@4l4UK-{`eNf0b!v`Kvv5Mk`C7cSEB_ZfFKyb%N0CcOj}172@YR z0QoY<=Vu;YUUvNV8Kgi`@^UwM=i|hD(>S@%PjU*w$|nr|Xd>??bO*W*1COFmxVQW69Fpat;H&lH6Jc3-@R32+CV z-Cd9=T%6*ca@%JsZqV@VDeg$ayDRP4&`>xBmu*jp+*!h?c@dGfr1~Nsvla7gi2O3I zqfZD^NI05(5n*utLKO60GBA=Q|Jh)mmJ z(#Z_a%Krdz%n(2dWo~41baG{3Z3<;>WN%_>3OJK-wG#s~GB=koF##!m-CJ#ss>Pid@tq8CUsCjf77dU#3$#Fz0zR*H*LPlfW7$bBFaN#Ih?FeJw5?q`N!u@L ztwasUnP+Am4yhu$&x-8xt0Mk>c60T~Td6Y7yb&h5xyv{!1RfO`&$x2Ta*-KrnUFfW zdCb1fZyw6)i8LbrZaZCnUJILV=9_Eb@{iZw-u&g0x5{*7ML~i~X1s?W&#YEK(7(b9 zT6bQU28P1w0#YV_oGGtDs}IK~trTus zdtlOwYDKSS1%t|?C8cE6xXdWcgbVF=%E5b52gO9Ku


    $p2by{!oRdo9%i@S0-O= z*WuQ5vniKkdeT*Rwpqp2HAryd`)h6T>3Tk0&dTs;br+jH#2eN0@o7=kBj&^5;#Z0| zzsC}I+c$_XOdE@T*J{PZ{;nrH|EACdwZHn)&DD2Tt!QhRB`7M+G*mHry!!gvBD=*y z(wOuv`=M_7h&~D!xyTmTS66=vp3<{!E2q_Qq+lECU7%*pE zYf*zq12KvNO_n+5Ld(wW9uojs% zhH-1ovP~lj9s)Nz?0dmB3E%5YM==lQ>olsZ2ury9O~UV?Gv(o)%(l&?jADJKbHfS^ z%NriSBN79DgdHTs5W|$Toe>UH=MhAVgz5^Wj5?+|UI0GgnC==`nls+mewi2=RP0Ww z4)Ece{)!0MSHaa4g+3f%IN>`;c}O6| zDJBpw(6;DY#Dd%e8w^$V#b~5>IROhlC)_ zcF%Wz$!mba0u-m9;lrXPI>^r{kG7dsumC3$(LDua3yEl@DXhXIZhA82+RT(F&ccDQk`K^iB51SR1;y4 z(K3`R&?Aki4;$6Rh|xBxS6AqlR+9Cqk;d@QP+!@9 zbyv+)(c)^e#=2@6S4c#k*J~_46CEPJwd4LYwU%_2X%G4@LClSR0cr;c+CWIB(VK>r z86!^6GK1)WkfgBw_0y!v51 zeF`CN`;=z*;1iP=HOHr?_lcZ;4}CN$d&N_gQjd&}Wt@e=gg>uR8b#sP2s*>*z77Nv7#e)QYTc&+M5v_5Xv-)%!l|7lcYJ3_J|Ap)f*jJAQY&wnJB-wu9+IT9Gs|${$Hz+Au8h;$CB4D-Id^`kJe;llc)MT*p z!0JYURVBeXq$w_q#Q~^)iTie?iKI!qlsfEc!TC9Uyg0lNQ_NDC zic{slCH#Y5Fqo@RG~T6Y{2{yQG04@wEpjh3!~SwkjGN&;4epVDTtoq^_qeg`l?hi# zu0sea&eq+tQvJ(xcPNwVQEAWg`s{fTm2GMwr~KjgfQ1GENY9Cqn;aHNPjy4Zc#1tJ zO@RQw@FT(r=nuE6Lpl$C5&^cP9N5V@tBqEY^gx(9w`PI%Mt9--=Hx2(I9~LNmt;yp3dEeR zqgK)w9qH7#2RktXmp`x*V_dHgXxc(x_e5Ykm?gZ0PsngE>oYnyXI;K7pQ!$om+@R3 zZfz@COI;;;8)z|ms=E%W+J4sv2elUclnm8D)lpw)w z&^BF`!>YcHDYDQ^!bi`+cofz4Iv(Sx$k?5aLihs}8O{euNNU@#siv_3FXdvKqAZ|B z`*kA}MWuXXnoB4uib^S}=X(2!Do!X05`}i-6s2ez()v+~QiA#$g(wAIfvql}rzk0@ zCUfY7sG#zHT0#NG50+|LHQN4bp~efxOxJjahc<=Nz+F6K`fHYa#RaOH()$thP5F%a z=0qu#--IwHRsJxocALeau((=Q2!3Te5i1Yvmw*aM5srIWWybEBaLO4lpwS=@XQ8eY zrHPw~4KiDwt_D#QrDFs0hWJu-(=EiH#ct z`1n(M6z*9L_#q*M4w|~(&5ef#R(5WjqDTOc>rPo0wzKdYZfFR`5l1HyDsNUQU#!+; z*qjKgr#U8%i+Ab0j>mHI5FXs7<91<(!MCu?sQ5|3+r`K4-@zGO{=TFy9!M%=(Y925 z@nubax;Evy3OgL=rhLTMcwTMRk;LI<9iQS=9kPnN4Pg7m*zp8pAZz*~x;v%#XFF z4H<7nAGVa!3IKZDd;;PI2;m;C?R@+OgrGow!5T^FeL34~38BLJXme3B_U2ZNq12(P zHr8rHGbQ_7y!;GSXYf?5H^o45y+cPsB#isK4F5b%Z{vU|^U|H_kz!o$R=$ zlL3&i{hv4C0e#I=g|_Y557)EsjZY1FHB_tR%-ZJbrKhJb93FxvM9fG|44zWt8*+vz zQKdSf?Y4`<<8AzOX>knD9?N2D`#qn3aZuQ;MH|Gi{L+0g5p#q;}O5p^=3X>(7Q^|I0(PgykII@-)GI` z-#@>~zD66Ke=%Lpw^eBW7sP~Ls-B<<@TX8wuEWDGzM6#x)nyweQ1I-_hx)_PYCfxyA2@P>mA#JkxL>cfPYur7FLWDo?rum4W76qTg~Sp4 z$M%}Qb^imQEsRo=7e*+PyI?GTTitV;I1qo&U%})a>a;;cd>DK)_mFFneoQ;P$@wl1 zxlTqka-s_c3UWUB*Pn!h1DK=+WXHMkf`u%tc7MBC?XG1*yU+~n?T#Toe;@C>KDG@F zLNIb1QyWh;Xj#A=dfL!&foVC~*w@YmN4IDe2Y64nTxT$e*e4wL@^u=2B{R_qP)bGX zW{(ZP_-gOV_`{9?Oj|R6?TJUimM-_4yk%TK3u+!14TpBd#Rzz|rRmTCwqW6xJ>4;# zfvMvFPiPW_;{ESWZ$x7fafWz{F(k+OWF}hQ<|w3d`Qk@ZG;RpJ6fH7~qlBVxB1xoN zToPUg{&Ub)kb@1>Gbu`cO#hW~T3Pb~&C`@1$$gS{FL(~|jcCMCN+^lKRJ@|l7mZJt z2_HuRNp;cjj-k%Tr>%K)OS~TVK9)=25GNORqCF!anUU{^vs78hFc?XPKUgbUsnjF2 zYZkOy25Fi^vpSOO0m~%w9?txkU<)x7Hf2Z76-qD7=8PHzMm;uv#dO$L8Kd|%-k0|A zEuYvzdRtrLC8Zo3Cjo6r1ggIEZLKzJmCUw>A$!9MIMnDA*Uwg`d+X3a$uE!Hh#- z`D@%I&~boeoB1|>Ik?5Cb=ZC%>(kIxU^pGaleT)O;`ab4=Um-n}dxUR)UK>5ffmU`|d z?w;Uu9?*2RM4)n_9v+Mj&fgzyoT=AGStR|5gu5zY=S-cJG_PreBGW|1#l2~jdI|wb z^!WC)OqLUW!hCn;hPV=H15nn&*&trT=`9K1TawcBAHy-KCdT^UuY5T7gaDf5BeNsl zvn!t@53Sn4Q+dEMM?bqJ<#t@J0?`#_Dy*)jR&_p}Hg@n!r=g%LmBc;pd0b6y*%w?6 z_T@S87qBTv!4pj9N%&(UGPlKu{*6djQ0e~S+>*e52y=2Ea6MSoRWzskQ0saItQ-hM zF%BP4qR=pZk=(3uuncyy!-U`zFcP~pq~ZR`+&zB429!d{u=8{_E8XDqW!O!qI>^baT>7OCd$iK zXH7hTQRQ~O)9f!zX1{z{(ZWiz%odX}U|u~nTJXuys#=`+1QrK-i`y7l+hcKV?&H*$ z9RI<9`_;_1J{yOpS7$%%&MCwP&t~^*TH)}2`6?W4g~AJQXyx?raTuvNe;PTJ!RxvJ ztv#?SPO4h-fQN#Ol(A$>(Cn0YcT8FFZQK+%y$x_ohBnid;K~Boss!cR3Xn;yHk*^xh57w04iuu`j*`XN!4ezWo&aXR9WC>J z`lI}>_)~(S1n_yqM12yt1t+qrG#_kkfPGH*Q{{vYhJ<~3*r$hmdf2CjYSP076Iqbg zMwb6!D!CLXhmjhe+)$s(Ocn^NA=zDptH%>!Rf+0GY)Dt19%mF9`;g8__9IbM(`37b z)T&Oco&V@uq-f2tQm@yr09RYmFgsg+O?L}!D2Luzj#rzCn3iSjd}cKD=dFr=3N3VP zHM2vUkZb-Q4RJqZ`uM#F6sCxN-^>3BP%|Q@ApZKny7N+4j1n}%6ep?Te+;QFhe=Gx zH9kF-5;v}jNt92ghl|_XD|xo8U|Ac2u(jQ;K`4F=3fK+ zU^?9w6#If=Ur_7~urDa~1;xIg*cTLk%7UWX7ZkUkpg7#xf?|ckEa5oYn3wKmXHR;i zM(Kr^%5H701-LC*XkJNrsd|24mn|-HS++J}_?pA4bZxL~-tjcuGJ!4cpLnGSN8_FU z06M86SPErsWOHiJR92_5L39@NZgbYHm-CtjjAS4o`Tye2vr#gt6_#PkLhqnSy zyVjs~x(DSMK&U}rB?o>*uQlb#`RU$ogS~?j)HgI2c(!e7gRzDj1DK{}TMjV#mNpn^ z7yA^m8I9gw4*pz!a8Jl!j;!wh+w%BdDZ%&g5vJmOV3_=KrYjDq6on_}f)~p1(k+|+ z<7zr`fX9VPS-yL(TZXm&4Ko%**WzY26=!yX#l`*1kJxM?ZpMTT@#0c>lRXd}hj_>+ zi7IGEg!RM~#X~;~CAH7QRUpr?IQ!x>=L`ugC~6%TR1BbhrYAR!82Jc~t2pulDsq%h z&dcy17r4d(p08r4C#iz+toeUGpap5q52rVN?+P=Y-=o9ir9`R}kiKAYKVFgpGx@$e zq!JaLoF}YP4gIQoFMSwIYp~e|s>uIfO5#NBiIRb2 zn6~`hh|I2kLY(lEdPxL`dR2l!#nXP4kojZ%PHF| zB}^)MHqTA5qIQoq77IA!MMW2r%t0{0-x!{5P#$P0w2ffSDKkTPO5z~FE3SdVSw{1B zINEiDl&K+g^94Wbbkp55{o2THIOPuLZqAl}GxfZFcJsEP`<>4fHskFpqmK-OIAs3u zYmugI@vwTzJi?3iy=%qvYj&DC6AnEqoUP<`TgTGL&8)LHcs(;+roCLK?alZOqSzl1 z&kFpom~(Z!bU0QPBdZFFzN{-G#+(=e)X&u?g(nps^?-@Y1)FArD2QbOWGu=^>POdq zl|_yc1o9P6eZ}cUA6R|6fYCbYTh?W_t@LMYUVnw4Db{#}7MxYN{YB8bz3e6#Aavq~ zbDsu2!*!P&N1?Fb*D$J*UCb0kvQnt)`+}aK8#66N#}Qej!$OMkO>%J4lHF6 z|39+4uCm}f&uW2$$IFY=)Sc!NvUh)f5t3~OA#*3b71(26PAb4v* z;01!HvkRh2EtH!;-38GWk-5AA|G$aIjWzTG_01Gu1YKa*-gB|EHXbGM8+4R^%S?{M zghYk|pyw(%NYqhdXH^cu=X$*yq^nNTsvKk~P&o%t#5$#zo3$L(7VFGSlz=~f4bu+J zkDaX=wOLJX>tRa%OfkKbVQa6ObCMhe+bF=0BDQ6(xOPN)J;>=Vft%aSPvg;(<{lIW6%TJQ+b--B}`f!7JNt!hw9%QjP z9O9{3u<{}ocO3D9@$&&+4Xh(F$(Gen-3k6nby=dAP4jxv1P*kXw_I*1-&#kzoXzA0 z+Bdmao^99(LjxjUK$9ms9&lY(@bwB5$Ai6p0oQLEypwcMC;>Bl`J_}vcqCqA>E}9abhJt=jdF% zb5V5dR@1eU9i6}bIo#PlHY{E9LEyT!Hk@dt?E%m6HP`WgX*t?(tX=ialPttx5`T)u zS7HC#_3riX?;Rc3y7qKQe{VHu<>F*VyBavA)BA%xz`63pg6J7qu85NF2e~Ya@8>y; z@qBumq$8x1cbTxje_;5)w*(=UlO!4|;kul}Xvx@E9&m}4b3Pv9bBH1gkCZU_n2ca_ z6r=R^@vH9WCG+)RG`oZS8-yYG#;DQxAxh7)P>C3XRJedRT%gNie;8*F|7al%QA!JW z_SOl+X%%d^?oqBQRZI#yRyE7jL4-I`y`vY>>b;Z5h<<0<^tV2{bv-$J}rKbm@ z$5F(Dkes5My_z!Zf9UWn*)fnHJ8i|%77tdvmLf@O1N8>ALC`glP=UAx zC!^EfJM^M+GQtXvCbR19+yhgD=V|=?lF4Q*h$zz+f;o)41g zY>vbDmfhkz9?ymE)ue>&qHJw}r{isWL56pNLM zs!}PWnSrR(ItP5vsj=U;R<>wuuH|!>!YRTi&DebyOBy^*?(b1LIFDdFx@SvSIHdr) zGC)P^-&w)KZYH$sX@I(i-Kv2G4-z|zdKLv4hEwzfquDFUqdU^GimW{2RZx@cSE`(p z)Ms%MMfQLke*r&Gnq)K#=!<0<;=4GcsiuEOOciHDSQ;VyT^0Ps$^0fla-rynx;bMN z_S@D?7dLeUH~riNH>CFMmt7>EmAcSh`l0&C`v|j3x!(otYqO8S23>(PJh<}KS&7R$ z=vPf_sLM9Iu5oSJ(r-gJqgFT-c=#bE`K72_i|nnqe|9AR`z*Y&R-2clMXy3st^>m= zmAAr54buSFhP@)wZWDr7HZaUp50b+87S&bD5}cGv=jMtVm=S8%$QsPCz)|JF%= z&g<(>fZz9Qj+kq~TURBj8<@sPI)%{!rXj=>uJuNU0V zqNP}+e;_uR4K_3azJv>OJda11nem0gP;YbfRGDVIKn(H9Vzq@+VMO>bNoKrRU7(bG zIz1@fp9WUjf>$m>Ymnd*{w<+wp}}tR2{sH8^WZS8WNO6Mx06K6hl9#ae>_`c=H8yXnKzS>TWjd6VcEHX4c!N( z8$6dw8{4BIev8>;%~>2jgf=~(2fkE;#EM1)>uM0<>;HZryNQ{_x*Ft3plS^wq5esV zNLUxU+FFnPIyK7w7=D$Qb5Gg%LzmOZt_V}Xv${X6bVl70ND?!+rr?aZ#kP!!FV1K` ze~;w`o3Pwqt>tu8l}pv-@Ro`lw!h7#pg+i-8SnOAYZ8q~zBy8v7)l-DiWR^r>Huwu>^xq2n937*q zXg?EvYL0jty1YZACFMhiWznpTMrbBCSFEJTMT#?iIC(zd%ZYWIO!)zg(bU0zuWw7F zuxZ~<+Q8Gj#@)E3rkb&TqH4z7{6w}M=T~aBY@(FFHQoGAy+!~$I@2@*;QKz4$`DJ} zqv6g!2XirklPFmz0XdV9Fe-n|T3d6}HWYr(ukg_6bOXD_^4-kz0SKXG3ZyihJ^;fg zyV{MYZB@LJ`@OMfmre1G`<^u>RHWxAHJeXxzu zX?gSfz<9gxY;X3noS;@#jcvlO$Yn#=SfJ1h9b+LFlT2K$vv@Vhs+a4OT*_sf!1O`d z_Bux-F7dNT7Wyq);`mug*yj43<$kLxbIfY0_hH@2eJZjn;HLyntvCJq;jN01%eB-nq~)!j90WV*;}1nU}t z+hDNE&K~xWdHHmCy&UEj4+cvIlaOikERX8{YJHpA+YRMIkkVR|qEJgw3-~lc%h$sB zheHvGe-Zf?^1`x;aj8BNf>Zf+f#Zr4w?>#AMdR|s!`?jcs!GlXQ~EFV%@;~?M#_q% zw@T#LX5=+7s7Ze`Lj4{xgAjok0y6@>Y<*L7CcwIEY$p@jwkEc1+s?%NgNbe1wr$(C zZ6|m3KIh@yweDlTboX0T*H=|t_zobFIcuk6qJe$W%U*%#Rly=OUklz7@zt5X?`(!G zc%@YvP&dIc4)$Q)8B>$`rPM=F%V$gFeu&RvkY9zP?UqZD9@_{c*^SI?Y*Q(nYg9pK zLbkJvmB2u|HJSqNMy(_CalO3%=+3uJgZ235of#l~F$xIR%q+_!ERahfgyfKxkX$rR zPGQZ<`FjG7RJ0xZ!ghD7lN+EI^vo0u`-f&n_OZEfIV0Is>aIW+P4o7~Zqpg`t;^ap zPPbLoag*sI_VJIYY=G8+H-khl)&R(LH=fWia{+zcTkV^x+xN#A=*vvNGS(6FM74S> z=LG=1lV?WdnM#$X#FTeTyKncWm!K5uJSCnorvo*4F}*crg60}P^{*KTUP?u{#L<@z(3wX6%Tue|b-ZaUTy*Dijr<&J2` z;F>=51eNlddV}yMP&Lbzq}pJxTM^I(dAfo8z>0L`;&s41CzVKlhD6TwjEOrK5m^Va z7Jc@+@0w8VFl#^!`wDgFfe87pv52C_OG9H%ly_g&_0%};^Yp}3@+-g66)m#J`kS-HS9 zc*51$H=38Hnv5el6}<@V7vFhj8%9}76^!>>RnD@rGShrjfQiWHxBG0?(%iwq2!Su} zI>rIO2UQXEX3RvFCy~2PI(<8%zth&IdepPl(Y|J<&6EESGuBMLIcOR&aYrs!998!M zvAcNUndY-4K8o~~sQ^Hw9`vW!;~-Ir>5us`J!n#u1y}7Rrl#R?Uq-?s1dbFa;`(5>cp>tC=BCwTy6Fj4;~Qm_Le+Tv z9aGsnqBG8o)$s^^Da85Xe8}O56UQa`g|Pk=jGnY$wApBvoo* zYo$Bj8PzH^KXxGkjaZrur2mlpmgK(dxlt&*lu*N!R!KJm&WDxFez@C|9B)2Ow(a7p zd-@7_NJyz+Y@&^W>3Lx)ZElj=v1+MKXQR; z&HM)t%JQlPPuWJK{p@f!v--FX2P97%l2kUr!zO4T_DLruXo)HG{CvW>c$QO#;gE^giMGza}uF)pa{5;QQ z+N#|LQ$WUSx}1%4IR9U&miMQd>CSb2^6Z5D}uqZ6A@i*&~Nz5_Lod_Gfy?Z z>RG75p+bvM)lXLH!czAOU4_;5eEEe~mQLBnm0*xc7>1)Mir(Y?)`r2dSldhv2Xrlb zkfN-IID9|6&c@!g0dt@YD%-RxP4`AvoN?waYIp2CU1xJ`QcM=`#OM_%U#|zw2(L8J zGPwh!T|6k@y}H zr08OQ)JPZE%^7(Boa&Useb3do@ohc*$7C(-Y2ib3+9&ewD-YL}0L&YVLEB(J&nexB z{Cb-S|IqNzf@62WnmA_S^IxG;k0t!-%K4%0)#fZtQV8{J z(mY0TW1_gm*DroyB*(uO*;7yf{;H|=sOPE5{i7~;Jt1qzvsqWDkQvHGqa>3rlXMBK z1_#wUfu<;kZJDSgkTQJzaH!)^REnjeyfY)m?FE=`x8jpWB2=ADP|3%MnEUMR5!U~X zScO#R{!NDXyLuyJM{$%m(I5s}*(jE@O ztbIm=ex5SYB_<;%v;thOS%RHLcb_icqMl8D`J(=QOFQ4&3&ZU=%CmH`r39u*XzlY=ak-EYC3$ z#OL95XrJ#RdR*SX>nb7MDGc$+5E64~ZjWJqFXgsA!D_0iWat2Jt7MMi;7QQ7oV<}h zlEfH?(${i5eCaj!PYBPFrXIDliQo*Y?@4VwAp2hu_5g9>ZU1KL?Gm6=TF$HCt@mDr zEx0t+l4*q5W@QVWo-psm&0_A97pHHeVtR|GJ?_a|s;0 z0RAg{obQVz;2Qv_eWw2RgzJ?2(Rj;+^2EHc`|@#SOm?v3-QGjFUWu3U5u>I!#W%hk ztVgX%G90X#ZBrMC&sBHj&5$_h=Dgwo)c{pQI-X;3_xes~wM!46&yCIo*^vd_9~|PB z8p2fTL#RH^s+b3f1RZNvH4t~R0h-i<; zNb(fA$HKZ)%nZ2ZF$S~MX@|xTUV4aT3A>nm3)Jf32y428w&>D!1z9u)+(#&JrIOEK zD>;;J$2S3ZaAy$y5Kw8wcUf%ir^^6_&rKL}iCT#z{4Ut85D@90>GLE{~vy#-0c3$mHdtjYyxnKby3Gc=SO1 z8RA?tYEQoOcZ(0si9p2a1|aztu4sbvkjC%Hs)_@US{h{A*>Z)9S-#rU-f8#-pade^ z3GoNgU7>a){A$tks|KYeM2Om;AA&sJbpyjk?C}Nlt8ycEKHs5`PU+)%61tmMkYam4 z`WD42gTl*^W1I zf9E-zoc}w|(bSJQWQ7Cl$j^;LuTt42e0vFkgmL@-MW2#Yk@NyV|86?tk#U80c<^Eoq9`54{4O~%Y`tfz~dU~V5f|Nv2)l7zH=^#WVroLw$ zZ6KBsMc-uao7tOUOq>2JCSM@n&{j0b<6?hgTK!G|XpAo7zGUdz_k4XO#R~pX^++#h z}q1CzR#=iXJeFm}g}ue#qq!zZsd^Y^to#&%vLDGT%e> z&@S=-sHj0_S&;QMnlWQH^XC&{r5^}tZzC&#DV(}-Ss1nLn0S;{o}I**opfFa{|YQo4HUI&XgQ@#S-bh->%OGIFgsfUFIqGTpXpS;6-5(n6V6gCpi ztc<$<5_gc416=a8=6=4u!=&fx1BgO8D)Tu#QGnXtojOYGwCn5n9Z()G|ID zT=Z04G+v^j&kY@-qXMf~Mkt)0zl+RQZXTsYA8*1w$!;e~CFThyCMYF2W%O*vX)ozE zM+)x)$I3~NEAh2d4~pb)$w~A48PgG1J030`xtHLp$e$csa#R`isZp4JtM}~GwexQR z+D7Lkjxo3)j{ubnDT-R*526rHUt#sk5rve~D{ZAS0~>)@XQTgWuyV=2?`xrUnm{;R zI6O>!#mZ?{RbzNzfrkXS6Mo0NbOLq?E)PT%}gI zT-rjSyPG7E+>&@V%Ge}rU9h+(?yWwLKI}3jHZo%nK&{?(M^>ftbSlxu8J1Y-_%e!L zDf(ny>J$J2H|ID)zVe}3Cce|YLfrI-2}`h&`R0bru1MT>3zTPxqMthenoq$5eOGfR4XRJWwVNa!o-scB*ev?2J=lbXU1pq%=ya>J zI9RlV&}l)W*f1(3+EFhPM>ixJ?VS*ZW;y=T<&U#2DBh?LL)yoO3&_soY?(>pG zuvQS-=s+p9X0HE&5Q7x8_S30Xd(5G}vL6%|UWGgtPW{;64%-JP_slH*EDxK~-mc&Fc zSWEDQJeq1HDUz82fc&6w==5I|9%MXaolY|b%dN(h)|o!bT?bL9oPyR6DO7|2{`B(R!!r_WpO7Rw2Bk(b_beZgRC*QWT^tkbF;TV`vr-m2XYt>gzMgA<`T_B zNnyrRQTORz=uB*a9o2yi#+PU`etOr>6GBf7e`3TQDWaDEZ$H^zDelk8QtM zM0Jn4>b~l0;y-+s zuWNSWtcbw~f%wR;hjf`tYUc7Lb=~whA#ZHP>9E5Q{zMRF;^Ar9?~Ky)sxBvYEaO|b8%7ys_)&k*QhP! zc<47x+0din9m|FV&S%=810u>bC$}>{A5Y6v1c&Q6!|~{*>t*@`9Rvu`W|N3RO~;kM zCunj%OwfxAHQTqGsFp%gbDFK)3{J?oD!IUpUtgAO*<2$cGNvG3d~(21oSc^-T+z*8;DN?!2AGo+o3u7x^&iu) zXV4V&@0e2=*f^Z^BinPhv>E631?B5jRivNls(hg^C`sTKIwaKHYmM#Bn=oPkg)^&2 z?IFc*f_~|KuZq?}dfZ(?#S-ucT=oG8VDufu?6CQFy0iCxC88b&SmRGql=Qa28r?S# zl#uv5H5z}{;H&F=02=qQ)i(0!;Axhv?d^C{y>xeIp6w``yQ++)xSh07u7E?RF=Jf6 zSxv)2LGVgspzdWTq;JSn#i~(9U>7hmUWVy_zfg3xMrS|@t)_vN*Ozv8fsebwepemL6pmLbNA{xC>&kJakI3>!&(NBxq=yp?3wr(svWr{&^wRu=M| zdcP%LP!(4jA%z3f!bLObyR#r9Ft8MW^+I;r7+g}#WD4aXzN=>D>R+DVxRyM74IxV- z=?o2^TqUdH0C~m+1dDlpHxH%+^_8tBvd^ftZs4~7!iw5~Rcx;@9OzC2 z^SkR?S^UmOt=GeZa&f4C^pZIV&1MnnX{W@DsE{r2StLXBX)F4vW~?b8I$Q-eGT1?2 z%%NfxW*ha7dFc*$ZNV09kkkxYIbbJY_h&P>i+=f?0ABks{3eFimtD8JncXd%A^rda z1dSC{NK=*OM3UdIc(R`0E-$Y3CeQw~$fYHZnYe|;r(fqHt}H^%m&Pd2PvVi8wM-x%$r;ys{6HP_~Nw^Q=WOC|C2Q z(bBVi059_`;I}QP1w6^M>6k$Tu7-D_X=-a(aok@AkvGh2!eyDguJNh}XrAvS(%9zl zKHSi7v59x?gC>K-d47_9B8ike(O^3pEIxX1FQ1{AH*nQelyo8-lzp@v*_c+61mK+Q zc-o~ivdWhzCb79^8-oDJ@-GQi#2vOIi<-!rd;Cp+g4{z@fTdLZyP`M8ht=K z_Hz6q#njoRrm_&b%1G4{fsz~&;cJfA4tZ!S5oMDXA8vOY96{@7L}>>L&4bB8oc} zJw7R?4zGnARzJOw>wl_!VvC7x|B`vQ9IsbPUXGg}Cvz4f6!c|nnXMECvR&3R`{-v3 zzXm--yUw+J&8xVsd@FxkmIOcx0^DL$?GC)K)9kSuGw>;@P0hboRg&A4G(%HW&!YH2)u?-%|d0;OkAEW68qANm~ESb17deDyA^Z;0i?(=sX|rZ3keV3~LZ4`U%W zlFYhU$BDU#wdttXVXS)&>3AfsD?N)KldL#>pb@Bk)OakteNjF)R%j@!7MynDW#$bn z&umV*3gu+C-h!Mjr(w@(ps(E&mWA5mLwP_m{j(KqDNYrOAn%_OhPq>aD<@hCJ&#Bs z{sYvdbIG5Q*Z8mSPui6do*9Ng#?;Q-*@BRXgXMqXxkH*8(Kv0$J}c_COdf^}^qt_t zAlnffu%v!_cOa6H9<$NlH>9AJgiR~kJu_v^Vs@5V#?glpfwNQB|I)B!14eO57&?E1 zs=18*py84^s^EqkmU5|exaf@SHHC7SQT+amUZP@NDTm4(O&@$(AJGfjk?E+Y{>K4D zm&`F<2M~polOF>l4Xz#~H)~JnQhb^^XtSTr64_61fovNr&L|C2fZPgIfjomcDK`Ae zl_Ybz7U!3G6pJW-n+k?4ngicDBg`!(3S9eNrae&eZkDnLWOYB3>hP4)cw_RE`h!Z- z+T0s zJIgv#9L*~q4uW8rAt7ugnCma~YAhYDU$v@zVZBnKK#32PVmf<_gxJuT7&=V3T76`p zRKrwIB?cz_@Isx9W)Z?5RZtZ!sKWJff~?Zn$d5>(mr06|wWz?TvQ%PP#9kn-%Jr|n z$^e6;S)WQ9`Eu!HVpbG?uYSm|z-c5Mhe$Fj{P09+?wZ?W2h^-t=A_@6G<1o&VU+}<7wCx-i~XAEw`P$r%JE4ldFE0nG<{T zhU<`}dVSiDD4O657Y6iD=4C9fCiRa&ryw)-QvoSM6=1jgRx zWd7%u8o6De<(X_4rjqvC1k>eFiPaDZdx#HUAt0S{pD{n-8U zmY?$iD?SjY`2uI;NM36$Ouk7iZOgv7m2uwk!v4$I%U=1n-A!E3sa%p#n!~?U=0deU zZmSn4Pl_(+S``93T#;9|o~|w}!1D#>`{#Aig#qKntA)fw>6|!3>mNzK?4PpQ39&oM z9z!j)>>u8h&SkeD%!%97uB{`LM?KH%w=!iI-J;Wwyfn6Y5Tx`tn*Upb>+`9kw4Jc4 zkzpJaNP7ewRe(LqeMJ&dDmMi!t#e|bev#+;G$>M4q}lN?8`8ub#+$zdpsGwhbrIrF z)|j5PJF1CW9Twa{9gfSrfj)48Av_ia9&}F!GWt-$Q|0obFnZ6+Umu*O{Z^~W&^kIfkGH@ zYoX%Xv+0t)U0?sxsHf+1`=uqL$7SpDec8Uc8X2?m-{u+DPv`9g{|IssNewbi%B{LP zH?^UW7d+NR(C;8qn%C5x;8h!r*z;;b?f_j-oZu}K6K$~X#Mfv5onQ{j!l76Gq^DN# z*cI@UGO+1=yb~%r3>eKjp<3MpR5Jr1LD^Jg@UF3)Q;O4VSm6-(_&LoAxdkZ!f>jMm z(t3TdEX}!q3(1i12$>9(YYOqm#d+8`?Mry>yc(EdZlJd&U!{O+fTP?tZuxw9VEeCM zg~$6#a>OcKXrA7H?N0`X=FAE$N{F>17s)eiExdW(ub4*(Sk*6B(-bV5 z$*|L;S44JVXxTIHTihYU`O0lX-&~ux1;#N=Grh8HGqPNcQ6*P%2S;f%1;1Pe05bkv zgd~oFl4_3t?W#!U6hCd0X+Br4%i&)%yZ}L;OW8t2y*1_O$KW%Sz1uIzAqshFJN_Ns z-fg^j@tXV{1Bh#T5`LdS59Y0x2X2mIx3Q21=ZlrG5RBg*9~>x$7#}BbgX$l!=xpxP zF|-Cu(mP#VCqJn>otpX#`Wv*osXjXRc~RoHxpIN>F`2n?0l!pDRBXWtxS9Y98NDc@ zC5aoo$@omb-P3F17D}02U8L|YB6LTJ`S^+?O}Z2xtMaENDBdy@%Da>rGuU0T0lJ2` zmghg*68j(tjAMi;U(}G0piJ!l#M5UgK$E(({U$q7_lf!`C8(|_iX?Be#O;JlFb#U4M#__)rEfV`r< zP8QD5eeEk7Vdf2N*#hm?!@9M#+Alr%_6r&D2D8hXlr@Bnh#os%pEtG9Qn_}P7M(*?o&nqZ$eqrQtvLz#UIAW%g)u_wP8*U zIq&PaX>M#@ol=l$523BjTo%MvK;oJEg}a+~g^N#snqM=dy_CXlhWPSni6fjFS|S1C z){C=k_Vo%9WAhX6O|Q72?Eyl|M*%PUu59mGw(2w+BvVJ}41&tzhyhqPl}eXqn$?~I}$Y9!5UN`V1#P(_4Ni@sPykK}AzE*QOJK%pfbZeDpI zrK(CHcxA{gU4nKcqRw#lq;U&Q$(jtSppBpZ1g-*P{n~S|*k69mjWbJ}XwHoSLFpE* z{Y!IU?iO@njp5-r=lW?}$jB#^CMhL^Ri5~sl6uagZ zypc;dBT<3&(5jfCe+04@U~ghQQEI@lxr!VPjTD(?%dvfK_cLm1{{FMWD07ljAgU%J zRYpp)oEC?Ugdft1kDaY8u5S|*a_&eU^`QV)Cel`6gM;VVge3Yzg8e#&JEx`QrQ@&N95)S3AH&_CqERtdYH1%bX z9Yi_RE~$N2X}`x2kOo@m^-uK{V(bj0iOfz3lap+GNz74n(_9IoT)nFLiiI&OR@Ftm zlas$9nf|>-+PVsv7i1NyaK=?uj!a9Ts;X$$8K7JgUgEfRZL#W%O$=;5MlOI>$VYrz z1j&^DEf~8S$d{|&`yEu#P#>AwDir+HGAWWMeJhJz`aXpTC@qR!W5{Jf`S;yOZ_Pk6 z3r@c&eCr@U*pa%rm_+17{6ucJwGFSC&+{8QdmjM!sZG=Z!$=!ab`e@<_%IbLqjimP> z#_Cnn9)d|e8=}r@HFR7}{2lY^+FZN5BT|}tv8Gu!<`fr(kz#TfCDvcLmk*(q7aC}x zfkomO-1!!`vxx6+1R%7X&Ar1C7~vh-HdY zMVB4ii%ktg`26qE{vAdWb}uNs{a`s3J|Z{f84n)3=R5MqYi{RncozNVEgJ)}eF6d1 z3oVzSG*`H15)4pye_luiP<zqD3XB}Qq5#FQT#L|zoY=Y7!kIOs@qhuhb2c>Bp zJV*rWGlOUqDs(WL)tT8@|65}^r70b=$&T_beD$aZ-(eY6NNPo(LFkz2;@3wX-*;?q(rf3~ z4t+64Q^uQ3``lyytD@9bCdQZ>sx}2VXQHpp^Ekfn))~|NfBJ<*2Eq06|&_;W|Z<=2*mCSirIMXT zyeV7RnOm7R-*~j%Q1Ie^yclj;9ZQJ1vT*Lw8DlooZ2H-?R#zTzlTJ;0r$kceVr`@p z{Oyp+TByD1XrTxoNwh}cm~|{vLtwUUDM8H>g`AxPyT;NN=!hNrnznba5?2ny(|HiM znvV1QO_NMpo$Ri*r}pq zCX`fou;ax;vDy6L2NGzX6d)m>x!T`b{?%NBA7ypV`-rpiPUVis6*3nV5z03cs7fwc zwhJnJe=$#}5i0~`?fO8&oi`)%Mn;wUm+7btSHfRN%>Z;0S)@AIq%fI!Zz6+7<_ae} z(=~A5qEZ}i-Xs067*>@_=X27^R2%8Li@X`y!5Lh?-`~EeB9sx{xZgUg?ayGMJ?a;Q zqd4X3C(M$`jMi3L_*)vEv7W&oj#< zsGHl7ZETis?G7FU2xeJgR7a$6bMwUt+$&IO##aHr?tW4EQBcTEGb(4#0nJ&5c69LT|3ka?rxMC~KD1QvR*+Ij{viDv9n` z@3IFlR;g=E9CpMUeyXS(&j;v)3Dw`&!ZP7>=26=2X@?XlDrJ+&rW*Z}zqKx{M+*im zEcHygl^XGs9ujz!&umTc*QjOV{%g`$t!pA6l;eGKMoX;2PBzaUcfuMETFjT408cbA zybnJgqWPg$9rq7O*=2-#WX^fo$KP_e=(qwnVq34ij-7)IQPJG7lBg8(cX08B6X+%i zse`9^fYw>JoGJx2IpZppRV-O1lel1yFQnm$SY%NckYVg23HEvU2+JK(dux$WS{pc+ zpBzPtfv7^r`N9v#ef=Q?fY=62%+dRWX5jDp(bKy163>eQN^1FgiY?ze&6Hy2N zfuxeGJ@UY^$(4W;g!9G8GBI}_CjLPBrvT|i$xEY z6y9M?wvZ!;!@EmPn{lQU)9~&$@x1~Jvyrfg!2QC%kqaP&^w4-LyW+dX>A6FF(&|SK z*F=+J`q)<#qYNfJbO%pJX0ZeOax&T^(LUOvK7W}BVq)!uqpvt zY=V+GnQss~?9NWtk9G*4dOQSEQ4j{XqgW+TiE7pj@3dMHJ9GT2kt~f0?AIjh^jfjS zx0qzPCKv{-^v;bYYkii1w#L>q4pGni3-`F*Har%-4~}rwymCgdXl(eH@c1A~%OiLg zo{uVbb^;>$ zszHZ=a@CV$zSEaZ$bl9lTf5;R>#c#hg$L%7wE=RrM1a9q%gyFgOAZ2Znwj~6=J^Y* zs!G-QD>ojG{qt7g*0WWRy(8wqUkI?y+KcZ$U~W!$^mR5hRnaTNV9f}4&8M9dDpDZY zcQS^#$%7Gq04>bHb)oaAy{uhceG;#7>?NIbX)cn4Gwx`)XysIJ(O9@SKxLYp^uytt zA~qxMiNUdM{z|{DrxrC_)qpI@u0zg(x`E%B<1oYOp4^ShNfCpHWeZ-sEQOaZzvjJy z(l8;$&w+%5)8_Nb1;Yk}Al&!lrG|K{`-RW|521WMsyAIo_6I!Kk(vr%p0aL znLLHM=TwW1W$RIFL^dTSI@}Fq&Jfn&hq*I?AgmYqW$q_I@)$lzLSW`QWdZw;3_Q)Q zjl_4Qg`G{Qik$h{46Lb@{2L;N!`1y>EkY(Wa%+ABa`}_?q(lec9ybe$J7MIGFrPu+ zvnrBMip&e^q3Bn+JO@h$_t+Q!gV!E0q^Im(yH5q{OeTZufdM>*$pX0^^C(QXCo%ZE zd(8-pb%A&gX2K#3f&EudUt*W8ch{<)*+^2IL@7zZeeKB& zX49Go|={3eDOb}GFH(m&Wxox{NL3mZNZ(7)b*tkL1#F-~Ok{k{|q z=p#D0gqnPRz{nVk|5nidJgV^N18Lu}T{$^T=K=Vh$yWk^e%|gI7KUIzYYH_Q?7tX( za`J{jmI1W^Gk}@^90kU|0+fiU!+kyAZ!bML-$RW1dTPbQLL)d}HT4GGfy2SORmb#m zqaYp#Y|%jkfCUd^&A!eIU%R~RE<{8h3qhi7NTcX0XuUy4LBE@Y2mgn2U_B!FT(MUQ zp>%#DmQr)^p&32GK*H*Ryij7OYF5aY?c0ab9>kF@MmTu@!JVI&95N-E8?JF3u$YJT zE40g@Y;Q@mzgRIWaHzQF10u>J?662>5Hti~nmvQBpB^O|NQSxxQRE(hOZ~phoLkGx zHzc2fZTf$h&worxAe<}-f@0JF4e6MT|9x$fAPB82M z3CBNBzwdBoVvW_Sjgha}D4?~iF2>^eKHw`ssQbfZZzp{IY^-&K$XCK8qNF)-adyBZ zG7*O@5W0rV4uhMEI^gs1U#f`(VN^i*PN09IV6dk=>f24|YJR){x=EyZ#=Nkr9?~J+ z+8JQ6@t;SF%hYRTd;zb6tAwbUmBxT}G|h4#Yylxsa#*8;@WuX11iw!h?J78ZzK;Xs zsPguUje+)>^mHiaeDH6FNI78J?G#ov+O;Y_-__?O-w77c?GsibLjz!#Knl1H4@dPS z^#L(HzWaPIDG6RcU$oX@odk>MF0Vvgc;=rymKp-{eqr@$l~egY=!*~s@xyp<85UXg zEv4)l<<8i;DxEREfC|*-Tgh*Ll|?`}e)W}{HFuV`ciV(GSrflxlou$fFNf~+5eC$w zVr7DXQqnLFK)5%|OKMbq6jvE6gk=7zX<%dsj~3Hycu)cYgiaK1A;V{ZQ+{+lG22!| zQwU^O>&aq6td}Vo78^~!`Sw&P5b5;?6V9m8vvDE!Zyl9JzSIt^Iv~dOO;g*bUsei; zm_VS#$*UcUcpXSmR<)WLW`v8J^_pg%p0+kbr4X_kUM+zMg7zS>>Y38X?poH7@q?nZ zBIN@?$v$_>TK!imR*o1D4r4(eTi_1()>nYHa`6G<}9}={L+cBuE$1 zAavuQ(_%$Zrus-Y(zp25TaK_-)$p(&$)oWlc2+i#EN~4N+vz!xXv~)KCI)Cdcop=X zZPRYVwohNA)!ZD36OCASd{+Y1K?O_@$4cleX1yc;_FwYCfAfFq)G8E+TqiIcanv!| zWz+d7F-4Hr#VnUJd0072Lk}qq?b+00J+dx}T;sgoP^U6jr*{a13|&Gfqn2Pco5N9% z0j9}+gYg9~I<((o-`YfPVCNNagJ%;u*e@e{p;$?9Et?_zN_FUf%*K@upk7aYQtcsP z_N2H0h`flDlY@-q{G~P{7aiMQ2Im%+0p7(BMx%cZRWu4+TtEZO)61sBXK@lFdBz|3 zq`MdI--R0HRzvCDQI7pn?N@1Oi)3gURTjUSu#2 z1MvN^%+iOAlqIw-S+wElDBK|CK+tTpV;uT5Ko{T9)P5G`PzkvNX(h~TCv1QMcweDC z%);qx>`YU5#s8?2n<>Eblr-R914An~L$zK~f&|w^H&GE*%6k4X%e%?h3{>jud##Li zlcfa`UZ|&eAewS=43NB>Z{vzo@`OnmvOHTxktrK?wzhz#{AmTFhfL!>HZH?heVx;o zjM8Il44CF<8i+R*B~TEqS^eV(AW{Bjn;n+!-IUJFmY&c98Cu92gdrWyI9IeFw9@CP ziz%)VmeoHcxZS#XV7nRC!zdOxLP2P6zb|f7G9Og$99>4Q z#D^9YyJU27>*1HpwvC}nOe#es*-?R-^^e{KY$D81zFjgSm~#^;2WtGHQ!_f&j7u2k z-~?J{_qe07aAcm+y$FH<*aRt(G(Lc%YZX#sp{u~+M%J2D?$uxvhJ2IBiX%K}RIk?? zca^8%E791*XLrM%+Lwhzh{~Q0ZWV?-lFOcMSPC&%^jj^1brfhWg4Jr?%hDw-y4?VA zg!7hfzbvjG0>8=R+cC?3;yEf!OxF;O&4SG&;dIQI-UpFn%{qQcNBNu^BN`)b@}Cx2 zVY9o=X`bTO6C$%-iTCbE=WmEX6QWSLdzdKgvBFlzCy2((Tz|nXuO1cs-{#-{_5N}u z)J36zurvM_Ry8;5HpGy8UeuA!V9A24=~L7xsbceGV?pS`7X=7#Ni^cD!%8K}b>)5$=E z_pibPLV1wZPBUL@emBvXfo)Wzf%a4eOQWqW6}8vX0cbR+vN?37SaH@%6{V!vaxDj# z*=*dDWL5vw`tB?kwU~OlIzB&~N$+bzO9?Tc8R#Sq0-*stw@O4B5oDwLU9bt<3fbt- zbPk$Xd9ytjWfaM(o&YAOFm6xHEx}h-r!j4=MB_9tHnb#!ehG0?4V+Ju(?{xT+a#i? zGOtEt0aUk3Gt#S^wxP5Jw~pX!te@a#VCzIXY%Rvh)z+bgFVhZ#DCKII%a}J7$ZMqN zIf(}uVG1%UXar)woSC*2>cW_HE*gp|T;SBa>TVhf)ZRYi{8sya zHVJC2e=}>T`prLl?oUL@R~bBw0$~uqFoOF{Kz0zFZ#vXKc}f87eF44iXR)F)K2EgC&kL<_|7U7!Xp@G zK_XG$f6I>d`W+EQ{U8D?)$UDj+S6Vq?tIoG_I*da(-PU9Z>G2wiM1qh_cLT zGiAl++I|4+rVz@da2yP*mSNC{fhD)w?s`((osipTPm^9ok&-}5iX8sgs}U;~3)m$I z;6|9HP;sXmjzb=$U`0AaH3Fk2*MN(|amzjhp_{N8X4CY7#DWyoh!asn9a>-6-de=X zCyopr`nHOPVSqtUL?+1Z|6rtkp3g@87!BdjVM4A5GKq&)VDV3Lhk9Z9bVqiWy^QwM zlG_9Okob$(M)CQVGegS5Bul?+7_dy*W;h!Jm$k|)poi&un2P<_Ff0k|70bS9@wxt* z(AU3*%516l*8%ZWS@UtXMTA^6&GbpZ75U|l3Z_srh?;~SVGm6egG&2e+rM85(3uW& zBT_GZFPSE7?K5ALlsC7fRhtNPOz_9rp)gxC_R~T`h3AVrY5t@Rb)V;mH3J68JXeW5 zN~z|UhuK#^yks@I%2m|M>2VLE{2emr(XDDLDqLh?3zE;_=gU|7!qG!#tsuDNUBbd* z_A<_C9xEqhj9a;_7K5aencp%Nf+me_`HE^WkV04;Zo7G;*=@i*j&B=| zXV_%abf@GVBJ=l-6`JVX6$8u~F~hd=8*`_Qq1rmBEz)ifr1>oG1M;R&NuVfDQck4` zkM36qanc$Myv3EfU}Ut*=g!K|OR>Kyk30!qacjDu$r*6SR1}cWjYzpS&C-`I9?P(c zakOSS)+!!g=c+RmsJK&+X891r9w4X~v&!Pum_T8q=(i(~?Y`A%5CMfSddep^H_QSI zzSYR=s`PMi5_NyuGc~eQudJ*&@u+%7d5-`~Q3~@9`G-Y&Rtv^xGi7ED-OPc`R#8N! zFTu;}lu!uBARML(wn^V_)l73hu#M*vos(~7QoJ`yRyai{hHCafP8~aXHiDVylsI4o zoRUs5q1ZV!XRsQR6)rvqlk5}>u6y+tWVskLzTUnV zxYwn7Zmor@WLsI4nx~L2_8Ml{?pGYec##&m11~h!NkfN-Jz=w7ruVX6Tk~^X88MeG zDFtihEDtB4>AJM-0DSW84I=SIos2xSDhU+BUdak&bR~4{r|wt@Bir6|L}$*fvA!ipt=2p2M00f=*KJX|5i4N(+SGxv`heR&` zab?y0QnjfF{Z5*Wx7YU>J!kv}}s8=ce~v>a#dzA)~wDRgpwym9wPI4>1*b_*^` zKRyVh0ZkFGD2m(4v04Aj6plM|6#n%91pW{;6L;m~Y99ji2G8ww+2p0A^=9eYLB>r& z{_5kCAY)Gm8ACt>Dd#8Jwqvq_hVLGL;Gk}%!S;(~Y(hugE7?>usf#Tc2Nr>UH-SB; ztWE7;8%7K>Z4yO#y}X`?&zUA;*#QqT_`p7g0Pt`=6_Jo)ua||(0LGyer|NXFN-^!B zCD?c_NyO0T;$1|*zr1i>UX5;ko8eUB!B#^A&Rn*nhJ7w(+7?{NNNrKOxeEiBN%Fw3 zbAPb_yY46An#$fK))^P&Gh^2&8tX3jKSaG_bm!dnxLw<}ZQHi(c52)0r?zd|wr$() z)W&p*XTJAu{nzs*$;!&BB}y|J@{2uL9HN*5Q6$?Fgk`=Fu4?0Yd0?HL+d1%*&(rI1ul77IOCOF_ffC- zh6pN6bp=^?OqMJ-?dvPA3qt>}AztygpJuV$;yIaiL8`XXAdf*K|VCxI3v~K%2buUXGL?Lm$EoW0tlI zxpg`p?MeVH-**-(bEWI(MxcYu(PTI6|Fz!MW6OnK{+%}589(d?6{PL}s3G;66R-yB zttnmG?l`sd3oWX;w8H>&7!Q@g0=?WR;%`%YZ_YqRNC^YcFad3&HBmleosuy-@quB2 zUoFjfmMEwnPiTVvf{ZI)qH~Ev+m1AnHEyRIBO+Ig2ahQs9_0tHyJWwE(1NxVc+OHr zgC|8O3H|8>w>&5b!9-1LSlNT+iA+Hj2A3iuGntE>q^~oqKu#eG{9PT$!5dr+YFpEa za5RGtnbGGehvpi`%*)c3fI}JWw1E8qdW=T|;@1g(=#= z61$rgXO`RUTp9vc3J}7LM+(Yxj&{#Li4tBCf|CB7hK9uT(p^_k?^bQzOECRuuMZW$ z^q_khc52>T9?%rq3W-Z|ijZm%O(PBZc|{9olTsPEC8G1)clwLb?BD<} zf;%0Q0eYcg2pCEQI!frRoJ;u|Yj`U6~BT@m) zsqiLnaRHwk0QB8%oWOVTt9@Nx`B_v#z?AGgl36A>ySzJ0h6?&1ZBH`9ctVP72m$%5 ztR9r6(rg)Uw%L;aKNo)^jsao*YvJ%le+{KY6v**?#bXIW6vy@#F+AJ}Hxqe4%HjFa zCzrP}@$Q2z8!#hoPZIH>F;?>w2xwg}CJI}23J6b`cnZX>Nc zstG>8?0OcTWmP7Vl%dXvClN?IyB|{0`C*|$fG44+)QKE1_DXQO!&8Q23ak`>E+wFM zEdZY28<6zV9TQ_RWJSf*jVpzKiePXfF!hxEW8OIh`=)J?b7fnnuLx06iZ5CBqR0Jc zx;KywA^7*?Gzb&2)cgD!zBt;qhV4QUpEVC1folT z705rVCy|L7aHI48R;a@ENt@Y6Q55PLzm2fgD*}!JsD|)1#3*uJYum+y6Sm!kA3sR~ zE7_(b3WHc?A8!k&HQvO$u9!D`k^e*ll5&nrt2@Gr91Uj#YxpcG(*0udTFaBEa)r(YpzloDt~xHoEYX7#}oY7(`mnV%dgWQ+!d2*)(_I%9I@Vye&x5u^v0 zaQ8={bMK*5B?3;!^+GQFA$FK?FnBbnbTJ7Yg~$kO8A|1j8;Ln8VDa0&OK~PFucu1@ z90s3PP;)PN1lr*3blqiakJoCZ4WtbwdnhsM z3TM4g40VFBT#Lt??ne1OjR*Xs9X|n!i^Nw_yk%pBArFruYRK82QzFX|+Z^6q!)zJt z&-qbYF36oSsDnkQ>V{uy!xg4{N(nVxIN~;jQepzxq0(@T&?BL|c#G8wjED`u-6FB2 zw!+@aKqLKB9J-i8e2Oe*)l64Q9r!5_N-sf-8Lg0^IT1DZ)uwje?TYnvDOD|@;}B}$ zYAjVopxkhv<1w?(#~ZCvcJF&*V%of|QNX6#?DX0i-k$y-%hG%J)Xfa4ZS;{>@>e6Z z%XsywN5EC?%>c#J6{(6C3qb@x4aqnlkUer25^;KU>sD;tE=+_-h%yo+rYfm z0ezzZC+NkdUBMyezAh~0IIgcZ5-b9Y+}*ImdPb<*4vK0rRqn4%kCCR5Gw^!6y{3-} z29X+0+Y9Bjwa2w+=~2N--W$~D05ez9rhO6~(!)vyCx_vRxUU1)S(Yzgs5{kN#^E_}Lv%K_3QC`+hC$FOIGJbF~HHK$Eezw?v8H^PxgcxGv zjfwV7VgXldLWYDiYentt-HGb$WYLv#qp42|lCb)ZN#*QHIiv@JICE{a$u z@dphImekG%_OCWQ^ab){D=?`8O}$?aKt(|~1On`?@(^^{54n&4BeFEOW`a|(p}3lB zTGcOGoMhQR>t*w@A84Np?ODe*XZoHJCaX0;R&C9o7NWS?o&)W^O8y*HXg7g^HR+ZH zPV_$a8Q?NH*YS-x!vv4h%^WU(T980B<BX=D)JZ} za=XC&zWU(nmMbO_c&?SLq)3J`(}{_;S2hIEbnoVwM*S}E=pvd++sNEFf8%8H)7A$P zc=xGTO))xO{_%2zy;zr&ykj}iAGnxax6eWtAp41pKfTO=OSBv6UjFVC=i>#5MNh?X z59f^R&bsr4xbuU#lKsrxm9{uj;#>IxX)}tM7nB|(3cryz2?@j4Wf^vR$t;Lz3+t_FLWa+g+EkeMo5zOlN(yO_ny2f@o9SU-4s4CasSMtD4R~y%n0f1(NQOQLP z&Rvhla*##z&vY-apw(sG2?4qx89a>A*Ns~KqOds`YSP3YNfYE8$-6Pcm20&!Wcy5Q zX=sq!-98`5PK~~>vd0xdZ>6&bweQ7XO1%g`viT*s)4RyIa*V1MkPstu4{^bLMn-e!NK6? zl{6xrWC0Rw1(|d!8GSL$f0w-cJZyS~%op z*W9j=o~>#%h}H`yMZC@S`d2serMp9Fuf=N_ZSQ-)e)j{va57x@lvW(Vz`Df6j-Pgw z2l2W14fM6Eqsb#c?y8Di80fOxxyC<2g8NwUQto`hI+y^>%CUzz$+V$LMhUbpho)CT z#nc2GYI>;Ge2Zgu!+i|RM#)Y$$8|W0QB;)}B&wABZEO=N6R&>{LYO;J>}0z@V;R(A z5jKoBZRpO;>VPOJdoq(WIVE5@w}BqaV10ILAb7^pU?UK4I>=%3%`*0ykK4MYPZ=bV z7fKe^amo1N%~AB})VLBt1{NDJ0?U`#MgYFm=PAyeO>P7rTkuaqp-A2gk|Ft}1xPF4 zoe`()KKJp#PXKToD554cpb22l-HBlHG#9lj$4O5w70Xq?1f!1OQ&2>U&}4E8y~JyUMG;pzvU&yRnH zP?d#8eV+rR@A*~HaoO@G*wVpGLcBI@9gjH~CXY!wHlJM=l7tRLJwPvydGXid=y~6= z-5SP*>{X$XAF8$V_@=b(;J$3sCRhK}r2sFCflmiKHizpG_W6CbJ_by9@3aCj3{&U! zem#Aicj~*bzFpsAhj*qg#k1ON`pN4`m0Z(dBP@?gW_}fN-bFWc_P6Fak$B~M<0G|2 zr$Ev4Cv5aiKMLQHZa$h&2(X#gB#)j>;1UyT6lsO)iHVp@nUg?(FKv)u3b`Zk041NA znZ^RdK+nONE9epQmJu?S2ZX#r*ft;vL;mEYmX^{rb2KZ8N6y0U0Q>H9mHl0tTs!R~ zZ_S<>Ybx&o)h96tfi_oX#5-xa?uZClj%{jK?aSH%5y1LwmIo9mBV>9?V3c=dYLt~A ztb3>bRO5~aKd@vHiN%FJXk!<7C4xKL{0IerN(#Ewf0A!yOxL_*AO^_hY_qEU1vQ{K zy)OXWWwu1*+AtnB-^UG+YRVKE0#%No43W8wc20%k(VoO|j}sjtz4Q?15;np^1{(6u zXUAYei3^Rj0e?(Dre1CoQsryNc(xwQQ3hH2@VhG7UH^b6WT9Y-lVG@1?vEzNipvJD zA<=>1aWDBd{bsrb|_aX zG>gc-M1*%b+f(aCL%S(lZk=Wu-6F$ZrwFvQN!J9tS*Pbn1Tx^T8BdY&#Y$s!FUeZT zrmogyR=o{a!|nw`vBRF+nWqXL-z))u_LOmQN~67X+2=CLW-z*0vYFh*g%SNb!6<@` z=(cZVa@yi5UH&w5nAv$%-SfGJO7g3FdMjKC>KrN?YJ{Ex)!aEh8n?Y+&tIMtn!kj` zDTZ#B58YU>*UFa#0`rWxZR);!2PWP0jVnIyylAB@A!h(WcaU$3 zAe$3B8_`gjdNwvsPz>b-!ruPPA8`m?n>rUYt`0sE2RcvbNq+#kkaRjiR`TIqvyj<> zngSN3t>Y6X$*P@n1wbgEP)!vL*keSwauxIWjo$Zn2XWR3ui(u#ca+9| zf3OO5uMj5^%JqjrWn#nj++AAhTWr&RNQdD4D5+|pbu5$_0H>fM3H8?P0H@$k%WPxb zaq@AHJc*+<9;Cwr9#I>>1Ru)tA3$AU(oG=ya_jTJ@p%~qEJ6){CQXR1mryJyZ66a%|?fn53bxZdnDo6i|gW zov6MLyjJC`oPFp`0-UY9A?Z+jr0p$RqQ|KuH;Y~`azku*2e-N$-FTw5ol!=0xHMWt9I*2`<>-17ys{({? zKhN$F53MjaFGlp7g^5+6eW7nnbM~6G$Q-&&MC=kY6ovl&r8B_q^VT5jVlkEeeIjvV zqD{qy1ZR^ur!5PgC069%9=Q0*vPGkOHuzaXGFwmTpG%j@hYRs3YTIb({D%#Ejti8F zlJ%87ukueES(k2il$vS3cFkWJ?oL9@7HOw9djSxhd#T!#*RHB7np3%3Gt{H_b-2N0 z)mQ1{x0?nfPS=>kL`p`Aa`!bP5-`OE#rbjhU$7qBmnr|n2{1M2AT0k!!RW{*Z+9T~ z{?+^~l}eA5Po^b}&@D}|Su0aYlet}khzEnyD3VB%^yukM`@;|KnvBAo(@5^4YH+6~ z)&$Jfg$w`4&;EX7KZptN8NK-UdZ}*L%wh~Ns$_O{>eYrC>kN@VVhB;8i@YJ>fKHyG ztMiD?{@q=QvRGK{p{;o`4M>%7efF<3J$bCNkIEO;(WIKIdRbsF--@=@-N7KT+b=N6 zr59f@oAy{29J)ruZ82HWX)(TeWdd>4u+cmXwfRbD*OXyYNgHE%jdM)fszj*Fl$RbC zq;1!e&X(A&8O^SH)x`a>ptd{tL^IG)^V}VbjaXZ8%Xag86L2N?12}U#G3c4C;$5fR z=drEKZnaU;aQKZ=7Nv)Z*7DV3*XT}+aUfGw)(17IQoE22S5%l(0f!L4)23%dYE&82 zQLE{tk!BwoA3MEvFlpV1qTKd?>@j=UN6jO-+g;59!O}|T96xU(2~Y?n_s%d>y~T`NBb~3RLYPu>jTU`4WE---(O;(M+EckIW01{h zueeo8`Gv*gS{to4^KnVM~5 zRA%J&^wznBUkAO_hww{z_F?$}r7D${^>Hv}%D%xt+Ux6^AfWJ03d9eSla^HexjZ^n z;1`PVh9!>UB_bvlI>xA@UfzNSS*CV1y0|mC7}lKpu5V)~N}6`&E9?s@XU-0fU@e=h z=aLOg)ot`v1;l9om)^&x9Ep6tHQl$T6P=Xs%cj3yRPEx``m(18Mzn1C_vvRKZDocs zwEZGjGg~+H9>6a*QU+#Vz$K{1JEf(+%BNm%y>}bd*+KknXj$_HU}1!FFbiyNJ&i() zDWO9FHy8wRkRU@2g4of8xM^?5<9iP@WeDv{2JAb)8We8|L|9@R%Nc8fXeQ|myX3RK z0xURScgl|MUPleY5gQJMHcS9)3Q7ooR%zM5O&uX<2au8OnAAkorD4q2`^J+EaQxmF zV$%+OW53V=S!(a)nOI&FMX_K$9yA% zap2oQn#g}4SW@C(kH;aZW24BE1KC>{>rbQ`m$~XgQl=(Zj?xqm)L1R55_)0G>tI#w zNe)$qn{UZ->(mD*K0GxoyVvWwth+f;ViFXq{%NT?6JMv z0#ZWC`L;Oy5&Ic0MWcfPd47%mEuw2QE+{g<Q3A3}$7!T&1{ReVq z$^tM!=%ftZZZ7B1ob9qO|AYDI3vBBV2XOh^mUQ@)~P-2$jgHtG%@cRKXtJu2xEP>S6KWNJfFlV%c zl}o&J%ACkNQ9k_3h|SRs)X3WtVoe5t3|B|OT2Q2^6sUq>)N`h}#t4i0mwDh}oW$8n zAsLQW^ba->H(!fAQt)$NMrHjgk_0?MSI@2X?~se6Hu>G<1*M}!3|KQiL_^|g^TsO5 zheixP_3{}+voJY6HFptHP>Gul0M1+j^S5!s8N5I`gDzqzKGsr4VjK?cN58{?d%$=H zPVr?~eTn?+YL-(r+^D>l&Q6O-TZa|5lV?XJt-X!_wS7=tZv44z51IuC7qtdQGs7pX zDMPIj%{>RRUnDBqy}*K=cxyc26d(IT8Z<$NfRw^?$bAI*pj9pt8e-#LfD(nAXeN@F zmjo)omB)@Ipm4mypHk$@o&k$RUOOr2i?OvgeQVFGwfCg7?M-Fdr}D;cwzW4<(s#DB znNb=0Q-A$6B&z`?-c0qU4JXQnW%MXd5?a(ru}sZSijp$g^8}GG76Ehc36vugVEk~W zq+kh#hIiN~M_C@Jw1%M$P?%MZT_>d|1J&XX5T@BG_5!W<6tVmIL$#ehvW4Lh(V|9# z?WD+30KMZKrU`^02NV<6f~iQcf}*TTo${&rZOU4)&REg2$1WL5*Ux_7p zNrJascUD4rBnbSf2iWZ?z!#K@Etk``2P0l8c_At?LxIqMzIG^!W8`xFKJ_BleciM> z*G7j%F~rZRK`yWzpuM?9`4ZJNIAIL{LtUG;xXCN%AC%#5xrsuG6e9Y&1X;#JWh(;l zHiD^B)x~@J3((=_ouEQBNG8Z{2SnTCurtOE-=rZON#rv|lQ=V&3$;x<&5H*5HkN&V z`{>}N(HD)vS$??>(!iHJ zr!Fc+)tk%c3_jO;(aA}VUh17~w{xMPbSw%FN||q40u8FPrvRT1TcFxjuI&67e_>c} z_=Wfl`(yOK$Of3@KOqt*GZ**&{RF|Dw%_5n&#PZhmS}2{x{TyVhmcMr0gf6FM^r#~ zjxt!*mWEZ1I>z{Vn{$+Sl-c8nM8&jQoQ}dt9QNwxK;RI@`+TQ5J4d99k29Czh@irN=^G7%5>pZDOb|{xOH{LRRDmr2M{kvJ>riWlq%P`i+uoPL0qSi5!bZDp zA5v$Btv(_FHoi8H-cFb|4Yj{x_PWyM57H1N8?5O8HS)Xe>%J9twq|(?kRD_r+cogL zFPLliy}W;@BZ`}_E;MMSnn@;-*pk2#Z`$aK`kfu+MvTFwpcasfb~zAke?M=&W)TS~ zU}w4QyT5YoZns6oX-F4I0DuSLY!vT>7`=Nr5E{qHqk*{J@@Ljo93d*j3B)iA+}b#w zA9`*mK2DQ>J!vL`I0@Z)w~s2tfVTaSgUN7~k>R-Xh=qCgmX=)d5e4|UmT+?jakf$K zB_;^^%^Td{vamu_?e7`as%HX55BJyi?g z2euW}-Nc9@^BuClw*#`tN`d-!w^*g7in*o+Bg*sbQR#>r?0t#iObU2q9w0UR@8Cdu z;tiq5gF$-8WEW&ju_s%gG(d!!;1p<#645)u-=3_5J|77D2XukXi7M?Mk!!bRhR{T>j3y@MAtsaEgLdu)I~@`q~_ql<1v z^GIg?3c9}jb~-HKr&V7+gxbU7aape*^yt3^Uw`+HR%t7X&`8!`g#QTcRY!+EUgF{~ z{&-wLFP{QdY8ijz?Qc{Wa4OywV`|j$w>-+z?+8=`Nw+v zti5Br6e71h0E6XU@T+W?j}5i_h714lQP;+*kP$KG&y(POe|u$h#I*Auq#24zkkt6^ zlo?p1M&k0$w_db9r|Dql!}0<)Cg55R>4j1X;q4($IULD{*y*q;vZG#w=YjM^qHBjz z>o0h(-Hjfa%5#r%<+oS8qxq8r?;3D}ud2KtnOG9j0LoDmhq?i2B`#;(Darv_--sjl z!V+atb6tO6h38M&K}dmYD6yNvD}p)h$8712Jc_+3dr=Rj-l}$eF5@NiKh(Ev&(}pR zW19J^!`;i#M)j5*N+U4C&GVY-|A0g#tAIo2NFI6>_Lk)#;Hd7qq#ZW@Ya`o2l8Sv@ zR3Ih%14v;0jf=jtEW5p^uz*S5!2Ue-@EsT>YjZ>i_ z#{ITEDg4k+(AHZ@fkDROBDF%7fH2iMw);4>t-$RT`{p*qyPEEo_c3wXp`|l|`TO3+ z98A~ZV3u1I`M?&2CeMEQwXV9Vn82t1k3PgqHy|T}518EMs zd;qOssLa1mk>h_tYCQ;c-1)DA#RY#5MoQ`5+2K!M&dOSs`NW%k&N;vPbGFDxl$`LO zD?%)-l+$t9E^FFB`NBDHle;nB0PE|T3CMG`V1{|8W*YPEl zaeYchaBO^SS&n>VxqLGgknf(aQ2NL!7PZKFT=IgFIqMP!(vVSuw}y!^Q{SC9`~pxW z{?e+CksNv$kz&xQCPnfIMI;iC$6el$m=8-)1q;6V6l-R7P`*dc z;HTs1zjz%>h6dxz19W*&WCc+w(vH9PUThh2Y7iu(P~!?Ew8T1jgpIJEjACkFJSF^g zD8OU&w!bUw2i4sVB6jE;nMG*`Fagx8Z1iNrr!A{BqF_gt;?K3I=Gw2VC2sGisigir zYxJN0=rBd`LJ>w5k?~c~2sR}{x>Tl=ZhE5x>sff-XUuLZA3cDh|A4KE|B_&m68sK( zpZ?pEKipiFb97e)CDsO+uCmx>fEZvUc?#$2p74WJZ+F(l^_VCuM2m^~lnqdu$+Y;x z7^uXe0#xcKF$vs~Ia1m##ab3K$eG5hxO z=gDGzB%PGy|Bq}XPa&R&KS880UrS0&K4*r*vI3Qis!o3|Nk8FX$S?>*o zhz%4V6BbfU8O;cvDz4Lg?)Naa&_FTUv|50)Ngv+ayaO-- zd|zuhgeh36>7pXV(uUB9ycw0tN!ah(fFt)kU{k}||B*lHl2`OgGFzOtGX4Ru53{#J zn9-%o+V9&Ggo(tk)q&*CxvD7bs~v^A9S`-}C8;!-wzgC&Z%S$Qx?EePICG)_j zG|b|hlQ?9ol7P6>E)8vkz|mgTbpZYVS&z~ngfribvu@Q_x8ZQiD--1y6m^93Ty;cT z(CXwQcqroy=I7uyx`N>e=S--z`hFbCNj>{Yd?lq#spboaA3oo=caxw>WNotl*^a-n zV5twdM+5Xx9fYBE}f{^l-iYW@+-nsnCKbyZIvi4o2Ub7oHt2#>UKfCDTcFzOPF zfOO^&5C56(&-r;i8@`G*6zu8b+Q%oOkFznUWGOsi)3T;jOG&h^|BFy(2SEcp$Fafw zq~p{H&f>xDp+=y6k}zuRbVqU;@~Z+K>Vh}8#SxQ|AEGVuE zN{t-BP~n)5*8`wG95rKc|5b-9sqE=rkth&=*)sd%{{ElXFE=mU!Htoqs#o)Z%Al;|c9(*hW->fS}#u#0mD3EzzMhYGbR z7*j+qiK}r;Jwho6GNJSpGUV11TAx!w3|ov`!~zb)J+)tZ2m2OSz)sT4yiqhmngO<9 zDti7)ItO5{@JEpxiAw}*Vl$R_C`y8qJ{Qx%R;}9T1REux4b+qeEBBEz8u8e=Z za;s^v^t?*?Fd;HU9G1NIEv@7>h3_e8)pAGf*P}Pr84<-(^B2C>FX0h+;7@Hi;a1gW zBO3r^jlMIGHLO5Q<3?HlZ;Yfd5i0CK2867D!ut0*LYZ~W>%i#)Wan}lPf1kkYkcs| zkXUfn`3`;JC*_X0KsluDw!DXf_=eAvsWqe+0KxMq3^ z(u|+B$zancl{Lmtg@Nmn=alPso%YTQFJ+!3j)n`H5?zI0BSD=EyEzJZ-cMuF1h00A zz`0oU8bHM9J*xe#wTu+N93(Xq$g&0lNjPO?q*_lvolOUZ5&#X#NScXM<0y7cl|w`l zf-2tShys;JX0JLZ! z#p}m_O-gr~hM=ZQE1L%n3!4vyk4?})90oQ*fWbogH!EZ6@I@kPcPxc-5p66& z*2sBYgAN7NfD`LBB4rpkk~p&Xii2^(J%rf#NN^!}p~oOA03C(~A28WMh(#b=g(M4` z0tOpkK*&6Kkoe>pl<$F1=y31hz`!81VObA#YY>9NutG==&94KH+K3~opsEtY)E(52 zrcwFuoiPw92bwT?u(Z}h$Rt2Pky>IP8fB1>YmpT|4!wXhgOSRK1&Sidjf9?^fQKTp z6Y!Of5)v>PHPOQ&lxINbf1x!Kv6`3y14NA;?H!$2Dhw)IU=t;Jl~?QgzvABAd591E z*mRnV+Hw>aAr$kVwAAObt(v*AFU6-HPip{?%_*Et{`Z zCZnq7dQX<4U!!4!I%u|xGA!+aMmlu)1Y8%sV~fLJBm=ip8Zc%(Xzy0KRNNn_0n{C? z;&Z!HTfF~T8ARK!zX0*fcX_Y1N=^HJ6nLw)&pI>LZTHbU*BD+_PyW_^QqF~?5D6V| z4K$O0ol!?=2>lz76GDBiQ21cT3Aw`+s!W^t6$kWESt3|_>BRt9_Jwv2CJ0F|tW zQKALmwQ-|Mm9u3}m1)CHQVyCP_}8u%`8yst+fd|wqawNm)ol?E*KILPa+kv0tjkE7 z@&tgq!D^EZLw;Pd)Ps(T=sG3wU5^>&hFI%kUn^mol3J`k-d!gv+08(B(B&&MxB)4V zU*M#W78u!#AdnKcNGHUC9;CXa*CpIy&C|>X=A$-ZOV|mqLuir_S9a>Y2v}ZtYiMWd z!tqwZGVewWi$sJOzPzwq6I$u?ELk_Rnf(4!;JrpycPY*|t*0ia$yj}kRbbJRusWX0 z1{<+M)@!teU0z;ZL;3+04sweJ>e1G7P1Nkop82KC&(D3KS&Mj zrqAgBsHnPJ^jIFwSwyl2VM)J%YJ;`8Sy9Qe+w*y!_A)_ z5WtOv=tPR?qlcgYlSL6XgrbO!YgoYPK(w?uKK`eiH9JY4(Vd_B1!zw2n0I2}8|Stq zgzz2Up8FeT4rw9wQC5B99)$nYT^j+Air^> zePXuK!#q8!a+($FnQEOS+=aBPrrfw~sds;|?Xaz0EXs~4r&26Fd~tSGN9;D|(mA?f zq*|h*0KAbqz5}Z}12*RI4%!}R|;%T5V6-a{)V!wh!ZX=_$UG)UminHkMva7xpF9cLx$Rhla5 zeA7^Y@Qy)Ph!S&P(F3Ocy{b*+D&%ks=4w!7A?B%_NOHeTE0xSxxEz8QLAOVE`S-pK z26(ze*lXIet^x0wBaQ6D*X}rlHOmF7y!CF=A)q|5j#A}~n0P%3W=eV>2~nj732grg z@qd6&MS+I@Taq~c(=|0yEC53SfC`0UP+sj3r@*B~epPs(dyJl5Tv*a&#_S7|n^jTa z!PWb(aqOCD_Y|>2f57(CZ!h(2TBGjY;*|~vvSo)>a}T(CJ4mMAdUex9`)bsfGei>8 z3s#RkA3ZPeLGT=L*8^hW2u_FR`S-vcaTDJ`9m3X&xmcU>u$f0>9io*0p#(L+GK$_y z)OZ|MZk$VjI8L{&zm}q&{{g??XyI3SI`mr>t4{D+Qx}vpd$e(EW!JlXuq-hT*CJW!l?QXIWnU?8~) z^Y6GRI%49f)N4WC~X;^%#%>*Iz+ zx3V3#A2=8VOnxo%t!{>NOIZFy4*a|uV=wLk$XQP1zpR`eA>CI%=9B+D614xt2+6Y> z2_X8($%dOG03m#%k{C8fKaxB_5LBi0s8h$VENOk1q-+mfJ@`=wjyE*1KZ>Un>2cWo zq9P7POP}GtVNgnFXU$YuebOae%n?Y&>R?S2*TA^QunB#+!u}R1d%5^(AqW8r@?-5L z5pg;xqgITI0Qh&5)+D0Lv6AFq^4#9}v_99|zL&sG0fgkg)xmgE2Me^pe}{x`Z`+U?QE=56xM1~F)^RO>3XU>3AfF|?6tD|Almjng^^nnfgWmv+?vv76P*|Yix z9o~U5bfaaMhC-mPOK2&hnB0>YPW1xykhQ0ym;wYfa3f4m*J-0c^8!_2`g;l4177nH zQTfLUhV!@7?6aEgG+{K;tF|H{9TB()0IHn5A?Q=j{91xOkU@_;%wCQpbfh&LlhA6wckhRz z&)fdX?C#Bry4A+iP&6#YJuJow+?nMOqo{{CF7e#le6!nU!ibrDUt`aaqIl8(V8w;i z6GPMlY9;6B7mrZON~6# zD^cP!Gk>VQ*Ub$ycdL;NR?y82Q(x0|SlGR-=t=ODO_!-NFZUDC-LiV~e?v8Wu?iTi znR{0h7-R{(*?*566s$x>c;ml`mHU6=7?hcnJ)M~u_&@!s%l`>s?xj{Sf+z{)S#w90 zTb5NViY~|}12K#<>BX&uXzI2v0Cym;N@TMW?%4)nAUfCn8{_S=I5#9XU++O5=i9T< z>310g$`X%ciHm0xCXqxbl*&aaL!nU|X3xZZRB)v4(Dto{n76LqaS|Tx#5ac%(Y-?j6^jWo-JrcQJF#dNM8bZO35b+_qboNfgz3wvp%iDdtK7dVj zlJ>m$PVYyvP?N*9{yG+5%kC<>t_N|k9dsW=|81V<(!P-w{ndSv1r!El;m;6`uEzCC z<)X%P?%SLC{?1GqL9CU|RsHp>aYifTWBOzOs)}7zk;JsP>d~|XW|-Z6mT@k8-n(E< z9)iNYR(ZBhi*6iV1~_C8z`blk6EKUg?zVUxH>%)~{`>uAbo83%u4ZoSxy+K_&3x*c z^;5MEB*J?gM=Ht>6U7|B^-9p=dK%xpc%eB1t*!nD@S&A6(LpU_RAPt9h_~C6UQa5eXqah+0NF63 z!P2Yhz-_^tle4heSzuclggI~~2AoSN6FkZ3r7aRQ_v#Im(Ew8@^`#D^1|Qy~^_7xG zIx(tf)e&k|!7n#Cz0dPDrc#dmJbC^O8aPawGO63`2hMXO)g+5Sd4QJ!7v=yk^$v+7 zr~YnR>Qxm}L5Ka&X(WKob-mK|J7fvwFg(@a=J?#gV|U0Jrxl1&olM?$#kHS2 z+Bb!Y#289Pbkx)s;m;_`{Nx?$m={hpA7jL>RR(uVFty|F4#5CleIIL@8@V7~ts{j+ zKQ|3-PU}L#aSXBq9)M!-r%eh_y8??|$p#0##yX4Vvx(X{20n z;*3qb3^}Q~5nZuKkqx!&6(2El3kNP2b;h0e!V%`R4vf-u||6hW+M)kO`A zotq( ztrCxkRL2ATATEb~OcsO6~rrzT$7L^jgB#Hn(m}r6;8bxN2B+@;8$UgK1rYMTg04J%k zQun+FaUmWHxsrLfIUy{9^eTOu8n3!J$o!3Nv8V+7hpc%BKfjkP#|K<5b(ete64x$+ zeyt=?wR_!6)bZy zj)Fj;65>&N6_F9OpZM2y9?;PFPNLl#aB!W*0cTUtqnXXXx7%QBp@6$Bo(PuRB;|4PD+;#Bj244=IwDpv}`w3BtyOSpy-h3U99NB|f) zR38ENq;kU-3z;GDqW7E~7{iJVuCJZBJbOGZx3BdlEHCLNWpf-Uy#cu>$C~l3y!V&m zL=?s#1F$J_j@V9ei;Bjwp<3sIOK`W9?LuWckSto3#@Mj6y^+=`)<|2* z5yQivF{Hr(4@8AB&(@J%`}^Us1AyM&a-|3M`$%U0s)a;C5g{=CY=6{CGzJJAEbMEj z1xSez+11(CGt$$ZR9nGmwN=u}Hv^B3t>#?0N#sJ?P+X0G^ z3v9IEIUi6$aiaTb2M8-hTkwi6ek96rF}1WAgoWz11*tMfA4I-)bTnWx%_WRh(7`sK zghocIc2*Mpz`?k|1-h2wR~G^yHi<~b5IT4~T3F(Kbk$KASUzf`&=qNz2ReiuDP6mh z2zAfihzi+uK#tL)Mo4upJpg)?Ft-CavA_OD%9_Dg$r@4?R-<98YxFTmRDAB?;Z5T} z?!GqXwea`vmUO=tm-r~7&_U8L#>R1A#q0_y{A{MFz5B^j%ExhKtf%YJk4w6O)POODW8`Y`=ePd`n)9f1))k2@cTfu85}j+8UezaHO*W3C=0Z< zV>}qH0;p$^aQ#hM85uTJ%T8m)(67D|-_iln5FZmGlnBup`}wdTQMowhc#x-A#g!vZ zpijl=l-z7RZF+Wq$;sEh5VG)4L8z6*wB=in1z{{A-f5jLi3#@0`sYXTf4nh1X#X?!q)$Qv(*x}Or&0LcGc0r;plC-HjVdwd zG7(MN%4{d2*Fywf!w96HZDW#UG3^xY7VJOHCfRh}8L`Pmhah_tBe=Wm=IhKu9it=S_;JYPFcedec+}o&+qPyT;Id^xe+*_4HZ8FiW z)X1VQ00>_f)fDwW!pjT#aI3{ms}pEiB{KTljrFgx;2}W0)*yxET}cD$iTUn3Q%Rdm{`MexVq!wS=4O9R@UwIi`~^y3W4CvKNgmNY2zl2E6`?zq&A?KabSt508;4-kTEw+)WwJJICX{}2H*lMxXFaB z{WE?QaFu$o-ElLtEyweRGxPp`Y`s%(W?i&3+)u2IZQHhO+fF*RpV+o-+g8W6)9KiD z^7VU8)&Jj|n_c^2)vA57=a_S>HO4sIoBAOmyF#l%W(JWIV^ygYx7N9Bg)bF;7xBfc zkj#|PqrGL*>0iY7_j%mq_nuU(o~afjO(fF(7e$lyj-gv%Jtm~1Z3nHW5T-V6bb#J2 zwgn8-D6oT%#>>voph6x(J!A)RStl9X@O;y;QI}js$mEHbx{6gE1DT77a(X#T(zWc= zdw^(MO~VknP)k%s*u$SUPiLzPDhOXWQF9H1yylKIwzBZ!)m#*f8%FUP2ecV12y*Wj z=w3fa3D+)^+!b!T`zM=7f%If57~POd#>8kQ19*ns5nVMG%PfZZRsCkq^jjj5RLMXJ z@hcITwB#+{JLL{(Ue@UmyebWTT(RBw4(rHc0$50&Jq=FIUQC`k|A zzkre*a2?UEVM}EaOFfbaMY*7Y!`EF!KWyyc4JnIjN0=l4@YqU%m&_3;IZ+Qs1^R4+ zh0a>&%#^zKEzl7cGLj%aBl0mN{}zvi?tnwXQo(R;VekC6MJIQ3)470qs6EVR*h@kl~R}s0s|&q-0KiAS{;-MtL2LXns!rgXR~mb-2Vl zJwGl^cZo>grpFL$j4mrE(sHquJY&yfcIeb2&Ck#4cPGj=0A6O6Ttn1H$AvUEQwA1E zANnB}Hf%-w;=C%~Og~I7Kq<|03^k#aF;NMZ8RsW=bT6C~a*clIb0_&Ya-}q-$^&Fz z1w#EC4_Q){abk^G_}CZe;NZ;eKPBWiNOIddafSr7hc1WDCd(!zSE|^jQb$7#k<0)! zGIFEGpF`$srb31X!6N>l<0-oVBTYn392F#RX4vZ}lA+12h``k(^{eI`@j@X$=UoxP$nW^DWi_(fSod~i)By;p1+xDc zGhbra({ZH4rH5}xWpA>#`3uZjeC&M4nKx>JEu~l_E6jAg@M8Glw$~q8r`yxw%uljf z5#QUe%AEVrM?qN8JZfwFRpY0fQ`iQxCWeXL; z7|XSYRY5>JC(V-Xrz0$=p(BuKYVoWVh8@JYZFazGx6O2u!kXWVi|Ws4qOsV12676U z0rC@PXy%7-w(c`ir0K(-i;F*xQS+zw3|0A#n?K1u*~nyk%WiyVW-2Y%>13d0!@COX0uM;AnEX^OzU9PBUkhE6S5$sqfF1pDpDe3gA;jUD2T zeH@>A40o384#o)z^<%uu+J@^uJ_hscu{PCp?#3!8UZ4sw!xvdxS`h|CQ4vdyf@y0- z5*05{b6UtHu`9t#83n3 z)$M=@lc~4g>e#%wE%mRHH^O@nj83@M_FgBHtf0M>H5~Zpavkobm4FJLVb`K4OG!J% zX+sY`m&(hYS~rxnuUBWbip}j-JtHN@`=Mbntsh*X_l4bTB?5jo48uD25&wg897&G8 z=>K(;z-8_8|Df!9r%!k-OlZeio>+`;?rNyBmzh;Y$@C=54vJGNHslOR(n-aB?Yox; zze3V!f!l4N2Ffr2b)4txN^Et0}G30*cv*oIBT~~DB(E^zLwNUrH-*F8LksnNq5A$NO@I&QtMq=5AY<_aT@=g*X z#;m^8OoZ|P=yLWBhZLZ^G0C)FXQ$Q=oWfW|jn%BO>|WBZ;JJ2X=F&x%%B89;P3!oC z+NouEx;Bkv+6Wcd>X(@FsPzv9ZeaYh88zXU-e70JDIb-+$duF1S` zpPeISM^oXj4@fH<${8XFKx)fOCCz*l&ENd#bsI%!(QSz+Mq04QPuU8EC|5kEeR4v)b&N|@giF;$5t7f7QVPbuF%1+*>yYXEn|&A9y>p4=EuuR(Q&Le9Kud~So#QWL#QhC z4^M-;nXZn%$Sz{oX1k!s_$>H9u+$up{`RZT|8>W;Xm61gzR{L0q0jFSU?q*a&-9Et^rt;znEnLlUT<4vcD^| zJo&Z=G@SL}QMJc~P z1q7pm63OlK!IgNNVs^1j!ZCh(-)e>b^2?L_^5|bSu^`@2)s1^{D|n6=IS?7DY?%y- zYfCct`g*6TX3xJCdAkMz;bG-LzqvbUcS9In>!!F3c5>7pr;a}+Galo@&hGk)u4uwZxq?C==|VtU~rU zxZ;+w84Pj>`1wdw-my(e0vm4=Zh<1qf`>(;cU8!dF=F2;73p}uMr$SB4T?Kb+MB}_ zz9&!pia-5Pz63U{3lc(8-I$9KL{W$8%o*3YvO9bd>7q%)nKwcfBuQG{8A|FJ@QuOB z$6zK?(Bf{U+sfFAXQG+>*f#=(Gl=`lM#}QxxJ5eBTdOyS(CsumKEo6kHyOJITC=r8 zhz*2iLAxmRBnZ906PMgnziHcjlw%BDqlDhRU0iMB{~&$8xxdz z8#cAmx_6}JTju9s87|U1wKmqwHB6stNcDr$JS;orIpa3;w(w1tnJq`o$;+s?_0A9F#s>l~eMe+@s2T z3@bfUtn3?5@*P%cDX8NFxcZiSA18CSmBXKMASR@=UCpq}*0M6#Z)P>%oLmX1 zMwrYQB!nCdIT&BJ-)V@n{ys`= z%&ynoX!=ZoVEQ!QNI4?O#(?jaw_-l5oOhMht5rS&K?!TT;k?Q);m*Pa*d9d+Mv!8L z3uw5gNA^=519J}Z)y&L$aHx%9Rv?cSc?5rT=y3OQ`Q&9_r?1lkel5b$F|%huD!2cZ z&bl65YE5nRSymu;C*iaD8>fv;d-v@7^PJrKCs`6PK8xEi#QS2EhM{7^JDdxhQ=YXt zkc8M2I5}&_$U13`I7$mrf`8!N6c)@#JzXsTjVTx$c13Kz*g}72l`$w;&{;hYNuVxU zqeSk;Zv?MFLE>l`@TME@ZqH3uA_3KEpYn4Nw`RSr(ke|VJA|sD)?~2i!o*HJqiZsC z^M=THav0Yf+f8wMh5QF9`2{)&UmSnMabRQ#a6J&o_UJXB#Qv+JKQ11@nWV~*YkTwr zLVT{k=}eie`2t1eQ_Qb-Aymo6vc9^-_H0en+!;#$va?v3jvg?1yS+4{nK6pyL&_y% zsFL8x?rKIqLZVmy5#$_uLx2l0F{gw$YHnU=fJ!pZI1Q5slrfPOAaP(yp}IFl$lzcA zs{X<-al}$6OH(qzc9`!4+!syQG%+`oUq!(h5DUD?V0&kdVDsrUwBPz5suO3-y~?g* zikw_=B)> z{dc8@62O_pi}f$j66dc_0!FK%mU4+}(uePCwSA`|TMZ}Ru3q2C#f_7N7Z={@w-(l6 zpi=H_Bel#dmCOQ}S<*v~6TAYV)+h?9)~STp%T9yZ&RoA(RiNC2nDRXGj=or|N0apE zA3W7Nz;(HDs-JR3vAt@lBl4&#*a*|mk32|vPqUx_#E5xA;E61HOtg@4u;WgFQSvmi zEXa%&>oDZ)2pJ_skN_5^!W~L8OGm)uO-RuP>adZ&GwQ4;F}-GjX#?I2Hj_LrV$}#Z zGdu#$$1O5!Dn2$9f+;UD!x$m&ADE2d@NCIBBGGu--UlcoBh0(#T{_gp0I(s%_a%vR zVqqC{7^V|{;2sVcXP6_ZSt6i7MpyzK#S)iP{)jkon99D!5GsZ+F`qXAtkgq+SStMq zY?_{iKs4|-#5-h?#o>sDrG*h8O-fW|DVY=z?R+d71#CN!|Xf z_$JA~$k8a$LZZAd((1O)#BlT_;x$w%7zkuICQe>lR)!^Srr~mq%GZP0K0%iSq}?~! z4P*W?U|Q06*gz7+H$tpFtovF2g{!#edH?W+c_o<6N_sanEpe%BH7l0v9FrOZ=(*;k z>`DODNjE<1GH;%oc~sJ?!xDAp+h310*GuEdTDFh3j`z!A9JiXr9>Y1@>Czl-Ue#$N zOuM?`X{1z$7x(4XM2|sN$aScIPV1oLFYW6DV4AEu3BtsVF*Bx5P=c#BN!sbfW_=Hr z{**X{ZQ0Jy9)?7v=ceD)k}cGZUq;)XaV;MIu*KROHyP4qzZ0SRQoR{^j3T%7<6ZrS zPCh(@ylMWbXJRNxDW)z2O!M~sHV5*@_7o_IE^MP!2~O;l(*9Q0i)9VcU|xum{C!Kz|U+T7~IyI2;S;$$1NJ6p&_TRR?cJ)S;>S!CN= z+I7g=YESN*nfx*>iOF79#^{;?C1^lcTC;bWU- zvF>_M>-EEW;7LF8!}#HEc+kMCXWD`$qU4SlE#VRZd(LxQ)>aeAcD1c{ zRrNrqm0rb6r~CDGlBsE{B)LQ{@Kcif_nda3t)Y9}?6x7hw)}vaVfL3xsVa1%<5f@ijHfBxLBav7^DA_6~7f3H4oKk=)(R znvTeh>jmFmxslZT1At$!~XWV_7|(D3n8JUj}XP8TWovf37dqFyy;(JiLE z0Ad`sHTy5zPx!GI16`c@AS_ST8twMfL3pfbcb3qyOqS9q@W_z1a6tE1`)ps0^H80q z>q(Cd?#!{r!`x^JzvO=>o`>ZnbM=%oKU=l!16w}?M}uz4qW0JS$|$&6RR4lwTj40X z?7K8&Cs3F*CBlU}z908k%2P>&JUAT?*dDN)hwoZ9cJ@{L1ekz&&%RLxy4S=ZuX&es zIDl(G@RS|qcEHFYiAmL9B{oFYunr~6_p0Kc4Xjvp8I86HWbuu@0ganldVMz9U~W5d z1J2jFXp&_+;HM~=Ivm2Z{?QJhxcVU8v3pz^fmAlPJ``(?3k?p7N)QLM7q&0-Y%^y@ zj=mX}ovGX9!IC3&lQWL*fg~Hg7HvaG{a|Ld!?Kw|7h`dya~m|sF=7p)m-6=OV?eYGUhL@rC<)8=KF9+S{k zhcJlHTG|oZqtqKKt@JDbfv_N|9&3|uI8v=A!mOJlgh(8xrgSlAO<>Kc=$w-_Dw*~~ zM@Xl%EBH<=Up8*e9y}X1jrf9EFY)2l5j+2<3%xSz)e>t!N-9>vGhy6vz4VO*NNCt| z4Xw3330D4Yk)c)A*>s)J9ig(>8^hdmubz`7S2>h?DZiXL`?mimalfL2x~wVQ@W_J$ zz3Bum0~`EVnc(bOy!m_vH>I9+{?`$&(DoIvWYCsBM3aSzY(W}Pjs9|^oG?6CEgx^} z*biT|u;)bXBwcW)-)Qb845-Th9AUSWo!CB^cR}gbm{2IpVv+ev)X0-f6h83RU=pGI zWImk;0Mkl=^38s@lr3D=c~>CBF7Cqal;`e&hV@rVy4MDw&i2jf2Ul$aQYB;){s`qz zJSmf6f0c2sl#dgB8nk19@+s1SE?p*k!ybP@v(+JY#Iu+^aB^Hod|vgg zVY`F5jdkMa)k?SX)NN5Lxkb}!?eppU;#Dz(N~`<5YAY~^$6o*{Uad#|`#pI7TJx&j zz~*%StnE%(T@gy*tuBm?HhFBZo5TO`w-kEGHG_O%QUls1l1%Ss2Gto|+W8I*251Zs zc6+bD;Yg+br z?|*i#pSR5ml+LG&5#gz#s&1dkWX4fJMNpX(F(iWLE;HHPTO?nAa%KSy!qf z{TjDnckvJG((H}AGHR<5om>1(IO9*#qa@(tCst=yKS&i7;MfwPcGwOv82AwRH^sn>DdKZfzG zW9uH=t5|v>_mS7i!dLZ7AFgD^P98#!mrcZ605fW!%fj34x!86e?CI?fCo8^0);6$- z<${QTbS52~l5{Uui2xQ`#;?=s2?#sN9{K;P$^Lt@H@1dhW&9W8gpx+j1egF}vb($P2U>Ar4FH~r4AOvD+E!0e#b*v|pI(fME=&ZT9 zBM(=PMPh4>j)>=jL?U()OcjbLc!s>x+s;NAZMK9d3X#8(a4Mq-5QRk}Q%lu7w7LWn zA<%&(*QYL-+i!*|Zf$SYBcNtWlm??oGH$S~!+eW68h}~s$xjnJ6;WAIy5D?fE(Q9d zo;d7Iyccvgep1nBJR(ISV(Se+V6%^*??E0{tyry&cUy%aXK4lWeSQ;VkcMY{i)6-3 z47T!F`KoqlP$1|hj(`YG#bN(J`e-3Q5-7ozeMMr?UygZCJUV5ji{_G}WTN_Lso12l=nPkPw90j%*sqKX5XD%BA1XD$1Bch>Ev3gRpyO4+w&(S+v zyEfYF8X~s0sv*4ln6dMfI758!DYTLP1kF~DgzVIo-OeV`C~-$5yk27T zn!uauB8?Z(5L$sgYcd3sy%f$!JI7jfBBlqMIErqO&}=#9F8P6C^l`pDPXI$W?V6~r zI!HL=9}=(Zn=WgSN<&T+IVPD}4$?ah(*x4{QzYdeQCZdmiE2Bcf42(TuKs8-bxGq_ z%Bz{=BoXuurI(R&!f1sUU_Ed>ux55)L6f9x8}@vf0R)p|&4hGc*QFNkm^-t4ugHyA zbLc)(hiiE>GN^GYut@6fO#}C?!JRq@Wy3UO`ZYu!2GDsGi=VF!x#jjakA|{rL$t^d z@nL*!6?3!Bpa7UTS`zp7h0PsIR;K2%JVB)PLuJsl+ z+&3SmWO}seFYAmwHg~2dNj;NLIXb?%8FG%R;EcUnn29R4BhO*PCraKExn=9{H!}N; z5%FNooX?_A2*g69V);OfWg{FRM&d;@eS!>#H3>>av2aSK4j4I45K_HIDRj z29%Va)k{3DdVv|Lvx|r-v_DIXR!MZ%cVQGsSPP<(e_o|hP#+&xnu-14 zWD!;_nQF|XC9VP1cyqq@IFOtJ{z>7A9W_?sWRRQ*MN?Qg=1-FnSYY7MnC1mu8@N$we&5L|mi z7)<24>)-Pb1uTSJ=}qHPNLhqX3}u%%e-Oo&^`@Iud-;SsVDgr- z>~bt3iN{9{qK-K{{r0*nz*WYsHD>z^_<*4rv+2b0e~We>{aW2;z1zz#LkNS#sEEM) zOK(i^{Rz|I)w_E<9tFgRr(ph%O8UQc8EtCZfLs7@PnBrymQU<=gfQw4BFG`k2-c1y zloB{+Kqw~xS9Coa{PMQ!)PZWc1e*G$bF$+ymprUv#;vd=*7+y6roUosm8q%r%4m2( zip|)Ww!CXmNcOl>Vsiw3wsj^9ep#Qxq5e0EJe=Y#j} zHOmvf6ui^xorKNZUcDw)NqBq~nR=T#Xqc6N$y#2L*SyO*3nbx#scTzU-w$)0-s*Ea z3wjoyhmOQKr?6_9K3qGnB|*79KlAQ zz)4s{BLpa4sfaY#aC16Q2d|Q@!gHPoh)>LzwUV zB7;Tv)$NoGs)E5XnR?G2nmmP0MTZSkpa&iWL{9-6FvmP7P|pBT?!BknABm`WzP#c7 z*r%R~fc6%y`7AdI>a3>^?u#$qUVZC+90>U^{tf=%ntO@xl^dnIB>v&J0b}R$%XLfX+fHi98wi{Wjrh@>k3>_Z@*YG${2Qg|QP6k$n=mfp`*mU&N#tdr?nyuf z?ZXIjQW_RV5%uqLyVgt}p2Fc&pqF5|{ceWreQE_nYm*jAynqL~^YU=*`{K2!cKXg^ z9IuS*b1$-r-ixI&q^(G12UsXT*0=4LPkaaGq}F$-U9c-<&| zbkBmiQeF%?t_YQbvg;HsY86%CvBB_E?9Wo@+51jUgCoK&AraFKNtOL*p#6bQ$?)rY zlN&6-Do*cC^1olECj3P&TRzE&z4%ujqyqt-K;#8Rlv#AG{sTIfhfjim97cD$*L}94 zU&^fuImWEaQTJH?>Mk$MYB-rB-3yC|*B5R`>|tQp?VZ-^?OL20Yf;_LBVkqP`L6YiY0O(1(ecFUSmlVHv_-eT1nKuz6z4 z))p{dx--{u*HQc6D|4fjDt*z!%U5pX6Wz95ddJb3tDO0y#&T1ySQ%GqUJ*bBxJ~zi zD#OStu6f+y)hkZ^tX3{K-ql+rgEUc)c%(zOzyf_h%7N=o{qk)3NE+cGfCM=h!Yq`q z2`Lm7LypnfSsIYU;G+_7^nTCC|32ztL_x^z))VcM3pX1{$zt=6e-3uCLlA1w#Rj^Q z^ecwC-1-6E194DfDb2EDJPSzb-$^2+=}vq_+-uih$V~l-utaO^Gw&jgj3PEoECWwR zD*xlhGk;lq$sMXN2sl=s(_i{)+a{&IV|TP5@@e3*{aw`$n@Av3FSW%#tdl`NPWlMz zg;4D04*F>7F#ufwcXKl!g#Ge&=&X|kOHH??iJ00f@@ndlcar}|17aY>-Yv)9o6c~K zV5Pj6{vf})=?Xs)>xd)cCiRUg#Pg!l^qs%&zZM4+h=Q)~tMODF3{-C%tvCmqaz))m zn&|OVpNaL~vG3h;4ywbbJs8u(K_fKnHS--ZYm<(QxfmB_M`2BcQPI6@lJxRTeScGu zD4SzhJxKRBPQoANg);)Z-Z$)aB)9Nhkj0K%^oLV@-pP`WJVZoAa$GCwqZCzG`?w5l zz0&h+j>hUSR7hC)w2NQjgozKQL&15&nOB=1J04+G;msO%S zKmWA-Xc3+t1`SNxhkAe?Fr!4#t3Vm3!XRA724KvBhsyUyL@KkmfFN_P_Ln$bcRevn zMp`^E2W912mCd=Bz6R_s<#M|z^%9alLpXY>4I8*ao9ZBv!eo!qoTHPR=^Qn8`Y#== zWwgYUs7|giZY~2yxT`{RwvKYx&OgOLdSefN!OVt;K4UGQ{ObITM=d+!f$10|uOYb}v} zvPMBt^Q>7Zg)d{8p>mI(bx<$Fsb~Md!sgKKxFhHxOymJBZK_g9AQ`@?O}+6aJMm-t z?RCtSw>4<{aVFbNq}j$ZYAk29rM^*S(>AO4b#Q_pGE+V{)M7*KB^?w6k{03bs1<()5|}={t(>-8AZaxAi0dT!u-GuHa!n zx*J}k5=r*#`Ik;aYScnVtGo_c)axr-MqxZ8HdYI6q%}OiW6^W;(574ll*pJ<_`7cF)j+aV2My(LnARQ^xhv|_OSL8+u6vh}m z%GdX)3*-;;el;1s%a?v!8a#GX1>}46N;1zuBU_eMiQ{$PRWAsc6=WfqSsVLHxcn|e zVd6jz{IsAtqU5SM^OQ_s4>rg;yhHkh`N=OH4c5e8&junV)7 z_G9lSihy6ww)5?P@K%tfl1O&w!kq8lalF(+HXSAXaN+eyjkaZ9N3)FvAsE)D1XqHp z3CJ8u<1)5Sf+@$QOOM45)I!svOiR1X)PI3xe13*tEA^0FOYGNz_XK=HS)@X_{w0wi zBF(72vvHv^LrV%I&e=8$-Jf14XI3AFI*qXGA5jLr8C!af?cLF{Hzl_t_8GeZ2ZuKf z!HX}=)W_yuVuvFwluX>D2$V>Ju*AifGo?^DxwIpsGJlJ%Atk;+3Fa&C{}&$+oXm`D z|34~$<$pNR|ESV`8X#(*u8#eNBfb}1cKw2kmNw-WY9a&y8J z?5vGYn~h80?yYV={uU{|fqybaJ%qFQ(Uf~HS6Ty_6)Dw17G8+&WF#?FV}?(Dl>@0G zT@MFO*k?}9YGtjIZHI|g=n{c+WyNG_rF80iDq9uFBjmy{g%TXlc-7Nu=bMIXXQgBW z5Y*N?dAsl=(0=(2isgGYBZesq8-9 z)~e7cv?%5j-JHuz$49I#7h$W^LANCu{LSt!vKppv%c}4G{qY`UOl|m#>|0UHWR)+% z*)KpY&@ixv?sE^cJL?_E10I`MM(uvR6rMqpHFZ&townL!;4wpI$r87nV|3^p>^AvY zi!X>|`Rwtt&`IUdtyGwgU&4p1?%<#9BgX3H=|YIy@j+CZAwv`)M!#C^K-1#yMJ_4Z zb`+f#+posS>}eOAsWy?1YX5W2P~CLWIj2R8C{4S{tp%uvnD4J!b=sV3bBph0y|8A&!F&@wOHC9$O^4YC;hRGJX3O!wutIP#w>K|GR8 z8gd7yZQIK6(z%+7ur;MkN&(`rMySX|{dZMI5S`H$C7`*B)+*Orm)SZ@D$`t~0aSp{ zv|MQbEjXj8j+I5)tTbRAn3^J=)c{9wU4SEgJG+)L?Z6$%-vbS2B7O*4YEhhCw@xt9R1xxq5dK4vL`>6**`+o;gc1j+)XomKhYgfNfFTV5~M>W1i!R28m&s z3N6ajg)H|2+B?N9HC?}p|Hf&1f^zeXHw}^6Fpn0O^2v-bgRn0LxWTIbM;eYr^6w-d zF( z0WYi+;-?$^;lh6eEG&)3X{j#!p=!X_MKIj|YwTCeFx?ozc~9#7I^N-oxhy?{$SB-3 zDHQoqy}8YE$25dSypb`!1ma5Ym~vQhA|udGydt*oloth}dmK$uL58X^Xq?<0^?guD zY#ZCEJiS$l+S5<*WlUT7B*B>`bT^Bt0;jclZ5=A#nth%l4Nne$gQxW~MNxk&*Qahc zN`tzBk2SMRp*1Fy`5N7rYw@T}O%9*~0B)|(EkpRlG}<#rD(Z|cb_x5d+78pSZfuEE zaW35AwNvF6)6aw&FS43#&sC0^BbhaOn4z{kTD%ftHzLZl+?=amlUz1ozJMaa$L>e} z1zXurEvz$g%2(aL(B4v<0PLKNg_DUI%J~7CngD0)w0lSp$ zzUc183^3I-!|YGRappsNeG)z7@~F)egU7TB3n0bYAA)P6{BK8K@nh!I9Qgh@i7IS< zYq+hHlyT+A8m0&9rgY1hu{b#CwX<8X>t$D)<|ZL>+XX+x7v9_2@3@@vI4Mq>VCodD zVUL(&hjg+<)+q5379qwEwAId_YxbWHol&msINhKR;=t^SH|bV+S1jChRr@6;CHk@~ z@&*;*C_{c$YH^k=GntLFmZbi(AG>6K4NLGPzf%Jq16yji^Zw7y`JZwUB@IgvAP*@b z=(hC3DJ0EK@t;gGS285ky{We^kI&1i_$<*MPEVaxg*l8wYcXsggCghJ z2Y7dSF`FW;Zn{_+6DiEF5&t;dGS@r4w6V1m$tu}b&&7vo@a8Y48_+^*&`G{2lJb=d z|3&>UWB7x*-i-7iN@jy&Rrp={IH-Vq%Tp=Cg}?uHJSz%_5Ul*FghlG!UO?O(bh{`)I9C+CGwl|P1w(aK^;6%(N_D*FHSxg(K_O|Rc?XQHbNt+vjb zGd3=!aVDevyvjk*uT%S6e(W2hmDVGXQTIe6s#Ja9p^qHrHuG>$6bZE|nqX+^Ei_}F ztUIMX_>~H5X)fr*o`Qa@+m0(~YAyyR_d;!)WrU{(9NC&YagB_D|I1c=s3zlEKFqs+ zVmCVqPCzJ>TShNK56<0Pa2vpT`xxz8xise4<~OPB#E~#L0bPGoQiXl75*IEVp|{%A zg0_54F(ye?vaOo)f18vNE!2m5I(wzIn@_ z^Ir!dt4M3$BDRvYw)O}39N_#ekRha&zSUou?5QxHNVPq#dBzM0uB%9ce?sbt$u>M2 zau@{mZVNk&T#cb!d&5DAtNa}b_jUh(9&FZ8B&<_DsfjhsYAtcuAv{@T#0|+R&ur73 zS2pI%q(^w1dMy4U1@25O+IV6l*4)6|luC!wNk<+;WMvoEWGXW$ZZ3H6Vpgevc-e;B ztMj1o7PTO`r@szq8g=2QtJQYIpLhy=dZjR9P%lpxuGw+Z?(i;a-9 zXhp$PgyXNIo~WB>znO0?Y^qA%3!hcvsof5OHtbDiH$eVA;GEE{jv5T-EEsb!9L~9| z@D4$`vE(IAEZCE|NFEN;ahCUL&#-W+B0c_VWQAS0?YyJ}+bJCzNdi);lh0fNM9QhT zBBg~Hd>jFrr$gkLtpl$XZyc&ry{q;Poolp1p@=0uad=MLa}2BI^Pb=+X}q9{v57wq zM^)XMqZ#ZMNTfweWnvLS${tWkV9ENtFa*&vz^!J57mP!(?o*Ee{krQx zkq73W{PGJW)vUp7B~X5cCJ~PYYI`QP?YWs`RO0i-VlO=JGHp*B>_HiE@F}fxZnp^U z-Cxq|gLpW#{mj}P+CxbiG!nC?Wbh(T%Tcb54p{wFT{&ta(;7O;&6X;ZC$fiftX5Y! zF<)wa)>u&qdqM*q9&c8_>{d1C2}pOa%R4M`n9gG4esNj4Y583b!Sl6+O2zR;5$;OqrCY2-3fg4n)XObd>RVIwM@oUOdKCrWCX$)iA1RX6^U>Bh_m ztC_S2!4pth7+FR(+Eh&S9T-PgIgw0o#lT)MB>Ke_U_eUEZZo@H(P@m=p0v@3F8T*q zlGbfZp|H!GF(UO0jWCO;Kc;suXy4+d-wY>sC|XDXicdJQhMy~afFMw{zOHtR$K;b`$TrA^;GiDdMHoJ=6N-TOpq zln(K+Gp{pTdBJRErIr)xvFO0c){0_CUKJ<0w9}S8-D}+Jv(i$y<$1$7joPwE{spxm zb%@j&?9vilh2+^T3rt}K&X{gT;0fXNcHb(244;_FduAd&`YH_FU+qKZS?22PX91Q| zWD}}eRJj(XIIZKAZN7c-g_}5YtG=!z0ph)o`47~6ree97FTB^ej@%XGPkx3s6dzrp z*6I^Js~BpMmb&MZok7lYvN=E8Nr4>J;2B&9qK7ItqnfM@j`5x&@(#%dAUF)}S)FSJ zsgiJj$BXy#M{s z$U85HLpGsX$baX&Ii0;B7n59YOm_+AU>Cfk^pm|Kbo|3A!N(4R^7grF$GCi)JJJ6_ zDbtQXak)*>@&NQy(Le%mqymt$UoK9xUUM(m&~!dGqZ0!>D*OAd9+FIR#NSeT*9x-* zB;Tmh_JsUF_``&v>0cHQJ<&;Q#_j~vIQj)~V?-na>3svBKDJtr;DXEb)eG8bjj(># zK;(aZ0jRiA5g{@-8_MG2@%#tEK7v6?TMR?S#38AH@=t0yO+XW=Sd5hwi6K=wmIR;G znK>o$q3O-rg6c=M$|wZ2`HS>ncm~2)El;2GL(yy4#-X3wtqUx+u7(j-DvtUqXBTCh zJbcxe-#zK2ZR$8Qopec>3>da|p03tLDS>;TDH)el)ev+8^}QNN#C! zSaO%H#)@5Y2fzyM%ti$aqP#X(`wxGMXueT;N66txv z2Eb8lGX9Zft7%@aj@Vaj5TRT)HqfMT$tG}Q4{LAU^!ONy4|}ryEk{X?&x`pj+6iaQ zgoTDM4b9bnW*XDPZ*aWG8GDoLuxoQx5ou2`~lHO*`>P5Dszn;#`>u;ru`@5u}kF#E^Jh=SfX z2Q=_xH`App?=EiS4w?)%jLEs9V5WA;D5DDfh7HKnuAe?5OOl4suR;1WGQJY5y}oq|#Z9(0TyTzbDE;c|7F%!~$n?8Kf})!)IWvh} z0&`^qjyDgJ%|Zb7smMvPuQOlCB$Iy08W3SmAN_bYA`u{zMb=*hQ>Tzn`t+tllhlOj zRsq}~M_ZUf_B{`Ivs1Un*#;OMuXDr&zh_a(MV4aTejJlP2}k35vPhC#R?%+uwV+9F zNU$Jn(J7KAW~(zOHiBFUN9X>!I$ij+I#uhl8}* zEd?aOA#{4VGjA25)sUT5&-Zq{eCZC39s=^Z1tNObv>4>xI22#g_7J?}hvQLaJ)Cu5 z>$ovE(P6cJMWaos(bZ&bUF`1D*cz7qqN+Oc6Neju@{Vyd$WFV)sgx4eV%4kwp_4)F z>-Pe6m&K5{>wdn2hz$M~LbCAgwuEm;(&ZqcKp^xL?LKk8_J{||uy8Z65x~k_KLE}- z?%N&?PIe(nG!na0>(e9K@M)pnDM9tjQ8|?-lS!pzqx+~S_`C+>vURcRf0#=Qo`A?- zfLvEV>KPMrKHcw8K^ZZJzwpKBAcjjigM#yOJ%2%Z8B%uQV$XDd41~gNXh|RyhJk)6J%nsH7Vis zfc&by&bO+nKK2A@*MB~5n=nqhuO7swUy$EyQSTbhSHq;!z0AbuR&eNjb;ekgHA&5b zm=#0&@mS`43|mf4J@*1_jp2g-7vTMmMM9db0U#YnjVL?L3@4X913scksUb;d>|6_OxR(kK*^c4_#?1oA5 zI|S*OvhDPBU|uPu3v+Q|)$aiz9eJ)f@vFeo+xmu14G-N8U*HfNURrFFyY;9Lec0uQ zIbx0clU7t>OP0H-i1WcdZwPUFjj1z$if|BUqTc#x6$X`b%eOg#{fof}Zm8V9p(a_O zVv+BNp6G}}fVuCL>XK1VaE8ni!mA1l29_ruQ0x)x^%`g4xte-G3V8CHq-zkdmz)Q8UX7>}OTE)|q#qUe_Z znV%#ou$zekrD<&9FX2UXGly-%8rK5va`Z9lIKZm;?$+i`8ft7aeec#XEpy77XGq|I zbppo$(+a9&dO69d#ei8va}Ac^&N$GYXN-VEGb**wc*R|!0{#P7Hpg{ubD==#DQ}ti z>?KdW0N^I)P5rzNNB7?mpt}+}J?uI#WQVwy_-pUC@sv4@)1HnULEHBvB!C^v?xrvS zjg?R$5{H~(i`d9$be=Ke);DKgOVtgBu%LC#Ph1lIJtE#Ps5hpVeolFUyu;L0hHME$ zgB=uFm7an^q47(1Fnx;@@cQ!u(~GH5?~Y;W#ZUJ8HCkQ6!mvn>*8jh)_cB=30Kv0HIFMCIrao%x>+w_Ijb!kB|f(wWts`tJqxY9zb>iC0* z2U)UQ5!K<@vcJz2X}syYcj#&y=D>aut`yuI0t*^h2m}=X@j3MzwkAR^F@5MA!+UJr zm$x6etR$=^c$aTbtv08DCFKC#6y{rwCA_a*mxG5niplXHOeSs-StIJNBXva7ouCLf z#h-65>H9Z*9}{)U=N~IpD%nYo^1rI#xBUyvju(QZm-cqMZ(6S&%z>BNX8OgMDQmLg zFooTEsv02Q6nb_;5zb`AN&b_-df{eA>nexL_Q|_#zrU#1-Um{|VXg^iulE)}_o+JH zu??;$f=N-ZS|Qmq09^<{iq@tbW<)L^n<1H6nTpq7@L?AJxA%MktBG&(Gw`r=Lc|AN z7}LxS(^1cv!0i#jZMlb2s}`R}ZSLNBAtuheo1d_k(ro>h3?nPBAS}Gx@Qd)kyMu|@ zy11w10MYkK83S0&K8X(;*$}d*W09>VhzgL;-dy8*e9?Ke3!$`_#%ikT0aJyUG$AYh zvvhWc8-f%=JFWg3any1GQx2<$$^@kH(j%N{R0N(shP#*lHRNZRb-IV4-nH)FpXFfZC|5@8$T#A7=Ta#{CQf^< z9U}sgGy<%t$j)`^7G8X}On7%S zy{hW-POQ14_WJ{UpRUfbMO%Y^$Q7G67$8`FJM#Np!FjM06sQ>UffD?;S{OaO_q6H-A$~~T$!g)$uOhue1@1@I17~8=I*q%>{2y;UU zVMy?gTG2}%-8YgfwsJm9-GJMr`uj&fIBP8|0!;vpXb4e|$`?yMV&i^fnJ&?$NX9Y+ z73#$|A;>l<_m4Nw2YH&6a7&bPcDM>)&R@QGkNEhG{36wsfU@l2;qmy~>0t2(v6uL_s^5)(uOYPU`vv6t!ByYO!s7uvzTgrd0bh<3 ziN)RE5R<5uUmH{Jx5nKs{3suE*O+gNwk5FZq-}I;Fn1S|*2-#0-krVLB;`dO(lX*TrbWj9F4&a+CRN!EGJL z1OZlNOLHoPBQP0&`Phc=YJMZ$d+VAnlc1X~R{iv-NOEB{lfJ7g3~H*2bqvZRoFm%{ z@Cs1)&hqt+-;WC-hC==FV`y|E^6!2(UGL<21ZnK!nE==24p<^6P7_TUP!PW}Neimb zRqJZ0GK3%Fi`w`9XUJFX#$%r20(Bb0^?b041jXqGzB&hp+}Z_a8XNG|UrlxTdHXr` zWe#vpO3cC-<;BGv)>Vw-l!kc5%{ReX%Rt7r`?+I>hFn&kMb`Mj>k<-9>`r^5PrU@6 z*gtIkH&D0q=`uZsqkDz-U4aJP|2A~iYE#VP8(tG9`eU)gjh*g#=ey3A5^Hft)E>eT zw;F0kiZ04?t9ngxJ@n#q! zytLc7CHZ+e{%^u&qYZ7Ur1H8Kk|9MbX7!P16&otKPW%{Ubj{8_Mp$ACw^g0s!E5Zo z5dsfzqogS9g^?Or(v9$KDLvshm5fro39r5^fgKNa50PGMTT?JrI4L}^3fidRwU#fr zAatNj)!CqeKVXBgjP#l?jM@CWicSgK+x!Z89r$`cXFt3P#t>lweNIq_t{x?n6i4FA zUR~Wg9n_!%mK4-{|7VdClyJbLP=%8rb@>Vq?QCmR)ThIC^AE_@`W5FJq5jOQOHAb$ zCz5_&fEUsp;6ZDoSe&Q5Z2uj&$j%XVC-RYZ%T{GVRXpuMx;&ky*vU!Xy7>gtPI!Sd zx{2_s>E%PhB`8wsb6MNKA|YUXzU;--ir@djt~gCV7YDyoJ(J`v&G&*tJ62Nuf{F@Y zn;i)k1!tfSe`dxAA>_4HNsj#obqWbK;_lR@{Lgh)_utp9D;l4MXU2nj-w6Idy#_g@ z=s$daf9cX4cI6tgV-tSD@LDst+h6$wFZyoQ%XC#+e}GT2`rsr69S;h7_W!zO!AXfO z2U35_KI5pckkM^FC#Dfp>%iNO32Vy)Knv0nt`0~!l-AUR?>=~PZnS-`kD$iy{oPI9 z8;yc_UYM>nIQ>~pUX2NPVu<}R0&{y3jD9l%GwH2*#erJXUYEOpwtZc79$#Nei1K~rLtj=-c@oNn<7ciwVN{&^gcDATs@{cM3sh|b%2rH{tT`F9m{fit zRI_VN*_*3ePT}HOy{IF+X47E|YTv1+iBJ?V){ewcE$2A!uRdNpo)Y?f1Cmma-2PwC z6{p)j(3Onc0XtI9i5k?Y*zmBnGT9Sl1jvRh`&NMkM!1*sbDm@q>0eL5gv8e!w-AxF z1P;mhrmetVj-m)={L|hn$J^rv+h@px+q7r|W)XvdPPO1?za1SsA1C%tzbJ@*(2<+FJ+tQRr^pQPoZ%zy=W10mnrSvb?c9j~Dk{$`ztdU7MA9}j3H)%^@4m1Kd} z16VQ#7l49 zKLlQ|B%=1gP^P|P8g_I&2L&zR@98bN2_?U zS!C9%43}F?Rg_g4DJyZJt8Nvlc+EggxJ(Myj6LF1SrJA|n@HHyS{&0?`DN&Q}kmrD+ ziSa-0@g=P3tdem{o{$5l^s``~ z1qmJ*eyp>iKvBQYnHnU!0}Q}8emuMgp$|^+eI}(4hS~=wGyZV}nbJTFeUo@mRo#N?G;z$4R+`A#j_? zB(xxJd*@(_rUrE&R^LDre?}msPF% za9~$seiW$MD)6BatIe|k`KWhlaH5{Bqb|Ihnqr#T@NDd8E|1}KApX|M1QWcrNFN8E6QmfKmcVNPeuD}||`ADZ0M zSrZd?cJD9Nq{R(yJplfb8Ln72H^I-(J2 zVQV3WM|}wFkyKTLb%|j|o9(ZSmV#&`nFK~4#LlAjof0{07 zufnDbE*lcbvDOrR#rh%WQKs?%qF^mwep04cRFEt6$2vT0+!Asn*masL}N|H~Lmt?&l!2SAhRMl^87XeMJzlO6|0 zk!CPe-l=MwELGSG^Q=-DY(LMz5I$~dzZ}nxmdRZeX7il-w{P2EnWg{8u;}Hj)q<)* zkX{@7)iviwiL|93Ur+bqC7B(jdJ)i%*^`)jm$$Ya;&-N>tf~cKa-+X|;l!NRahI3Z zxJdkBXU{641Qh3ON$bLj>E>E;(UzRhn%Zsd560uG&^ntw2D-aoHzlMhCx>ht^Vz{w zPi-76jRs51T3`js^9!F^#~Y1mQrhTcdTqdL(1KkqNOL;jC8`|c5IE48L)e>Heh~7Y?6PdA z>z$sVfMUAg4j0;{j?v=DfQ6_EsG0-#6$2z9)mgO%InSv3?D8V(bR9C+t_2n*oo;)@7I|yQD7qc$xn;IWBz$2zxo+Xcp{X?v{S8E zrsyVsi&K2;#J)*RLzg&PF_%&0)eM4ACcEQ60B+`v-sAu*rH={w7yjc#u%W)AI0Jo4 z=OzWYt=7aHRm0{>?S#kx@>6uWYBC}RQSDAfmqQ2f}3!%u>HjWA_VXf@4CNuF^nONuifgZ>q(1p4!_tF z0dR_@eP|6_lWgO^aM@XdKy)^fa&|k)kL0T%oYM-h8cZcQ$ITyEr-#mKsGUfwskJbR z=@n;d0?p!qMWzkl1NvR)si{)#OH$*G&iaVXYIo?x6_SSw?F}+i4PKg4+8OJldlsR^a2<;D1 zw=I)51Xr6vGdZ`)A)JvsPpO5hrk`~^Uu|!VL}XwUG;J0;O3x!-GlDbURFR1{tY6JM zs%qHgztb`bsI~16Dz*A3RW&cO@$NAsu3(C&*Kl8mMjg6RpU^Jhd zS|sIv$_?9KCB5W25O{`+e}R}jE8?0yv6iM*87&>zS|rhEuiGfGjg)Jb708byl+SK{u+RUAO=k>(9t0>c%v=ifp^NLdi;+=qOOuy=9^23p#NT`CxKdd z%|b!Cnnl*)@z%*5p_N%PX44V)lXywMAYjSHi=74HSqNm$hQPD{e$~`1mpE`00jTssV_o|ot2XjTSR}9XyMZqk=mhi@eF_k5JC}pj7(3-YZHR}@ z>y>)!5zYvpt>o?Bms6#$3ko!P^O(;>iINMybqU=>DYAL<`=@ibz6#=F_rDCE(U2S0 z!|n^Rx`G`+a$9r!+6lPH97I zkMl;>1TT8!S$MXwf1FEZd*Gp2oTgh(!>h19@F%y%JRm-n zK7>RuurpsDw3|rR@GG*W6#mbCfwiBex(2)t@ZiA$pKQSDb$n;QwAC2z${9;tE9k>la(WU@=U%^QC^oq?d0 zynT+Qu{waGIm-mHQA1L=XB(Anzv?uxv)DUSAqKWUbhksm#PI_^wsP(9K1}r_3_Dm5 z>TEcB;BHqbxeWGn=1U(Kp;94tYac;+P%se$@FD8BIzS~Q7>J82(Bj>!G~MM-Uo`1zz%7Dn?Fs~r0U{S0WDsWd|z8ZB7j`pKfh8nRK9&wV8L zsN;9b=b=lT;v#l`ItSAMj3W0(&G&$|Z*2N+noQxzq&EY_p>|)eoP?7&to%7*t-fd) zK5rc1^vLmpeeClCGh0p|+}&M!xL_r8L9~cJ95J4M&T#mww;qZ30Glxt9uUTPdtWsw zK?8^rv1<1-jJW>5iSQcBIc8f3-wGwjKM*fKeu(1!1^QbCAp#+RShb@s!dYQp;Iva^ zrOG`q4l;4l}0<(}^oi@kgXM>%e@h z5#TaX&X*jqJn3W5b3^;WXRptI{ho<06b=Z4i=(u^ z;HEKEm#VCwRo+S^2X|mwy$eIh_s&8VJ#<}r{Hf3L<*1{DdW>cY-@N;YVCg`V zd=0UyF&3DfverYCP^K;$f@jWyC;e^q3uPanD>r?3-Y0?~q2VdpL@A5xRXGraQ&IP( zOJgqqD!VuC8V^JBV8cre$*FL@%?p6yyKWPfFZMbs{agOT9=Gr@G1j0lM890{-}pC*>a zK9ABw7d;?#z`rN~#8ZXqsYcoEiFJO}edM(LREnR=~oTqp|2Umx}8DytVu zC<^7OA-7^7K{$ib266^NC=A7V@@Bgm2{|5=Y7UT8$rv^Xt!@i3b}RA>Vx>O}vwtuc z#%ZWU-AIRpzL(?kJc}bh%=_qTTx+!HZUFUK+l+2P_;|q_I%M(Vdf{ zhL$To=odrQpac@rUSLqmCsr`IulxWOuf#9eT{F6>V>24OZ#t?ymCLnLF}O53h6Zy1SO6K16vf!S?+mb7?vGLSX+RLlCxhQ4@=@vTt@ zu2c=;u(eg!24%N61iRX3+qdV_uy*x!Ck}mS$N|U%fLCgvJ}%CiAc~f0BX^^&R-|1N zULdW|DIyYcSTAq1{s3#tC?ITFbpSGN%qTcy{{tl=aXon$lKTztpolSi2&&3HDwrdm zwS3T_hm&|*Kq6-Yk~?QsxHAi{(C(hSRYTp1KmR^HCnszBU)?Hjj~1jDVg7Z34>oXe zTw7^>tOIal`Bi_?0k>KGfxn^y^`ty4fkqNT2L7Ni8KHp}WgeU<;o>9P{Q#j_cRG1G z+CTg1Xn^l04&H6af$KVO=un&)iNeP1hZCNn zoR$4uL9|eN$nIH;P#n=g9RMa`iqqsh>_m_A2N0$Pg29Mh6^KW!m6ZbwsxD!~ zzRz1ZMr)_%kB4%DO{xhZd+ddy_6*WumQAeJkONoc@q!SS6|-KUrW ziW-VWy@-8Bb-k&Xqr8scm|6-6wOq5o{;K#BP9(VVSlgr$PD0g_p`)cnXsir$%q73K zuB_);vmvcCj@*>i89-w3?e(b~Q`@<~CfptechHn;{>QUD!?!!~3u896*VZ0<`OI8JQA|l5&?Q z!((?tA5*HZFNo&I_90qf(L6n36hGW&I&YnfJWj3!L&A{qYT$`yKM?l&V+BBF3jBe5A^vM>F`FdA`QO81i{AZKpP3hCG z1Puz@ojlk*|AvI`yn|YnN=XJ09%Y?n`qq9PRD~uj$KnVzklB>9uK=6lYc5Q z9l2y?0v3~TYkTN85Cuc+=JssNxHl1#4TU#UKYeQ&=D*&9={_Szx;yk0m~Ut7;`PRG zzl9{Z_d8n>$FJXFFm!I8>T+Z!)?|g_j)uB8Yby{N4f%0n7;;q;9ld>GU${c57W?<- zjf;gnE}5DsZis1sW>={5b*SNibj{nEZH@pA;*q8nXfU{YGT zeKz1))x{BKcO5U;y;f9*$m`ENU(ci5x#FEp#j|g$=CKQTeAkm&{him78-3;#4vodK z+KkFk(ENzjXofsafYnS{G{T$gMa|LSwFL#w9n;R*RhMEa5D+U!t z5B&qaag*c8VQwi0lj9$tBJ7&)|D#sp`cKObTB><0u!i0!_Lb#INcI+IJ1b^5p7O)- zB!#2hnZwNzbm_~@2}LOb={0meb8(Sd&#fp{1DSlc`Y+j#ah{p(m+y_1wydG~mF{$K zg97ig;2vApuX@(4%i0G4dA*SM)Oy;K`lD2$IABb`KGyJe*P;kVjEfCq_>Tj|=pqF* z?Fr{OuD9Czg0;57r|%L@He_4V_6gw3wMA!c5nnk2yrLk z&9*K8jf@0SebCb%e;VDxTeLEmq3MVNREJI2OS1m3FBwaU{8k^%W;e>4mUbWr6LVNI z<@j(Fm~=`CX3~^fu%lth*0K+LI>b5s9TMO-kj^j%-yku1?t{Urt^7XSn{KBkI|&*& zIHDj-nFT$46x~G;lR?wK=Zk(%FJMjn=#wr0_WRA5U#UXuGY~R3UyGN-_@D8@Mk`qx zXVjQ}wb&k^*I34QPVXJOPuDNm>{R-#uHECY&jvj{cV4Php#j5H=%bV|D1lcZ@SP9S z(qY)KqNIbjPp4upEg_L!`zN*lED>Zju}GGx$|HSy6&2F8#g^yitEWZ{B(eCPeWy8q zK&O^U!Ue*ZJ8zHhim{6Hc2#}#qM4miy32+-9ew1cvt)aN6JKIDKo0Gaix^LIzKu$P z#RX71K=euElMg7#e2#D6RLV&}a*?_^XupbM9`{~hF-dxF*Er3;v{@?8TyJyT!>HPK zHM9CAh*;h&(wj5eg6VMY7Ol7nOECvLt|DJWaTi#HcG7ZOt;zSfJ+u%h$E4{aYDQro z{|!?fY7os6GwZmJi##o0lQCat9gJ{BR26j?_$@7~yMcCyqJp%oA9w4eascPuj3tfN zY45mK+Z6c;Md~-dc*N$TgS#%fG)|v6W6J;7G|QH}Ch0gaU7x~emqp>2fl*`>IE=~W zIa}j=@)Tl~WH@bVh29ak2khbV4)IWP0gU_G*_TqvMP@|iAjd;R)qP%AQCrOl$%8d( z45)jJH6~rQTy!(gu!lvPYc(iJO!x|IXal~C5t9&RWV32W03tg)3|cGopK&e$0Ni2Q z*#zeC*~^YG<%ZGJAZ5<(ix%zsICa5wTTFzmt?3InWA9zw9CTEiEZm4%yyQigY50hs z3EBfnkf@MA*Dd%W!{KlptE;7V$Tj5Zwtk7Fz!!E=OeJ6%Xtf3}tDfxC_oBc#55$zq zFV{m)d!6qQew(GQuOJ0J`5N00U?Mai??momLf#(&wS@0N^PS$ZJ?qI#i44kmaln8K zGQ=4L0-b}<*qM<6za{ULWx zlr@Shd+&@$K6NW|f1S5ZLEeTywtQe>SS zNSOJaa#5142u^xZ__>16{OeK7r7Lw4KDQd#UF1&y-6=8Tkjlr1h1yxny*U#dB_Bns zJ!L5Hsg33q$5mpvU!h7RscjWS&7}zDmrkjoslB5=NH4H#7}^?j`@>k|L*ft6y%~sH zYJL+iDhT6$yBNUIzU9S37eI;$>pLK%X?fRr*1!ieyY zh6;K-EAi+6h}!)^>Ibmc&7b`yGCLrI}F% zr%)GL!)2oMKrEgCF)M%|5p9PDY3A0^z;BB60BL<3zcCjqt}kWJ;yY#YBCR8rb!P%{ ztdwfvfQCSb8r`Ve%d*tlguZylUp>DV^?7%u#G0H0u%l;Zk2n5HEBMOY-;vQG6SQ$4 zeSvJ+{ZL1@c(aLvpJ}2|JVKLx$$n*~DV!uCc>M5NAe?2tv-TE7lT~X@_ephiVnC1< zM<2V;I15NxD;x}eiN^G<tqXDl z;RHbg_LPEVPolv@^D|opIxw8c!f!yk&6$U=*lEek zGpXK=4bT+nhxU2=P9U%A+kRU??~;2Lib_qdoph61x>#lY zrW#ayT3o2ZzexhMg_F=pUW(MQb%1!Y$fEHVoMcgCNK0nhPXoso|Cll)YdbXOk_DAN zmY~JcVRcW04bnOpSs0&;vM`V?jkHk1v`wqGd-yni7s8{)n7?T@3HVmb zYe2rJCy`i1pYW|Vw+n@qb?XA@P)uq8XfExc`%|3F(hbA zAC}+OlGyb9O-xIv8>`hVz7W{AA3+A_4pRvi*0jAuZj`!BoSe$fsyD{FHpY?I?3b?I z2dJK07N}jb7gf0`h8XWa5M2M3em84b?!s|3pCQG(j2?PF5 zGRi#n$0Y?0DZ=7g4d#kx8zKAI(fvkYC_DdZW~>Xer^^8gmZ@RfB;5__B=j?OJ3J0G zUyW`?=60NRy{;eIsxR zh>?nR`f=`V0kMz0#2kWbo2rX9_C%I6>nrD%m+f$+LbKAX%$r^KN1<)tTsxT)iJuXU zi^uw2ja&g&KSZ1)1p-z`9Ad@0|A>~$G1i75qPJ!E^QujdxGQgg0ngkR@bln{G$d+4 z88I^>YwDR&cCy}W@u{nn`S%a5Um#KoPpU&DnxZQf`Fd$hAnCq5^GHtK$y!PUH>qkF zz?6VP%6!epj2`-B2Yb?z4$>0Tlka9U9*ws?w3<^kYeA)izk$<~rN~lPgVW|Kk^A|v z-IO(6O6STdG>Bd{CN66?V$rHjbgrcx2O+2?&E_t#+B%e~OKbM`$JI2I;=BEg_A$I5 zop4L5Z1v9}Yxn+{>U%Ze%~pn28S0(n%SJ?YhAxs`JNzDaag2x4L6tv=X$qD8E1CEA z#y0}kkJD=ZjrzdM|FO5mNR`S2<_1(_T5t+wJRb?-WRm%eV>Z*| ztlfO*Nbkz|p5NffpMK7NA$a87InM_J^8qf;4Z(&x%l?5=Cvvb8P)^0j0`>t&%@>a8 z>)*FlhRmdT7r51?(pXoy4^Aj2y)MzalfsCVeLA~lDy;=I{Vc*MxM^36V{^-!HMKq<;q+)IR8-ZHBd1(O47aUYV#P zzT6}xv|L;g3rdCMGGUy1kUcDfaT7#;g%xu?NNt6MOQ6mc4!kr%|GE~(Tq?9sWgExY z{#&hP zS_qy`U~bI_D(}>-II-UESCI|d*i92w?oQ5N1hE`2I~~nv&^Y^% zx|pbjxs|LH_)?>$)vX3w13^|L2b_#o1)u2vc@Ie}{o&5rzO?UN6TkVOgkw&1*|s=Q z%FQF3{Z0B-h30cqzEN{~>ohhIk9M1ogVq*e!7R)QwWd4e$H5MW_-e8>W@}ZvH`~Tx#sD$xb~|UWzm&0VQKXq~;M31IRHK5|?EsuAjyOCkjJ$_D zyOx5 zaX31);>oK5Y&#q(oKN!|HGPX3R~=TDUqAcHcOWmqDM#{vm?Db;{PEomV*R!&F@Fls zDchjtb|&W+slK@ym!>o{>XSzZ^@Y?EN`sN=Tr=D|El)eAUKY2$fhg_QNrb zbLI{6k{~^F6-K@SL=-PK+Lm_)j5xZYaSn|f9W!7LB;FAt=Qg@qez-&- zLsEq#VO>;iBn=2)A)}n%%99~X!XEjEdrjjJC`53u#<~WG;*?I9~Z@RmZ=;jDMgaMNAT;8I> z{vt8<--FRbYR^Q<-~9woO#Wz9phS6d2EFo^dsC`K9+eQ58v1I3PX5*Dy42ikQ6|yk z&du6@lbmWq?&7~bN?z^zyq*42tIY^t3u>o7Hkq=j3ON=mzgw_luO$7A(Eqi{$u^X> zX{kko+|E@kW|*KTTS{vsD;1VOaHn*Q+Xx#b#!Pj2E<2PV*l_2r$dTSp{~@^QxNM3? z-Z0-$;lsDe5#2>U29utnG9lj3$pl>*(kFHTBpy{-%ZIY24cr9}rFPI!Q(Tuidg8(} zHYs{F!kIqGke#y4+4Iyi*lY^|chR?xJz5*WYS?VO4t60_h3}$^Y$q89dg3GfT`Xo_ z875;w9^U%f99+}R6}KLSk!_0QE-bkN!VFi+F=_MC^A|t^@-u(?{LA-r=TpC@9htg> zF4r4!R`*2VXv~J(F*E!Vz#Kmz-NhIPqw>QHlbV{Ym!Qt7ey{KO$RM!~F!*0C_#dvV zRlDF{E&!fOO)dO4PHdmrSqLl#BKY14Qv^%^3<_R0(MnScyb28FYeARtzkUB55Kg5p z1r7zER+&q!ur{n}qgP=N4M&vdqSBumvY^?G3n1uTJy&Qhbw&2^U{J|ShBo8$tHXCUjdPflZFuh88-M_4b6%0nLw9-{Xe)gT zH>Bs0H-Z3uJmubVG!f~Z69~iUi-b+0Ix96OpG1Nn>5{axhOM~IK@=op!wY!^KNsM^ zUQ*gwaBXkzKxytut~ZPvt25KtG*sW!wbo{`tE`}B``b~r^m3I4P|+Ymok1WyVouwn z%~;sbd&iz?;Ab%`skY)B6X;Tu3<6>ysdd5%s4%3yViTO_sUyc~iY`VM&%`xnlwb9P z7uO_F7e7c~@x%kVOi0Q+P02mvip$24@8Ka zDMaIlH4IvCjcGpM8kQNq2$sJyRaMapQ7YDL*i}@lmZDhOs(`fP{~Zm6_|*Dz^Cwh0 zp8v0xX8K~coZ#n(udQYu?~K=IVu(o3E!&|%V2ExkwOI?di7&59&EOp2aX;W70AYe1 zntAV2c`)|A%3rsuYn1Q^J0I^f?uju{Yrg6tU{Lm0Q8bh96tP^gI2-?Le6Bd=6Pcr2 zYD$1m!kiRoFl+-+6@F_Fw>g`!WK)) zOz0+d{}xN>FH&J?XV1!G;0M69XP+-*J$lMoH}XN0q$@6y6y~a=D=coDpf$=2SYvj> zb`&hqU2;6s?RXyrgyIQRy0b&3(nJE3hUZ~l)F*~+02_EIetfiG>f9IxgW_gSUu`-~ zT?8u6`Pn=1;W5TyX@ucXoYN+;R;ASj)q@a;SOYB`@U{K;e6XO|jV3}@M2ChX--h-l zqWN=7fxmx@oSLmz~w8hrlU1o0gA>>rPv zSvwc~cE)N;tJlR!@cb0|ilaCpb>>n4%AV2)orccUVQPErl9fFMJrQmAUTXj<>FeW*S+)8Y367R0GlyhMigv3->4r% zjsA<5BFl7i55Dkhjyy;1B`%5j6{tb<_hFDUx>QDDz?``TXkagU2{5uc!60r<;F)!! zc*O9^AGeMQ+2F?MDw~Z@zeEG0&9gk(-aW->N~NqR#4XonOJa|z(7G@2^mf_sB8iO& z=^IZL>8%^?h0gE@Kr}t(?RwOJi+JrOu#$8{L0Xy+HS*Z+fp`=R+a{H77}XP!7LnM% z<+)niELK>YOe;J+x}Y!@vnMVj8q^lHue{9pN8@;74FU#^JrR~QatE!W;$!T|0g|nk z&h;vL<|bW7PqJo8ur$bEem$~j5L>=jw`iM7eHRmfH1s!GKpcu-!u~A?3Ti7D3aK`y z!RNW2;=M0uL+$NnAYTPh{M*K}$m|@uq*dlQpYrV+!x6A%{cvc&+f zpHNYr9_WEGhdD(;gOQ4qJf(z+2N@Z!45$E>wwo&>Cz#fOS7B(KWiv?aY}-rDZ{}}O z@4Bxx*2}E{Kt+v?TStxDi_a@e*k%AZOw;96d-dh6{hKdfU1|mf=8jHLH;9dzpeR7; zs_n9(e;|Eh+}jWVA%1DUzXTV)TBgtFwd&3#ap&7>wXJW0BEpfHw4cgAXy#}PjCrHj zu^=ez$}{o4>H<=hbPe9Zcs$4f^I|A!j6c#3dA!9H;NWYU$wWEVdrJkjmPWKr5ASnsn9_nJ01adG~bzDO$;Q>YfiYtPfqYxWCUS6BEY_G z`Gk+v1!?9albX*{V)HUP(23IkJKO2?FV9dvkM|?O!#33{&k)7-9wfs%WwuA?O!wd% z53%$xAj2U`xj{QV3PK_o$VNPeGACBT?Oy<-c$TPNKp5I=&S zsMYDdR^k(X#|9-Op2UR%RmgE5f2Al5w|0{_XU_8Rh<) zbymRld$=?Vi~w3eDrcvZzQ;K=#V36j@Bv9pc4pqnBr-)wEHbB}irnN^nRxQE)v|Db zy0W8gIT#xubYWm4j zHz^Pa^jsuPEVF-!I9FmD84WnQ!uVv+K1cWH2xOUCns6yO_FmkJuE6+SH~OG@FZ<>` zerHLsATwZkV6^5#;zmJ*G**f*$%v&I71r7OalQPyRbQd|MrN`Yjk4L+=rI`hsw@Hu zODenjrdYI|vzws!0b*zdcKmMy$7JImV*FQ6Tf*@2!Z661*qS+;6LB#!r!tiSQ>8-H z10wQDso z$&@t;swGK^w;ox=mLV^6q|ps>>!INRk=nH~jfVY0+m^5LG`G=|G7rMm1^a<5e^3a!K834FiN!O`>_K%qT4=0rSGh<=h!hrQ$7;=+U zgKcE9L4TD8QroDS8CNnTcF}Ha=p*dP2 zd3qj|isaU|P8VZ2FG>cDlS?yFU~uf<$`*VZk+*gOd0}Ehdd<8|0>O5hs@fy?M*R~~ zdS*UYme>mP7(K0fo7E4{U*n)mFsTLqLFu(jBz0v$2Av$9Vd9*ybBhh4XZx$e1!8`D%dXJ!MrT&-%-xEqWwSchh4K#uZ zT|0-*(Dx6h7<#;zf!P&^lI@v|y?+sJTpPUddpeI|E#l>lzu(OCxJPV=MVRoyl5{5I z+bv@LWG_%*0ATBHoH^{y-6(i3>EGQnKlY5*!UZesYMdYHLIPu11S33!k$Z|#a<L(1^}KBDs@DkB|aE65z<1Ryaj5it=%>!g_CDZx;c{T}qR zK-4|ARlUgz?`J8r4Zp0(0sVHDjY!Nz(kaJsE`~sc>&P&dLGw4M@v%U&a;e|D3=coUsnBQ0P)4WKI#r6Gg~EIexR{J5`14TM5a1(Bhd@*;zi~&1hvaD>(`?m!7`Y z##bYQ+&z}dKBS#W5X6X(@++QRtVL4M0_F|`1;AnQKs?E|l~`2To+0f!W2ND$*p^(W z;%Gm@bLLV4ne$MD79$s68kk+{mnzmsyYkg@y4|+a{nIgSY*Y=M7-9BD;uK8i(`1>^ zerCkzMPRTk@r~QCr)X>X9!2<;mS?wR-kS$!OL0*1L=)7lK;~)x_G&CR(EEgU_VQ6N z0=RRHXDv4fg0egd;6$~|*(xDx3$mA7Efcma+gmeiJgat8b`0FMRa`@nLX7dDB@Z29 z3i#bpT&6F|{OO4{C8xVx5nfLnm;=qOP1Y-5u0xi3F%DG>3vu=O%O@nDBTA#Jny;2(nntLdY0o(aTgW=b0PeEq_ERqu_rdC zcSO(H4z9U>o|1Y@YHg%?WgBUGpT>7Qk7B)!ypNs#Eg)}_V?T}VmN+KQR<;;16(ti3 zznH%*+U+h7S)dWMs<1ry5X_lSAfrg3H^_!A$7d!Q0YE;TA@2Xp&Z)Q5kZ7PxoE-mL z)czlyzA-$qplLgv*tTukPByk}+Y{TG*c)t&jcs$|jcskXo8-&$KHvNP>FMfIU8k>e z=9=!g>#i#Lwwq3HQ*a0rN|0JNxDZfX@leC)a%^^yu_3*X0$lZ(=2vlGsG;IZ70KIo zuFA&WFXpndTB7JAzc*43(uD7=Nlf?3Enm;{c$MQBn5iz3HXSbNho0Xa9Y&BQzS~r|=u6?UN;E3NQ5<9yK4os9>sZ$tSuJZ0?@$RFs6i`OU0d_@mQ^_g)>Vyz{4rsH*pBnf zcK6PDb8aY9p!9jW@-q>CmF#kBL9(GgL*8f3;5GdZBYgdXbi}YS-_b&uER)@yGn^ip zmPEejCHGq#qk8m8h8ltyFHS(@e?yg34J@--mP))9BLs*~ zNJ>+n7V|RW2aNVw1M5hJN^oa911_}95|s>68SxRbQh1E-*kk|rtKE@ou-{_-Mj!5L zniR6S2+T0*Luxk8BcuMjMz+CD>c_fRZ|`Fot975TlpVyYjP_1LL23oID9p?g+`+9# zW4uaa>5mM?soOt+r02Thg~n#Bos%W@Lj zKXzm9^cSy>^Jj@Nea^UID<~vP=tq9tezJo|j{;z;K4kYHv$Z%n4*mD&KAPv5ZFEbnvUxg>FA2=>dGq)R_n5|DR1D_eM^t!SLGxTWI{(c`0Ri7KZM=;x*LOY0x{ z*-5mmC*86#=J-Bd&5L1I;}zUf+_tsbI9QAs6gt6<+=E_>t##AA^p-K)(_^F1<@k?Q zy;W)y9#E+^VvSd@^p71V*I6EZ;xjgV>rsH1kZg zbkK3XCd6*(g{MUq(NK3(*`a1S=YR+H+QcU<*b4~dv$6^Fyj*-aN*#CjzwLYYIpz?% zeT&XVJ zDDfp4x!FPC>bpjIO%KP#h!s44M(GA!I!|jpCGha|R4-Py@Q7G1oHSSP@Q6%s?GA63 zFJ*L#y1SQU&DM7Az^dXn^`@}kBP;{>pd?&_F(}CG_CQT&fhgbS#~Y;M4gLbXB*pdT ztZdCcl$QHCDU#DJeh`N6+C(?xul&>l%ZG%!YzcMe)#mVHkqzsy)2KTdZ81Y)K`6_o z>tU>2meAddyvmCs$6j4# zq(=m%kj?NgsvprdIygAV2(Jq=gYc1!(eQ?WVZxkwoLM)Yx)9s|y(A zD5*Q%wQg7jT7GnOWhOiqIUY*oIilAd(bwft9y}z87)?xdhCET+Fd-oW5up3(h^g4E zp^Zhl)!e+He-$l@Ez7Tfi4Hxq7$!hss{Y8dHv*MK>wk6k*ZJkr%UY;OOm@5|G+wWQ{9rTJyy!;UR z^qibrx39nFQG1VIfw*v>C!*Upy(rPS?^c>&XjpY3K;&ioO_P&v^^$GFs!)!vmVqyM zsp(HvI5Ebbx_U0A)Js(1JGi3lPpI8%Ux~D)(wY;62P$x6H3};nVE(i3Q;-vNEn9AY z(|MvP^tnDFU^+30=>X5(&~q#HT>d)f92ElQ_Gv<`)H-~t^m|yg$yf*APqG6W3)Yn{ zhYu&GJEOdfrkRTP;egPt@KY=xv`QcRq5y%_jrkn-L;7iwX^Xzrs|+W9?pxHH`FZ@JU^4a>PW|smbBc^K zkpp-LDD2*%veBcEK>ikDjJnWBo0@$$#1gBcV^j)kW=UA58&dIeFWy5dAO?F+7008s z)u_uel5g(;a)GvS5N`5?v}M}fRWy&wLg_GIXd#pl>F!80JVFI2h{6)KBQs<4psEC@ z*Jg=8I|UAVtYq`SynQ;)yy{Aoza-UJ zL)FB^2WA1!WUZo_xO!1&eO0eJ4JZWEIh@EO5nIk)*F_RKMc$=jo9b{&E2^zz@apZL z^IjL8gN0rc0T=zyIAKho(AZb!+NvUBagmg_$tu`f+bmg+TfFq?%CiTJSfr6H`hw9x z&xBZ$>}z@eZRNptq3S!$i{)k|;KBnT48V**;81uoXRQ!J58Alk@U9}sAq3uV>LG94=I{Y} z^2utJ6OmB>S6~T^QDv@@eq37stFV_yQM#vv#7Bs`P_7^uwkoJnyZNGm)B>7=pxH3K z-ZJsxO}y6HSFE401wzEbK$;V%oE`d_*dT`wRqP=4FPZohv0Vx1SEQaS?a*k(!sc=< z_cN_RP)u?GF^UxGxyL963pid_^Na7fY?XJw!GSS9zCvraQI#`WPTih6zcr8NG;lV; zNU^ZbrMJ`&GufiqX4EQWmMSF!kbDnT4yeOJ7*S8DWux+3*TC?|Ls9{TCTEJmL5T#; zW~2b%{v^)RR+*1qbXC^uhu7?QTO5E>U9fA0sJ?w57EhON>5{8U%6;&EcR;KG@^&dL z&qIEL^if{;wLP2--Afr|0xzi=tKTIE2?3Kz56sd0=FidS?32W_c*Td+Q7Kc>DLuj+ zy3A$fe=WMx%b4k>%e4S}c^ZY7+DB&~G}$u;n^&{WXzKdcW0m_eg}0tpG(j%vM|?C= zlKwf9@|De%raP-oyIO@z;G4_e95o1!zQN}bUNPQNTf{H?eqqzPY8$K$qFb&3Nz_L$z^tStjz+aLUIjQPA0BS}XFZqx{GxlIPF2 zF(Ult{;WbK*POiP$l2S=SNH9Lf@cSNoBWw;QWBu(*)=x$_87zVC=?6X_binUq-!zU zkWJajZdyf6rUS!(=Ew;o4xcnU)J~FhyURJP2r_u`?Jhy%>21~uzYc~zXn_t2RH$#$ ziH^EVMIG{y9fWf@lVBg@Ekv@iFS?=>A(An({&Ui`3lcLgm{Xg*d~Sxl!kTI(xmtxm z^QtS$e9g*(`8n#@!;|r5`dkO@9Ph$~eX-=-P%-nRY&x;m@ECYb zY^j&7R66kv_#WEu>`>3$Lp>MM3;U@k@k!~qgqiklTB376M$;peAxK3YFB}hZ-$T(R{y9BB4o=rVW1?@FgJ7hJ)>1^ z-79x}$uAIfZbSzJ^Yh6+zzsq;C$p7qz(T|(ZnDCkp|9A%gICZ3=z@>6qCUtF+nh zOWstVO*(WyJPL|Q=+4!R=8@H%fA5>ccY7QzQ7q>)c}jsk98Hk2KG+h2?48xi21<2C z8Ri15rGvciy33hekDnBILadHss?$JayMXa9r|gvh_`YTFb-QARTouAq!po=HuzR`q6{w^5CPz)H_Z5RgqQL zIyWq?W1l!9KmI6DD_Aseyr$*0qDlHDq*bU~e4*Vc#3UPG+Crw}S(IdYacMwoDPs#1 zt@?qSeb+6Zc2e5sl()?G#T_+>|2py;S+$Y}RtVQjRruKmbkERV+$h=TAdKmW^2Mx< z1}MzdeC!P@GYAkF76@Gb!NQdNufy!Zbp9+nNF5$h)x{1JB13-@w_z_D&e~QIh)(!O zzTjvrTnY?J&l7AZKio|htu!!-e5w7M7b9PK0w|}0v8N8bmM}K(@BaAJ&U|!HMji1h zWw{mR2$nEPikd$UikePL>Xx`#VMH15UL|0%N8p671EtNr> z>AiFw2O&k>HdOs{fAqFH4F*npIBYjAT@WdAPPzwkE+I$V2&S@hH-{8@-L}t(hA1vn z1}n^nDQf&PB2dVPb(}s#RgxXGkX4p`Vw59z9|17ZBjQ#+P9+WRye^4R+=VrJh9) zjS8bfKZsj-L5Yie&d-}3)`^UO%JZUIL{oKQ3T};r*SZ&vw5*#KCtuXS`j4=<;q)F? z(WOiu7UtYHnH2Rmb+nyH=&=r~ijxVy$6b$QP6BjqjpTB!F!%56~ma@O4gnd z-g z0{QKuuGU_ZSZxaCtZVuB*k;@*!9BM3Tn-fPKetf1b+`JXf`kTgknH)?!dDQIn=rd_ zWs+cK&dh}Q3EGYcp9KA#a#jyER}2tYp{sLOfyPP8^WF#G$gK8b&0aA&o_>It>N~d} zU^8w4$WZ81l4Z=9W{BoN-@T4|AdVWVv!8nidWC%3yFKI0`q+R$)q2gj?=u9GCReom zaCmTfea6GDkzk65o7y>LGsDDHE3E`RzQhKBUWJGktNN%Iej2@|La)I{Rs1?kBt@yq zM@_ETAG33=7V<1ya;Gd!>via~45dsD$nR((qEr!di_HHRO4WeWfvxDkP+Z)Q0BcIb z+sK4TKTt$RcI#o5+G(4X#h96JoeIo z-n_q{k{uESW7tYyn<5d$?9aK_q)fr|mveD(lOXNNpZbs40t&V^+_8?sZ3eZwXy4RF zt}l~7I;U$c=bg#ou#haqGugM_QJjG$VusenReYF@5e!^WSz_UnkT5rlK)B_Q?B->y zc6dd+D>hByWb$sbUQ}Oi&F(1AyR>TK?d!s&fp_1OKCIVLSxSWl8=KF58+pZq(qso1GYzB9}65kJfXK-T3<)>LL{+n|uXGszi2}BDEES?p*kZlxaRafN?=R#B)Xk+}@x* zleYl2T$Z}a)3-POynn>sCy9+&)wiSr7iup2bErsO-ZMc%&TCwwY zj-C0^`>05w`r<`--};T;>oi_A4{w$m3>BTUKb_03xZ@rx%Ji_EO;$zlc4PAYxpuUM z8OfMv{I*E{z->h-MZNBl_+j9!=={rX*Vx9<;QBwOL<|T{)(X>AaKbNc_16bRo96>~ zi0fC0yZ;g3Eg_J6z`wX&|GC1j1yPFw&ih~C@(;ur2Lu=Ee1 z+WvoM^N>S(+x_?cm~BJx(9OIMJpZ+T#>e@8TOKV%;`1#C6k1s=V(hXwtg`s5r_E-REaW5+nr1Qa~`wyGw(4wov%G@#hPNibr<)S7~=t#4xJ zSOn0{0Lt0f@M*2*lfF^0f+6Bp7n4;MnE+?O;27Z%_h1?4!TL`d#=(fonniO^#=%fA z0>z;F;lo8?1~I@3Rr-l7&_+kzmBVin1d%;~;If5ZjNy$yZT4f-SU05O1z$|eg0z}zEwrE8 z)`BKS-J8p3^`X+pqs?a?S$A#N?%i?K8Ti2*o-U1G{X%xZ84TMI4_6T4(Lo|y{R7Z@ ze6FYxiCsMraaD0qrO?v(56ECJun>5P@HCe!8<=R8{oc0G3^CwKS`c897T9r4MRml4 zgz-=29J)4Hdr{@;+J8Ed=@dzEt$uvyJ`h)RfmfwZH2rXMLzFci+mQb#pHdA=Y=9 zfA1*4Qoay9KZI9|HZ1WkTtn!18O5PNoC>a)>Bo#wSH{Uk-*1rrqi1(QRBY!8lO0?h=f{2I`!J zFv3<24m>6iGn2)aYo_{z85=wsL47<49)dO=TTenA&^6iaMx7Yj!rfFrn+H9%=NYt@ zT=$3+EJ}?rzo=6zO3BkVE(1S^hEkc^wXT=VU-_XeQmdmc+RK^(@d~cvNEF;}f+`Np zl}tTgXAilDTlP1`WE?1u1AcA;YemKkmOm2n=K!YE#L*V3CL7*ukpROgiyEsK;q%|Z`iRYB zWx@v}XZIoT#tU{`L{kSuIi zVG{#BUXR|@!6I%RKEm3v@d(iq2hSmC1BuF#apRGTs;i0a(kgDw*$wfu%YYxg#}4dZ zP!giX1KY%Ft3cN6PsxMgAF^?&iFU%!fqyoebGkk^Y0`MYW&e)X4q}(1z&{lkn!8^W%4i#>X=XRj z(LPnqpHKJXx$5G(1hU%J0UzJQ@mt?>$DKs*9~}KK(j=ClxboYAwdv`BnY_?g`0v z9i_9gj_5y?OAQO4+%4&+Uj^!j_>$9sc95&pTOEMzl?j-m=dO5*?hp@|-gvl{tgUu3 zXH(;Ua=_1rr#zt_Mrv!D&8j>QlH`fR8{wd6ge#H~=eP}rD=|^5VHViF6dw{lHwn#1 z%_EA0>(^JjnS$Lh;%U~YQql@*X)zaZbJN=Jvgt$_v{qXMhdC0)M-p6TMk90{F_dH%;smu!Qll1gl2p7*DrKcB&{*Bx5%g z2e)q>FyRmdXx1ECD0rszaf$t?ov6*ZP^0fd0=+8Im-Th8N6d%Vk3jXs{h>24Zc?x0 zj`b^+iv%lp9lMGSK|E>cm&XWbe%A(~X zyI0o$o1_3q+#3%w%hu+9a_^k6@lsGgYrpgUZbjGDx4%(67YydV1Q${RAu98tH%au9 z3MCUTC$gddZV{zd9%z>I}`H)5L6 zSTD40ef_jOi;Q+nVnzzPql}C7ZMp-V*>}|=^tiv{iqvjN z*|ki>qyOpJ*Quj%{6n4<4b)4$lu@^Tpcc)&{%wCg!Zw2#(0Gw$Xh;s+EVWT^*LrhSu*H^uXt3dyq{3%vm|S_dEB2d-_RtDENiDz9wFgQJ7EoJ}VDDiR#WXcj2!O~;6xi31 zWsIQTtz5(rU^j9nKR?Q6w7|o?Y#?w$a$yJC~PW4BvxCe-ICBVH6JN8p+gG0vIA>MwWh)?Pu*lNY>Y{{DzI)qiq;b2>C z4tCFC4xYhqq&P_^kY*909+sc~s3)S#`3zR_>~?Y%i}&raUT7;NPi@CfPnYY*^82Jo z^`|k+7oSKP4yW^M4GFb-)9TDb860twJF zDk~tZqqkaXJ$RqWD#-M<#^KKt21*=Z**O~deKl`Xz?huB*nR@9(2|Y6*85{Ypz@F? z703mV%7pNwWO#pZ#`DYHlN5N`p>ZLpCROfMr}{6YvV=xvp`W<+eFrgYwB9H>#Vl^S zueg#6TOGZfq8fO$khoFPmq@H-L|JJ%E%N;L6<*W#XR^AUb@@`AN5okA_mgctu6e$V zX_HX1T4pq-vgoOwEN_7%NvwxPl7<&qZYX}m6(1k^K=DbhxV&eaHr_j#=&3ln*k>me z8l}(1#=r?K7x^(sfh(uP=BVzzZLdEy6ODSG-VjE5;+nwsV)3j?E??yiyWSXoIg#v| zg+jtyr;Snj(VzC8i?1wYf^O?k$xR*-&3o|~sFcO5tQ z-Ls1DXr!PHNR!1=>CeFUgw)dIlq)cB8QPp+*^xe3?j6K2%d+)@qjwKHuT)BIMy z;_v!DDmO0lpN*UM8P5T$<>=7Af4A&-KAfJ)YVZ9-@HUv(l!#}+Yy6f}+5UVVaeq(6 z{b0WiEYa+l<*(N?nsr9z9-{1SW}DXh{YkqbLeS$c{jt^oP)(caFf~K+uJQz?a5;?L zorq)}VYJ{Yr>{S8Wz8Y!oOlY}%s0?isGay_z*@e62YtEMn~f(8XojyADF7flY380> z`GT6qg^YH8(T)$rJyMMyMKwQ%+&2uox?k&n!D?cs?sFF`PFc=~p})LobN4V)XLXOS ziBvb1#K%3;r%JB)Hq^v1%zZC^tQ?~Gr#@z3&;5D-Dzr$no2ha?IqSW&uWhOP9?Mc^ zXk(r~1E@=Hz32IfFmb<~`-Atu-O*q9o}={p&ebA&NsWS4GizHcqYH(ww|dKt`15Ej zu($k(?x+qyU*&S4(@sd|7Ax$Eu&EGR{F}+gFvor*sd$a^a z>U=>V=u8zT0mvDIK(S8ZN6|ZD@63LL*pyiv#R-OFZZF!aw~&%sNz;`$annUv=28|H zcVK`cLe8HnaK7|!3EuliHMkuMpzl}){?RwUu}`I$5ZZ4h(Fea z2}vU|>(PVN6ku-9F1dPApqw~#?W{N$_MI`RY!TX^e({D_v9~;G(t7EMXeQ+>lZby~ z^{>9E86U;t?hVt<3w3>B50~KI1_qz=FwR*+&r)YAQ1`eRtUW`l!0(xy)CY;P}_NB&pame@rSLl(_&yXRjIJAioeDP5Fes$7hr$ zcih;I$Q+Zh`Ev~$+IhDNEBeL9K!wKvUE_txNjEGTkQ&Bk=uZ_%MwHfpz#)vEG7 z71JqGMyE3gGEKtvsKb5;vv+nKSLs9b&KQY-HV48P^?A*VmOp&?EiUDEBscO$`?FIg z7pd#~e;~NM|?(|WHRnPB1@k(Q<(P>@ba7V-Q$q6D4;PbYw+w6ZHx-lsVm zVI4H&wSz1>^(;kcNo?Jx7(Uc$zb?{rO}&mz<{+o^R0mH4=r0Q`fke(@V{+jK(^3lA za3RD3QLbE5Ig#J)m&Fm7TtTaeN?S=-s+z+^f6{yI{}s36OFVE~*e7sIOZO|L*R{eO%IqYY^vdc*2}QJ%lDFuei)i!#lO zz~Gzx4`s6LFAd-c0+i1Tm~QJ~0PI45mqMKiAQJ*~p$QmngVp%YXsXWF z=!YgC9Rl>K|23^#2cQlKGBW-eMKl1YL4tzJ0ds9kCID4PP=PgIzD>yjpaKc%wg2im z*#MLvLBE~9mbYLJK!O5YxB)Oga9)7cwh9-3Bn0Tm^FPMj051?ANZ&6l1h20yIAw#_0gHxmMvGM+&?}v+r_dg_4pn(LsC2Z~|RXNG%UENld`(8ab{77B$iUWHUAP!Y(dD9IgvfTvjw#;EMP+ zgc{#UT>#Wt^fbiGEZHW$v;Y_!@a6d!3^qhBm~D0dLmq|@piCPk>gyx!OSv(Gp~qW> zTQIK&rVrNmO9{-FxPVo@2o~a(U}(Kbp5_V>FI=;ZvJXS06zOldtT{AIhyrLROjbm! zbQoDpekf!T53zps+rqd8_;JxeMOvmx8!TkZC%s~LlqCNX!w|{8V60>$K(YQDm{9

    uy66`eO>bXJjSLC9K$t@=>v;-)F-S&V6YR=w;%pR(z9+ANPnQgDGAavjC!jLL zW`Kz2&@#t!k%r_LPc`p`NmDv-aIeZ`iGSfPz;eJAVsyx6mA;#(@`Ea1WKrB(_CLs ze&l?`ucwuY;P2JX7Tj`EkO8>?iN+@hSD`tQV>umzIkwW~4&9q)1banH==CZQd?=FWW2IdXK2)$iBKbf=CM%3%cYta5F3 zSV=;NCF^Sn$c!_|aOB9?Y;5r_1~4Yqfbhn-XjB__y;x;aiMc`($dggYPDITe$T$3C z_lt?#k#+jJMyE}&p)V!Vq(O}#08HQ-f}G&YWhIV&C_E*T$++sh8IoY1`~Zk_TYX7O z3SO%;&Wx>eE{qau2CYA;f+35auXI5XATt``#t!Q~UrOFSU@|_$k3|**=}yHtJ0yrV zUn%=$WlMNkUJ{Rj3ipHUDTOj5+C+H#jKD1aMu=HoV~tJM2gag=mSdTdq64UZAo~cR zX4kFk_RGOTG~1W3lFY|XXC1N@!rfS+YXbfo@~>h8Caa>XnMl7V-&Cp_0b4l*O53EO zc&rD5Z}%%c-DN}!WO{kBft0p&`r?25hkTr$Eq zx@4N5ZZjx8ZDj_KpR2$!sUF}+%!97)r>KfYJDJcYlxl#3i|6mK+N5z$kH`39f79OP zIMvv%Who~_MJA?kG`m+6}*f980KgU%eE$(wo=an%S z)&8UL1j23{cjef(g9P}wCkJw_+w#PzviaotNFwqT(xC)&$5vw2Fnz~*grEyHKOv^G z%A{UUI&sP-sbEiH(O)g=aq9W_lmlzc>-zu!Oqd|g#27wuAzA^oc-rD zD3DORX18*B*rn>7I;^`ZLUQif^5IIIyL+G$L!G?vD*4N)0wc;Q*gjM-i!1$zgP4sM zQY#*t3i8KPW9rjRS$iGbIDL^NepC!c2x=myR^V8rOPC`$wq#OYe7iLxKs8wmv>CUP zaTDns8)(4UN!k^Y+8jc3$(R4*e03bv^=NINqy}rV;13hE%^@33yOXMABvhSKCqj<^{eh5PIU=MR9SCror?S$A|K;Qi<6%+3x z#s`Im{FPk1j(Q|hLvQj6Ag z7fJ#Y=LF!ra>xgDva{T{=i^R^;M82PkJ-m9g83m-+(<@0NOE zCOo11;OykGau$u!F-HbTl?cDtmJMIUy+U3g&7H-UnsS28g;GQ;axaHbqlAKcsgyxQ zj_wVr@n`%n=-egv;TQ*V5gFrHNS0gvVi8tHXio4=7mlvXPGxtcQtn2tYW{1X*F2fe zjKZcHs=vlyU4=5T7C+uD#9o2No8L6klc*0~!|c4ciMFR{mLNNUZ0(T+)^j_3LAUoL z&xK!9hExbY>98qXQ=jpLg+u$hySkPU!wA!)RLuDRe%hOv*&la=++^|HBJI9vNG$cc z-0flLs{_4V=mJS6VGFmH+b?b!VhEhw1=_353q>U9H%sQIG5w6%VP{{t5RLw+_4W-_ zi{KA(%od>qc#7T}@H9WX-IZ31-F)6%zE)2>nlO%Rl}?-naA{NXnlR}7Qyirb<3C~W zdKugd>Xn?zI6FEwdMygj5vEI;syKSMmwIgyq%+Q_pW-TA|~&`Qy}f7tWpJ&{Q-`Yo{Dx^#dGVm1qo;WC9`v4CW;_tqur>_9MqBg=eyx(}pJ5V}^ z3z`fZ6mBUoiY7p%XyT9#10%iTd;Wan*&o%fx5okZI76$prZqX`o4MD?o4H>7h0BBj zNs%Cgw*0?Kv%9a~0%aa7=2o7!BA;wKBgX1s9{M6rH2DbUs;ACOJcRSM;$e35Jp1`_ zbC)7=|2b{1RM$HO2BpzyaNwyo`>b}%U^Nyrl>2!hyd?thv&XOKvo@s$h`alq_qTG( zOc>w4Z9rE+bzzte!g!aodC(~RvD0xvbF)w041G@Es=Vr#^5yG)J8Hr2&y2{QgL$4C zn3*Bv>2ff2j_fXu0^KTh)mFa-PhW%15l!@eW)Csz^u664p6DZG>z(Ykc9ud z+L;*(W+n&vOXDg{Z6Y^&(I|iXIlLmdG4jg#Y43{_qwctmvd@`iq$GyZ*?n~=Qn>B^ z_B0cR>>SiXXwN1uIQ~L5?Ylhn8(|xH#;P&{s4-(z_R3RpUS#;Bw} z)w4kRJNg5*FE`@{hNOqY&j!QF)=?U!F;r#$(&%U45g>ls>U6~#7^>O>ambFcbcb_v zc4eb6voMcXVcoWhL%}+&fCigTBfNtwy=4G}qWFUj;#XF^rt9SE znGKyuRb7K71`a1 zChlYRZek&=G^Yaz2IUh7W`f*T=S?Z_ur>Yq!BptXzb!;NwVm9%{K4&Etme>`rtorb z`gr54Z09lv<5HuroAnY#d9#J@?^DRGJ*y8iK3b>8uWg=~;TesDq-jzl>S}SwxbIF+ z7-_=-yFOG+yaDq1{w;=Kbpo6JwGjb_42IiF(BEzh8$0nSzAN|9ip2JC*+Wp(m9Kbm zH*ur96EJST+`Ld>O6F@o^xNB3hnVl_VoleqcOhp4jG)ahVR)4pUO;#4?@f?@Ks_?B z#|YRZoF8vRu(YT1`@4hA{wrv%C|Jdz?{w;qJ%R=9(u8)aRM?rvy>=`SjJi_5CT5mE z(H?#vpCH>mc3jN^4n-N3$b-Fq1Q8B`%a1OIs75=9q|U^F3hm^@%PssjVCUXV=gvtt zBK)tl2&d&H*#EFUSk#L&;Zcl%PzClt2meTfkaX_T~31d zIkHI+0%X4aTF4LN{T_i99ugfvpkQGrN+uo5+ zUjV5xq?N?*tBY{j<}rkT1$YpR_S4u6$_`ZwQ7|& zYURRA8+swFtNAm0htx;w#;#gGpUMlO`xSrNL5Mh}5;GN_iwXQ2X*%*Z(&=PyDndOP zVfy}W$n0LwM-wU09~?49q8kaPrkBu?=;9#nkds>>XB9gbGBtIPW6c?_lJuOU9R$1= z%pR4{q-_Mtsa&!z8mK)B(4n#o-I-+rw&sT|1?KGW8w$9zFXFmW^}Cdc;iK`q#h|qu z=+4l&dgi4stx;HF=KI5k<8mg&gOlhs)Oo}oU#K`yPBx*?6iWn~(T#9$@qKP2Sw4tf z7@afCTLXmW9WP3rWP1Q*k_II5nK$l278p7z!vgJ1F^VD96@9E z9QhIkGse03uN;9)0{@I^2}7nOwf$c3RG5TPece%m&;g3ANBr z6=p&bMYB37pLoP2pz0AKSz$fZyhxW~;4bW}iIV6D<11Oa$IJG^5~k0=tj~1qz5cV4 z2Fip-_GbTMgV_P^0`5c^pXOv!wDnncG%9WDi4RBx_qA^Gg&7*#Uz7eT0 z$lJ^xg&I4?a1nA#YQ-|rlc5&L|FW^yz&mBFJwzv$tSbY5cq?XW-@R0-xhuT!^!o;r zTJ0tJvMY*n>ls0&R4OPrv&XJDvnR)(+!v+3^-Mom%DWMh`~`W&=XkBMX(RGjsCzR~ znIBuu2%h-#2xza+JZ{^%Io}->w|QGNNACQ^zZkmHep<*MBP>> zx%LT_bHy+A8GoOyZ_a4gax3<7vzQWRj9q}dM#u=@ z8E7?!-G3Zn8z&O?zo&1*#8EEqM4mM7Q{bJ2de-i;SR;J+)tKDjaNu7 ztN4@U9lS2I8nVxMaaghDL*jAojfG8#yi5Ktw~_4~IZ64%!4#LPo_iZV(&PX5%1tm^l}@I`leNGSf~q**>%jMOL`DJs>ie+s5m_ z<{LMbiEIn5&tE3x02f!rRn1+hg#lC3v|SM*zw5!~EC%b@TQ(TwkyyOAmfBo_*gBkH zQ6+FyMEnDlFm37^8LzGPcWkJ!Loj;a|Fr*U$cyc`G@q0>thhAs8`=1F{;vtb2et6A z;5Rn%mEgbr45Bh@x8wK`#R@g%H7@F6V66%@c2u$gn>a;Am3{O)-mp|X;pV-eT0OxY zoaoB;CT*%P+X3!TbIE9TJzr=utH;>taU3#IYy1tjZX=!?F!%xKLF`coIx7H>g5=Wx z-q7sqY^+?M(KG<@|L>lq0c!Dh;8~Gn8E??^8OLJ5Q-B|r#ON(cmy5)iO} zh*SX)0TDrdR4F1oQbdRZ6hTEP(o5)FIw(bI0O_FA2sh7tp7-8o=KgW#GrKeA%5Dx8 zy4-)6>bB_JXO|XiMO#-yuOOdQ$9JKni7GBUdM{Gj=N(O+N)0kqu59SsL=&=!D@I{*CoY3D#t(sl+e9XeV#CaxvMSgqJ8?+jvJ)>^?NSD(AGharUByn{&@<+_5RcrI_>ln}AD{88ks|Gu z)&GUfsbcne4gF~7drJDPU+;W|fHd(+he?N@!wYgeRD;CJ^=ThMy{fvPL`UBzw6oKx-;sMaRo(zVb zx-7^3sDsJk@o|gFuxqw~L=k@V!LPFT&m4N0mvB}@J6^9WURT?}n;pR~OUAF3>jan! z>u_c+>%|RFr;E5Resf%GUD}92vA_;^o)Nl#!;6ALK)wQ3VfRvc1=RRdBxSk7oKaX2 z8AX*Y?R=$<6qrn<4`FSs5KwQ5&rWl#N#eB3;bBH2E~SYl;mg|0(~guo>1-#xY)jD1 zjLLFD-Sfui)B1snx!red(dWmTd zo&-uL?#9W7i1FM;cFHGD{APB^5~vmS99Z5{dmJJ7y4_HIPQ*SD+r^Ys5fQ8uB_(-9f8t_Z-QGPztdN&)h$s zr>IZ;nN3k2^2a2<5J%-mENrr!p`tKZJG7$!*T`eo?06+(WFtANv)-)4y=EiH3UsqH zU4y5EH-JLE?+;4 zEe1QQ7tw08EsS)WCX_6<95M&TWK7U$rI~o9sexS`@$285#q zjf@nI9UO1Xxg*PZ1kGxTdNQ>UwQuG)*qX(oK%|FS@jsfPp+;eIdMY$1hCIp*DZ`{z zS5UbbJMg$fIFvm-mK+L6JoLPb2P-9@v%PUs_P=C!{}^OP|BFA+9^NEb-dWD>rb~TI5)P z?{w3xHoeKXXqvq0Iydn3n8C3%v+sk6fb-ZmGo9+Wqx`3@yRfr(Ot;udK2J=hLB!{Im2I2T7Sn1p&juukgUt?FBpbtE zK%K`(OF?b}Cx2d2=xPYHZ=I(A293_)%*?hwe=RHJLt1KrGepT<;miXpGfBTXc0KK- z;TD1&4?Tu~-iGJN4f{wHVjnC~X!|WuG}+eYYO4RWZWK44umy8IslGs(5OAOp!eZ`G zUEbg1?t6X%+t*XO`RbJ?Wd{?@fmFG(5Az&7ILqUKm5YPQz)|R07;xff$zAbfQVt9^ zm!_|yxqB8mrrn{tZll;ypo6{$9mk8XytESH!Ey(PFkkgEq`J_GnWQan#eHPGJEx5n zgyUvowbf<6FlS3kt16C4z|r+z=OVIc!;< zB5nF8@uzK}mbQc-CzMYGG6Y8VCDfG%2K+iY_Y~p3O*~eD8KoL5U zl4;xK(+nHloRzxu_cp$roSw?|gq75iK4{)bo(%bB+(eM6e^Gch%3g=FLf6iEq`cI< z^yh{C z_4sw5LavZ=^hJHqVgqH?v}27)(82ObcHmr|&uPVasn$%k&gbTYd`VWTTibry5|OYU znGh2BVC{Ep;^jYc(aRlY8n*yBCB_Ja6SvPlJHyprKz4$vEJ`hEY+Fkh0CUdL!bLah-f4uhX+%x-wX1HZ_W`A+of z&(8O1{i-0YhW2A3V{tF%^Qn)b9IKrpYJLSQkaw#aUaQ?s%+KH9eR1v%W)XEt=1ym0 zG$WX1fJJU0-At{ufJOD4p7u|Km{NMBb(YG$hu}ao+lWv5%1Xr=l;l5bWUu^oH!uES zXhNz{$G7|ir6HBv%PKR(!N;S6$blV8c@Tp|w7AuDL*L}6;BtBbhj9mAL6NuJv_O@C z;+3P2ZSjwRPq$=qy>JbFq>^@kbTHhe8GLeRPC8ucxMj);l%)@~*NJU-2#%$5pZ2^3 zjt=*h=5ll8ew^)-)`p{dmWNvoa%F`YR!?u%nEtt}%7MiH-;!x6CycC}iD8gK-*fRMTZ;c!Nbn5}WhE36jzB6P z6!r8Gx;jV%N?u<1QSa)lf$|EWt>Er159g_~*!4G-z zXS`4rCzDr`Z15jm;muUHnc#zjeQ>iV0GOH zY%Wz`l`QsT6$pq;*8Y%h#lyPb#<$1Pd@dtuL7p=Dc4Xj^`8ljP_2?&tyAH~!%MP5T z1YtzaAj($s-EQ0Ag2!u?F~Z}*XqpKMfotk45_7yJJ`Nn7QQ~O-gN`G33%A=-Znr4V zfVlgfx1+tfUlY7PAbjxQ1ij{f^f`u`Q9+OTvYYa{JW>Q7L0;HJL#od8d}^-CoBz>B;4F?x5v%85?Fm(T z{x7_g%`>|*e zpoNuDYAeA-R^OJian8f$OD%BHcuIlPU!)rfQEFZn3%d%tI(rXA_=Wj=_-^ylPrT1h z7kpmftBpbFk-3R_&z&1mr&Ck9r)`K*{SN?2)jCboW8U&oUf^%Sp&s(i-$?fwgU!QX z&}fFjNU1`iqg+h+WkKmNz32Ir_u5)U%^5u$E{zb>+(N}CsD7u+WX3xSIEX6g;qwo2 zo>RWO&A1-Qjo!*lr2O!S>pKpl<{;o1eGUqTp2=IUN6Jo6PY+Pm577SkBml59vYDsq}rWylnj{n(^E8hWa)- zt)FCu{L(7;lM++@0kDqK*u8J^n&$)e>M=rajA9V_4=ZZ2G9}j}4c;Md;Ow35Ynf^* z8D05Kcjf&orFpkS_n=G8!q2-B<#88DI`1Ju{{m6K0}-2+gq=>>qO^?ujg%S`rg=fS z(_mFU8rgq(7Cx8+q-3%@yGeSe!6Ur0107t6>HKXoPhlsyv{Kml1V~seM0XiVarNxaz^X z=66hDE7@pe&wY^e`HR5bTJhZv5{}5*fYEKE)EXvf5g==>kR$t9=RpXFRkHVQzTvXP z>-l2{9>{x_S{WF8|HBu^TXYF6%opZc^PuK|)Q))a~|sGgmzM6dlu6cG)2D7#B&UgCji6P)I`=8l1xLf7MptGw3-Tf^&JBiE#GTOnWiv>i) zxg*415LDGuEE6mf%0yMM2>nME{l#+a*YkF&_>l`XRS1PVc?q^ggT=n589?t0IUY~r z5WU634IhVVMR~15Lm@FgHiBMX+}EMD(&cIeDs*hAmDw5xqjB70O@g+J{%isxf2Y2k zpkNLqT~A~3hF5x4_Xkziz?}><@Zqf`pKQ+XdSQoxSUU=@7@cWdz(14}&=AD4NWRqI z!FypI61^3^%Q_!~0GxObW`D4c*WV~`i$meNEa8m2^OIigF>%56Dq0MB0f&S&_1>Q`Z|0g4as+ZJ(9%^5eAm;;z zz0T<-@7XEFCj+cg)=TGb>n63CB|73id&j|he>z*5&T@MDyod=>zKU7Z1$|RwQR`(` zrEg|!ZSNmv;zLCsQdpAyjz<6Hfx0 z>?NrjS0XuDG5VZpaxa&LO`&UjraT%tAA>k8JrQ64PvSH@d%mKvnU& zsER*aRPpa`uP?s(T2%$-%xNu(>sJMrlGVl*M%PS8U0kn<@5}4E_VP;By1eZ#B`@zU zjVzmgp=-9caedbgo68@rzy0cKsS3xO5r&vm#TB!+s(!IKUBpE zKYm+47-x$IPr4!5G%QjT>*B8$e-6Hph$W0+5=JnUV8*B;T1r+a&@n3`)t5w7z!$)O zW1&}0U`Jx8vjldGJ9A9niB}@nt~gqDwn8l_+r~D90?Y# z1gIm~KQr7am_dSyGr=QSmk|vzaSJ7bxA2@;h*Tj9p_r(m^xK?R&{4`W#Hu9sVkuPz zvNxAk+LpihF?ZC487)14*yz#!&3H6_!oy(isKviC+iGT<0$ecVqBnPMGW*rou2>L# zvdwviC6{TcW=UudloC^%!3F(K1bV-L%cPAELaJBWQ7V2W{gXxBW1*YcJUNq#jx_1f5so z2A@IV-J|erFrCEnsw2#8hjvSUzwBtShLyY(%OG?EQ{D#ZHl7fDPUIu$FRSvliR^}U z+^=`xxqI9NM)CARce4+2`+b64EZK#qUegzo4|s0f*ak`nvtz=h#f5iknIxyHYCM^iZu;cj2RdhzMvIA|jw+h?`@riw#J>A7a!qC zm2s>{-2( zKChQLtCbRe=e1JqGq+Oj*GZ|*+DXAUpR<+X%iVsR0~f*R1aatsaUD(8P1xDaxHwIS zB91IG;5(BPlnY$@F^|wOQeI`$P~Ka;8tNF}C>}aOwE!JDIt^bwKGy(f3lKqm7`{Fl zi(wdXEEXd#fv!x5G!_%JC?b$4IMLgm(ZoW)gt;hx(=Is^Cv*mSP0~#WQ6J=pR5bt% z;npeL;OUi)!DuAMy-L0?un zcO=qJN~<$%_+|D`xvyfAI7~TkwMfdEXH=#ZlMrXt=Xn-!01Qsnh=7V&ZQdDl4#nc@{(q$WsJ)(jCD(h=N){z%CMv{KsKld%RJr%MYou4k1N z;yk!I7}I_*UP;UUKXu1bcqKn!-BCODIq~X$5p5E!$iZ!~kS2Stle8o6ciWWg28oO# zSDC54$hCmz4>YSQLN{=Yf_<~4hR=6s_=^rL*Avc@)Uc3NSWnI1s6WLb;)G4p?|HId zs;SratzkX2hNha@V>~tCPr5x8usuNU$r^7SLOPFV%l%(gLS>z&2F21p0IZM0p#}EoNSaJx@Pb9TCKjtc++~cLLtSME6WENlV+95uTcYebA zFoxIN^g);y_v`(XT>Vwk@ucS9waNA;?Uar%p1w1qQ_?B#CQ96dKXc|kEvf=|Vl8EN zLIj0A!l5!rzIoLTi40>qbnUo*l;9oU_N7N%N8oO@t9}#PH)@jh8#4NhKdN@{w;syp z0W~#MV;k;~(~E!^^p8!L9A{=x(|!2KkoD?iBj3DzJM>MsbZEyMq@XFO=khk!MxxlR z=Gr!&8H6`{+Oj6Y>txFQMQB-b*;fcZu!w}yorFxfZHI!~Q2_cPHBkCTLL|L*4E%lDcugkXg+a&dFF5K1v;^My5zNo5u{tHt-(H&s_|9@lkqe|fE($u?F0 z(0@WxWUKD0%WK1V_HK(Q-5*CI9{W@AKtjbxg z;q3;Nxp}C7hH}|05|cR)gP)JItjhhemm;HgE9!f~A!SGNHsz|K?nghSeT3n=b-AU^ zTO9k0j9nlToax2(elaw^`TAn3!XimYwI&e}Wr?;k=( z*8}x(8_DBR7TI$k)we{U>iT+9_SJ0q=Su_pZR@gGR-`jf$6jY5u&cRZT+5hUn15?w z?tE)nSZ0-o$D)Xcz7;9@5(s#4S^-af%oVNQl$(keo_{T_1#tU|){Z3an3G~y`>j6@zrVTobTLRtTFxYF z)`HG8Oj&L&zW;$QR`3$-NPz;L{Ll>$<;p^9z4+V3U&&uZOpT-lf#Z_jCFC&>n#v=b z0S%4?&T`q%0~z&tS20p8x7$<+d5t!aTM(R%XMZ)Rqc&No zFwDAm%3E}}A`1dHuxC~{l~0$F;K*|U1`CHZ&x84`64IC~B|!g%>Cx6cP>D|k+m=S&Gj zv%y^k>+7m*U3Jv<0S*2!M!KxJuN!)Ps{03qXhddzZnss7_WBurz<*+eKl8=_RPe`@ z2tAbj&`PXR!>rIOyXVbg-}d#gT(3voxtcTYEK??Ieb+48?Y3G&$Xx+jTAS?K%N(Kr zIiC*L_4wu?IHQ*SKYM(!>?zEE({4#-^b{bBZELU7u_2 z;v91ym9C9p(z#f)b$`%P*_?_9gSlAP7gte4h?X;iMLav#{qAvHc|+3K@2U@6>8AQB zc_kkC~Y|$IRSo ztW2C^W$qj!bN_N9OZwDE;s*FoQ)c%_HCh+kts9peEFntfu74#Mfk5#{2Y?`_q2E4S zLgw~Nshm%>;Bkq4m}>nxZT4c8xaq<&$@6LACegodAte&oeg8lmK3kE4gM|jAb`DcJ zhwI^BdJ~e$klMkYj~(#h>A|O2dn$S6vB++N&M;(3+v$3cCEbE22P6j$ADw4+b>cW` z3$o@e-cs{|7=N_m%t7>uCIQDsvOEobSfWyJ7$l_k{v;cfy}!&&=6${9^x_n{!X(U@ zm|Al>iIE(jubIucqV+=9;~C_c;9P#Hd!IN`ALv44chp{Cu8TP}q>;@E)Pc&NS0w+| zziWFifb#K?GIn^`LAHcKuT`65J+L~ftA}`KYz;ZKzI(oec=KoAkc2pMFKlB zBj{MslaK1~2=VkPj3khdhmgAs-E|-6?^Ez@KY?21a=F`vT-xqg^7=akkF^ub#QRC9 z4{HX@DRdS7Fw?oMJC-`+JU-p;<_DV2`ORgaGr$WSK>H1yiPLZng%6z?yc@S6_XYeS zJ$}&w1Pr0pXn*yRI-2TdGa z!L)Lz_OcX&A?QLJyO@eQQwODfssU0$!E;G&0`_+JdS5!^uLR!-{Gw7OND!1ry zVxA%Zb;whl;0cS5`IM(^1>U-=>7=Zmr+5^PbZLXDgc6fQ(_+zK-MuEE9C>1B7X(#IY58AN4)26a7IN6F`;sdag$;^mOR+H z^U?y$0*SZi*K*sehg(ZA3B;QZl%zJWBPx0K6pSF-c*CO}YOCQN`B_ zU{MmYUt`qV<|EH7O>h$vqq!RjVdZ+Aa-4?2af(GeJ8w=;N=H(s;5jWlM#r2v*`Eb_p7ZBAwg{W;lz5OEnm zI^%QaxD4=VR|e?U6IeSAoJ66Sk|ri}o@IZ0fknIxDO?eg9*PP81AS`pBS7bXAKO!z zx5W#jJf<~-Fm;Rp2VN3PKALoUa8VA@&tEBhyp6SUsUg5p#Xd=cJ?)b;cz<{(L$Vv7 zjJf23iR-lkSuO*c@WLiKm_iNck(u$xa+AC!Yt}#DBP` zh*019N-frFL$3c1-+e7TSh}9PNFAWV3jZn@*3e(PKCB&0YYk`C&D6VWDWQhkB&2MZ z?qQ9I+LIj3`5X=U%5a^7m4BM@o=P$ytHb|Uy^kF$0!V`b2v+@vo<8pAciSe^c-r02 zN0&p;s_%xLQ0BpE8P%|wNs0&~PNs;8Ln2qqIC!Bbpa7L&9VnVpht*2@!|zb`*J{Km z-J9V{f1h>QPI{ZL0J(Er!T*vo~{2_BNKiLVx@Z31zqr6Is}< z9=EIZ*LTo<0XPV)ndDw`SH;2?Y*A^wyB4zkZ~;_@Dr?5J*H2cy9* zu!o(mdCH;Y3BBd64@JIk;fiOFJ}0`O5n-sscRG4!Gyh##poF!WFy+MeQc@WIaZxCq zf?47{0Nn-`8J6q3+J8lG4SaMBF1QAI8?1w8RT*w!PXvF6h=o+~()9~vcYiBr;yJ$N z&$B;O{i!Qk4rJtPqaV01i6ey{TjVLYKf5TJ!TmQw5K=02rN$wyFQ65G8*WBNF zUR!^Pj;^g)4zU74(~vE`IZR`f1Y?91rNsP3|B~;4MpA?Dj((c0c~w&fh58xrrmXb9Pz+2C}mNI5l#)Uwl@# zgjXknl62ggP_*RFFTG87Be9&P)REn+PoDt^Y}t5fleGULe-%@h5PAEUQVusg#Mmgw z!#l>ox5yy&SAR=eTdG=}C2jkvjcM>VR?arYl0jY+!hfJQfj}57*K2` zq$YM=EUHy~tLkob?x{=j$-4=k{N;>SKi^!QefPcQ6CqeIMounoCqgOate;rpm{ewR zc{BMjdvm`$pDK}EpW~ko^Tj*-=1g{fc^-7Onl0A1%hkhdGhZ&&=RaNk@ZI-HPXY^8 zT9U|F@R$v=Y=tY&yvAQ2Ap9TeQlRkJ<}RlXA-l=1xYFk$Tg*vp`EYp~jq=%BT9yo0 z7XGQ0g>f5N65efARX(0AZaVMZ%-5UM{O!|b{^?x$ta|#mT0TN7eqU|q@fX^E6Bggu zg0##BQEs7FnNpQxrL*fLz4__f*lf0%&lcDD>vF_=Qx2G~%ipi>^VzC?dA2D(K_S<9 zsYzp6niMuLi6^|Y%3V1GdfX22%y3^1xTs|KwhYdUu=R-diCHA}=js9Tgb2f|ZX{fB z&J#lDlj_dW7NVJT-2su?SjK&Svy;!YeMq-3?QBtgF8ySYQr1*h%d`xWsl=AGtCuTG zHP~W`;FK@7B(S=vU^?h>;D<`Np*2b~&2_z`S0bT)(ylj?mYgeYQuTqeV zv619F3$j{>P8h*5;YbC4Ug<%v_Al$zv5o^5X^Tb1-?f-b+BG=Q&9iMWP?iY0c{ZBL zGAWx}oN_bEtXJ(dzd|OFa-aJPtX=tULRH=Hcc%(}eVXmdk=xgYZU243QvgXu_=!8U zpSZ8uPY%qIlH;)08yL!|Jy9%&Dj1hFeWHDlNsNGl*YwF{L7P~aeSAV;FI~2oZ<&`a=C7R}2*ic} zK^ic*l=JTt4S-By9Q`mzgKe}D=gI97CDNaPj-n+Nd<|BAI9q>tc-$;E^Xu9D{g?9D z)8cx$Iv3e0zn^XLoATjX+Q*mrrS%iunlBfxdv>IiJz_WGu-CplYmLWr^1~VMdp(-YCs~loyOsSYkE9`uVNwUA@n?J0}ziysZ1&d>|<S0+k$yXK9U3|0w=xZbIO%V+_2 za?BceDA)VC{L8z1vuTDEHlR>QENIFLa&jtYbCvgh{`1_clGsz(-K-={3#uW;TDlT2VHBzHQ0PO+&**7G(+tQ1amNfk=xWmlpMRr!f5ayd6HQi+NAMrDZq zYz7^`7g-U}=CYvfr-}(H_B%OzNKi0hS2eeOm*hdMZP3I;YC^Iatcj#stMH0TnFJtX^^78ky+tqsw-dk)PKK7J+@*?8%_I_k=t(@Kmi1isk2D5NArl~>A>lR}Q%S5B zLPuk#uWN@$v{7(9<%b*~hgfUUzpd2qYbv7MLX8BY+f!V_~qv`9U*XFJ!=*viNxs4@>?Q;r^djGkd1Rp{w*kW{! zN&s%(8a2O7nFq1d4yIFwH}8ifl(t1{`u(m!X4C0X`$Pt8Q>8D^R+G&sPtBscGp3%^Yu3U!{z#P3t}Yl#@m2SalJUN>uG| zHSH_K%?ia;-P}MBdbL6gnwU_~ogI&K6<3trF%zf;2n9DjW`jKufX1*VpnbF99ObkLZ zRRInE^3aUVR4K(tJf)HKEKf-wGhZSzI~tk!6VZR-McPgV&|ND10_pHb3O8~3wU9Se zf+_2ES%B-Zw?}2`yP}EebJ^)rbVnslWWj9Eqq!QoSkS0j6rhDUimGWO@c`!^#|Fgx zD=}-_y})iCyTy-cZ+rITu!nLC)4S1M zgG%aKm>zPIGP)D~NOrU6FPz<%zvTa+)bhB>*Xupau0tGWlidLFs!yMPSF^`5$2~o! z0X}sl&~1{}W6mcNN8_G8kyj!`mI4E!ev_IrDUM^ZYf(s(T*y&rq<7(V8;a?6I%H52 zLFbcUP+R`ZO-p6cU{pwg$hH}T+e_X+Sm_c^*AXftw9$_xX-uOvocv; z2$bFcr5I^_zq875&Kmli6}g%WcAT^NQO>H9&YJEFGh;jU%HhzPOx z+y9pyl?#Ld_?3ma7y%u$q}eT)tEedUb``}36crrKLqASYnyw*T7^NsAc+5A-qcmcru+`CeY9uApWcT%n zN+Q@23g~YyHB>da__dPdMPQ~`-frh6bq)tQpC4X-nDmt=;5StWBN$F~0K+-eT4$GK zno~Nvo2{d-oU815xmY9l>wxSQ%)6g6^mxu9Nxx4%Ipym*6()XYoQF7*fQ zZL~p5VKQ_@;SY+w4+$yMY3l!HG2HD|*~V}R9~s1gj=q!X>TKHlCmj=NE2nFJwfzq{ zY%~JC`Cn(KFqR5sZe(+Ga%Ev{3T19&Z(?c+m%+&a6a+FdI5U%xQ!IbnIud^8SGd@R z<2lO+NxpB@dB`SkPEv5*!W-`6&0IIEzdEF zTz&HkJ#m^AFd$|$fscR7BtKWv8RHk~MLhX5d$WIN*+zf@m%362!q@{1K=O@1%SXNq zuQL#Mm@dAaPNvj1KeGujE5Q|bg?X$7?UPmKnH)+mai7LR$xpCvIASK zx*U-UY%n!Ci_S+`30XFxE>(-PU7et}h}d4E&_|51T4Q(DxQNnr4H=O%B^X(rZA<|G zuGdrB?hCWA-7xwM-{EGPV&wS)qb7CZyr=@-v9JaJ?Fmi>hW!o^LS5tsM3G7;P|r7` zBLB=HJ{Jh3UZ8({F5XC=%PEPWY{Y|&%cdNtZcVRrR2!uV$a}pc4%@sCF`ugkR`UdQsa1poF%D7( zpyC9e4t12^uM@{L!%7%SKu?hG9L#pU?6g!47MlgC05YF%MA^=VPuPgzrN}ZCMCz4X znt@2r9Y(3~e&=86aO@kmWu+Am=KAl5#tkE#haG=1Vp0zgiD$V+2diA=lCDS_;qJ0! zk54gz>~$l|vPP3Wb_@%-#8u1aSl$O?IG!Qoa zgd4POEriJp^_M#3E0WCagu+j z(U*Cc^SOSA3$6xN38XhUO0u%3cqk7T@mz}y>*Kh!)_ry&OegozQ~KYv#HSdZhx58l zLalw(Ot(7c>`D#FqE7UIA|7RIh`|wq&6h=*)KfL_2iP6h*K6zQ^>lp^$4rZ2IjhbW zYMdo`k|qC>031ld0{@QqfRU8co6&#%^*a9T8|kbnvN4wMLHaD485L?$tq@$esydoh zIlV_M6m>d^`dA(!P5W?N?rD5{X$aTY{Zw<^mJKSv#yIu1tPsF)6Es#(;u=FR7VBXy zN}%*Bp1XlcjQ9|Ijg_F7olNZ;lwqC6t2MxP!AZ(Z^H69+) z2G~c`ZBs>*ZLojMYKtdD6Aw_%T|x==fM&1>^f8SU{!DTfR(}>z$csuR%R|{OzUP%1 z@J5}rETyDv3n+DzlnYdpT%3RR>>;fLH6{+)wn$eP8yp=C!|f%}dQm@a7B5yW>xRWN zs6#vySXQsh`|E;Ly4dAKQ}lk-GjYg6U6&>)tH0^R{%+qLzkPG?a&~h3_U-KC)$3#J zOk1`{`a^2no4MKVZ-ebZCyx=)ACj;)b3?d$%<6f{>)n2X(=Tyfy_kQ!IC=f*yDPPS zAS>ykB;Vae?c8-|ayH-ydzD(;K&v;?DNu@Uho?oMA8fDA8zA1xYO1U3wwYyHr?1I? zT_-iKMFTyAnCNQ%Asb-Ggorem(A|pO*=3wmw@KX~Q~AiJ!O!kV9gYv{!0?iR3y1Qy z>$;eot(@SOPD904J4t_J;FEIL-0Hh~czKe(^stNd;d(We5U&qb1VT)Nku zTiP%RP7XnyPx`(p=3E?Vo1OvpE`*Ab<+nIddAPk2zu&=P?#ja?VKo94yCba0-o4og zC;PpNHw(0HQ^hapaaq_BJ;or96mv5pVqF9dcL>A z$&c%B@+2JIsY92lu)NsL`Lj||G55y`wDk>pPog23O;HN2_Mil`wm*E5fDu3-8O>>(#^qybAOfLB_6bWN+X7;j8(%GGE*WG$1 zhindtEvGaX9D$v-zkUG;3^K_^NWrAT1&oZIp5FI+dYIasW@;Y}Om_c$dhqTN3~kLd z#-q_tn=UlqKw~)6Mz&)>%hslI?fPhrgYfuf`saaZ3{7o&A$J-#^3R6@?SERgp?&ll zoscj1@`=DO8%#ZVSDBOy{A=2#0MbXG$*QOPs<${sBCDS800@8y<{2`{ke)GS%LORM z1~w?j&s`)-0L=7FJH40;KxHHWkV=?SlCbKo2#(hz5T-pZwiYzTHcX~xK$G>GaPwp@ zGNkjXB7gN&PKYuHxfn_9o__#Qi94;6I06N+sR8xEZNO&s<3yGVjWJeg^%CKQ z&4$uaBQL>%`_QHL(M;BqNSd_I5Vhxe6SNgdXvr49VfwO5FBy!=2($E~lBL&eU^p(& zbl@0c%c2JV@Ho?=b4u$hyNhUerNME}#lAAF1($z%^Vr&V3=HiMPR5mu$y%c7BlQ>>!oJ zls#v808*y%)gl2VViG|DxjqC5Rw4px@F@SpB#ctLMLjgE**q8=tH5amoxSzrIu0h zk6T-6`GVuv3-0K{dPVQ?S5|0wSfe0bFIi0`Br{z6hx)zd@{ElK>1!=W8>c=~gc@0yVA5JS?`;CGEJ%s?4YL@)nZ`BC$8~ zaWpvhmNN&f8niF~Bh zO(-{jXE)Lv6e=5Sr1}9kd)Pn|L9dCgPo-{XBG|7cn!mIrwp0bEw8K?0M2U_II5OPv zxKajqA1+txZ6TmY>*qz+LA8^jcmW5P-2KQKG3NFEc7Hv8zsS(sc39Nu-gmRe+#6Bf z%;;XF`efUFLaQGEN59MP{vH#3;-g@8PlqCJx%Sa+Z_9Hvo99HVO55|+d;woE{VZcQ$`#OF zW_Kg?GJkz~xh+?}Atg{bQfSB8*Loi(Z?^Pfbg_4Am~u60VXqaL@1}xNSPnbEN|Q3= zz_Z9XTx8c3US^UtiM;OMlLx&CQu~v51zzlrAc+c!#%>lPC{8^V++k#0T4Ci`Mb6^{ zL(>3ODLbi*)GRtShG1BLX|{<9T0;ZCB8?;{xPPlnaX2SX&UeHKr$fuItx<(XFCpJH zEBhuzKf8VURh%Y0q!grJQJKr`9k*YdnIZ6d;7j zQkInvX>7QzOX0ee85h%o{{RYY9m)!2Zj;1~5CSkblL3Ac0y8z2v4{aFf9+aZbJ{o* ze&<(sW?yInjxFCiyDytSwmVIhW|_%Dng;=bxG@%$4N3azSGHwh*@j?Sgtp5I7-1b9 zoy&JFk|NC{MY?Dy?EBAQ>$I=xhGff*Y3kB&Dgi*Bfi0PaB?HZnh7;+wJ-~q*y4b%$ zkGF2;>)pG%;omJq))ndHe}l}V+{pDsOSieqyOe?Lxr^eDs(Z{P~` zm;N|1#Yo^8Tkk+`aSuBq6v9;ebY{$kRg~M6AcnFu%NcqXzgNZUThA1Fwj!GXa+0lS_6lR0yDM|zsp7Ms+G$42cN3Q3Ff6v#Dz9)o%+IuS6R@D&R zeqYNKK%GgaMP{)#d3$Do9{p=4AMqq^ghd{6WBmN>5`_dWu;&KdzhDr${wzqchY$rJ zd&0?gfKc-Ac#pz+MA#2D2*+czNN3!$!8u+b!4cn`ZV&kRz&gS6$RjG^N6h{2JinZ@}nxw_D`%i`Tp?^wQtpa6rXdJHKY3a~Z;67f_$oqAr=_C1TFys^T%L`e$6mrj!S~;UgQa-h^y94KzQvC6fTLs)ju>KJH|TD z=ha!mf1M3gwrpA!0aJE3Z$88G#WG4qa~UNqaL_n}adGkD)yX5C=3e%q$i(Jld|6m7 zIziJiiu6O}DW)UWuXU(;MK+oFJt^y~NkrbCNM!&CoTCsCmh`)RPJ(^>@PJ5n;6Z=< zz{&u39>MEc0R^dlOVXX>$23ZMcEQ{fk>0Qwf62E%auTZ1ifP;&HAGi;i4GPl2MQ|a zA(J-=$7b~)P+;7LHG&d1uIrp>ABVAS^E9bu6pB(DQkFT zJk$mW*55x6%BB)*)}KgPL3~{>B*iG0Po68TTSM4qeTM}@dv+d~KZ^{h_Z{^1k^8d+ zfjtD#>c^eGa1b?*muG*q%d;6%)^uujjtR{8*0fIbPSp+^*|zOO?+!PiH*Ea}Cft6( zlfjJ>lX+7W0XUbihyf~p&05=T+c*$?&sPX2iaKqqilVN;?n|3XQEb|5>O~*Ad7x$5 zQCF4(N_LW6^xu~^$)YSL)+KEU)Kw{RIK!DabB4B|Ei^+rKQQFK?)CgBo)$#L3C$O#+J#=vxS!}qi?H$o_UJCE^g3*lqiv`4?!HsFI_ z(Hftv;vf%`_;15DE|V-|&!6z^5sK0qbTq{|qHh4##!%F#p4+E^6spQ>R_Kt=Yf{S< zYMm2|o!c#rpc|sJ4^FG~leSqLO3Uf_CgGuNKbEQvD_c;Hw>y9sq0 zy2>f^bLX2L{0F_o<19{a_pxaXqh z3ekCkDA3ua?bvEt!bh(=rGnXI(c5ihlW6wyl{GE)))OCmkuoAHkF#ArC1kz0 zz&T0MBnq>C@duRUVZ6u$d&qH?%cn&An_(;-Zf|gYgNgi)97I8Y(_+Pq99$-AJaEMi z=MNYBdSRUVjP;DzjHAS49YqJhr6!zz zfU6I>cT+u+Wr6#%YJ6K9*}ZECpW2vqbdIg|M<+Oc%F*c{8~u?4C_0TXS=?9K5qgA= z_eYp7>&aNK z_J*l{4c@EPHMQBp^6s>UfV1}vb*bOJ%HsZvm)pCl7JpZgOUP#Lc z&Q~OU&?0E+zFp>^Vo`_Vw}I|@wT9xw2TbIDS!X#Sc~+-mD3lIJNwOelDJfi--xPi! zA(}>V`V0pVB8=n2d-+Npg*e|!x_?+X;HSS`Qn_(0fJ*N1i`3~s)z#dhEPN)O7+K^G zBwmPz1q&*aKSCl^u}of#Aop%6gfNNJcYabENkNt%bt@L#ll zX(9JAr6|LFUECfEWHQRsK0{H4AJn_d-pZ*p!qQ$4-#J8qLH|5Wm@X*3*S*Zqh4~ak zHMh5cmpfw+)*7e1C>bIIak~4hjTl4s>w=EjtvBoIuZK_Ezp}7Ely@Y#pHbi}3V42# z%+yt1>G&D1L8~0Ules;Gq2haC!;$w|^F9=`Qktrdhl4g*}=3dpdm0mmi91e6W^Mb0u! zNZurregIWyF|u)O+D#?7x_RYZ`_9_$sm8&aB+Fl8b;F6*^WCGh7?#EL1m)XE z<;$uQXLLs2tn5Mh_T)SzMf+SWe3wd#QWx#fh0yH6gIJT%)gbxfp1nzb9LJr*3XC?H zVmlNFye_6l-2EZHs{_u~PmktBc=X2Qbk++R%UEUb$ilJ$k*#x-17Xvx{ao$<#5q%S z0H7ggo8n5`*FD;TjG?1@wk1T-6GY22;vF?~8NEW$eHO}?gBE4KzGFI^+3Nf&8Y)u* z5K>Z1TSilNXv$5NtXMOD`ep$*it$~}o~%*Eek;Vss6akKIbJMAScA?+R@osK;qt9` ziKd5R(;Fp+^kaQ!y2S*Z&wNIP^GwcVgpj?YTd|pBo{*({C0>w?jr_M!y1~C*E(~!U zkG5xq?s%54q&$N$r2DVLbptegp&xsgQk~h2A$=V%9sen(@XnoYD>4%U6S2k{O`8`4 zayDyIxoAOY*@`4qw)IBx&RUEn#n)Oq@Oa$FM;0)kZXon@*OachKqOo-^B^33q&;y~ zyT_y<&tCBpfYxXcu9?ovb8PsA-Qp|LA=|{F&};cRbYx@!_3G|)a_~R-=M6WL!HpA> z99b0uI59Yrky9vt-5OnQ+cxk$zrx5Kmb5V|en=vNZ9uoQ#RlZ1^VSD12$Yz%wOEoX z$!WS_|9wY^k}X+rB~fXH1&B;hai?x7zdm-O-Pe zqgQV+b4DBq$6=$%e1vU^9E?Yf?IJ?$(PTE7o_upUwuyaz@>3K9QJA;=U>(J2a(XrS zWoJpt0@${CKr36a(gRDX_1vBHtkF1o$rs9@HUWhPt^zH30O)9%;S|-;`7#IU z7x@gt6wSqdBcr9#v*p6~H72+eMqvHOCRTd36OZ4^WV1?>S8qZYFFs%T;qegS-R0*@ z&N~aY&LqqcCjzGD5P`kdYcE|^;KGMLEZc&Q*ZvGz_*`?*R6%2zZsPEu5Gg^zCIxtkBAIRw|`tj2b^0+Zefj99L!Khe}~hweQOeFN}9B!$)M8 z3s}8qY}%;w$#JH}zDy8+IBo!q&su|1GUgQ>DwDsICVLxfU-*#Kj0+8{t-9 zJgQWcp@rOf>GPOa1Tkz^g*}g>ps(8vH^KUIU$HlM{5tI%h^M7ool|=zfwrw<+v(W0 z*|BZgwv#XH*tU(1ZQHhOJEzy$`<#pOJatt+V2+w|jPaIWbpiOC)@N!|p-2npSx7s; z^Jd7rdaZCyf?ZHon6**$aB4nOrN^@lliuVF#XHya!wnTEs=u?q$Oq;i(KQK8*{r)I zOvt`iKQR{k%9pepyWt`?>L^T&Kd;3@$kAw)Cp5MBAoS&nOQF|mpU}A>!tN#`Yhl37 z<}BIva#W52=R%|A6Q~I3<7yt9BNrdw$EMz8^}L8^%3D6q_F)zFX1E@l=7fE&4leWw zl@LOp=^?nAPE^Iia+4%l%e&0#a=o8ljAa9%sna$PArn<^sHCWnUgWo1{Pei8Q8f3; z4svthy;ogT+$ZY(g4Q)m7(K`v!N$IQ#cXe?b{-muHv?M;r2HEOa=Wx_3Gp4k=|G^3 zSo(&DX7V<;46Ew(3j-agY;9Q)UoU}3W}Gs8q&z#RT=m)FYN*%t9xfbIl->2akl&p1 zM_bjtc*(Um=J@SKICM(qSiR(XiEvq$yMeyn_pTs3K$kGey5E`&n!|KE`}R7YS@umw z%yz6hv^?H0%beH)|5*1)pB)y!Xa+(F_ZRi&`IHk8GQR++v<}t*u@%VqK>iQ2kHArQ z=eE``wT4?%Q7FqisAJ1<9U6~l^(~j%Q zKW||AV~~;Vl+S zUGrWmzy1}hbY5N8jj#1qjJZH1woCW9v9ND*EjFxc7t5c)UHR#utSM7!uumAuOuzYc zu!tZ4M!Z@u$qbx;9Kr-Bh;kfZ3l%|j#9J{sjH=U*c z>0mTOw#pRlA9b{00&)d2F^4uXc9wu^3x;0NVF?H2z77LsO)O(^kh=tSK!IYBq2%}@ zLPX-O8!%-m&V+I+KKUj>)#>bzavX=XkD(E1j4&ps0i@KAn-B@G8Nu$qrc@X2UM(Ju z!vd3}FEg%5hw}kGQc1HWCy{6kOayi?3JYx0IL0++cfxXe1;TYF1SJeiAHf(V+P+hU zn_keFN*7>v=i=bt+Yr)0n!KycoIcjEO&sl#siFkR7*iPzAUH?PO<}Gg_ZVYduX2fK zp{)!_VAc8?Ir$gBYwkI`5#I4LbR{yQ6!wJ!5AchNftiZ%pE%P!A<4YShymvZA!Kn9 z(Oa%7gyN`sylyARjU`GE*jKHd%yf_YYc#2=Uh_HYp~dW-!}9#BH0#RW!QB``&=z*B z(*4*MDSQim(pLFe?INnE7*R%D7=yT3^H^Rw8oMn6vmFSKRos}LVzv_3)PHfI?1<-R zk89mvC<%hgg=HF5^l*gESRtmec{uMBkAPbAw-QttQSKX292Hv5w~8grhOdvKj#j!* zi3e11JiS{dn z^N1p>{0gd6-j<2^2B&g^@&Au(f-p1xI}D;F9*WTbG^Fh}*^#!T#jJ*j64 zB$ay|;(kr#p9!Wkc8EL%;yZ5X?_f_msB3kEZxNn?c2%X2B(P+Sa1fw-cr)DU>4JTJ zKELk|O)%aTp=qK~-FTfyR*RyP>fw?JosD+TcjLpru7*hP0WOUcCYgaevyU5y~TjeRP$d_1; z$XD#l>uEz1qw{HX$rUXCx+Ddc%qmfOD#aIZ-<8ENHvKuDDw|h(4`vSsAQN`>S~DRt zbZ5`&->Zttp36&ecwN&I@k&hHk3eBdD$mLtF*i=55Y>l<@8d}-GZPG zM1L32H6slhSuU@y&SYn5XJz(ylxXEqXo8k_F` zT#}VLPQehFODC;Os`+6m=eDlIO0%YHy1)jkrOxEY*8ac0X1fqwG&0@fH>%+fL{f$g z7|L!EEX?H}n`dMt3n27o zR9Z>LC`7cdI$lk(*3y4Fm8f*4O$U7dG(9zL#CxM<+G73r=D~6Ca(A$k{A8PUevGvQ zntL29@gv7_HpEO%tN7Ph_%)PMJ2DFH}ll~Mmxa)g8pSPS# zx14?#-N%OHMur;bg?{(8e>_eyW;V?CsU{!{Vy0VGjNOD2xLKQlzlqr>?E zYZPnbap2rF4jON~>HRBa$#>offJcQqY@QNyFMkm!oUhcSXQHrUo>UwUmLUAe`9)C87!R!M^Zu zx|Ql8eZ(n>9Sh{oaLfABR!geQi-r)^waoOW88gf?`2&N8TtIV&*bZeJO^gYfw7w~B zsJS)y!Pl%y(-JE9<``%T;8pfh&b;eg5W44?W@tAAx)xBnee@f}S!Opbe|mT{_`5KQ zsPotOm;T|hg*086WMT_bWEvSgWy0RHnj0NQ7d|a;wpmq7K90JtVxju${XVz5VVrxb zMw(Ko@HIQwDsTBFfp^}a`H|Nm#AU#L~txUz+`+dNmv%FC=fOh+3mQ6H;R+$kZ?H5CZk|I%fd6DNtarj{LwLk`OJ%qB^q$CK%?h-IF8OUOwbm zLR5e8<62QloR;utA*}q`)cgj$mk_U*w^Z|F(_Qgnk~BF&)PtJHbRd;H1(S?I4=kaR zHhK%Q%tW>Te6w#9pc{VWUofm>j^eLOiyU4V&tdBnk%5P`*->WRUd11*A|-URn+i06 z0x4BpKz#*HBjW*^XVt8s=n(0G{4;#r434@P5{Zcy-K)vrCjT$MFExrR@#X(1jxrlz zB2EU5j{}3x!f{thGAre42gnl{R0YGfp%v&)$hfV)5dY;4csSH6(V?EuoH+}r3zO%b zx{e9vOWufY+R(_~)HNO+rnB!`sSQ|2oKap8$cpf0Lj%WV)TSgJs_AiWGArAUMW8qj zpbN|&_hBt`Y^hwpi`579!Ct9?q5cwe-;;FVE2mgkDbeWPi|K&v=X-)dH&tM5_Ak?r zYe63tjTDgv`0%pR-Q^lRtG|x{JxlmPIsNF9e)FsUDBsX+C=_8QTg5lgM7PKx)``bJWoyX#(Wv5*@LEm*HtQ-fmI!G+!TLh*$$+dw5D6%!h9rebT^MMZZeQOF z52xsd-P}9}-NrBN@vX*V8}cn#$*r0a@GsXt?dmMQoi1cqWau+oM6+d+aqw?aI(a!T zTl7&x7njW`)o7Ewj7qAnu!y>9d3%CJ(qd5{wK2M|*478)b=cDNb zHY)~XE>*e$*J~5y12kQv>!aZsQG*=&fuaw*`(KJ!QW*~^OwQmi@jXDY>c19fv$Eqc z#UX27=SZ}+ywNW&Y<^aC6aT#`Y~pA$r02V8dl;j0xbf;!^1JHU=QLnwT zjAP}RyaZ@NUh}dI8gV{eUd!eC5a89`S!p<~Zo5H#!cWSA^rsGIQO^l8{8S z$>^Y1^idndh9$fPg22-e**5F`!uU!@e0agrQxDj#?D3BgwT$7|@>;l(_g8|)X2ghp z#DiD6PfV*HwNGO270#L{3$lx8wtG&rJ)W=xe_Yu$8#R;l6qR0`gQvqP*uBenP+whZ z#CvLg+&mUNU&@S`4r(N$ViPXa+aRpfbWB=Gr?+50qgE(Vj?hM#q2qpUPJ^@V+)FTb zItSz*GnJWvhd89xL^o3p=->th<*ey!&Xy!=E={2~@hrH}T|t!e+BJSR`5lmZ{#wzW zWQ|L4nnM%NaWe?G;jTKhcwlFtbXa2v)+P3Thma&xndBb~xh0P*sZm?JK8+>{s+$+) zp_|yNYx>-rPf?v|e`Z2doun8Qb`bazO$I<*j@}DdX1M)Iz8U|O=;lg72~}Ezz#LBi z+&uUrb>igQu1Wix$R$rISvznH5@tCTe2T<2<2)Q=;DP}h15ZfPWOmj^{g^M%G94m$ z=$G=U*#L~7MtF{K9~iJ3k3)S zvdsUb;L`sU81O;0??;yFj(3*`&1mpMK(u5to4j zeF>PX5TlPmB{Fs?@I7~ixzyztVPCubRdVi?aCrTsn{&ORf`KIm(|Bt3Thr^D%374^Om& z>3gNRk*?}ZLM@pezUT>jOb_--&nmwb5-``Mg-Xv?>Tm)#vT503sA!$-L zFcYHvKy9b`jh{GHlXD2MYpTbYFptU4X@c-bd&lM7^IY80VE#Vn1N+_Rp`Eb;&LSKr z#I{INCcdD)f22n1bXqbF5fjzquKmd7BMGeZ7n1GGpk8&WdC1ao(0eH}v9>7poAJH6 zwuP$0NEc%bipl$Qh!RlcZ?y1WGLINQyrtLDYdQ+KRl<%_F4j|`r|dmWijqGf`2e}F zYRrcKG^PoKx<)Cg$ml8ZRNNt#83SP=3 zp&YpxC4S?5$PB?dt0UEvRfFBbd3}}K)@CW`_|2rbtp4(L1_nSZvm1y!aY|j}JG#QJ zS$G{excWCepRvK_Mpa~HNSILqPj%TmN1*G%zKQ^Je*?bJ5f{K)S_eC(Z(H16#hZpw zUl2yTFI-bWV*5qDT|YYhS?uo>9uIl?A7|niw{xybpV6K7L)X`zXW4xHtw#8bJQ#nc zcQnv#hXrPTi4!dB+!%vNQO653hwKZ3z4j<<5-pibyih>?0BH)nkp6dr`$stXCz+s} ztQ`Nt(A1Dl++aufx5nK!NS=NuY+_!BXqQ2S5xBGcGY6%}Ngps2~%PHHhs427s zNop*4AYUMGxIo2D)pU7x>^Kd)Kaf)&R9xs2AcePG4>DHw>2BJhJIn5}go za4LKqS+fT++LF8$#N zXx~Ln9hIK=?cFHlGf&Yp^!qnP%z*gClc0d#`{R5LP~u@vY|jVo(0j7)gQob$76r!b z*$7P78?yjmks{oI!A`>EM^CMjo%4exj?asg2ONFN7#aNzZ=sa4Ub|6xKLj?}2z5%T zWY&p6DSlcv2Lep;xb&S{kZaViS7ORO02sQ^7E|e!)7k%C4W^I+RSjnD;J7XGGQS}@ z=aO?)jmHLM&(?Yx;dHq7H~#)qGf0!Y*zY+hmS@Nf?2~}2$ldOyTTP5qQS>8EJWOVy zy$#W{{H6hfbK7*-PQ0E8OIB~X2A!KvOvXxTU}pHTO_Q6QJuf2G@%4ERUYX_=Ko8FT zqjQk`VoVPJv07DIpD^)l3fe|{2G=4?1(JXwTw+2(YfE04N>FB8wK?ds-0-pPaE>;+$Yov3HIp%<4Q^6C)i zZuS__I$H22`Z5V!ed>fGN5uvLfD2}C;Xso`EG=dO$BT2QFnm$%j>u?42W3y8ZA_1hHUA4Vg>?z10goo~VQa^|B z$t4k^5e#Km8)^6jue>k^h#eh0r)u^{az)s~i7dPm`@-qv4K9KvnWiKbATCXn_`0zL zehw%;L(IRH7TwG~^=j98%eABsyWcAZ2^|pSnne2>g6BLzq=MEsxxwT&=K=PcMS_yg z*MNME*gE#F%PYH`uEM)>t1PZ_8=Ql$(=B3O>Ly)U&CL^MGY*jpayqg*Mrta9E zT}W_ZOfHuo>~Nos6Miokz%IAIA43ICo+$%82Z}zua4|%4J}z0U0~~Wq6j3f4zuF8$ zJ-b$dju@dpRiNGL>^;@rg2NYUUi|#h-Lq=rM%gY4So@&oC~-=D^qq33o_#LqWk*b- z_tf;`yh<>%iuBW&*|}8AsLiI-pMEZ%_Y$D&0)3uZ3XrT4rhmk60Vd)8Rdw1LomYxP zqofoV@@OYz z)FPLSG;vcHW#$@rlv_aTJdC(h8_cvC@W0& z#^*JaL^RPcI8K3fFfgu!%LW5{4sjUYhkEM07 zgoOOz+X;j%Bqk?xOPMOHn79o*pLe_)C)U4s_V|(EIK_yLTQeuocld@oh<5I_OP!sx20u-2wNg$fd&vM*~)nMb| zBpe`ET|uRfqSB$|ADEidxjqAOPhoho>r^!iU8Q0Su=f$!+BB$=!UNDXsVf7Ut6B!A z+M?rVWLjYK7^zGJYTCjsa)`Q;h_JSe9TyiaQ$S$f^(vF%hJiDsG0pO~%@pY6lgUuM zue7XSaP=1$03n?ZD;HK^6OO8?r9J9-*-i(Y8_wb@mbYNb&&3mPU)y}XXF`u<2h;Ej za0pkQ`lc5}v?P>&k&MpmL&ytwLz>%Zt?S2D4idDj>K-!={IN-7VK0O7px068t$WG- z0E>ZErQn9FLWWw-LbgF{6>h{o-w#ckhz$PFzH;)s0>-A`^%}c=g;G6EFC~O#n+X}EkdiB2&PH7eK#dfEG-X`u~B*E#JFLQbi3n_oZ8?Ja>53ODBOLH8@tUZryRhVoI)nqdZl@1 zOO$QID#iOEPF8w+_A8t!=P02P(4M%E`IxHI!hCJ;z2rF9EiS}>IHt^qzEsEqc9z8|Koudc!`#lOMuFYqe8x%i~8y|&Jse)Pm_<@sd zl)y}hv-WI&EzJ!(91f)aphD_=_nbIcI?)9E8`~NMV8I+=8reSc)`j+$7<{veKSsorj!sVu@yr1Y(aX*#ZGV~P$*tBQ_Hq~c4Wb=sITT%BeZn1u&f zIwlk<;)+>Crv=>b=k?0FTLL^~i0Se%9M5;_VglZ%;ag_?(!Zk281tfjEZ7QMaD*2uLNeZiEs~`dN z>3ngZb7^8({RL%1WFcc*+(WAHxw?~7Q7D@VAUQ;(0-XHO;7-8N=;UUj<+BXo2SAvn zX=Z@#ASwqfhhxM)ojQA|Y2x59$WT;_iiVJSbwX=_H_E-7-SS5GgH33mfULc!s2A)X zLn~-BJQ*~jU#hPCwUzyb+qWLni4yKef5IR$uwSrc)lp3Rf(Y@;i-@|1}Y$(0< zbw$VlpyM|Az+wid0fZGxV!I7GHegOtB3J-uJ$eQByP(V=J=IrFY)xlQ8I}k#41DC% z#6bxl5HczSP_)3nHM+vWH8Dbp_~_ohRBKQmDgRI(#ONf{HBp_GJ>Ax_N zq0pfgCz#ft_J{k#kg(V*{Qc5t#Aro`*^M}c#0VM7=MbFg?@#AQRIN@F~F#jx~2S!80eX2&`KDY6-umUMBwb2u_@`P^JfY4O+Zk|0|mkB&3ce z)t72%+yIYG43P|rT;^fYGS$e#yY zb8fD&Mj?+KZQ^?Z5ZjxDtFz*2_2~UeT}O^GEr^tcE%yGgN6&o`tM|YqE)0N^hKEGS z8@c?Sg_9a@zPH&9?NB(SY|qRKnUVNxW0nn$e42Lid$1K&L$+6xGI*R(^Tk_?s+2=Z z|7>B6@;T=zSq-^4YpaC-D$np@XlczZKYD6=1&8d+3*68KQUnLw$V8t*eygxi1v!nq zc0~7LJ7@J6+C`;|whrj7%nm@%D&Hnvzm#Iq?t1W04&;QWPgs95{v*86CG9kPw87GK^9eFSLqa6sGp(r|79J!4O8iQmmP1!}CHsX1AvbDXxJ!rVM&CNc7Wel8y z)CRDelZ1Z<#ljh!liZbyftQVJix1yVjhMJzs@`DsqpOKO9q`~27EIpoL3x$TZV zL-$$x-BFW%mOinB$Bp>|$JWqgec`U6TN?>O1fT8euk{*R^G4s_lN{i?{MM99BS)|)$J2OqE9!`2smgZaAK?`A~p3D*N(zghro`ogF$RI|&(3U)D4 zPkz>YPAj_Az(f3^#wK@xaH=>AKCfE0ke3-Te=+=gx-{s)R~{rezJ2FrMozw*Wb=Br zCRzXtda~bM$JTd(X+c%h2%#AU={=X}$O+-qL8s^|$&&hmU?!r)$NP=YT^*e8hN`35 zm1jitwtSV(Rg?gn$|0@hEA4bBhI3nKpphJ}4U}G;rfq(W1@=vfFUnE3Z_S^(IIvGN z=nKDJ1pY> zxVeU>jyl(+`=N6@Mei9K*bXZo)jKu-v|p8aif{3a#WCR03>`09^-)06)q(GuffwEV zS?oDzBK}`vpz;Re`TEEmiFQp(J367-6eXfDpb$gf$PVm4fcpNHZxLQ?^TR`;a{|=v zFyL}=eF+gpyhY04x?zV5)O!@eNIy9%VV4jMv9gh}@n)yz1rYsrdy=6PQ_Z3UhLWIH zlRvm2XjhQ0dPA@_3CQe7%mGG0bE7wkq;goyfp*EfB6?Sa&)F|b_AUo(_50{TnT(H} z&BXnK`Yvx#!#RW1M*jym4DpEaKYJoDTVfvs8Ym+hMT6XEH=|2UVkw@`vJW1 z|EhgG-!5FZOZ}x*JYpVsP*zlhp%0Z-Pyf@Q(mbw8B?Z3#zxm{Ru?uCIP})R`8u{BJ zb3E}np4`pNCA8)x*Vj8Qy+qsIU~Bt{j*e?>d%3C7Q+OpypwTaH$uDoOuFy}mqa&8p zM*J6kEbXW$W?VX1tXe`FWn~oUjw|3QROM`-t)=irD}aYwxyLWH_-zs)9ZgfQid~Z| zLM1W%_b6@GdSk6F`TU{?v<%gKf7N{>lBQ-H*l&m@Q)UqWmFpd5%x}a(Yl!n&KWfj1 z3BaGV?#i`3c3zX0k8OCzv$5+e4f{s+u^z4a`FvqIdBX4JR|pxsGJ`wj53R^hIx0Hyi;7AE{WJ{< z&{OBSdO2JRC~7N&5 z)3iJ;@bCR)9$`T7$}v-VUDE25ZtfZ`TVX%zAs z?u5N4ILK*o#_~sp^9}D;Mhw;r z+IPOFYQMojOVuYj@>(%ME!jG_(5(PqFvrF}X*o?UgU+k{f=FnZ@(MpD7FZ9&( zUYb~JFQyWm2%t<9I}1M5j8j!^kwX_%=^C&pgrpP}!Uyb`4~$)(zF1bXG^zv~B*IPai8hccXOPs|+u>}hf{vcm zcm=|cz=eqz9>zMq&dc@Acle3tI&6H4!Qk%ZQ35(9;0b1r3?3@fgr=iP7C$cPghs@L zk|)r|4(2-(1qw0a8%wyz0!@Uf2U9iJ9@fJf>fL%?y9p7ZkbWJO;+V6GeoU}*HdH^1Udn7h?H@MY4eRtAaY2;| z@u*vrf-d~Cb(@VNUU7YdixsfW=|7D-@h>8Cb)7!UyYltm@F#g@u}wA}#< z?7=At%d9ndQX)DyoX$MHowIzp8c^*}nY#h%2gU;i&qQ<$G&=&{$UtDgudu8$E>)w~L1&0+A@o{M=}1VN&;TW&yc zg1+JyL;2ytAi_EQ6tG)7!g+`_@|nQaQP)kMP*{0|)Lt==f!p?7gK*!*9{$-S)ifl; z*GPrn8!~5SD=62^2f@t_#4j)SfzKP%Z$QdX1bfsTn~Gp|AJmzcxUMt-7=FsHj&EW% zgd}K}Y+DPIgVmf%c6tOIyk(ctSAGDT2ul(|)-ee=H6Q}#rRqa=O_khae4Jarn?BY; z#KSHa;A1v};FH>i*Ha^neQu+fkVeeKq-d!Cn$n2}|9-}F^LIDC0*{y!D8Yb?f3@yl zU1bQ;o0_Z2cUJ-{T-Aq?wCTc?`iJieMAVXXn%}C@XEhRv)nClLaPEsON$i6%x*HHb z{*w8D33$(}Q!K6!!kqguPO7M&HlU5Jr)-c*PLGM`R9Qcq^Tq6n`Dt@xf8OxK%G@~z zv|+!?8p$AYp!z*{@OzNsju=)W zat|)sKyO=hYsUmcvvMod4q;!=YIv>^6)XrdHk=!(CbmY1Xwj!#wcz`KRo>5^6ZOk#8S(l zHM*<@52ef^N|#UdxX!h=_|kl2iD&CR*AkgAr{4&+Ji8e7M~$+Vfa(roX=#9d5p_rt z^$3AQ^v7w${lUv_X*wMVWQ>-E%VL$%bOz;^e^KSBpZG@9BtvT(wXDlGQ8#81!Hhca z+llqJ9M^6pv-x9!n29eSr@X#Q;5EMu6DGa z-S_=mGZMrX#l;hU{V;>R-Ua{_v=_kdbHge%dnfhW^zQmFL9nnC+&5s3aeGt>(+Une z22RPl%SQfzVYBnIKdQQ}4XHQg^mDoVDdPU;GHU^ZdZdI^QDqeS{C{C2I<$l%0D~<5<1CTpy9iVgLCOZl#qiIeUaV>$`oFrzt z7UpS@ASw%Bu{>-CIf$I&*?~Xck;9Avw&}%;8_Src;g@vCp;cDRu@+)PmMF|h%(@*Y z-${n25&9}+jL&13D~r{$1}G`!+wO@k?DgrZ$pQ&BJI&|vYz zf8}9glmnbW4C9X)QnmNZniexL)y0>yxz8|xaTBrXO#2XfxYD)IsA}p$>m}@ZR7GBz zRuP?QX!lj;`SWZ!wt04|NM3yKnVJ$Nx{pga$ro6W*M6I3q4%4dh;I3{EOz_&95oD~ zkrGh4YurvC2^TM}-=d|vC$M>2-Y%-X!^IdFQ33qk5~ThW>gq9iQO#5ZW&ircX*i#^ z|EjNNxy!yiwl-T13@23&w;_YVr9eHDflt=Z>?`c!Yt8FETokU-I}oPaoHt4iMTN(p z5%eH5+>$xUoSv@~O}&+;5gyAl+aN^JEk?4&Bs-e^tbyyl6%MI2Cu+<>-Matj$<%eD z*#>xXfX(v0Z4BYWJo%*M;Kyg3WSUW&ypBHCR?-knuW&rjX{1;|RC&Wx_oq(CU z*XQVCmEdU5^_7B)F9kpg-07dr7wGnG&!S!0s$oH{gDnmm((wuC@_fEwT;i}N=&V5c zdA_ZNB4$%3n(O29ck}xAc)q^g-``Mm844ihm#0X0XM9_6R>0Aoa;)z1j>B@z2e8uVy^p%u1CkDX%#kB_UE zD*e*g_j3k)`Fl<%jt&RmTERy34LLji#x3%8`#Z!miGIvb5rrxXW2IcX)FELF#TEcB zAMogrqf`M$c!3h5>>C8%oMK}4SPe;E*w|n=&Q%Cm4onl1u<=R?E?!X#)#{e8NrhK| zF`CU~nH4asQl66xLHsS*%bDR_n&W+lKbsr#q^5?N0*MkdZsSQ0q;wk`UfVVl;hLa2 zfi$y;Q0hi84>#L>hS03AJ`TI)gGJeetKqRzu>sq8^3r%Gy2>&Nqe1GKk6Yl(N++=T zRCIH#|B0xs9(Dg;7np&dExEW32zQkhgXFFeFB1NDIm12z)R^LPkAHf_ySB!-u5F;#a#+>x?o13qr^u`xVjU%3uI%+Tx} zA87qW-ee@dqJ zpiV4_Y~$N17Z~Df;C*HS=`H}k&@E0FJ6xFi^VFnfjn-Wjz9{p~1=h!^`F;MSb{J6`n8w%^6*V3&GLnLO5p zib?rAovSIe0?ruI6lp6ApP!vrDy5|QyiEMpLU!Np@xp_g1V_D0>JXMucxC#!fT#xT z?-R2trPa*1qCZ(r%RlSMx*LKK8}twCq&=Rafe)J2Cw{GWZFZ^vKd6hJ=@oaH4G|wg zPG@@m0N-vp+}4DGvtO6*crpeNMia> zu>5Hi3bbunHj&;ZwHk|5J?a-`frQ5ri?5>Q$O20|T01u$;2n0uc{J;mA+x39;%fHn z*svH~Ba{Uh&(sRQ)w3?ytk11DI#P%tGQ!RFZ@Ddx^bXn11MyTK0e6zuaxl(hpug%M zQHWQ9m4oGW9viL$iX(X+#vW)vF+@mssHal|pYjl6&I(uPgm15KOvgi0W)x06^rSa| zo0q-`nYp81?Qxy3P7j=Ixr@`CSyuhTfczUsF3pUH5ZMO}7gC{C}{(h7|tO6(0mNda? z$A0v~^#)0axy*v{&SGvip>#eAZ_UXV(;aqPpepa8&kU9;@>-!yC>OpiQu`h5N37of_t^G_x@pb=qvT(8|)*Ye&v!&*%|09QVssG^=cc!j)is8ob{0p9O6O+vf8|FiA+6U}{qpvYKcFO=q zVR^@GD6d>(uoXa4fxZz9C8m8u>IhOa1pRP!?=jB`fJy)yR#&3vWC zoObGHGkrpY*163^wZ2&jrqP*$5fpK|ByNGAuoq{%_u8oJUdoc)DKLMQx~`DKdlWei zvyiH@P$uNaBKeGB4iVwCLf{ogj$DgUquZ3UdJ_c>=@?)Mj6sGJV5b$IPcP9SsmF9a zIb~pZaam40>WZ!ftvRg%XP;)@_~&$FU{n*x8VNRxrTAj4>FP}kNM(voG?}RhbOF|; zOKWFHGc5`E^$8E~%6&*Iwf}@jP2~?!Vh`Ch%6Sr~n2-YmuT1Pl(Iz8_fjJTnid?$5 z2~VBZ#R#BtuZbs%ZK(cZN?P1WNTelbd7_mug%k(tqwa)8nyb4}IyRU-o0l}&95(zCEZp>F#^&d4OT9B4?s0i@q>$@k|cP zS>9f?6qT7}*vzxYTQ-FNjo#*0lldL>Cmd0eP;TI{81=+Z0i=9-Qx$%$&2h0 z90bUvyp4PfWK1AsEpPXV#QDfqEtc@pB$OnjqS`5}HJkopTK=k_kFI;K{L^G6%UyAz?uFE&yiwhri?Vc(pYV6G0Dx}RautzB2#EQ1+mKVMj3 z;gWDe8!G52|9%j)hsjalp|&7T7_hJSK#_wtZE`nxJJ4IZM`Co1xh%W^K79P!|1*hT zR*+G)FV4)4pg$hFT@!mX9?BJ83fYw3%=hS9UQ~l;v;OWPu_yyV5BU1JPtE*5R0j~; z*w@f!mS5pPyLPH>5_x6`wcN`JHCqu1`aMP5+>a3dUD>rSQtzgQ#RB#W^HU5CoMTKe z2gmMj&H&L)6K7lCyMds|t9(MAr_pGUR+}*{TUW))V1UpifIk>-RT}7H=c*0^*6Lp2 z`}?D_c(XpMHbM~9aEY+aR@nHF%>{6IjIr z%ou&YMsXpbVZ%QyCNiO!upn@^6vi!%?_|L`Se;Z8ySfUu^SW2X1bpi}*39R4|U}qR~anHRq zt<73|&0d^t2wzutPG!E_Vx1P*oU42m<;YyP(~$&RQ94LsdRcY1KU<%dXU_EKW94$n zlE>2h$$3;Kq^j7m!Nd=NozWP{mh@&lGXPFwR#cORQ~FW-qp3eUg@A9bqbl8fP_=w3xH45a3O`f+v1l3yDu63V#s!t%+8KD-Ojah*}9u60R$wm4me_M zF5uGPcJlNG9#6~_0TgR5cp}S;mxx-r9EB@=h4AHbUWm{E`H2O;+RU+l#5&UPC`?jC zLp(6gDgwXbEiaH^z$jrZ9FLz=2E^`kK879i2ps7u(B$;DOcYl)jl zQ`8Icm5^X5D>O?x6)r`l+SbeFS{UmSUEX&gQsT|6$udfM6zAlQ<$xh(nV^)-r7M__ zBX$^QRbwb3*M?v<$)ETB#G0`Xb+%<`V`{z88Un8;C&8$OS|Fu8+pHSzvpG_jraV!3Cp~+h z8{c!H$Fm$Cn==0N|W^_%Jia#W#3!l-qn}`><4Z zCx1h7wV{=hy0H)SmX4}YM~Aqc4~972%qt=I zdQ7j=IXcnIvwjQNoxXtU!|Y(UUc;}34SRd)D6s4Ni0~O7Bp}Z$O-O~S@nXE6IEW3} z8)+Ap1K4-ex~SuP?}-?FM2nB*A`aQ*zioAj75J-F6#9)X z*E@oMXet3mIP9ny)XVostPEsERbyz0dl?KY10bdk0mz}V>v_|z1$Wbg)vKjhDDZWHj_+cdni%X76gA|gPX*FK5j|ql|>!Tx_igiGKSNf z+5A>pkY3yEfv&8QJ&M9qftGo8ftHA?+eN}T@JfUM&+F0DuyI>PD--q5bhC}FUVp_F z1^ilSw(Gzt@*24h`o^D_#`xBVlP_D*C1t&PrrO3BH3rsO@*;xqrg2Kndy?E_?w#M( zGT%9e&`*VMuy6;7kHrwlF*w9*u`JJ$qGvs!KzhRxJcJhWb3~?6uJ9;GZp9H92Eszm ztT`0^w@7Hx^Oiic+^e=OP>uUBJsZkH0MeF1@lAJP+2c};A@#XCAE+{2^=(+6y`Z$c zR56-Mfn|Y#c2#JSJ;GayemS5Ch{sXx90t`3c_frWiwMCo6o8cDLHe|1KzHj4jApUa za8>>NdU@K*qJ7yFIDA*Yi#+SM-Xp8$Sutb=LH=1+VNt;McoPsbR=yK(Y{qtP0W2|~ zh=uIOHu%r+pR#?|TmDwO+jC|p&K@@^tzS)5#^|7fvzuBeIp1(SdYxu&u#|CKNc~#5 zSTJARSg-w?FXE8URBGTgYrwtBn0}5HpKB=sArKyaP!TJn7o`E3tjHq-2M9E7JVUd0+JZkE9k$2=PHW!8YlUA^ph(0 ztNiDVsStv{3+UH<_`SXFX>j?C1SQG$(=^H=(J=YEsghG`NzwB6HZvd6#s+!D45n^W zVMU4`7oL9MjO(j6w1s`17vz5v(^(E-M~(Q&Wh1iKp#JrIUYxf${Y{fG^na*&r|3)? zt_?G`ZM$RJw#|-h$esb~?6gJCpbOXU$r3QYTfXwRzvyZn3Q%xbwFW~lo%&G7 zYs8}Q`We_U#S<&-RJc=yW3-jbn<@*w-BMa=Q&a{lpq>m?d>5CP4U*cMuPWBHBtc|$ zh(uDysoyUK<98Gtli{8@hquzubKmBq0j@C)EM zCdH;00UhoF76ZGZ=X!s^Xa(=BJ_1s;c(4MRXY`%WJ((o&X_B?cWKUN9nvqo7=W0Cm z_Pg%_pPm!==Hr5H`cw5lyBH3^ppN9siaIee_f`H@LgJDZTiyB!(vT`tM+Smp{b_09K%evREot4-n%Uujb6Q1K7Z_ps^-y z;+bti{RMYB%RXwby`06PShz=S)-MEK{r$Z{fWT;~mycrEJa#Qe*X(jPQJ6Ei$1RCP z=936XW^pgq9mEwrj1yIz3<}D#ajY04x}w2t!zt*ewN~1af*L#VDz~vT#2iuT$ceJnyH-pR18L)`l{3Tlu4iUe|eHK3!(|iqXEq_%FrXnJURPhsn zq=C`Gieay}K9O})7>~CE}^y2a?$!J*aEiB?}L1MFRf&{3cH9V*#QO!CA2!Q+nA z+Z6sYP(*UrrR89Bp2iWNn)ZNG1Eeh+pxHh$%LI{KZTMEwGL*v{uC=mCcym+hhrGsn z7^UPJI3O+$T-s*jr`Th-y58ifO{<}K+4PU$i$uv#SVyWRin%Nf0(Jq0=;8yG0Wub5 zMHM1-96&hI^9eQ*M!Xbh+oh)|fNpfabMNnNXAv_;EME|mIgZ6chqH6mzY2y)LNrn_ zJ}1f3PK>ct-=o(W(svz7ulfOx0fnT_P_1^*geD!3*JsNdUkEJ1V0OaHQEvG0Djpme zps=mTN%_UCFa}${!Ew>9FZNCSxv6SNHb5lz0Vt)ybffsyD`BdSh~UDzVr-)oBJXdI z_mJ*Om(ShYpSNh)CaC;NB_-ZMaT35U0i`B@_{9Nn6|d*q*ku zAp$fiKEcekdDtyQo~1c9(I%Y!0p2;2s>5{z9a}t9tgvOFufF>G6M=zfvksw_4SH?c7^!VQ8rA^+T@Yy zTc!Lupm08h+^k|){2;G?WW~KY)*0jNv9NNPrj>WLW&2-4Q!=lu%DOUn!=IGgfb?C#^I%>bY5(`TrVsyapZIsX(;nG0R5laP2}_`>~3X;#7Lr0mF^c+Rys$^ zl?e9YIm;@l?|6i2z}?2tuVt>4EXCD=?MktZJ*!u6fdu6+{o8&!HeLbLNn~g?oNCTW zO{>u%M1yeM^xPJBNEjglePOM#SQ4V6CY)wk0WFEIwyyWVwE3dpCcMO&FuRJ|yi7&J zfZ&_aie{_U=5Es;xCvvdIK34y-dp{T!6O zNn)ws{Blo06s~!3vS`a zaI#C(x@vKbWzoVd+v+#1n6UZ`qkaW|`?x8clpHGM{d+7OtacU7nV5;`si|2?1qAsI zPOb}_m2eb()NfCKqWJHM?tp5Cx(fntwGs*wc%#t9a%#(>3Wu+Fchwp*TjSWkop*`|!)k0}QfZ)#=SNA3ns02p}g?^cn3 z)NP@4(Qe(UT4!NY)W%vtofA%2azX#|YIy6+maJ>Rhh%bdfR=iQ{1>UZLClCn zxC~;PW4!_Y_guBMedN5Xe+Z0byPM&gHdfx?enbWl^5*_aUD&=Bp2F@=TOGAu?-)D# zefCTo3)%m>s`0hyRB>6f9#9p)4ehb5m^wA3wjeLu3zZ_@uVA*34Y^Pa(aqu7T>Vp# z@_~2y#a?}&G1gA`f-7PO0)cm&BjhhgQa4>b0CzGaP97yP>p}I)_g=dSk7-BaGQ#a@_O&z+~;C}}%3|OagSa6@J{U~!_ zZ4B5cbpWqN`b37yEaZ_DP)L#I3#sr=i98X!+LiMXjKPQe-J7R2l}zBiOdR?XS*~h> z4ubCsrqAxpq{G4`rLHR>VL|&nIFr}g$r~{xR!*!2eTQ;(lufR9OkPX@P>4QyMkh5! z2Ztu@jOAWhY4k9K0M2n3jZSy98hEj20S%|UDRfLbgX~JZj4C~ekn`U|F9_L_%~>W0+)Kv-7n>Qd~U96I^<_g{b0sQ{t8_=L|68Bpp@#cx2bc)>g;OyQr1s?n5lH^O4UG9MNCItz#t)+K( zlsK0FXS22rI#UJ>2UFUC3s|AjMD$4Gu3N|vQ~18$3)|A7BXf5zAAtzCS07zNwbTyB zB^Z5|lz3He@3J!&OjkLp>ENhq9r1nPS{>U)>blOQ1EPuf;K45<8{S7CCvdKRNr@Vh z1eY)$lxD7jw{B{eaA`8Q$jY3EpHixt%(?Bt$%GSXkC$b8uz?e`G`SsLv|2?L3)5hqHKO!RHtfu5j& zaL{xjRH1l+-QM<)ae}Y*nbv~rT5~Ah75C6DdoDD+#NM~**WwJI)*!*J_YYoE@_o0W z-X*rzd%XLrp0=W{ETa z8Rz{U7wycS*@Vh^#=$5sYd6G>EDw%~HEFfINi`F>pWGn(n~dOT9D)ap-^6m6_}jBZpE)ZpA_sfCMb zOBdrd6FysAbaTT$nDQY;MJ^`izZ?O$bo`=auVmu~Ae7V|ks#RBcQtgpf@HSnt}=I= zb5-Azw8&{R4RTO+x4Nor?O303y6Wg0YiY}tb#}b@k<=&C#!k{IXF`jB@MP>!z3lv5 zsjfNu!0h;aAHvT3Hyby;-HmjQXea=_I&tYmKVidl?MTfNSkySs&Dc9t8{85wr$srE zB~vrvMV3$ua!Bq&q8z!e=AwJ83|s2To8jcUjiSv$o_Jbo*@+#qOv}W$YX$x(5N7n^ zkH%!~h$Z*;&Ykl(c|?*lBk?ygWmQW@ZSybH)ygVodkqhFFJ4nY59zrMU75EqOB!)R zRSK#~7t>>2O(T|PyLoj^C|fW9gR5WTEq)X{H{NWfk64!|bJ1-OOZ(N@Zw}5T8Y4Pb zzakV=ABl#GoXTxzjxZhM>@h6&hMG>E*ToP7b~MshNC)N}^XVZ`w2h-nh>_wKEV_gB z5IXxeV*az8w%T?-e!G#G`=4my-qJP`_Im&q6I?r4KU5pk(;({hF0&}l|+3}UpC{pF2_S_0q-yX$9Iw82P*3I#q_)j|o zbVw9zP#GOu=TsC3Piii}F5v>yF9e@{4VCb+7CfOB^mi<99vtws1=dY0Wa5sg#(riM zeNn{A(5;bA9bwKOT+?Bz$Z-%4WHL-GyCrM@1v=heUJLr0Pq$cM8R{UpGaRh$*w?D~ zA*Yt_3VSoiWWwjXV~#Z4(+preiSbjM11S##$8^nImmqRiweBV9w zz7JUI6KOWyw`^C}Dsf(8Eon1zPtV1Q!EMbz5LU^1bYR8E2oo?uC&S3vX{RqPMg0Vj zs%NDoyL=8-b^itc*vm!&Yj8{%l>Z#|U|y&;=X*=`0UW37eG2hsKF`yV7iJMqP^{5j4HcvxnU z;n=-RNf}y;=)rhtn#6*BHI(4A^s90LLR$fsdwm+vwv^Ctku3<;rDa8isDrlS<$r2%xbI#)gm|ro|3iybovm)P9Cy2V&7-QOXo~<*$6oV~2D74jZsYVPf zxR|yC9CG#mhj1g@aTS(`tTz>P4JlGAGk&_A1}F^xT(;f^%G@veglujD)&i+Efavce z;3oWvJSWd9%tI1@z#_&|RjGsVG*@;*=`Zc?e%DRa^5N`${W=lu3kLBm9@*g^4wtLXi678?L~JeE+FyaU)~!yc{>!yQf9&p!-pG^ zor4gzpP=0=U2Aq{E*;ITA{=P^%xqf#)g5bX*2RG$jS zk!B}0$*vDEUG9SZTp)hCC2m;)RkzREU;tSnv(e<0`VazUefQO&odW%YKx})se;&N68M9kvH7QcR`Vf1pM z59%%!KKOw)&4rW`D(^aj93?EUS#YLrr)bjdkghbF-i}U^HsQdHm$S`s%(!EJYcmgjD?xK_VG~K2_* z`^&|5X_oB+M$Z7n#aC6xD%x#dz*jYk0e#UMdm8|M9`NrS6KClR_D`V|RFnG^@<|s&%XuL9jUZL519>#Q~hi&LK4Y%W-eLO5+=p2tkSC zznTe{DXoAL_-6;p&hmfkGm3son3`L);Rz;#_*$H6AwU+A|RpF}&)_ zDPZfqL>2AF|A<}weaB>FO}=&RT4u-IM;tHHOSd_B1h1W69-k){o(DX_9zVwJ>BnJY zqmvS;BQnP`aH9)djcMFxY4-N_ChaiG3wALb8&lGCH7eakFZWNt#OghxlM{nNAlQ#Y zogtdVoQ|1c(&@KDSCRt{+-{4NC&wKeY3srz&8RLYgSVIUFzma$eMcd2dnF^dbT*uW z=+037p|<~re>3`4;?WfMlkamUmC|tWDCQ7mQ?geaL%OVetP5^TgxN+vs`Q)8XCUdP z55tThq>+PYwx<*ThsQD3(-Yy!#^0~hGk&o4%yJX0D*^us_1TQk1*-Hxgfs&zIZ<@> zS_7m8#SR*mpLQ+sKQb|=Ra>IY(?4$&#r&1L>mL&SEW`)G&Z44D+RUBn^f;u`(%GgDTN zzQw*~?3=_V*_|Jmuo=_TWFQpHG@|8^FFAtmZ-!UoqK;!XrrP3IuCp`q-d_X1vG58M zMzU?gff7SZ29csz?}nQBEeA2g&{4M`COx{Sgi2b#lUN0u)>qFO5-)K?8gT_UP@u9v zrL=tQP-i0k1D#qrviV=`y6ni7Wb>}KB!i6F@?yX9Fe3%u>H7jvu1{?GBF5H7>wn+M znCNdHSZf$jSzCe(MFb374&U>Z*05=K=0^zizjHLxiy*8Vv% zoLJOK5f8mt(!er1BynT~$o|Ugn>iuM`J^V8`?4FMqz_aVCqO%! zjbV0)^^gK6i+aqcMC=1k5eKF12$~~;2(-abdH>-z59@$x z3kGzD7$V0bYw3jYqbhrtROK1r*7*=-3AqV0JSrpN5as?GnNCD|spW;PlH*yDf5Bs%kN=i2Xdo2vG4t+ce zz0gpP_Q*ruiSNNRNZVHx$O9)5t;A-^OU*oa_`=SB z2xMJ^#L|wC2t$N#3!Nqo9XpkA} zNR15l%O7I)JRjlz7~GQ<)?i;jjz@`5ooE%HO@C3qEZhE9IQ(xUE`bZc_hBjq6d!Iu zTJZbOa$*1o9Ea&A?m^iF>}enM@a6V(e*VnBpx*`Xb>!D0qvK!x78bZG)aCTga3haKz|^3xCHw>>HFu-= z|3f70lN`ptk`nvTL0GtwfZeG9B^oku|81I&HBP8N>q#Pu3CapBlM2CMz{&#%5(%)f z^kJ*QWRzVu|HB$fTJvnl1?gAMg~xgRz!W$Y2oiZ;Pt~SpTC+6lx2kxNNvxab6Q}r* zmCe+&C`)P^gwISGW%x@8H(uHry9{bfOa7_gB9J{Uy^J*vfOduc;Fzie*n4P?dpsG` z=1t_Zl5a0aP}NeZ#d5~l3yQ?tnn_>(s+UM}I3y077t6*plEE1Z3YA>mP^Vd01nEUI z@g&XLv?j6at^vJh!7}C>N<{LsqMttNNpRS7I5yM&6l{Tc65Zd1>S8HhAvElRv-^D+ zzyfkPj)s!BH;0{-YJj&)*&i(YY^%unKCp^HBo?*4F~~W>)1+B%TNA_ZvuWBvjEv{f z0+En1Y60y0z|#j8+99u{vpywuTYM&}_qiRplWSiC1X9m`zJw6K7)2bTn5_bG8M zavqxne`=r=96$+%qWqirrj5`ur9gs%gk7;=fRhhTa!XEAl%i>0FfzRoo46dwb6S1y zU~?#JVNlzGGYASxUOy4ZqjYW?3wE-SmZ;EimrZ9eBlWP;LH?c2pmu`ymvaITuvs$( z!~LP#g&Wzcq3&OI%hY2TX8%Z`Dw=A2h|lY2MBpZuK|o@lG9Is`e3)h&b6$i*ZwN9Q zkIY-D##+y%8;slYrd3U{p)^p}t>0N(a^S?HQCo__^dAKbxVADM2TUdMg(?#wE^9a4 zP$RP9K;n3pCUOxKmp3<<ZXBdp?i)2-cW z06tS6`~XILxH?NaZp>&%nOV%lwH44&Vv`HMA9=7zrmqTVU?(PrraCxL^U=zu1&CA9 zbdiDDgAChT5{(y#0ZG)rKTr+-CsCP|!WZzj{XXtQ;G}qmtp*PM_+T}6Zlq;=JNw|Y zDqIEnRAzlpHoreaDZPOvH|-daIWNG*=$^nL-T*N#wmO(3CKQm?G1K`Mn9tPbti7C@ zadC4LD?06`^M#P7l|EBL2b^j|f*}-bncn0gw>q8ku0G9SG18hk**GOkd?7kD?z8fF zOX_I4dAq)CyWN&`L62SA?l=I;u-5b|6c4<`oIHB>#3nC=pbdpwBDFai&^V6nu#3q0ZLBp6Zi zsOk`~R$Yv-wsnqU^t$G&GqRu1YRGsd9w&IFSWQ+M_4>1kw#oI^$`9M;YX%!rnb?lU zZmzde&*AuuYg&a(0uD@)mniuFi`b%j0wB?34YR?eBt)t)1bi38K&W!1!5LW}mH=7HhP^)V1>HaPcawsKcd=9W9-357%G zIeiMLST;DP_g^lgk76NgOt_JvQ{n8in6gkg|1wKI3IAo^(> z{*Oq}^~3u8p)C0=|JgBqC!*)#En@^ZC|?JHMjC{kUAhS(vBxiHP@k>MEbi&vu8uot z5E+XgtR^=Xn1(yvbFlSQ71zp3M)WWlVm3C-k(FAF_9Qyg?f>`dS`b#~740^fCVq&e z0%^ZDw7M{bt18@`9VZ3uXs+@AJdSc=gW3&1pfJh^W`xWj=Y%B~*^CVu4s2(LdccF| z05Zhm4}Q$zmx#FtBroKG6bY4bNDZx0FgiFIR}FV%B@%JpYi5zyrD&3cjb$`0zW|hO zYKah8T~5$Q64^jPeZ~<3j%EUxfxd-h6e01?1ByI-5*!2(v;#6dI9i2X8{{}x2uWIzK8P4wF&z%Va1S`7xTLf@0K?8c!4DuN;v_|=ATl%uvtMWqSMVX>aa{#FI~IXI zdq%iveUepJ@PZBR6>M}dYGEV^$_b;@6EUXhXkdDh12MSnI9RyBMF;eLB%Tz&Lij2| zY%IhgtZSsC_*$3RA|c=_=LIuq2pgBNlym#2r zq6b2K@!o8WR{p5Sv8Hyp6WdjwYh=MND-f-Q63+^;!HDbQ zZgSd(M+zpulP%!By33Q880{SZ&04-in|Uj>S)mf{>sVds{&+0Ed3CUv=1KRay8wE^ zb$+I`TuAcZg;3e*x=xF^-+l?JsNkiSryxF)Idit)?>{N5SI82MO;@Kb6%S)Kq!^$M zFVfgo;fJki^ylT(2vt1g2~2T}xes#ytW3{ujqm@Y3| z3%lVHv0?LA>c6xm0uEl2M}(mv`!Z7$<7O={{DoNgx5{uOU&YDPaJ=|%;nm2pD{;)4 z%TGdkA3QeOx?rR{PV*jVhOFZfn}302EZN%(|2@WK8(bW-phSrp)cX&qGclH{0J#Aa zyS}->=&A&sK%1huc~L50xDHB|dKZoMAR1KtSk=Ou6(JM3)YLGmSt#!wi~MYQAwfK= zrQvWEZp`tQNW=Cok;<}0El~9`UlH1NoA8Tm#7gt2y-xjy&PlCD@5ctOSm?JMmIVIL zHDBd|lDIz0REp$=SO=46r6No4E0fm((3!JElP878hm-O&MLv9hdy<)Yj_UPxcXCf2 zzuRW#XGtYJ#pcK%B%d99{r<#jR-D1w*x$<$nHSI~@qgBP=Sk0tVV3s=?Rd>Mdahg1 zL%EnybEZ07fnF10zC3x`aJdQ+wmPkg9{fGT6ft8njJ?vdhl=`KOZD63cpberfe1_j z^6^{4hXuDI>+skBOm-7Ij$f?TbMd#eenX9J+1+KQOUmwxZdH(afY$DwcfOVGPTTFK zG+&Lotf`>8b5kVg9%` z*)@ILu|HIksi8!%h+nd-0iVm&=^4SAbkZt!?2iJ~=uogbEZNHr)ccYv2p8HBd!@)N zST(dH*!<1N_F03&hIP%I$FBLj+x4+PzDb8I=c0fu$Hr4`yLZb6l{lvj>7sfrbTiIr z@@mQUvCtW?&30P#jNk@m&qINKk=_|*{8S#*(TCJ~0NaPLgTGvmvlCwsIu&Bb8=-=| zq7$a0MQlEz?@Ys*6D9wy{0}*|^Oo*}sk@4|lZ#A3=k{!};P%YMv$NK?2l)?AUqF)& z7X#=v`UX#*JNPrb!>IVgH|cmUGVVb?JcM<#a|@isMM$Guh#`X&TIO^XZH+PZ3VHT% z2{%u_r~a7`7e@obfowTvGHi};z+4bor4Yi-wKt&c*COH8S;*x9g6WtXlP!h&3TfK_ z|5z6)UZ=e2ED>R?E;T3F)M{D)AJ4&pH2w==Z|q~me^V;&=vgjc}yNfexr5&_U8meYWfPiOLoiz>}+r9Oi zBD9w)QuOK`EZlDINa_0I*BTv0Mgoj-c*4*WWcQJN}OAfbUMdQRy%=!Jipw3&ml*q zO6l2J-{u9fQ06bv#Y%dpqN3L+ISk7g|7;-KHSg!eba8s4F|J$?9H^+Zy z?tN`6`;R{Siz&3+VvC#QWZUp7O{&Wk1hoJmzsaRU;1 zA2Qdqe&zuHXO1bsur0DDPa+?NLLaBT3q<*Vy7f>Z^7>{^B>;VRYJfIwOfyNC`0oGp z-AAw+_ z+W!7Ww6wEAKu-P(XjcworZtIa2krl=ge0w(K=0vI46Mfv#B%Wxr z9_CKL>sMzRbv-t!yJ zx|f>sj}qu{fPnOD6_guIkWeGcEHq-hfHl&UNin++Vj-`M_Vzc*xI_~*Qsn0?BF$3~ zshBOUf=cwi+>tfiHQ-FRk@_@iW^tISUW?&|P)VwZ%J z`)rEpzg2yW)wK}QP|R?y%`i=*ZQF>+aC$axdVXXHxS_(U;wlC%I8Wye@kAHZ*L)ep zkKGALJM_ca=;@=kc$0HosKg%B$`y8B7g`i)TFTM2+oY1rUFMw%qElTe(^YCl&u^rm zYar5AY98I}r*-5r3yEbf?bwZ{9^`sgH?auzq$#kM(a2o_m*qj zT9bnWlsruM^o6MpkY`9F7GYIVk8-J?S>|1F7RCu9nF`@WbT-a}WC_d1X$*9OGVaR^ zGsc1XAoZ(~tB%ku`RQ(i!Tn}T?>y?T%wiVk{b_7x4tHx!!37e>C=6JnzL1t*ycw+> zd62^;ZUVqV6mchuz$YFaJiGLc$;pYU0(M{k(cq`xpgZ%8EO=eMS~)vWXVAaBJ>)tv z$2#J-^G2uWOeAPg+Wi~{Ktl8;5vwRNrwovr-TdW#^_HR=iU7YYqQ>`P@r-5IR|-}^ z)f^9D3*nc;^wL9|r1k|)36=$F@5H3`x=)6OWgh&~o4)?VF{R~B7eqC>WGg1r6pV!g zpiJNBhB)%ft0C?H>h#D$>*T5uCj*6?$?V}+SIM+JatLhsfOpVkJ}(NVxUAb)V9?+_ zoHe`yM_fs#Q41Sr;7-^5MO4i;2&3`+;r+P#bk%d7)vlRhK{@UDpL(S4=kkUSS}57PZBfEA`oKWeSQYP-E3@$;dge1*5<`}=u-zgus(k3Bqykm2`pr@ld_mqZ90b0TE~)RH@}9A|_U3K=tb^n{@v z1Yv)N`X7=plNY8`$Hj-g+J?QWZSQtzpEwj7Q5oSO|_lOl|@pl% zodki3ye)YWjJl9(3=EMZH(s`#=yRc#6P{2VS;17Z<;925d;GE@jtMhH$JN`TA5h5) zxafgIbHwZZpuJvPZv+)2+}u!I*OP?6Fticj)c9tqY1l5T6BAXg!zw! zI!vJY#@oM_8P-TlBq%J(p)B$$U(H;d_h)L}eNy;G+sF1=aXwpKqy&DpaFw%9XJPfv z&a;sz7#fxRP5AayKL*E|PSpWN^JzyTAT0)|?bc`dppjvM{IT`ZHN{bIUCs7i3`74w z?F!dT)>0wEB6=-hKwWiUI#Rqs^N-;u>i^JO*r=CFJrqe<1BoER@N9c3EWfCXFI#6)>eK-W17Rcv$_-65e8GJ{ECD78P zN&$FN_P;e>RJ5%IyLUUAf|t#e+NYyP;Mt$LDV2CGZS65O-XPgF2&hhi02t9zIBnF) zjx+)hOARbE47bo3xZ>rJ$srvs=3>dvQ)+C#+r-j55I0!Y*d#|17RV+xRz}#-Cc|>N z5KAOuiWvwdijj7Fa5${eSuwp|Gm-DUdbYG@SL5b|3Ti&Ws7=+e5mDb8nC+@>>x~tV z8X>zRg^35X_yA`FN7=hcKnL{I_F(nfE`cD7@>hS z|3{rujqgUbgq1iKa0jlYT+z{Pabj^qROT6nNkSZo6d0Tkva0OSal8TCJ>Dn3arSZj z_(b3eeptpQXF6V3>JU@F)EeWHtngR`CRr@IwqA<>SqT*;!NUN(8?*E<;;0FC9)sw1mVfu*mjV`q?pf} zNN(c=6-q>GoNf|HAk^!QLP976r9de}2m=Xy(uzMJ@I-=n3LJk&!Xgj}^2@&yb9k%dsJCkgKmIk&nsJb5(FdTr zXTO2H6wZjqmq3t@XKu8U4I#(E{qF0-0Mze@&Ortf8sW%?O_TZH1@`ncWBGX%HO^cv z+%8r8_|?eMDtgE`Tw4k*z%P=qdc2C)12hC6n4nRC^z(~>!|OK(U9}bbC&G;*{a|!x z2DuSa%^VcEz?l#E zG~n*GdiS;$>)b0uQMOmbf-87y7g7a-6oI~kTcSgZ{!uuWkeeT8RY-KbzJ zj@iB0zJ97RV=cA8!IdH$DZ%DWpyjvTl7&loH;0mCZ-+G-P=6g-h#-fWV}NY}V>PHb z$=Wk&TszKSdXq4>%rExrdOM?J(cO9;-ef+Ax_uoUdt3hz31gwS)FUpXYNWa*M*byD zA}dg_OC25gxp(peZoVx=Wfi;k(ZQ+9zzWTL)ryXynEY_CUKVRx6p?j7o`SsCFTG@` z=P91IFCi+|lN}_ zlLCk=^Q;5~zoTZ;+m|kp>cs-*F(=hBo*a2DVLD+K?^wZZ^P$zdjC{*Bho+SvQnn<% z9@ip2Pzvf_Yyd)I{MWaCRTe{pb~jHb>9nsLn2U(LpD|Zo^^eM-iqTJ<581TYvKjL8 z8#gjDb|>LGd41B6w?oRH0nDj5OpK_3ODJ+IA@3Ly0JX7%5{1@h0|g_B%LM)3 zYxXo^IAF9iWn^GF6z`vF)JWewdBT_ea(1MlXi|J>3CO_M0EV7@nFJdyyS?Tq7*G_c zgprQ-%_!EEQj8XP0>jTQ(~lj8-a0WT7%umk+Y&JUsxIzn2g2kW!U-rvA~y&=io5+bD+PskQry>zYKVp+mcF zl*5mJvVr9u#sk&A7sLh>CLhgp9{*jD7kI)Oz9lI+{XTaq z^EwbrR(MGBEKDA|SllyP1jh>@?op`Zfsk>!B$?orREmar7RXq*gJ?z#9^$>Ro-ieb z>7U)phr4)N;#3i2oW9hWa|V5vc4eNWdEhVKDx?L{*^W&*WY&sgGGi!gkluG8PC!kX z*Yyoo1-NH+W1=PMx`LidH}h7xkFM%_xt0Dwnb4r(M@CDiTY1RTLY&yvSOTNSJ)-U^uSzFselgTseRKO zy-31KEoh(w_mDwKRq&tam)MB1Ch7Y2ot5Y@1JFv-Il+LHoSaQKQhR%uur)y*k$~5e zQGE393?A|5W!)PFvclt_jBKYvI9>NXwQRPqWbaJ#Xp*adSvAqA4%$r56D~!ZYN%Wei5d z0>rpI%QfJ`Pjpj5Tws=cY=ueQQ4$43NW|gDoRmll+=cpuBSGY!Lk{%8xX?2a$7rsk z*%!o?mE^}-#&V)w&5f@mQ;2WKb`kWkiwOuaLm**<5C6uH1WCaLR*GUJ*hfq1#8K<- zhNY{?)BVHCv00c$xFtUKB1UV>2f?Yd1&oxs_(f6wN;_A(yWPD_qO63b;G`?Efq%8_ zi|}{hCs%Y~I)Gdwmq!)K>u@VHC3briifJ;A#cHEb(Ml1zpZhcBqkm##)FH-|$E)m{ zz!#ZebsSQPSW8?HpJ~m7Cs)J@cu_op2t(z>KQQx~cf<3&GBl(EUW{!FP8uYm0kH87 zRR$I!QM(e+E=&)BYJ-G5g$D;QZo>#_5G1t+Du+YQ>>0*!uhgVk^4n5l$!Js(COTQ0 zxXh<_WWk=t4CvV%BV)~yD4Vs-{IqJ9Q^txPc#Nf1BY=caGe*^mS^&e+{GA z*T8YalOt^zVCw}gjj_dzpWX>Nd*Xs`>ywOP6aPV+xuvC97EOe(B}g=}2AEKq#XW<| zp6pHWs62}phTN@y>ah*<3Al?t^sroKEyn+0mejh%S6s9j<=0JiuwyZDai>~=D#|P< z?F8bV~w&4hVSPrIr%<2DDlDyPj z_m3_h`^f&_^=^@nP6-kfs6Eb1IM#49BJNzmK>D_&@hO4c6JRtLf)1l#V+czl#OD-c+a1xKtr&FE88EC`>I= zMU29yKKE|tPAedU0T>j=1NF0!>H2Ajm-ve?@af}Xe?kFI9f|+AWm=24xw*f6e=W+L zwjlZO;lz?V#~~6#QLpcJh|^mvl{SRdA~FlO6imd&o&VxUzyoU*x8r22wu{k*EWJZs zj-f9kZWCe)+I^j?SbA<>ku!Who>?rkb6$4~5gVF9L`^dRMjS+vi@IA%{Cer@5f{CG z{4Z~Ta4`Qj8AVTnr~08iEo$r7Z?vHJ&HO=nbI7-CtQL(d3@%2Lb9P21#0-eyFAnLm zYSt%i$4X?6)ye+$Gvh4ooL?LBT9(Jf(o8@b!}fD7GQ;yY*5B#!LR&d*{3o&96Ai)u zr$s~lET5)N1*YP-H>Au&U+ms~v-SFI`{u)rDgCy45%(`I>~XsX02j0(qlI(7LVHB3 zI-V639Kl%F+;h*EPKz=(oatXu;g8uHf+?+wAy}%y;DDpH zU)_c`XFZ5Zj|o??rwn^)fH_9-#CK=-HMYcqM%=6GK%Y)ax8_Q=P77%F9TU2`Djmr) z-~4cd_?~qE_m@?4_Ke5KGN4TsV<;K%`SK7!AYp=3O79IP8VlTChbI|!>mdIRTkjNI zS+ul&XUDc}c5K_}*zBZZ+sTfdj(2R^ww-irbZmBv{^y+c;v3@|=VsomHP^)&vualT z>UmtP)d~R7YNS}Cf~frS#BsRT12SH4>&m9CJZ7?#h%f`EekxB%ek#-9B#qaW2bYzx z?P0j@ALDY(GGEmDDxcfk4jzvJgxD$`0QB{prZ2jvxP9$SJQlln4V>5Z;OPK3j@&a5*jre zutZ2lPk#4=(@$>#&&ZZfdtZlNSJO9c>O&hSS>3^B~SS3f^+%Z9--$<5ekrSe zuL#*Qnd?;t(z}Ou873<|MhehHi?zGO`ROcn2;7(mLuOY0RY|TR?fConW9n%BkfpQ7fIvuE=ldU<49~#yvliwCw{pGC7K0x-O@3)z_al*B_&)k`3t7+7 zJj`{Ez|!u4bmXH|-rGsm6npjyoqT!k1WvQ`K{(+bY<#6U;>3nYA&$i3V0s7>G{^21 zYTGQ7EK&mv@~xz$BTYrdhFnqsNf>x|Bdiz+DaO!?#%3sG5W}4I5l{#V=h2BBBB}U# zAwR0XR2CO0!}0)owNw6X38%#8&^%F}KSQOwfSs|C5<%c`I$a(ZZ&qAdo5FyS3Brs5dGfnaFtQ?J7Ngr zQOk4@qK+jY)NtTd-@(&$ZHOs7ZemkzqXAmo!7nJ5XUjiHd*o^O(R(U(toT*jf9;Gb zf%Dhg0j^i9_*o`RD#9`LWhC$v$eifmvlwAcPf0Y=0#_O=cGCd^dH`l7Q1N3Pvb;EdJRb%5wB z0kCl&l5=J@Y6t9Wp$V!_UeQ>>RcU;M3R}Jdgf)tqd%wXS+DWgsAEo4!P%nPd5tQ)q zbsBB`W=i5gP-9aY4bgfT9jIyWNM&qA6B`1R3r=xd^b2$dfP0i%jv zg3#=+LlO!d{Tn3~pJ8{zb&>NVFC~qc-jotf zA?>f2lurao|6CWqYD-%OdDVwc0XJ|uT%zmSVoKR3%UF@xEO!E4RP0)H)^yaLhC^!; zNbad(1qV%nb87TAy@g;Ir zx}3Ow|Jq)dX3G0VP$y<>DYNj?M0AB&M!Oti*`(_1iWbd6_J?FK(xFq@Ee_;NjH&|@ zRJ4TLw&YNuEVClX2g&5G*VO!S00smb7f;gMZyFSC-v1Bg;pYBt-9Po26hI8Lc@_oX zv?%MS7P}?A`^{GQ&Y5W}+(C^EdXLYoky&_tWeY?WX2}RrUL%tg-vmp)$)+a9;yKi2 z4Pr=8f8>o7%u1or92Te$mLC_Wk;&w$Hz7OJlEZZAQ2o3`ruRCPPHC`nGsC0NF|!NW zOJk`EJPJwRea?|LT>Fv7j>iE^`wh!h4(q7NHOT@eyg`dp=7kQ5t%C(4Q(ji5@#RM8~RQ&J)Cku^pbk_49rY$25WM!Kl3KpOhW zi83()xbKe@zA|PU}1HZ!O2kBq9Ng3uw|9!#*WDf0Ge{N5(up- zP4rQ5X2pf!?UZN$R|jCPR=3(Haz@N+QHAj5J3n|$Elvo#0bX}9;ozaNi$ zgR*8nOzhAAWk2Q?S&Mr+k(uuym1?$f3 zB@Oy7EH>6VM_<+MWbFLP@K>u0oiKD`!Q-|D8fn0gR(BJhfNeq-W82wK>gaojpz>JH znMc;2w@8~55by#UmKdQ!I)@>vCSvtLjdnk;mj2kX@l+7F#lZFQD=!_t>J{1>+`POs zVav~)6MrM*erQXVZ%CZ=St|7wYR^DA^UBoIn;U(#T?Mkgl>9=11jv$^djuK3vilCJ9~TH`Uw$EZ9HOW z;*mT^uMEUV8&xdK9W-@Z%=Sl|*|MPmb7o&t@6RAgJUA;I-WD=PHdKH@wZ}_8_`1=k zV`pX^`_M&`7~^^u6Oih$tgr9X8oUVNTBs(nU$B7OFT?!z48m}|%C2YM%rHR#D*E|i zi`zGG{k5CJwKrh?p~(J{m39R#yKf1DKzc?oy zY0L(*-84=z4eY?|R=z`ZSa;1vB&AHBmZHoQJBM`6RTevoY`5#}PKxm{+jxavE}aPq zztVuklzM~qakG~{xO8XIB!3W|xR$8Hw#d4`Q1JH?yHjgLG%LQ%>#T-M$G4*r7I584 zvTd;8Xhn2Y(_RR%e8Y+EO!QzKg7>OsAZO-nRou>OPmTA$@&2#N4CoO&R1+aU)GfqXq=5D>|Q_K6o6DYVgt3G>$7+s7)9evyU(?nYFTR)DQ^oei+=?ZA+vcpWj{X{7J zGiJG?LEsitJ?CzRKMUmYiF&^C0#fw&ZZ^Z;mS+W4u68mcE3_j{(XzJhBR67P;NfNX zlHtnduBHSUnA;iTo8#1TdwbUq%qFsL)JVXY2h(P`e;*>ur9|@)YfsCL$xD)k3ktFQ zXf#A?ABLKH8Q#W3&cHE5^AqqjLSlMV`&JRjZPMx`E0HxxVTt9VgnSL@pgZ};;KwYQ zz_A!hC{m6C6u%;$`=+aFuoLzpSC>p|qp!9;Qls5UgG~zRua@Yyi0ncp`)x;wMw`V@ znvR#&Dwo86oO(nz=HXP4SJH6&XRw{xj!*X_bWN21R{;JJj{~c>rm6NRp&jM(Q`faB9&Zk%~ydnA>A$^qk?A`_JJBxd5z`rOrMfqh7d7OcY1p8?9rPu}CzzXO*?~IM3!)ph8 z?-0b=rr9M3pl-(ANJQ~!tqNksLbkWna>!}$wI&KuBkSGcgXnYrGDm71_r!_{-Pz~Z zu4u;CL;_<&S6Z`;OvH{J9wgbFHr6mx)nmRC1m}vERgA0E4){F!15#7%o@<7!x%jj1 zDq^4#z-P!I^&FS6$0px9Ee?d1m3-itf|DKtva1jVLVz#7<-{*_D+F`^LFsN4{BJqq znLN#x8?@~F4omH8MSKctSL${zRV0Q$4u@_@l~^-Oz23%zI>zI=CXv-zeBKr54!_!& zKHKJYMdB5*br6aQ{pOKc1ZR>ec{QZU;k;oCe2|Pb)Ow)r)v+kmdh~Y4FSr9FLTDu5 z^XLOm)plW|Xfu}hJ2$|$_?@zYw^4$aymJsQ2v^~=`8^<-I2I2*?}rcDt%I84G@WY& z0|~zAPYDZ(C$Dg=1ks%0gUQF6^^#oZ_l^8a}PO_by?@1xz^?o{sjr2#E zLo~-u>8LpLDa2&cO9iK>2v-<#nVL08o<@IxEm?%7nBS>LHnJI9^KUo$COCpaX*i z(lpRiUV6Lowg`&8ujgFj+^QsF_d~@?WU|7o;J%V`1SDtP4ihsC@cr>|g+z%VmMs-IDZ)}# z!fb_EgS>@@CrYFx|DILHleZ~AWA9UZuEKOD?p7$F?o?3&(H9Fyiz4e#Xi72JsP5)h zGRyiVxZ=KJ5RxQx^(4;oEXM+^>-2{ygQTxNE=rY z$C%^lt8RSz8qa6ZrRJJk3XzNO$CpMWmzw&GkX3cdrZTOn)uV-WTF*oC2xrmURWb1l zTmB=CE>5fS7o*cdmGLZkPSZ=#Af^LDQ__*?+~PkrZzF48HADaP{mpBsYIeM>6j>%4 zVO$v-!p3K=naZSHFJC?CCnYvLpx~9?jeULo%_seWmvOb;B?{-|oa-EmapQhb$nxyK z1zxUOa_uDwDUwc$gQlsw-luzghnK!RKQq33iO1)QCvQdWq@YorbVbpH&TNwVvUKs) zu=3f(k?HSYHFnd-;CkFHpO#~HrFCq@AGOTC#P^+=);_rT2Z`^*_F{AbsKKxB9_sVz z4@xU7{=rN*atHtaW~fZ8;W=1%{tHN^HuC_+!5vv!19$$oV1{wi`7kh#r#}yw!R5J30)PfyB|xR=?Rnr6)z1uhv! zM-UQ+Z9HQi%8XX^LQRs_0x}g>BRKHwpE#Q=D)+x|4uX_WJD>U2GWS|V;x5W)sDLyQ z&LN6t8)0&PInJ~@3iGZCf+wo!1v6b-mkebS)n^NhHT!p#SjmtQb|y>LhV6&^?35Oe z6(2_|&}Oir3!$5%vKJgq4V{;ao?g}RPSNpHDoi5JV0Zi6bgieK)X}bz`0-`J)?4lv z@!2QGUKSskry3K*wK=VWSDud`J?uzcwys9*np9-F-1u)Yf1ZadX?RUg6~>}PKe*zp zWOm9C;hZZO3*6{lM4Pl`+p$3T2Q-tv)8Fe9AcE7Tx!R_Sw0+hI)z*&rh5>~F!a}}1 z=%@84tJ@?i1T%mkAp@D#ye?2o-worJUFdoZz`^?wfsQj#s(XWbuavqQgceUMQtQ z;vV&r(Q)y;Z;GgGttPcZv!0Vdzf8NUk!twqlDHm*He!LesGp)=!hqE}_0}fPH0Io| zNo~ICZJ@Q=Sl$mcs*HGS+riHfPpY&r2|gCqET759?yESIpLVJ9a$UlXCQXvHx-gBm zyw(@6jD}@xYMsTdgl1XGR(^R@u`*pwgV!X@cA+J$W7(uY8uv*6>m4MdasoX9ztUE% zz5CNGL!HQQ@X3Y&*N||gPd*2z&RuSo9JaXqY&JT8q&<5uPpbdUgpDiN%%GOW*5uLeRQy(_T^0J$#&2+W3 zlu%e6edpHkK&Y&e6)pf0MqLRyyI#LMiKamR z<1GC@iGZ>&8RDcR4GS<%`MgINjK!R>iKY(mT8kxd2R@{vnFX2ubnW*>FKT zbQy*7qhe}p^!HPW*kfA9Kzuwm&gCOiZ9T-shp&mRKl*1oc&bJbM|sHwzba4L#bhxI zY7zhQ4p6(A9uj9{daG(ux^i0VhTS!^joYxj?znmP|6-ih7l^5=vf|SXdbaN##*lYh zW3Fwlq7*QJW+38uex(P{U)X{=DlU%^VZo-W9IU^;Tvygays~60Ud5KUjD9_dS^Y+S zy2PI#LxY5XwjBTjt5;Rp*z30(=Ev>5sI>k^^YN{?4ii{%Z+KomO?KF`SH^^ zh>u4zeuR~DqvoJL(pts!`X&#B35LM&7D+MeC6)dL!qVr#sKK1g##h0w|8-gnj3p)q zZ$vimts*_oaAII` ziS^`rw*&CI0|NZAE#88AkI8YG(STVkK^vRvZ2ZbZ)nCRcyR>_MNbz^tg-ngEmd^8Q zvfU>_QBWrRr(5=kjA4OS385|L=bfT|ZoHL&-!62gZTtJ)q)UFtnDPnP8n&J5nbO{S zLv%*Nz-~e3exW5~oOmr=F6E624!Pu&giYVKym9;o4BZ;3nwP42TFuIPz#z7w650iJ z=Bi3C+eAr=gy7R(c754~K*zbvrX1Hhc zs881EUnti}yB}tYT$oHf;`$RhcE;p2r;`hUSSg76B2+5rW+mkn_ZFW|eawDMV5Xop zuoM(a^;!+wciC^J@{Za6M%7`OSLfLWZtxhB3(zDRnF(Z%58o&9GPB)0aL-O~n!u?d zd``vZ2utVxY~Qu&^40vj-GkoinIiQ!y*0pEY5i$biw_S&TcFNRq&C#YT=N7ERQ~dV z4joJN^;kA1cyOjK@>KN?#(wPq1v)iKHdeQM|8l!VhqWlkRCaD%sbo!H6?5(Wl*x z_w|iDbu! zG4^FZQN;Gb8}CH`F9%D-eDy?h1@3pZFygGc4RjaH3ciN&(uv>~t{+~;Jm;Lm4U`3Q zlTNI=lGv2=-7@JnRevQZ(^-5jIL)G@j>?b(5$eihfq`B5DH_Wk(uF|6v-uy6ef6l! z3&#=;bs)2t5YhrXbWU3-AXRS+WC4q`P>{cYqCbMExAR7UT0fY<-60P1v!O%+vJV(7 zLE_~W`{R_XP3Bn@RhWkfa_YR2K*hd&hp|6yXb!N2LIi;@ABLG~A7P8=)5GS~Ol3Em|~&P z>$mGf{%{!3ObVj>d> z!4uW2#_Ifb>c=TB65NQdPEoQmjyAt}qb?C#>lAG18k)dssfRCYlpiBHdsUIMK(a0$ z`Lk&3oQ<_6f8PDr-zu6kFrW#~2vie*}&l6esAgeYiInYu%j zCP<4}q=6HhDF*>DI{*W#*!%>N(zPZ~et+BJ(kvcx_QDLBTW~=!|8(pQVz6A$?V`X} zRN)F~@2(=E_X>Tig;WNUCsu2jQEIVu%E}E;K3&glLm*!csyA+_1V+1QBDA(m^V=-H z#X|ZLO|D?(phU98#{SgTFE{XGkTgLrz0q)l z8|$#(GLY4(QjUMs!(GP>B$oL!5bX9DkB)0FM~-iajiT;ew@`pu$vQ#nnz-bmQAASB z17PE@Z*o2q_7~A0a}V*KY^hz+}EChKay_BP|F>kw--e3UH}UOs}`JX=60HrL^Cv>ltpV^ zWVpb7NNSu=SsUTH4fK%PERWy(SM(|~^I!-;TgrVWs62Y&aiX4GJ@rx(#{ef#5N z<(VE}`{z~Df6}{aFU)(_to75uhXUdxLV9Y82ouut=WPwLYHr~18M~sImkgG$L0rfgO?NxBT~~ZQP;M(AN0!&LI&Um&Gp41% z`@bSsHd)p%oWL1qkK^j61llkVl;Tpz0B#2Tx9_9BFC9X0cy$yugD#)99d*w~7}zfC zX0Cbs*26#d)(hPSY>~F1oEbDGSh3W$z7G^B4KaBaZyz%je?KYw&}DzC*!Xa+ao^*s?ojs@Be(d)HN+q}=?6%0`9Kxvxe zT+`)2oq(5)-x*3;6yBQHW7$Vvhl>|P8By}Uu1?-K%|2Zyfv;R`6gWYk+CiDiW7(lg z@c)Y=@lp?4bCwUpLp7!`+fe+N&Nb_WXI`8a-V7`wr?M|dOkKYSq@NuxJC{Ax2+D~4 zS=MW%F$PrfdGCX*GDdMPx7!Xp67~+`mJJdB$YUXdThhJekRYXomFG$w@vIqozCBPc zE<8`Rn&Q|#$k>8KrVe18sO#b-;TY77yF*E%hJrQlAV&W=8tV|w) zBO$|@^&O2*P$x;KlM=v7LNaP1nGeKdJdT~m|rB3 zh(Y-o6?H&+TFKfHwN+)!9+&f`dofoOwdY0N@GiJJ*X3jHH46^G2nXEYu=^PfLF{dM zI{VJckv=r06(!w#CQ+#0WE44FLxOK!r6Z*iV>dT{_(yj4N`<{F(HhrxgEJJWCCkZ@ z3b~T)1kMG}E5zx25-kwA{#-!t`doWETV6WgAA z07%f6e?D01iC^0Z{QQt)#jNv*>`|XjTRxJ#0hyZXZsp$F*2?Y6Tw+k4N}H0%KM zRoUPxZS16>YGz#Huw{dbRv>RZ$7wg%-)Z!-_MR6H_uUm@XOzulTrM}CAj%xDaftit z2rFHq^d$oDLJYCY1{bUXJMzwP4T+87C}~{|vTfZca#)Fz-_^}OQ*Wdm(e<8;Q0I4+ zZp4X@SdjNnT2T)mYrMH=YO!r%r0$tIuqWXW3v85^)?q~ZHVu0lU(@KH&4B8xHV|)4 zqHS7oNMpl76KXMlUyvNC&idv%GX=<(C!0#A4!;Jk=-9~c)NL>`;A}`$RsqBV##8^O z05G8JY#Qv!QeRX6bbyXj3RS>2sNZJtr>3dus(?9QB39?1c38sGZ=}L!i;D?!9wPDF zZOAaz!aJx1cGcP0yJ(D`Us|u&0;4M>C5;nn~l8)rJAf)UszAq%n$BB=vHy5$(;6>1GgJlpNflI{F*nisLk0zG1=ujXx7}rK8SlT zoP4?X)<(>*?f0H+jt}TS#Tj%qH&C z0(AfZKqa+D9e@KxZNfXa*Sf9_&;tPVju@Aq{bL&(m}Qjp#uqxp0@N9Y7(nZr;*~t} zw?u7pc|}Y!VI~VK2Q4)kvD4%embVkM_UE&Ak{qUF8P@p<_F}0*vC8BJx;%hWYI6NrN}U1+&}Xe9ZcXOILw|LQd<2Fl#b_ucK?jl zbW(GUT-gGR(VaE!xKgJ3#@+s`7tOgkH6(f|ik-Bft&2y^v^IJzR9}yYa>g0k#+uk* zW?EsMEz`D0-3G*narkd`@aai_)uL1u{_vUX*MoJS4I!9zuD&Xko4js#RZ*0CoL0C; zRMbng{ab0imj$vLi!zgCoCfh{Bfo^__)*_!0FQxqZvq&pEHYo9|JOt$Mrw{0KoO=e z;Gzt00H1pQAHU3WnNW~NV^43cptpAszW8{e-2z8ZV|`IZN0nuXDYA4!2}&`$7T?ax z=fT2SE<|V2pWjE3K zJB|ep`1Xhmg#+tTx`#!4W)fGHRDi@dtp-PWg>4zjVX>bJqwxGe1ROyu!9s!cSVajO zBehfqo7TYm^>*N{clB9q%|9%2l{0*uhKWj8mEwUNCr83+wJ;bMPSZ55ig&hi23_;U z_lz309`i4wCxVK~{;c?a>mS;D^76Vr3Pu5Mh$jEg(YLwGbJc=~aKq9PNI-LZhh?9v>!kYtf%_&WFFA^W8 zn;kzgd7+TZ^a$XP1pOvJZgOx&uTC7rJ__Es$~r*tHeu?x^+MV2OLZ>LbY%sO7m*_4 zvFT-r!E$h)LSlfM-@SG84dsRXOInUX0pu)EvUST)H~;$ljSY%aC2jYFwJ)1KsQeyK~vYS$3 zcxvFq24hsss)6h98}VSoF5RQ@SPz6l5ZvVv&R8X;cO}KzGN;rrl9WBtph{b0h1Vn& zBUM=8>7DYdz7lT$ku9Y4AGLXSK$GE@I!OJ zBl=SgsDcl-$?z6nn;^(!Jrnt{3W3%Uhy$!6ij1+udB}$<*p??<_g{)V5_Bb3SdJ{l zh6SR)f=9YQZo`rGg`-jC2MEoJaDUU`{+pHgZ#gQH`yhdB5Bd8n|28)u3Uy=dzJ@a- z-T_?X0vh?I3kpU-3^EvO=H20E4@Lg^gx#+3K~18)_hVHVnV47qXDEV0yc*sBst3^R zMpcBo8Sj>2spVlV)Jpx(1U%Sumb6C1^zzS{Km{hq6P`EzS1A-lGz7=UkcE*xrWB;X z2pz0(JmkH|JHy*^*;!Zs!}0NiP^X$OXKYmFJe9?DkjSpV!K$9|YVdkQDe7hN6+-2k z_jJM0j|fp{8GvkBv91@+!Wrf(TDTG+zXj%4w6UjiSuW8q$ zI$GEj91Mw8^j|KBF*ANi!JsLkXvQ|4T6$i3N9ENqTW-P?#-6n=2 zFdM_hKa5hlny`gL^$ku;D#k|FE4lT?{bzawvA{3<`KKM$t%;xiU@>R}Rn~a6n#FeS zFB$#>nL4*3Wo`}bVk8Y1OFGapdy{Bnt5zxGZP-**jM&PzgpKfVf<$x0Pb7CWYAYt@ zO3cq1$}12~7@#5|%<_31$=9kyG>ua~^#S09#4-V##`e^`Q1%6n4!^imn%e)jhcwxobCoRRzl9oa}J<*R3 za?gI?KssBakQfy zW{p5S;mG!mj>Ucy9owh#J%K7swf=5k6Q%xuvCTCzzy@S9(wBGgo^e zD|qfH3zRh$fvNt@{Jz(2_EA>rzN~*=o}5YmyY1^sA%ob(@1#Y$aj+q)p1NfSp!kZv z03(1XlaFV^0hvr8S%(aM@QWg^Oa2B73)m;=+DcZDD zMuIDE? z+dIRzzf0Gfg+o%%lUf~>{CkrHJ)?p!DEj)5Q2M0;C3P=9-JfWT#WQWf++QbTgxpGN z&0qvp;2CopY=G^)ZjL6%wsu8v;e&zhybDwH<$AxknuxZ}Hr5Z--?}`a$o>ujlU8#j znC^`J`Ml&Wsv-LQuv|0ts{Sojvn2FnJGt%i#-3Ee0zy|}b?v-K`)DiXfZ@etqUY%pex|Wd`>%$E zG5wG{8zXdXX?x(itpW5Z7xfuTNL%>7KVw(_??a1eg{Shn$Y?)zcOM$mQdi>rc!SZT z0C$`5+0sL1Ypq380iIzu;EHRfOAv4RdHgH6jn`wx^WXs#9(%X!z&NcM-ZhDv{j<4V9T{F}T%zYZ4$U6TwSFYq_fpe$v(zq%etb&vg%+Bo+mB9R%}W&sgNg|1Oyho=_S^e>r< z8}Yrgl+jil#lM<~MD}y_&4xR3;Vo%geI@@u}l^h1gVEf&I_xc)AJ-zrCCn*Aw4e`zBe7CV7+PxZJ_p>MeQ-5EQ6f>GWyw; zF1SK=!xFZx*ZUW`ICW(~DMnJF_SE8o{R3ZxQU|}80YD?*iW1t_!d~6-#-|$-Du|kd z$}hMu@JKpcNdgD?W^~-owU;Pob=QxLmqZ4_yl*+7aTPs`hf=I{&5D=M2UASKl`E)v z>Qqj6d}h}O_vQ`vZz-gS`Ob;Bi_<)Be!OFxvZc|OsXGuI{J(kL&df1BB&pFNmq|*xAoeB`5{TP#!Rh? zYt6jhT!0#IMuh{%pw`BvyT(ZH5rwG}|CYC`azhB43d8WUc~^66pDwT-(t2jumXgH@ z>>8!tbi>rh22W+m6W5I{h!7|WJT+ULbIt13qU+$s+Bobto+gV~AK%vECdfYavZP{l zT9j=?}6# z-vcpB36By8r1~r>Kh8qJD}60aKl)(U+{92ujL5GO|K}8n9ywn4Y1*r zAwR}ac-2P|P30Bkdn#yp-bH3J3mX~^VWv-|iV zscG8h$^^}hn% ze^m}r@5}+Iz;y{0s!0|OV?G>AwFCtN97I2IfCf&TONsk*_B7I6pNnx^sZAj8zOyDEE5Ku znVUQnPA%5~m8yA^4%(lSO%(a;@;jqBN~nOWf51sT;N=^3L^)TjEQVL2nwa4xz7(b! z+fvre=*K9GFduEPqSq}3J2P$IPSPO^Zq9~r3?YuxX5nVZF^sgd#+8WzgVEcZ@DDkW z1-aMoqs#LCR&_V+Hm~<&93EOMyz|wJXkFBW^hrvS%)gDOL{99dQ@{3p_xd4=yZ#(K zfi4Mv1;**DJ(d!YD~vqZ;@DJmhp-|Rf{(PQ3Kc6prFDj+?Yxh@AC)$ZBC@Ag%4Vo8 zzyHH6Kj~6GKu-jU6AxkBAfSXrGY^Kj!nX$P*>Ue^k+o_@Oy29gLC;{!$~tA2OM8os zrpL$WiFWCSKfl+)1#O zOn7)3cWtBAJ&yepFC1-Yv3eZ;X;a`uUr@o-XqsafvZS8DVo3ky5!mD@$Pf)dlXSa}wkM_s!3f z;W78|UY7kg){kR$O+03xdbyXx`R49`De$2KA)Du~AZst#Fc~5`_dvy1|J{+044!uO zl`zk&*Mgw^6P4rKl2iyQBv)A9RegAeg=p1Ll(;KlvSumM(a};^_%Usi-E3g$N+i+1 zS{{WI+8s6%B5l+s(mU);@iAIusCIJV+#9*a;zAgIH&$z9P@Vq;|FCL(M)kM&O<-Si zPyw0rl~HdYHsJ^s=cT|mdFSvlQBO1J1L~dvrS~z>8k<=XVDc_X|BABWp$)gMc?Se-#uo7ECED86a@iosmck)l6o9KL0; zgFY7R9-yYR>#Z<|xWt}YBE){tSzwRbDNM^>HOXUl91$ertPsOe@@Z(A2>f{UO>3gC z^mi6Ba-9lY_aXn=RMEt@8U~B0N82X^&;~uqEibHba=Y=6+qB>662Yp*)1GO&_l3IX zY^5x>P{!?Rz;i*eS#GMnEqoX;c|p)iaLo1L$XHpzUo-I0KaV7$AI z%-Ab=i?zjmxW1Y^!I;>#rvhwqZyU>%GAh7jDtzmhq=Q#AM_LDxjP)-qeAe4n)z^I8 ziI3mubnPrsM%;W+eAGx1J*D4nsRce+oJd{zRagzap%n~^)|rfvVJ5er}2<75(3CQCg8z;uy&7; zV%PM$@J;Z8_l5CrAwo&gVA6V9dHgWGcBnWp8k{DOEFC0?|7i?;JXTl)%ZRpC^S1`0< zo!tbkgM1f)_Z+xTK)|s|wND~b%;0@P_q?>#0u|9G#jKTC#|e-_sk9z;{q-Yz+1(L1 zF`ga7Sm$9|Jr}v{=fhx_b8@6w%eO4rNnkL>J?hAZqZlN8jj1nlk;fBtN(uGbk>@SG z!p|E1bka3_Xl#EyPt7^aNyrs?wwOhEF@k@sfN=3c!HDTOJN5i72E|74}KPwn(P8_0qSAlX=goj4`+3 z^{GWv`-ku}qG8ebf2j|X1gO5VWMQ@*##QT4Z^)!BexpIX`k;i{q7|-w27CO@vJzs8 zcglxR3M_!gmKzkG+W)>zL&3~ch9`@y;k>r3S7Ynpl`?Xh`Mxn9`$^XmTHfov!14?I z&O_bTyWbj5supZ?FHj(GS9+Tqr|ozTU5(L$8Ohcz6j$4&Ht@q9!^>Y;`S|#Bdg*wi z{2z(Gcv%J54)A>lee>@M%HPqA!0Xb}Q3Aez3Lq7Kh7_s!&A=G5h@oE_Q%_mLo;)A! z8~p|02cu&p+$LQ&0~_qk*@V58p5(zb&tI^NeAI7?9hJ;l_8Z7}Rt@<&#o~q%;tfB`4xx>cA2H=6$gAbQ`dy`F5esoduRXGJqn>{vG5OtTdb2;c-%!XB zaR?mD5pwZh&bpAw;_-jMd5pTQE~!LvldOT%zqjVP0pcNW2fJSW*UkCA@+?@Xe;@($ zsbO9KR4my=tTlMbLDeS9-vu{y;FDk004?pQm0kc!p!1m>$>sb;g74OqU?y?5U|ixN zgG{-X-{&9}jSK6l4|_oFexDG^1}&R~4gyCL8%mbj0*GsT%% z5W3X9-OQs^JbctZA8R3=PTiVJDW;UsB!SBLsV?Dqha+p$&``vfge(qcv3Cqm~pND zJTpTeY`*QG>3k4&iQ<>~u-+Rmts#Jh$8EA3Eh}EmvkE!8)_E^9=-`9yqZb@D(ZnBUzBvBhhR)0C|=Vgl!M9n5n7j6os>|({`;c`2!cV zxIf-cH$-+eTzGILixd{LDc}kcG2ud;v&EL8ln1XFN~i(TYDTWlpIu=d2QFT>X3_?j z=7w;oqv=+jtdVz{2$rIjrasSRgrWT5mTUrrf`OS2l1_m*P|2#mEiMey%?RFWu7VBHfyU|To)=yt_}ZP~wh*Iv8?u#kJoCHO77)ddBaX2CRJ4vA zmEJyfiZZ@&XO}SaR|mI!y};2^;*tq8knmcCv`8>|QgXo%2FN+kpwC|H$>0G2dQ_33 z4DAv%PtUB}`>SG3mj5?!t-Sj}x#%>{sU|u5L_ORwGn5mur|4F)TF@OzuFrw-GvJN% zQyU?Rm8U=2mri8sXA^idYlNyQ^=wJ=!wQ}jS5yFctLAnTzQwM1o2{$!v6=A}a9ifM zMap?oqfybx(PsnYv^3(7c^@mW0gN}9?u#BvWB6(kz-z!ym?0et%hA^JpEnZ`-b^GbH1Wln%LMF+MB5pc?crXWdfs9!P z9%t~GutK%rjj`voegbHCiikOQzoyE52w!gEhkZI^_qlUa0VVfHDB3DvtQUnTXy43+ zmqqf-=k2|f90x~B)Rt#J>U+8CJNDF8C3+!tC|v$MmJd}MfG#45H&%?K?DiR!6TN@& z#ntKMMv*rAvXuu$G0t^B431Q?Y4g10;^;jkL>2psKr(tp)K8xpOnibU$x>~E*n3

    @rfpH&(^M^2T1cOhODOb<5`{Yi2lRMc4Y0ZhLstOfo42~BJ8pZy^#0+W9uEe zGmEya-Q2OQif!ArDzEo5*(4}!oO|~hMI20S1 zIP*v)q3Nj35k-VIJ&na+yXC7K44$@KQ|8`m>1q}y$MRJ@ULCIvU0D16TwQ>|ck#;= zD%JF%7&cs}T_^&AP@@1F{*TVa`9D@wr$2xmxZ`lZ0q=ipU}VUMSTi@CdPq3PwA*s- z##wz{FqOOR<7h|Dj-wNH*)#{W>sHxycRXO6c(8afP9aG5?UG>5NdA9yv(qnVEM5grtNba|_ zj+*VujqS(-K&p2g9D4a^u+#I=YU^Fzzk`hn^|?gG1u~HGubm!s;^d#cR+$;FzIqx6 zGRA_V75$P0TH9<;%lK{RAt36BeIOyg0`FiI$~c~#hd0A+bhHDlF{GK@0`Dt$MDxIG zpFk|K>;OL zkoZ$*DP5D)!BTaL&;`Y1JuOQdiwEll4xQR&m6bC!t1}a2B_53C)47W7li(v7BNPqO z4#4?(c!SHD(FOa|W=S)W-P$wI(ZH1!kCaA@riqx2G*w7-swBumXwEHmJU}bZgT(mA zW5XLVNWjOJgOI2pm@7<5K3!^_({wQ7^zAX#muf=&#Lp|PrVn2eqc*=@6fui-#0WfX zFpZyuHLHqw+;rP1Gs~BvfVY!i$j? zb)uj@(L(9AUmh`cj(yo~ch#Ps-_&e_`%qTvW8Kk5W1#$(Bnk;8X=@RA69=Wzd$dAj zN|G833wtPE#jK5bk^&9vr}f^thMuu}aI0oT9o57OcEbUxaU=igc1m)XOK z0VDT|K=w{^9e+jF=>-A|dZ?{*%a5oXtoFAI2%=@JXX2B{xH6(Dxp69$qz@sALxGOG zspR4_J=W8UTW5=b`jj?x$20Dm$$F+EYuHd0eq)A?(X z*=pX{(OePB4I?U3!@7@RZij+u^Q4c((ES=zSKs9p7QVlqkU;d4rFiIieqtOzMsU$~wanK8VWn zWBqnZBHoSRS0^fu>sh6u%mB$bKIYWnA!pk^?-o=irk5_Dkoi zMevNC`DKGfmnAX5bDSI==M4i%a7e^j*x^whB1bfJHDMjnmJ5K5jw63p&&;i5UV#HV z+s+H`6x}T6vE~`RUS$|EzgnZ|g6=$G%IKmYi2{4=C;3khe19Unr+>^0^ca)-zf5S}wPLTqZh>4e2a_D%p@IDYkChF<`&no&K zDsmcq2%sM*jqXD|vCn0#W={1h9g8K_a-@1h!|dDII{*BJNB}-bw z(H!S~l@E37-D;F=_Q^Qq8))8(sQM$OD;&kA;8lvQqn*me;+x_lgj=@x6~G-v$b8OQ zUyt!V+s@S313I+XUbl0gn|?9i;yj_9chOqEMIlv^0 zy+}7MU@amXs+!DC=}N9-+vK?RGO@JVW~h~5;0sb1q;kDO5wVEI6I~XudSBn%P+v7C zHq29>&nP-95V zv&%r-%Y+jb!(Lj#=;;Njp6qDjp#-m@Lr!CpyE|LFKrFW+3uQkKP=dT(LVYgZ88W4L z(m4U+UpCHd74ghX6*;dSAmW|}wknZ1jglaa7iL6xBX%5R5o9pXGAQyD7!QG{eA+{W zdpIM~8UTMVy_m;=KbnVC0WS>m$Bnw{(gQ#a4~%SV%d-;z-)FOm+&YU%J3dVe2ydZ< z$VR@25`@5NL3Elh8iwyP2^Cy`wdKo69 z6BW}@F!`}#2y^!E^>F2UX|a72ZG8sNG=K%HG=9pcMu}mqu7DW*?|$h6_=-Hehc$ct zAo_4TyXb(DL{XU&gSv-W2CkLbS_hR|V30fh5VE!!&YNgkq(L*ox>{u1A_Qdu5eIt0 zJGw*%_jSqIrQP(0X@g@?<$m)=n6jn85c(Amy2eFG zx;|O3`ws;58#9AcS@QK4nsd*h_}YcxXDlae86<(}LA~(IRKdo|)4EsC+xFxAp$$c- z37DzdKf9WYtbf#tY`{)I`)t0|SGG;=&7mzt7zYL&E-*E+*5f%X^JR%i*j z|Ks>_{TKWR%JIM4xFs#S|3EuGa}9`HsSJ^rCpEhBw_xEl4ItrhVNE_}(F1L%TbQ(9 z$<~CG9(u3GNbRKEb5?6u52NLB%MWh%Ts-(Py;~s$p!AI63_{fy)mRZ6OkMk z{c#PtJO_)fL&@&^PJcP@m4d^Yf7<+i;Sjb!<0B)Ur>k z?0wW)qOu;MXx9JKHiF~8?=0T8$++cVPIbu~6mwh)#C>VvR{d6_r!D7X!O#AzBG^dE z;8ui!32F684|OLaT+dHm>J0y}kdzeQK(E1x;mD>KejPjy~NNx00o2q3+p) zZlV^`zXrUcHm&V}xtW6R;1<=U#}6}PKdo==TtrBfW=~k)yAieVc_z_++F$W{?xtDdW0Qu8L_LwjfebGPuO1_UPL26FEnrynESnd&Uu zx)1!W1GN5DAofHWhXbRD?MyjK@S$~Md{OHWg4Q1?N*$m$zlwJ`a?8}Bt}r@B?u+qP z&lX5C4athnlt{IC5qez342va09C!o%ncwoV*Qi=GR!22@YZ0RLOBbhug+2+_bFC`r zaCLbC!L)w_r$w2V#!68^`0k=9km6Y-6+kcU+Ec9xQ4Yyo0OvlM`W1AO5ftHgs|pwc z^tE*V5>L&w8`~jM26d_?U@Z7%6jOpsmE>{^{pYji|9n?s^JM>>MMev}^cm`gJ5@SR zvJ2x9Io6?&jGfWdO4>-gPTD|)G@gPPQTaCCop+=rT^Mq}Nsp~q6pCkK_&K+d?CeD! z1W}=IJ(khg_wG5dXXz6pxW#HJL5IZKZ&N2j#o4Wc_-Rf0nRI-tZCKDV< zYH=C4HCu~~c?Y*)B_>Xg=wa~rlQ;l`21U@gnRMC|97K+;magHKhZc~Uw-PYzsQcL@ z1W$y$L^PKF(D#Xe7@~}ToAqwKRW+}rj=9Rw3f2**F}0ct?*;8fh&bj_249OHck(gy zhDi~0{}=jP)(1y|R9&}-&J&3oDdg_un)3UnbUS1JRJ6vBxi zL0Bnry=j*DqK4MF$e~~g9O`>`G2L`dkiwYky|40mQ`l(p%{o{TFuA4^B{2toc zv9Ch+ETFpc-V=HTBgld*X%9M=+Sc2eBN34eu!)%oaEF@;p9s&Hg{MwqM+}FaiD6l& zX`)!&l$FJrMZ8~e;~4)PYt;p;ZZr@& zB7{5h;7o-G>|v%zp#7@lDtZqHadH-$W9dqcJw%%g~1Rx`w4e7O+1IQv27ESj+-*Fm6Q&gOzKt4#sq%; z3gXLBb$W#0#n?^7u=*g{4FQGpBW#N;5+NTW4TexPpvdMpC;Jn&Z?O0#J6)a$OYsJj zpU*P96hwVjL&(OF34@pi#L?E$~l6eInD0`rD%a)Y+ItD&$F}qVjY9fq8`|F%wj@SyDf1&si=v^VU(!}R=CG;ZqspwrU!;^|^Q+86Btz?m-4E&m7R zLe~0D8m2g1d5$q1?HFnDg-L`OCVPhG=F7(QI3T4i@`aBA{j8gqkDl;8r7%l+XmnP1 zli>d6>m22)p4$i;I7^vA=iv)WQ_q)X74TE(i0Gr(In`f%85yz;7{M+rQQfdSDb<%a zU9gu-K1c3jRuqY!S0F#4EI@e_^JpnVh?YnK7FvC4$;Bom!H1(I0nPIcUdNUgBX%Sb z4>^>f98Nd%11_<7H9gMKFqdj$kcEN!N1$gE3u^KtCTZB5;g9|c4(PBHk07U2>AD`# z=|bW-Q(4$xim{bI~FcQ(2iW?%J)NNVZpvCG;0@i3!fqJNaO-@mKh5<3C59Zax zQANZ)77>k9h~E+B(Gj-~ENXCuG@l{xg<4NH2$7UK%C&VZw z&h;Hb^GVDGLLS5DOECSEn*D)^wl)r35U2&CI~u`CneOq2i9bkBlG!%Mh>~=l>B5vq zn5)rL9FH=`7MM)Fi^nuIy{%v7{X3RlmGewd4fy%UDYtVz*1%guuTG!<6`VAEe_{Xy zRg_e3wj0oksKWrJQ--F=2sA#5r~|=z66HNIWX!*NkpLMHk5u+ zC6sk$xcLx!rj9Qfy7FG>Z2+`hO4IB9;;R&Iz3%|a3Af|7rMy|qn9ez?%Vk?|s?Gf~ zux|TT&u{s<$5x=iBp4v!8o8EZm~n;OaC1w$8NFB6uR7WoPt#FN_l;Ps(C0X4^ybT& z%vSBy_?{wZ{3Qy>9XsPMN03}b>FMh~r$d<(x2qR7UuN>#jV&#sKd#<>3MZ_@^6_cJ zsKn}S*@3SrI*S|Ryn8<_w3AFSs66Z1(U<$_R@*)l%P0wjl}GupZMvMhFGRG6uZz%e z<~p0Leu6b4VQ8tG!_L2V@4MNZt?E#)?Wzkr1;*9jUL$U+c1K@9kJ^3v+)lSGA6Th0 znaU0^r^cuLz%bN09om$(#&i$2as;)$E>kcCNxc4v@IGQCz5lD*AuOheAFoF+*^1e1 zKG$l`XE{6*Z+`IzmT2luA78xM7B^|8@6s;U*`b?zP6aU<+?LyKKyA3|jpAdD+jRf0 z8`wrr+qSKz1^u0oO>HQ}T0N9eo$wW}93+8q-`zkYkS6K{qCKKNjBe3XuL_CC9W1B; zDFIWYN2Lmek*H8^7S&ho$-W-CYv~IAL<}}-oUH{f5nW(L{Yi%j63r}WE#HA@sLQ-; zj1p2p!^wGz*qXj^4rL+!SAQ=?P8-3(893f1%hk#Kqy0EJ^Az^!+{>BOQ#N-KTH;r2 zaAc?q5$LgtRS~X;*Gm!_KY#e1eCstVk(=zU!UIZ9xn9&RnR&b)dn8 z;pD0go-L_oRIVy?6sv5gu&CsTwAUj z8%~EnmU@}|y^*p!<;UJ@qq!;)?3D@5ZwvFy#HwS>-~^c6 z15e6mqD5zNm=>D`zygjk$9#BIgxlkt<3L4zIbn`1dZ=nnTP<=v)I5}}wdKBiRI>9R zSz2|^e9d3kb{1Cf7P7&4dko^Os1JqqSX&HG9^~e`QDzStB7$i+jkK=>y}MIi>jzRo zXr=`r?T>joHyMQ1VO^vpR%j-E^j7P1S3l4xD!L1>q z7e__oN2#DWZNfPwDh_?RSyL9Y`e`@!dWPDqm`js7_%h&s1Knezt zAT4jejon%CodBn1fj^=6{Jfi>@RZs{-LIwAWV~G63-uWEdOG) z^hh9^P!*P1T1mJyl_%P$TAfSv% zIiZ4D$e{P`8H=whk>TJ|8lx=83CEwzub$IU@yia&9e^>K^ZV6@N?SC^|486meRlYq zT&bRvWDfYZo^=0@o7e`ThnD?1t3sd0eT!ppHd8-A6E?jwGN}y4B&slG&S?!NeL?gx zU4i@#BJDiCD#|d7N`=WCj!L|&jId|0@W8daJnPMpF_o3=;CN{7JjWer42aD4t>dfn z^|q-R?Y)Lq+hfsd%#KnPTxs_;9!!|~5pNQYg#qAc{^=18zoo|dz4suy46_ZZw)mBy ztB`(PPeIqYcOPU5uONf8+j(qSDgS+G<=^7a{I;59qB~2jB!KrITWp4$USu28u&+g% za~;f&=#X{ls8(D>B?bZX*mkXN;mL!Bf~HAD>F;mpNzwR4F95bsM*IOY>kryg3uPI8 zyaAj_`WNcB?P>=5@Z{sblzPo%Zj{=-^QJ}hZ;Fx9eOvrM+cEM3C3Ejh!4hmtf-KUE zMxyjtgn8tMpb5qcT8PMnzv~KigZ*Hvh1t^%80{o+dBc46qun2FQCuZx8f3MGE?XGY zwbxBaTM^KR*EiF}$lw(B9(MJ^FreH_)D8GO(-n!YTgcAoU`a9*6+4UV!1$8Wafrx; zml*(=dV7%#5n@Ix5(+9Cyr?M+E!dErff9eydHA=fzsS|MXKy`lDv!`cG$a3+KJw&F zUa*0odHQrHB%^Q$P2trMp4_g=yR)mbzj=M!sQ5;kgjCW^EX<|)N68Y_dX3!|{t-w? z2G9)x*=iFNMhB&&3eCF=4mnzBu8B>fKuT)CgFTsf551IB7)It{ADRBZ2X6=Xs$jUy z1n0cf7tX25#@##a#%R)vC5N!z$4Gmb_>pJ8wh83>r^QCvwH{%;SZLMOkJ<$e3FE9 zLiq)-x&n<$6V3*pCplN3fibhQrqxG$ThT7H&;Of4liMrO*E9BeDt@ukiqlXhi@&jO zT+z@pP&Bb|-X@*n@o7V^#; z%&YHQw$Jx7wAacIl$zpeMcBzoNF_6%7KP9#g`1Fp+WeYY_{DKvpm7fq*Oo#hb7kt; z-GAAA&^6I0y0Agom&n$$z(c?W@1Bgg%GNPG2Ht$fTHbhk^91l&AlVy%@>&*UcNfzx z4&kb`U&baYEO5mjH-_@nh9SWyaSgGMFk+e;lF~HX*lD4#T)+RRz4%hRRR00cT#UbPB4aoKZ^y82QqWP9S?f9cc5>p0$VXY^TF z1&9t?>c?B^khOtIt>>Kdxk4`WNX>aCNvrBV8zv^q7+R>}+G^WIJm;>TsM1RZ$ei+m zX%qK1099GQVMU1*5$v({w)m93iL3CuPrkRumy=%o{dgts+DSsUNqsHOdN(4ZKOI%c z?Bc|i7$FVTt@P4+18?LNC0*?mmf0ooO09G_s&6edo)%^|*j+=djx9u(xCwBXE+aP z`sP|peC(O5gHq^_He3Lp0>&tbG-JBGbZBA)<1#PdGv{zU*CNy>r69@eMd=PgqLWMW z{f%+PNeE%Sh;i?xB6wSk-tWLF3FB$Ht6Nc5AM|ImuFkZn=Uu|*%%-Xmuktyx&KFbj zKuh9u(9>cDMcNMaIYe1CqZ+A+Z07uvinKWd>mgsQhi0=fw9Z~nKV)>U z1=P}livWJ#f=)82zwgjN10j#`RqR@yQDar}HY}}#>b6SnMgl!b@%8kAO>l|HYIJD) zm+vHR0gT6aO&;ufrlNhVE>DFL9Jxg<5v48F5rD!XVnKrzP%aa{)yw;F2SM zOH*jF_sP>~0pvcTHF?<0nj`zMp#xI(g+l-O4Ki zto@MgGDb_>%o9s>`1`Z8^oH9Laz5G63^v>&U*aqEFFeL5zX|2+*JAsjp=i|EqJif| zX|c^;43&}#mDb7ZU6DGqBX8i$*crn{)mn59(uw)lAV}?WWc`*(Vm>yYf|DS&FQkf` z#r|SIiLmYhG=hF@}#$p5CO{W%JCIF~AVREx`8vYEmmGG>&E znmj3ayD6Mc+%$^BNO_zAyV@B}1q6jm9YN*5%Fuw&?<`6D3euYv^Gnekuor-v+p4e- zk|<%S7)(8W8grewFB#m2}R3&h}7sW}C)oRe4A zp=It}057TH=gKR&DaYAfs&HHeH zxWoS1oDG>eRPMRu_%?s~b?{$z$(;4~Irpbq+D_`A#x3T)tVLm`1!QV=-FE`blSh9WyP(~|FU zqOr5-vV&8pdrHk2XK@a=lQ@jYV4vy%`LSRh3mAhj;8!EU;J2Bzna}%eij{qqk~1N1 zy_?C&r?}r4{lq@7jl-};{QD4#WvCp-(%lQ;EumtStKJ%DzB%Ja)s=wR*gYH&MfcUT zG~caECo4=LNv9}z{fvU6%H(!EwgY)m@nFKscUZcqrJCr{nnQz49{pYYv4&0Hc!E-< zWjITlJv6qxkJ9eAP$RC9oxNPsJK3(KM#GS10P)AdU=vg;fwn$0_yjyyPL2_@K(X~c zN1=g!_64_k+?y5TnshoSR{`};FEhKV0=AwJ)z{9u`nQSUlmvNWLByoN^C2Yp^)x4J zkypQh84jZnK1;e6>k?KtBOV5zX=|T)>n!I21tGEL>T~)g{H)4vsWML`o@B6qy(ied ziP_3EgWw8DSlqiqT(=B6HkEAY8EEBj)3R8d|u;xqTHxr8@dNq;><$R%ru>PKP)3y#ob7 z$&lZs5i+DO<}a$dBrZ28$lLTxZE*(?QH+Z~WHhOM9uQC*{HOCFf^A*#%vS6CR0wx= zKCIQu(~?4!LCs}L5sgWp+tu1zzKYByR&yHdbR+I+jNZ8rlV9lPPRqFJ<6#Des9DzS zYAiaJdVNpBAem28r(207G~txYZ!HSqUvgGeEY|2tLZgWq?ebE#^Ys}TGdze(1@b5N z9adu7YZFC!X*nE;4ZoiFmlo=eVIXRmC>5g=sNvSG*73?2AmK8AS#$KyQlLek~ZWHInZ9eOi9vGJdGeRNk z7&mEwQ~&J)fd66HTE{B@JODsd8hF*W1z9YOstOmO_^X-Yg$XldCsfG}W^{9Iy_wu_}ho$kl36;X19VtyFD;5yV{#)YjE3w*WGyB7&Wpc zSe1fYxJh}(Jk*qQ)S`XsR^e1+cO8pCYX{L(-c6urmZd7N*PBU!9t$j5A*u=s@G)rw zVNj~#XJN3C&ylQjQy+y2+3VrCAwEpJ;`#HCC=FcV3QG)}fa}j*$E{P;TdG2O$C3p~RHh_1 zY{tBe{6Io9y_RS5a|Ryo#V7v_Xc!zEEh^l|@<;gwt#h9KLF~V{(BDY+QWE#;{y0ik zSgZH=R5eReW#dndozt{zHyy-aB$8VX4gI9#59T3#f*?!JTQO@ThIqE!+n932?5D1-=AeeEQPR?bN(=p znb&k56v^VhlG-aLWda3aXC;WjP+AEqaXP{|t$B^)wy1AvV)WM>OZ;-ng&eg& z(}M4qG#0pW;&&ezX2R`rk=j{13nOqY3Uk$NJoAoTyPjC zGSnqpU~&)zu9D@#nDd%-|G*;JrzJ(%O$q!7pFgd_a=On^luC}$@GutmK}8C&usu@kyI#i~357%5kt*l&O|B=;mH> z30&axb~BI_4yb4Y_wI>S_ZUiG-@h?(Zhew+S9 ze{_$;=87((CoXt+rMA8!8DAq6erx^NZ6*k@`K|Q^mYAJv#tmc%o@Y^N4NS(vJs&ji0ZX-d9LM&l=)Ho8RKxl9bKa8NYJo`o5tFr~EsCpoYAn zm63O!W|IOw5(qpA#0Sm~M!8?^B;33lDf=PLY-&OxBjNz}-*TGGTl zA|9=?4EUYP@-87yUhL^FCF6!U;MVdyBMV6+-i}B z$#vx?|4czMF6JTTloi~&$J;GC2^J#Sh=0XoFkP8+XEUu?pfC*-uDx&xdORX0|223) z54M#;md6DK`Z8DFa|1?&2?)3S$mo2nu@z8Gn}e4q^^hpcN`%_~)HrY;gTV1w5-t%_ z5DbZqBBGi}b7`L66X5VhU@gd}{OBTec$)6#&SnxW!xOkw@VY@H>>3R6ka!E(9oM7&H-iq~i+} z23zezC*>e%&VV#1bw#gh<|RrM_*(r!&ygsiJdVXveod7^2f5*L+Z z>Fu_G}^WV6@0CO^AFC2vvI*S$!ma?0YG+ zVs!w5Yp&W7?p~n<5vy0PIMxckd-6dHGSXjBaBGH3eFZ3W%EVID262f{hW0W>Ilg_OyeLrB`V&^s9!3amy2Gj?Hux~LQ&ruFAjI{#hyZfy_FfZ+ zTG!c{$&vkIB)9c{9nvMi+Y5UnJeIwW$=m~L$u1YlMo+@}y3XgG`*m|o{R;^p?R?=| zCLN${({|8T8G1oiArGls>$qJUW*Bz1!5Ujpcr0K4ZO_KQPZR2olo`+*fSPqdp|ilZ z)vE}E@#GA52_ink<@MfL6$VI-xG1APS41;@l$aTm-hKwcQuD0ZjVYqZ{82npRRago z@1CK*09);*tigJDyT&u8FrmUXLi1jA!%W)>`yc6w^T8H-`7A0&VLvnrzFJ{_6VN`G zg!Y(-Ba593lU)m@p4J+aLKMV&=;Ww-)GXXRb^Ew2c3N;#a?^%yf6S_d;OKLHH0fOn ztcoTH!0YO5V1^ew=kHlO&os*2fMx+hoc^P?rUuwJ`wipCbm-P+Y0qRN#|$(Zn%9!mZnlgsK4u_QFf@` zxa3`C?Iu0Fmp`A#)0pz)VUXAvBXQuMz@Masr$%YP{~9RO?lh_5LO%m1aGAAeGQxwt zXFaUpl>2ZUz5}(gC=5W<-Jnl1G4{&GbtRbur~wy67#{1d`&>aQ{TV!AqikoOkMbPQ z;ra(FP=_BYCX!KM1>zc2r*3-8ij`WV-Dtn?$~Refid zVNXFomKd*&dBA; ztbt6YS@WS(vYw9_1Z2cRT5N7NVD5Dx+oY=y57^jb>BwXOOC5(-kkBnePXbJbJmjK) zc;N)Az%fMh-s@$O?BFVj#*74V8}`*ugPxKs)9Q~`acrkuBWh%xjAG&ziwL4dUnPoM zHp7jC8{bDNzzP`@hz)=4JZqbxBh$adzwpM2!A^2=IIBEu4uok)9p4%0p^Fx=Z+DV; zA`OBXM9Kar$CLjKhl=1;-t!XD+_!@kd{JNpQe10Bk@#q+)$$@=028{?L->*3iBcvv zCr&&Dw}_4rO_NW1i)1*l&m`Ge6gV-S=$8Z~BbkR+$KoM%_b+w#$R+LY4H8-sW!}j= zHmKuwV{O(T+}0=+^q~EGB^+K|xR$EQv#{r*rE43l_3xj43pWhEzJCU-yOA6C%bow2 z^L%|10#T>o>rA+ttFz3rq)E4aO7mF{rX6$u7=chaE!_s6M_0U2*E7;%Wl1nzzhJF_ zH0d0}G2W8d2v7=BNz#>A2QSQMlQqPvlyBT=U)EyzP)NF^xF~+_FNx&RYcHa3i9=o} z3}zMs_h2hX+{)ebk8Tb=SP~t9VV^)8tUJVlam#LTVW@)@-V-kRX;+5Qj*YI1TpsN` ze;|IVP4v9p=tjqi!x)X)*JCLh;Kz&?vpw7U%&B)#Ct)KzhBP@$Uo z_8W;;j;?#DDy@T)$N5<1lcIjZ^xBLBC@c%Ch8^#E*qzH@c8JK3Zj(?ue}MCbrokkx zb^o-auBkU>Y3^@zrrwsrjk;&32qmyJ*ff!B04gZQf4~iBt(;weF#xcNZmfSr`N;P- zNNb`n`d}i}Y&7E=;PjOW1PR`qP|~C2&Krw5)xY5<;U?x^=i+~m1>g6n?Z01j zOBZn=6V{_-@Xte`OcO{q)U&YFRk4$EQ`h--$LcaX$&3>8cTfBL)F2L`z%&T;8I1T# zqPDlY0lgNh0RL;w+Eb3a!DsD`R^H86M3?_gF`+KZ>)|wUX{0ZEykbkz0`t_t7B@D) zG`%ihU|ru!Ajo?WaHiC5Dt=tW9)wSKtC-RX5|keczY<|jaR3w=QVkvoA#>HrbP}U3 z&R0e_FX8vB=v3wsVY?fmOy7ru?`WRDhU*#A8^ zng2oE3-ezBz63BREZ82ogxfR>Bs5lz|0)?=3@Sj~na{BB%PPA_zqV zQrZn!#ufhwVzt9lo0hx9@0qhTwt+4nJ*w)6`Y@iZAd2Q$%LSlH8~=upYDE4rmmh6^ zAiw&2D*nc;_$CTh+4&$lpGC4D+Z8NT6Ldwnt8Y|DdsiK8TeqCn`~UO{-}3sohA)Dh z=tf4AFv=bEwyCwuU0Vd%5#UJ5{k+b-TQoWjpWB{hRYhak1sGY(H~N{nmK(F*RK|;3 ziXrjOFjfrRCCPLC=ji~y*&qV@`O77m3_#zQ2UlitzocV)Ggjh<4B zH_{7x<3hNCB2>dl5V|=!UG_;r*0(!#w&Z6v=E*L=X6k!8_U5!7)_in4LTy@@k3!3K zcpv=aSCE|@ABLPnNvj!j4i-)mYrm*pa8*m8Cn+aY=U)a0oPlACAx7fT1fgwFcki$z zlR5x@SG@n?l+j@4-LVEXC9AbP%vq-J#~c>z!q}XRpO9G)9Jst_ixxV_seFm!zCFay zaiQ_$wib*AueuDc`Drhyyrp)xkbkgDHT+wq1)iq}UZvb;Ecl(L5O!$@+{F!R>@bxX zcns;J_52q6L&j%YZ~XU>D}NXjk13YSHVM6vR*-S#6A(!m*73iI1?PW3 z^5D!|oc~o;t$G6hBLL_mWm?}LfFH=3J66%P8kbX|Wk@Y330g>?P%ioo-kE-*BKRO{ zqiE88I~t$;JAh1}t(kn3((iCaU|>Lg@@}EoF&A4yS7w>8b#2)o{QB{HBgW|1&|ul& zIrh0EUZ3hDp^10fMKzs863|siB&#GSVvfgp(3-Xk%v*#>#)4*EXya?|SOglKC2e{= z7W=k@8|R@)j4%sc(sz>AP3w36sjQ0VmGx~ahE4smIpgdrt+Mr|tCGO@OojYOx1MjY zbhy)$dqg0^KwYpTw*Y*~XOW-r6bU;Gzn$GJ?`$F&ey_;@fK_CFNW|FePtFp-=uH=ja6fW=+A0(9Pn4of!kT(9SY4W9 zpq-qj@lHZ)Iu8C+P)^?H^rfC(WPcZnS?sgyQ(6i^P%KX@bk7e34T8`IxF+=Oy@YNq~?oN;6uDK)E*NIz9t$68lsLlk7b2hs~nVwo1O&W>1zQxnAMbaL(qNfy?Uvo?| zstk%t<(&8Osj#&=H0T^@tC-rilP$&PF|pw9;w>q`YU`@F+=m$A=HpO5MBlURw*Hw= z1zQ67YS@d^a*}7%e|#hC>=`L~@=)IO%g?%w3hlOj6t5Bfb@M)M=<@IJW+cI%>`r`_ zX#BzF)2MKk)irK!dZEsj?Fr7$Jg%k|T<5z0zxx`5O<1JC>CIzg?Bstt+BQREx5Fd< z&(+&5lfb0}*W9Bw{&&=!8B5klB+Ub*29ZB-<~F7Hgh9X&o@Tu4#Ri)+Zrv0yhmZTY z#S?Y;RMAeqH+KK|Iy}?hIm}l z;_-r4?TM}$-Kor`IjR#4Nv+JX%U5iH8;jCQq$A7s%iG4}7UjMd64`O(4DrpS#;@9l zUm`enGdkamv!HpG!zdk8&62pfiq=xJ;BY{H^+~P8YB$17n)Ou~>n@V(Xc7@{Q!h3l zISD*U8maFiYuVZV1eK%h?4+IZL|T_fm9#=DO0`m5a+*jb1a>82s3BZRF6#i#2(e)O zxA9niuLT?$eQfh@%*SYOSF{jtU|H^A|mme;-uG8 zSTkXv_>|v#kP7Z6N!005uLVJx7MgS=31<&*we?#XJH6hS9f*|iOeHg&LREd|g$NE= zAwln=D3y>nC6>VQod($pE8&16{cx_wg_k@~od<t>hO+z zQf_*gIGMlo;@gkS6kgc`gQJ1~)$lx!Y%m8RYM%-39#sR>T_QE(r#YZkp2hWv`7+Le zsjQj@I<4Xk376rM?*)~t)p_N)x~hirvkDXzQ-Ft5Djq-S1pz(&pHwUC=KzAlsguw` zFo(Ceu+ms@!0#9gnFzi%sLfIRPa!5ZTEiumKA-3E7e2@u@f z-2w!6cYoY-?tSmaTWfkxS4(xT)qBtE{<^9@@g@Fx>>z!4HXmatKaM|1x%~7w+<*Ko zI^K39YXrTxamJtHenc>e%dvjNnY(!xl)uGY?9s%c3Eln9yq~|>H;E8?+EMk73$%sB z&xNJny+GQp66tXhzc)z-Jq4}5>0bpDCCrunTW@XB_CaKT0bc}7r<}2)_Ca?Yx~EpR z#!h|rt-sWxN2NeM)D`w+&k7NvLsDMnwoXk~Z6`QPB7%$=AE@AeL^TMy0diX(BGy5b05iRW0Pc(40_?H ze>X*g7tXB)CO@dmyj3kvrYEAeF9>dYhKU3PfX~L#H99FjLp{hh`y^1%Nf4zF8aM_t z#n7-tIiISN(e`u+WyR3LlSIPZEB~Eeuze3&uQVa`a!w=d8 zfV4q955M>^ITzxdKcS$tNL3^Pdbjb0sQryeYQ3vuAFT{euM|G+fkp|`x_(gV4i<+< zcX_)0rF{&C6>gDS1vM+rFKufF?cb;|r9Yv-o>f)r#AB61;uc+NLkk92%kzxA?QI>x zEU0B056^vS^s$rVL!}SFi!eC=IlkeVvm33n(Eb=5C$S+^0!Wmo4{)wroA1!ojD zx;AQ|{HeH^K_XKhW@3&QSv&a5d1lJFN=at-Trz)g2GN)0W!$uhWSkOS?QBhe3RWpT zUWvoGf(_7gu8h+z7Rr0K&|?o4Qyhp4e7Jn`u-IkPYM{N!?1YU&(`t8Kjt&XPE}5U87oNStiKff1Cy85xC+@h(uHtHaIyQG!>J0sW;d}NS&F~bWGJ>0!upb#n4KR9aFA1q9^Zisb?)?KKamE*+SV#eeVD{Utp z{^oc2YBSt7*c1S-&tsrd2Pu_#disXJ@=~Vg%suXZ^>f*hI~h3n<0m06@}dzCw=C!q zug|ts9yode%N0;w?zRs&i=mGELM|PAWjWkB_q&AVseX-Pvswxtm@b+*Rxzn>)OX(g zZuLTzv940vU}G4|`Xqsf0)g)c_>&5uI4pO#_^nOs{DS2oSR#zDgp1T7=rc982{_fa zWV=(3VExFAZ-{k!??K9TJgC?1``$$&D{s1G^z7&Nz+HHOPV}GOE9@_;@c$q5$Maw4 z54fl$=Q7WN+V!aE`EQPD;F`Qy1_6K$3(1Z+5wAPqx}YVK&6AjL7J3MjYNl9k*r(#R zNyyBPa+x|!USIcn(<#7)s4gZ(jIG=Qg4?h%0;%v9C{TT;Gm2N8clUI09G}L zrZq!RAw zIQxMImTR(y+Z5rPRqcTqRXtbXipC_=-wDT&!`wkug8;RoS*R*;kC+I=7F-#WKiCYC zTu5z}@N7^h9sQlBj59ZpC_faEGl@aAuNtD${iw9q491$#W8!18enN4}%Y6qcgsbF> ztrEXGP!us@l``LoGOkR#E7tO2fOqXMp%YIoe`4dMts!8;O=+4r12SO?$4W~7dC{CA zuAGUC@@HqWeURw|F)*bmIDiKj4_*d~dr+P`gZp!_`cT}Eu2oxkq!6 zH*vK@3G{RMxNbix9}iwODh(FakyBjyD&`yL`1C^<%G~EoDpmB%ik7QPI7BD&@q29Q z1sycfaws;{O{ozzeIKzbLwbHKu91x8R5}d(d$KSC#9uK41e?2Vm9=K+1$`fWu#=LI zA*`=p^f>*L;`wE^7F(E>3*>rIg4$rKF2wRH4QZGcU2V$esX1;H;ZE*i(c1&IoXM|r zM*{GFiUhYsiQq`M-nTi4Dr%=pniY#b%@~bD|04)5h?WH(%IDg;`CcAcj;8g=>_q*9 zGgxt>KqY5Z$Nip^*^DLb6_Zq|bjZdAQX(c<3(^!YHc2Z%TMJLZ1`g($qb$|-X6#(s zJT>E${sUdcc5wfbCv#i-vb@yDKKc$rprZsgKQ8ATg#s5FkNQrPmZWG7=VA;^bM~F_ zH$4SZ%{MwG$mWC5J~QO!titqPEX?{-G>8(t)_O1(oCa} zj)XYrCywZ87Emd@?X^rPLPUs`7vUieXMBDijRCgp2P-!*A}nl>Vf0Tm1=l8i50DYu zg!%uokHZ9K|MHNeM&aiEe_?-C&aZHO392-piWP8V;0>2p0}I5SN((stLLEk4^)^6T zn?lsQnzS0B)NqTPp0dj-#o2uShLmwsfDqYoY696q40)_9UfaBnlJ1nfMrIY$adND+ zY6N%Rq-T-Nq4+Y{vh`A(WsX{6HFiXNFk)7b&O69fgLNl|laBYUT0NrpQVk}wYPf-T zInOy2C`fA?h{%FZ8#5wGeo7>nml%V_7Su3h38kR%}KpIztKmo8 zzSS;?iWx|mi(qIm!>cR5RSH~$BLj2mE1ReN^*kPl6d05T&o=`?gCMMPkVLFQ1sVBb z6R?}HTDA^GlK(c{MMGi1JlKnul#&7#p4^8?R97Tjw|lDzI2`;GaL`=AC4*O#TD0!8 zsR1QgU7%7VAYQh6k>0v!_ zs{X}jH}dO7Q#ak5@>1_MSA$&kmo{=8)(ofCoOJth*;p!Kw}stYiz-Y9!t~=bhNZDb zzMVEp_#dnzlTG`ch(n*)eZNBbZRmj->fTAAR;iFWxmRVs$b0AIR8_HjFJCG6vmT^i z_C%IV@$*88TIQnI)!YA_Kl=sP%XhSZAZf5jmJ*n~9HKOkWAUdBk+5@$2{TYcHEdW8 zZ^h#053-fRqE^(?ti=cwvK1{NlQ)=NXKq1za?7nWM|XNEyG?GYEZVq;*A*l@G>leE zIgN3Q2H0qHQQcGc-UPS1u)8y+Qh}>;eTctuz8@A$-6zLT}>wH$+t^N$YX}tdleOIbQ6F0A~%kP6HUpoR(hSa!hd^g5bEH z3r-hb)CzxoQjipyAm_5sm~DKTl>>si_wT#(3~D{&#(U4k_l?F|eMs8L65@JFMC;5M zOZ$RK(hb)AsOtFz(}RBl&%rj=!JYgecl^!f5H?eOyz~U1rjcO+RisrNexA8*vFGDNd9!4S)*V&_F{-Ts7TP!cxQ82jW%}2P zwmW6|A*K7p{^v6fpxSeDK%fl!@%>eFGyo%9=})gwjQieL4{(#3kvia8og7V^{x%Fz zKmIm-f`{f%doB)1_WO?eJKwk1fy&tfpaX%xpbc|5MWIi_5^ja2K#sQ-W(x)(P931YjM7?&_3kk zxG0@T9cDoBQXMq2Y;)O9+u6qrxUoOP;#2#uw?*XwI`4&&Tbd zq%DgRw7BRAt*GIU8L@J^;q23tNEBz;=*{)sl6m)90La!6>gGKk--WrVDPQ=JAZi*BKMS4GmGE2S;H3E zPxh9aAtTI^RV&+rzr6#5@;UVX=YTs|yfpuB{E|+nbKCzLzl;{z z-R5ij|LA2;hHm8f3S#pg3GM4}`~Uv&*htw~Sy@4-ijbHPUlxA<34E&bg>?eeUSGRm zi=Am{>44qybanpL+dCjF zNh*G5(xg;lgc_%?1E%%6IW-lS*3f_jpy=fYx0VkSAqT5O0YHk!LkC3*$3v3R@vxfr zV4DRJLY0CAB=}>%CjI@UO^U5V$w&-~iX#b~_y-n|Ey(JJ5)x~DpjHB*juOknV}6k6 zwZL^Xf%he>9@GSK%P&>`fqKm>oyQe*i-hNV0SZ6b{i4)Zw}n?5mzX4P+1$OStJ1E zy-dFC4TirRu$~N|KmZs*Z~!4w49NqIy)K$XkB3b-Z3-kSHY8X@FUtg5pHW|b|Evf< zQ*taEK3Q@%SHJ6pEO+Jq`)E!WEc?P)i@;2+z4qB!cB}_9%Xz$ z5jv~r-c|QNym*gK?j5C@KSH2T2pp9Y&QBsZC@FuD0#@UA8Iwdbi8QF&Kf*l)h+e{x zW9TBNbvsTxZ@3Cuq*L9-UCJ%N@tMzag#|L(-5SvYdrlso@tL{6TKUj4J$#%4CaolfUgbP-*gTxl;15M!?0)( zz98rTmON;GJFsXVDZuop7w1>MY5R3o0s2H{8NrhK1lP2QdoFgc_<0zeAq9dJTb$Sf0 ztX1?%Ycv2f!2!qRzz9Uc@E`p0w4fV6+ttLkErT|aX21qSg01qSXU8#t^9c5A3vbKq zziZZ1BN#y>9Bn~=5BC@QMxGh=Qkb(Tv}1U7?*s+|CxJjdEAlb-1Y>1;42g@6frpMy z_^iJH1{W61{)poaBdr93q>Zq3UN0>S6Nfynbw>g`Lqe0l?gKBl@*o@$(SRU%<=uz+ z(9j`dt-j=ZveakJiC_W1Lwn58jN~q&&`Im_{DviAG!v`~Cw%sK-G5XC+rKP)4{9o= z#6%038v>`v5>#Pl&>&%9nTh%sP3-v1jYExLo#J5mF$W>c0znalbT4MM8yt_p`8sZL zHK-5V?;GQ}GsK!(=O>y=MKbr$-}uw}vJeho<^KIcR&Q!)KE5zJ!SII#|1FB*ez4me zbDS+Z9bYLe-$ZVbNtOv$8XiShk62#RqZ#g+_5q1>W{VTY{ zYw;ydcwI)V%+rL!nt>m`>zVZQL};p6XF@~c;rC3Ky_1bOYZLky9Pp+ zt+#}K5j(2+wGryaW3Y9|;=HTz{zYJ-EBYc|&iYCWM7ifb z!4TNw*1Yi&vN%c{bZ_&-ew_&}fo(C8hFLBwfEtkgX?frPgS3Y2~~&NF%dNJCl#0EU+ZHSEQ93nFdc(NVb1!P>;vj z^51nfG)LzTEJ}VlKk}@1XI0XX)J$|&#PZF138v%ARk2c@blMKQW3q=R%|z|*^D*W@ znf7X8Wr_qB%gL~u;vL=n=y0f`K%Z~V^NzuK?W^#25~>SZ*>DN(va{_VU?;Cq#&Jb* zh+OgLZlQ#CY!Tx07>>CK6<;WY>@H{LY$kDdK||rX&~M^t(HLwx9kU!rQ}Xp68$Wcp zV{N1BB5TG?A43+0djDc6DXqyry5#`Gqq!^i{jj>8kNecp=e_K8N8IL#RB6$MP71r< zBeFlfncm%q1-8GHkAP3l0PXH{;|P`%MJ;7H*sUc7xV<|U@Wbx3>hrw&NJ5=cneWpB zuNis`w&ag(`U(w;x=|Mx1v3f!apald?R8Bpdw^7xxkQ7q9It?4R$hq#l7zr zPy?IfTfdR#mj!9*Jd)WOE-p1$-!C|(=eI6;MtCiv#hKVBPaUnY0<}s@g5CVeo9e70q7Z_opue(v&oOVTC%Cl3CsRq-g}Z{>m<$iTr9y2Fd>lwQ#zI6#AY= z&@6Q!QPShopOSN``#0%?C>v!hPm05V0{J|vUeN{Phcp#+AdtJs7Pa$T9Vz);8dMn0I}K}t&{A)c2xOmPOitl%}g$2 zd8^w=X}M)8V%c!sk}_H|F+6MbY{6A4npy7=dju%gHQZw$M>nDBFGCsEj)rhY#}u!Q z*h`Fn=^}ZtciCukrxQmqclWKoQz{9Te71GVl1#SD4ms1cfxxSz2nGeC#kRk=(X_og?J?$tbJs`lxY^p9^w5%r{Fb86 zEV+{KilbU1VaTBYOz#_p5X}Rs8!YGVAyj)+;MZ5@+{fvPZQ^xeXRymxU6@+ert`V9 zh_0NKv`p3nZn03V9w-jusca-P3ZLvnM|l0))Si|51@sv%qbjVs;A}77XqC~*hsk=- zCds@CF@*ze>4kQ#7C%0U2lUxi>t4j$WroZ?l%|CKQhWP$Q!B}iH5L3?%m8&S#Kz@= zAOC6R=yK|*BGYCQYu&fK(sxm4)g!k2S?RP=S-E(f>7~+=*?bZ^h|?&~wsw)BX5PW) zt>l(d1dM2UxuXg1sF-LW<7ss}k+<=6P4$5RW3Bm4oENEP&8L^P39@xx9uh|WNbe+GzXihP%RI}wIuH|o5 z(H9-0ae3cu5n-7g+^qfvS2|#!moPkrRB*sD{w9B{fkgS z52#d6R0C9{KQP(wozWY)w9pI}L=mc8^^uj@cF&X;Y8owwT&J&lj zapnb@KW2=?4*NAQenoujEGkMng^IP806ALJU>F9P%vAMQblYQU%5&%+9{qqHZG%La zvlq1;igcX>l1n;Mm$km-ZR6!1G7?WUiuI<$s*xFb=7PnWE1AjG2kxKlgQ+Wrie{RH zBpr0Ty~iUq(J5SpehUBQ(C{dc?;TEFk65^QR>rO32F%w^A{gFL`HKhFZc^4TfK9A_ z<7sHNl%Yg6eaL(BH`&td73B!*v1`a7@F6uu{-7((XOt7dcK< zGJdK}WB^0RH4&rF6=@aoCy8DcHdG8$cFCUj?CMO$KJNZh2`s&F@lkDDOTg~CyVO)x zZ8*Z;9toScZ^!6FZ3(%l**s6l11;}4=#Xol9V?3!p6#yINdw$VGWLe~hkp-1dLvlm z3nrw&57K)5MQ*QnK%E=c#0Z<;FeQp&uYEmFxGZZI!e^)(T60VHj=XMT<`sa0%Y8b=#G48<>Jx8;`%^ zLgLxIs@PKZr`nPib(C0ty)tugp5**wvMG?_m&L5im$4FOjR*<0*x}mGQ=97?TjY;F zoh7_Ag)c8Nq>TAp?|sT!@|Z1ELdvOs+*S_x_C#?y#J1>g^qfmxfm4GWt2nG$OYUBI zY*%IB%`Q8htpJeDTSUw^dY~ee>KFxIOyRwe&%$SjljTE6v@%D2kiENg| zMiMbe^@WeFH|tB)ne`%QC26cQ`n$sP<|1|m(D;(vrNM4I6s&OV5A#Dt$L}qx zkMc5-P# zFt)0UHeYeAN#rNm5j={6c693uqqDwArCa`5D5R;WlpoTpkeq5NO>3*|j#zgvP!V15 zE=w(|B4ZQXte)-EeE4hYJ`hPQm4Upf)-HYSWtV2E8^1*HHugogddd?&Kans&`8MmnP+M#cwV7N#?UN ztRcqnPe$ESsq8h}#EAzu&g(@De#>J#`bUL%>SE@rPE9Y9nvWDtg{p+mwociw zdrU`GcXaTX{E0Kd;O$w=+Yk970?qG+^WyiYPz;5>zPiRb?yzNQDIbkof9_#13F+;gJE@LV_NpzpB2pqoG| zD143U0GMk@aRn$rf(Bi_+&)qL0L?8mo&a$O(7wl4U+7IDMy?)x=o zl7O#l(##AsDlgmrcdEk4!^-<#E?RAu1?gtL$J&`)R1B?FW;7y<0BwA$W|)j(IO4J9 zx>NVoF@5zCW54$-O3Grj;Xfz&O49f?jMx zD^p4}QDbp`IIJp%YPu|JB_{~FhC0zm@=|ru zzp|9V`fO>k&_Ym%=__Er^}w1&8bu2_4`Gq<{F@H`hJR)%fd*LvT_jD$&)*1RLuZ22 z+Jdc7*VC2U1A>$-USQee1>%x1P8}EpBQ6p00?3o^R!}5qAQiL1(8kDTi6yPlP;FR5 zD`8I|0#(+_5)guMuOVeff5Xz=HAX^#HEJ=VdPcC2my)8uq4-09n}Wk@_WT+qSbuu9)#U7&gb#^5qw55Tmo(-059t&c>5BKxpx(O&V& z@QCs)>S6e4Bl2`@fVWzO+7JsJJ*7SK2{=VdWTFK866kp_wml2jHo^@mcbFE+Fh)0| z9L076_*IqsP2h|(NRp&6I{pgY{_?$DQBp$hceOwfWz7Bi!P3*`LF?;$A66Zv%IDuS zm+HC0mW$)Lih|fPr$OouyR(48eJ6&hgyFn%f2K0h1>x2>V8DY>tO(@~o}qWL`ipbc zDm#W|SZIOj<>9hEY?~e2z3K^8JDl=ZePexp*1Vn>MSxERXjV!uLY5p_1>6Vs*vP(+ zV!}ki-*3o<0dfj9BjPFkn886JH4%`1y8~+nw-9u=;6PCI(_e6t47T$;o0L>kB5w*FYT&#I$>8o1A;{{##HdW0oL!p6+WN}Tj^k`GW z&FQ=}fmO*|Rw~L2x9-*D+4+`Ddu-l>62;5mupmS`x<=mHIWe%@PvPdkv!bZd_`q?W z zBuZ0a29q;9f%81iU zqXqdrBU`FM=Snc~hr)3KLQj9B3DOR>x$rYB4kw4huiqvQR3>klxds`H7{_@r28iWT zWyUj4GtTwq$-zgeE3(P=QVWkK1vt*_iL3>&r5}>bCwYVFA^@1crv-2)p2ot`6$m_T83bHsOgk-?swKK7 z_yK^;TAOsQjjJ+)#hq7tZUHaEQ=YnM*>1EXI}srsU~H6hLL@HoA50J#%Bfl~A@W4Z z3=hg}u(l(GZ{q@rtE%!4ARKBD_f=1>X8`4NOzXT5DlJAGxmdXQu+e8 z#G}&bpS+U*F|OP=3vPbk=V^iEXwFZ9p7x5rCa+=ToQJ$`!wNZQ$F;yWk;LG&B_pTv zP39Mrc~#n>O=8PXBip$a-0Y6Hw`Af$8>M)6@t)To#4)tkpVrVfV{RmjqdNKg*`&r{2@U(&sVZmMTJe{6^9Ny6qt%@1x7Si{mPMF5Ml|^pl$=}8lg4pYg zzgg1YQJ%~PM2c9%B`k7ENO|Q#akcRA8%UEvn}xjYy|>0CxIU4Rr?hE@e1X=}b zcuGKJfiUNdcxiY4|w~S(h5XF%$WD0EYNZjEDboM$R!? zf8+OLEWDLbPG`@8M~;XD|Ur%o($=T&-P(iJJs@UY*L&7g`MFtm{MLm4x_ z%$t}PRBJUlzeA6gll`g;cMNOZs-09g>Vm5xK*_XM?i_Dq(l3RkCPq+QJeou0S&szE zaxBPU)+F%EyMhe=tM@^YzsxF^1@dbZ-gVobx9xB7KuMSoI(Ip}`HzU`WRb2#B(9_) zO8;>GWG_xAm()bUl8ZsnQwMw<&U7ygW4u#%^TPyj!T=dg&k!kKQe<+M3$Fb7jA=CpGCLw zu?~s$6yb%!L*^G0*Gw{zqhs7B054FhUST93);qM^QYqnWMAr}n3B>^LNt9x&U4`}5 zxoSefM{ez4rlnLP1a2%d_@S1sS8IfGmLUnP2#PaIM_sbd zRKJ?i@v&a(KJVg+P{SV;xfs4-*76$uViZ@*<5P1!XN)F7ZES13d7NB=NHC;M71Pn* z)3}sX+i~*0h*3m2pV@9Kli!@JBYe5eLQlN-mLacYXfdc)wc{Sr?QP!nbjLWzQ)Dpk zH-Nk0xfk+3YRG5gl;rvrk{=3+XueoNOdMp%DeVm>NUR(@6)Xwe3Xwgr32Bp?%ZWL> zP!a2bzEQF)px~s=m8Y9tbzk_B>1Zwar&=u9sSE|B$jHok#9aq2AXl^%0^qiQzXbwtxK+-6g@zjNeXzY zx!CX|^?ZR{`(cD=nl*IwwvW~sw>33YnM=a&cXyGinA4S|A5)_8S1?_EYT3m+5VH9j zEI|tfYLQEqNf>hn(Sh7=y>yY9i{!_#*Zuw*<>x;>I)e1MAm%a9e&vnZvo0^4)cDPB ze)?&Pq*ctZ%1UJ~0}XRd0TM>0aYf*MsnfHyia|1He}DOO+(svM{FC;RhI(I}m(g=zkIW`-(C4!71WUMn)kNz7&cAP zLQ^SE^Glpp{LLm*=>cynzf{12J6Ynd|BZ|nDBe%$8Xc&5K=E;|asen;np=g%?HEW@Bz4&IDvjfSZ70C1nAP%Z=+9l-|y2yR{M;=1zZ@Jbh=3R-bou z**JH0*>Ha<)Y<7zx|X?UOV#Lv~-_iU?TIw%}{ z_H+`!^ZmI9erRW}@QeF-qoG>UaP8&hyK0#QTS*CUxpfk&KBun4+X)Ey5{sWca!!}F zA=P`)*?xO=ep+P2@cwfRx*Vz2xkWTAma1+tyWE>DoU;H7OB@4u@Z}E7u7iZYKZo36wQSM?f{Kt0(IzB8YtB$leq5kL2mO}mU9)gY*Ary) z2_-4WD=oI0M$v&P$tEJyf?ue;^@RhxzXLr$Y5HV3bGC>eMv13^?3?v&?=h}u;$(mf zr<6bbZ4;9MuTXXs6vN_T+OfV`uJP>PlbldEp5hdD2wP=B>BjiRo6qG(yGN~U=c*m5 zy)K}>6uT{+SP1x>I_BHBCal0)#vRN}x@zvgjPUufn%Q!h(X;l`IPQk+O^xs!)(`u0 z@(j8&fiK<`)Y{4L(wS?R#m`@!zHV~$V`hXfd~y!(G3LYW;^#Sjm>KJVGUm)u58^V*KCHLf6RJ~hf7F{te_$eyB#sy=pPuvg_`xPBYAR1&6f*Sa$gZGvWtem+B3FPQZa ze5D^mF65=|`U%fpTfOvjYo#JT&4TpKkrN9tq*dFctr=4Fzc{ggc)r=Q?ZbepI#~3= z8R@Kg&Xn>X>3Awe*lQ-mep3&Y#``U^oqzh+;{*(npLQfe{XX&1S#_I9%}m@0^q-h2 zX`~*XJd89_COzqS9DV|?id~&T)8~@#^D8LY+M*Zom&OdbR}E+5Mdv$BOAu;nwIRNv zJe5PP!l}BSSL+5*c0Hc$(+i@5)ivXpC;_BZZ0pRh^O1-+kh0ml8a2f~$3xRTJnoz{ z{ekl)90FlSA9nl)JeBme;6TSR-zza{oOfb=)UZIdT!poLJw9Y@>qmv_(3Jx_2 z0Wa5scXj-vD4#f{1J9nV@$Cm-zr;WNm~kG~Yab1BJLEH(CW9pA7~X#G3r~(sgLXbM z?5kDF98^lk7}vA|+?TV@TX(tkdG(x@Jb~}8^Np*#46x`#z!#_wq#vIJPkCnEaVj*1 zxu=ROxlF%nuC2?a{wTILY?4>cG*o#qvc)LD5v0nA@&yd}kgK{Gq>z%*&W4uyN|4s` zSQ{>{EYc*-Xw5Vk5r_!Kch%Z=0nX2kw52F*W9SPegVn@y6%<{%=Y4ta4>L5C(t(M~ z4~>$wTrDIX=(SodT?V<~Vm2KIG+n<^HwB^`rsfcSh8j?CFazK*U=&W)xe;tP*w)XI(Wk)&EJZ`2|6<;Gu96O2{?}KtOMq6%64>Gk=Ip}d_CBir$SJ;`{X0ey64P+Clx@& zB)D8G{UMKzYOdYdgnB`-$zqoe{l!%BTYKS6sFRR8FQ_PC4pxD{1amm6p*3-E(H$zJ z9@Eb+y4>k{$E|Zo8&fKTb6Xc!ya}CMz(v9Yv3(5qtAiK!`3kx*@Y=_jZ53 zpr;#IRNrvgFEenOM{1fk9(eGx!0M>)wwvVOmSHeY7RX{^!D1ox(xeL*% z_1u+S$OyrDdhfjUk+x{9Muhqge*v!K-G>My-S8&7dV3s`{*Qr+D=+1(vl=6Y&@v7B zm%ItOMSy4H&#kIE83Pd=KrAFXRZnMbts_u+(<<(Cjdsn_esPGRV z)nYzGKU|>a5DEA&6X&a;w!HT`d5|Bs8nOcEvw3BS=xd^xyae3SIlU=!rsfP6(y)yg zYtpdPsnp~Th4pprL>1!rGM6#+(l2SDyAZ8;%MUPDsG53ATMN;+7uy{p(-~CwQ*&*_ zEQzzYQ`W)P-U}*_auh0U>G>ve54wIXcoNRT%9_9?490rnRqr(v#C&@=+9gYJ;SajB zoKK-njTVBD_5qq-I?bS~lyd&i=`U&3Z?lw&2DaZ`ahhZ#F>2>Skr$$q$dx8i>EN~b3C_#jQf!AWNxFMpR28r8oowYWp*`Bi1iXM2?ZC}XKLPMnbM z8!2-&PvNsdN;=|tL*R6jx4j;oe#-Pc#z!&iFK+eGp$CrkI+DT|#4rE$dGeUS=Q1|N zAW7|38{YEVwOtEHog6ITy!(fr1rQmTc;nHCh;iA6YL9iCH$cD7ky}o4P3}f75E5xO zN8;Bc&?2X8n{5I^|98B3V?qGFd~aqAxsb;ZbS2Nt|1sRTx_YiTKI`LE`Hw&UX}Hkb z&5gkUs{-i4Y`^_j?Sq?4Vv7~D5!0EfBT8u`JbX)y@m)%k) znWWnSG=fa>iUh;crMZwwu1*+OF-N;FM5tkRYzarbngje^C*C58#y4U-_tWjZFN(9% zQ8xcrC*BuHW4WnTaM!AdVk^XE*O9uGqc{#r#=1ZEr*QpWlZFiTBHYye_RUDH3#w{W z*-A_K(B$4q>INK~;LJPmt-1t5`HloSF9`-z|Alfx@UVd>GXa6ntUN5-?4YVl0P+83 zr!xW7cwF$zlGe^HuB4p2tp8^~Ny^6gMYQzgG60Ox8IPrCLF<^-yei}RN5u9ljgAV1 zyEJ$u7D_@b78bC8iajHEMnv@XIA!D_X(qub06NaF!^XoiGncez3O(zX&ttNxpFT0M zdsx-8*-`7Dr(LsjTs!-Pwu~-Vh|oolh>&=OJjz6INum~sz`IL4ah#;DhGSFXKAZJq zh69}CO9+qpN3lv0vs1KkhF_RulgB}Y)7e64;?DZ}A+H`RD?jGMq^3Do9!>oxTtA_% za_MM0CL2?r9P75I-k62PExECXJlxc(K5aXpsGpJxt(1f3S!mWjQ2s=5hT(y3&7%FZ zWQe9N2SBrEYYZ3^4iR1!PZiK#zSgF6l)@mf0NrZ~|$w4k7 z=g0}ATM%A3cct#ixy__oi_LC3TC-%9Cg(IskYHoq;?GF4n6CS$vlTmrOWk4J{520> zZycKLD7B z@3=VEzicSP|FM7Ee;@U_tO@k&^7C9~dN=ZVELB&IaNzx#N$}S^PAM z!Lg(#svfOMs9LRpTe>76{=EC>e?7z-`F`~p=Fp{mrLcva`L*brww>BSLhr627(YLj zGi%8vQ^CgBhV-Cv0l}wpwPJ8P=RgE4G98QKuQ7zBf9)890deFCv#% zx-a<9qB+qiX~zUj%R-H|kIf|sQCzl>nYQ2A@2LLdw2ZGOIxE?-EwPWyL5IzGPShHL z_q#5lMEdc#qt+>J)iSAZA#lprysnXO3DI3Ao$(!vDas_u%nz|)dkkTG6?^*lZN_wQ zS=qy%);>2*g3I06zzCPm%ea4OVIOEpy^N;;$D{9zEXUj(`#nrOI(z33i54w2ojJ33 zk{U2OH~kv@^ys|Ptp^kq|3qpK=h>31;*BFW>-G!lSBk~>H4xQv03_x}o(Lz*PsH0M zN41K>?PK*2`Hj4lFD0EIY)e`LOD^h&CK(KS>x@qA5|n;0WDQGj_uLS>nHN^c3SSMD zVK_Ht1~Zv3L+(2yew5H^YH~-)FzWjTcoEzD4wi#K4n0kqfXCk*9`P05&O~h_}ID@C6|`wy=2=$NyEqV-yY#PN%Pg;WJW3ai=6)Q6zempMWma z3^8O6G(IN$!2cN6;K#huJDR2cr;Uf^sUZoKladn&kwc%&m04(~pr}3A&-FlKRWs1# z?xk>q#i1!$o7XxAr#7QEN(*iLlYGM!3#APU1wMy>5A46`XKJ8jtUA}9I<)8}`KyKd zyM=1cGSooAM>^Li>ROxH@R7>Mtbcg4KpXO8^R{!p>ZKIrhT_AM;i zIf-eF_Ffj0miz-ckOs#cO#FXS^zl#t>euS>0fc`iLLSXykY?Tfxc4{GWLty`Lv3MFFC1i+;T1x;t6=qAo3sKom$VD=mpZ$!)*$~rmJfrRW!OmLJ)sMs*vrC37 z#D)6D%y@CJg(X-j_cYdnTg*Z?iT-0upLvMbC+2zHnD9YgUS+|2725e05RuwsywN%u zdxBRD6gds!d|!uMEf^AoNcg!w=KeV2lknc%?`9V`{Z1 zQGJc%T2XlowOUa{(n!tx;S!ILi$Vh(sP@JfXU($qNOWcCANe*|b1F1CAv%?1+9Be| zvKp(dRHt+JW$lD(YwuTkzu0T9^;wBI$PwJHIT|DEt+fWeF%+#Q(L(>ZA0JZam!(RM`Ec7$YWOnW^$`1t)S z0~Qxz0b#i}JdQi%8JW$j6rwJZ{TDWUJjfo#wipd>8FyG)S>{0j(V}p1;Z`XWDmtoC zafx+)z9_w>@y&)tg9wrED`$G#f$h&;`Mf;21gi^#LfI>{Rl%btCGKWqkxJ4{aN@6b zCaP$;Qcedz2Pl}4k0emxR@P_*Ghg@tBpd}tibGOFM65Uzf0@Eb3TCb+LLX6+qbet31xIn@w9!}i29a3%Y^MKSPMLcV~XNS&MA~yYlsHmv)W%qBB3140U zS(9n1ZP_gN!xBYSfkdw24OKlM4KOvIr&B>rEQfB#l6d;4sy02?T(RAp`&b8oMdm-1 zMV!A>pV1j(F1cVXeapI8eV02Ti6atdBIpWLskQY=58IW8vf9zw=Z%zo93eBgUP*PO zbz4t5E4^xihNWm$6?vWMGPJ_#qN4^LX<4>o!V3BR+GtYGZ|1^`x&4oeWy+H*_tyjt$PkOYi&ix#TD+$(+x6VD@?Jfu! zDAi@|%i&MU3Ae}Ld6}=k)hN>8chb~)Fn33xyWqO$5;2vrQCAIrC|w%V=q?yMhM~&k z9Mt>Vq*!H23Sc%9;}|@hxQ>Lf1|spEYg@2-l5uoqE%v%N1zevi%X|7PYn;0X)qZ0G zBFz-?yH%1N$c613*w1VrI)6W#IthFsub1}HA)3`j%C4s?UftRx^%^&JK~I&V_pT{M zKU>nKL%yt%NMX#0r)(4b85eY2ahTWEhNGcZOV75A$(P+A`Z9Kx1KhQTT*F*Xy;35E zF*GKrN&=YIh&D(Q!V4ieAUASaQM7GBz0>fn0h>R(PsNZN3n)ilbq+?t+Ey4*IQ zMa$&73F-%6CsIAUWUo>b^V|0h%iEQa+SniFsc(bqX3~u1VOA^20bWWyUb>@sDnjhe znPl54x@=gg!Bn!QeBhmx3wy&-PEMWOJ**herv$8gGGBBPGE$%=<@4j_ato1?-_poBxc)aK zY3t{k%zmO6sb<=0ydyn-t*2;b8*e%?N_a~bhptwt?~?MG_3{3bJ9nEpR)p4|*)e>LF}zT#GJq4x0=+w}nyMAe2o z(Hl>s*}I`_5z$|(U}Yv1+7LBAKmKHuYbSx@l9a$L)itUxxUR~YI|!TrZFH4)^=!@O zrZfr>0Y-nT4zzsHKd6(9URG!(H4nr*_8%aV`W1B&!~>@f@TC;7$K3qa@mh9d2rFOs zdR-=69srnkH`Ib_{ZyA1{Jwyreo4$@lv4xF2qH=eoAw$dNgtje;%)FY;*CCsG$?*Hb0td+F2PBb z=^dIZYs^h^UDZcA^J^C|S6Vs&1joK$4B{dJX&sQL_37x6^{McOMBpX;bY<; z;WyI2T6|7!OCWX8{(8Sd9+L0T(MJXovZ$HB73A~wKdUq2vr2@v;S{{lW3^#1?= diff --git a/doc/src/week9/._week9-bs000.html b/doc/src/week9/._week9-bs000.html deleted file mode 100644 index eb84da37..00000000 --- a/doc/src/week9/._week9-bs000.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - -

    -

    -
    -

     

     

     

    - - -
    -
    -

    Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking

    -
    - - -
    -Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no [1, 2] -
    - -
    -[1] Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway -
    -
    -[2] Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA -
    -
    -
    -

    March 11-15

    -
    -
    - - - -

    Read »

    - - -
    - -

    - -

    - -
    - - - - -
    - © 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license -
    - - - diff --git a/doc/src/week9/._week9-bs001.html b/doc/src/week9/._week9-bs001.html deleted file mode 100644 index 78e442fe..00000000 --- a/doc/src/week9/._week9-bs001.html +++ /dev/null @@ -1,701 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Overview of week 11, March 11-15

    -
    -
    - -
      -
    1. Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week
    2. -
    3. Resampling Techniques, emphasis on Blocking
    4. -
    5. Discussion of onebody densities (whiteboard notes)
    6. -
    7. Start discussion on optimization and parallelization for Python and C++ - -
    8. -
    -
    -
    - - -

    Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs002.html b/doc/src/week9/._week9-bs002.html deleted file mode 100644 index d9e8847c..00000000 --- a/doc/src/week9/._week9-bs002.html +++ /dev/null @@ -1,697 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Why resampling methods ?

    -
    -
    - -
      -
    • Our simulations can be treated as computer experiments. This is particularly the case for Monte Carlo methods
    • -
    • The results can be analysed with the same statistical tools as we would use analysing experimental data.
    • -
    • As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs003.html b/doc/src/week9/._week9-bs003.html deleted file mode 100644 index 93beacca..00000000 --- a/doc/src/week9/._week9-bs003.html +++ /dev/null @@ -1,702 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Statistical analysis

    -
    -
    - -
      -
    • As in other experiments, many numerical experiments have two classes of errors: -
        -
      1. Statistical errors
      2. -
      3. Systematical errors
      4. -
      -
    • Statistical errors can be estimated using standard tools from statistics
    • -
    • Systematical errors are method specific and must be treated differently from case to case.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs004.html b/doc/src/week9/._week9-bs004.html deleted file mode 100644 index 63514813..00000000 --- a/doc/src/week9/._week9-bs004.html +++ /dev/null @@ -1,698 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    And why do we use such methods?

    - -

    As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. -

    - -

    The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs005.html b/doc/src/week9/._week9-bs005.html deleted file mode 100644 index 8c6f395a..00000000 --- a/doc/src/week9/._week9-bs005.html +++ /dev/null @@ -1,709 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Central limit theorem

    - -

    Last week we derived the central limit theorem with the following assumptions:

    - -
    -
    - -

    We assumed that each individual measurement \( x_{ij} \) is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment \( i \) with \( n \) samples as -

    -$$ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -$$ - -

    and the sample variance

    -$$ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -$$ -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs006.html b/doc/src/week9/._week9-bs006.html deleted file mode 100644 index b4cebfed..00000000 --- a/doc/src/week9/._week9-bs006.html +++ /dev/null @@ -1,696 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Further remarks

    - -

    Note that we use \( n \) instead of \( n-1 \) in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs007.html b/doc/src/week9/._week9-bs007.html deleted file mode 100644 index ac828571..00000000 --- a/doc/src/week9/._week9-bs007.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Running many measurements

    - -
    -
    - -

    With the assumption that the average measurements \( i \) are also defined as iid stochastic variables and have the same probability function \( p \), -we defined the total average over \( m \) experiments as -

    -$$ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -$$ - -

    and the total variance

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -$$ -
    -
    - -

    These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit \( m\rightarrow \infty \), the distribution for \( \overline{X} \) is given by a Gaussian distribution with variance \( \sigma^2_m \).

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs008.html b/doc/src/week9/._week9-bs008.html deleted file mode 100644 index 5eae9dce..00000000 --- a/doc/src/week9/._week9-bs008.html +++ /dev/null @@ -1,708 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Adding more definitions

    - -

    The total sample variance over the \( mn \) measurements is defined as

    -$$ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -$$ - -

    We have from the equation for \( \sigma_m^2 \)

    -$$ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -$$ - -

    and introducing the centered value \( \tilde{x}_{ij}=x_{ij}-\overline{X} \), we can rewrite \( \sigma_m^2 \) as

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -$$ - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs009.html b/doc/src/week9/._week9-bs009.html deleted file mode 100644 index 07e5de6b..00000000 --- a/doc/src/week9/._week9-bs009.html +++ /dev/null @@ -1,703 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Further rewriting

    - -

    We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements

    -$$ -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term on the last rhs is nothing but the total sample variance \( \sigma^2 \) divided by \( m \). The second term represents the covariance.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs010.html b/doc/src/week9/._week9-bs010.html deleted file mode 100644 index 14a3cd97..00000000 --- a/doc/src/week9/._week9-bs010.html +++ /dev/null @@ -1,713 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    The covariance term

    - -

    Using the definition of the total sample variance we have

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term is what we have used till now in order to estimate the -standard deviation. However, the second term which gives us a measure -of the correlations between different stochastic events, can result in -contributions which give rise to a larger standard deviation and -variance \( \sigma_m^2 \). Note also the evaluation of the second term -leads to a double sum over all events. If we run a VMC calculation -with say \( 10^9 \) Monte carlo samples, the latter term would lead to -\( 10^{18} \) function evaluations. We don't want to, by obvious reasons, to venture into that many evaluations. -

    - -

    Note also that if our stochastic events are iid then the covariance terms is zero.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs011.html b/doc/src/week9/._week9-bs011.html deleted file mode 100644 index 5980ce77..00000000 --- a/doc/src/week9/._week9-bs011.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Rewriting the covariance term

    - -

    We introduce now a variable \( d=\vert j-k\vert \) and rewrite

    -$$ -\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}, -$$ - -

    in terms of a function

    -$$ -f_d=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n-d}\tilde{x}_{ik}\tilde{x}_{i(k+d)}. -$$ - -

    We note that for \( d=0 \) we have

    -$$ -f_0=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n}\tilde{x}_{ik}\tilde{x}_{i(k)}=\sigma^2! -$$ - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs012.html b/doc/src/week9/._week9-bs012.html deleted file mode 100644 index c945828b..00000000 --- a/doc/src/week9/._week9-bs012.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Introducing the correlation function

    - -

    We introduce then a correlation function \( \kappa_d=f_d/\sigma^2 \). Note that \( \kappa_0 =1 \). We rewrite the variance \( \sigma_m^2 \) as

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}\left[1+2\sum_{d=1}^{n-1} \kappa_d\right]. -\end{align*} -$$ - -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    - -

    Note: code will be inserted here later.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs013.html b/doc/src/week9/._week9-bs013.html deleted file mode 100644 index 39f540a0..00000000 --- a/doc/src/week9/._week9-bs013.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Resampling methods: Blocking

    - -

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) -and has become one of the standard ways to estimate the variance -\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely -\( \widehat{\theta} = \overline{X} \), the mean value. -

    - -

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: -

    -$$ -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} -$$ - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs014.html b/doc/src/week9/._week9-bs014.html deleted file mode 100644 index fa7b50aa..00000000 --- a/doc/src/week9/._week9-bs014.html +++ /dev/null @@ -1,700 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Why blocking?

    - -

    The strength of the blocking method is when the number of -observations, \( n \) is large. For large \( n \), the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger \( n \) is. -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs015.html b/doc/src/week9/._week9-bs015.html deleted file mode 100644 index 72673cc9..00000000 --- a/doc/src/week9/._week9-bs015.html +++ /dev/null @@ -1,711 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Blocking Transformations

    -

    We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from \( \boldsymbol{X} \) and form a new vector -\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of -subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and -so on. -Define \( \boldsymbol{X}_i \) recursively by: -

    - -$$ -\begin{align} -(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ -(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + -(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\tag{1} -\end{align} -$$ - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs016.html b/doc/src/week9/._week9-bs016.html deleted file mode 100644 index 74235b3b..00000000 --- a/doc/src/week9/._week9-bs016.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Blocking transformations

    - -

    The quantity \( \boldsymbol{X}_k \) is -subject to \( k \) blocking transformations. We now have \( d \) vectors -\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent -averages of observations. It turns out that if the components of -\( \boldsymbol{X} \) is a stationary time series, then the components of -\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \) -

    - -

    We can then compute the autocovariance, the variance, sample mean, and -number of observations for each \( i \). -Let \( \gamma_i, \sigma_i^2, -\overline{X}_i \) denote the covariance, variance and average of the -elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of -\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \). -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs017.html b/doc/src/week9/._week9-bs017.html deleted file mode 100644 index 628dc398..00000000 --- a/doc/src/week9/._week9-bs017.html +++ /dev/null @@ -1,714 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Blocking Transformations

    - -

    Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since \( h =|i-j| \) -we can define -

    -$$ -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} -\tag{2}\\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\tag{3} -\end{align} -$$ - -

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean \( \mathrm{var}(\overline{X}) \). -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs018.html b/doc/src/week9/._week9-bs018.html deleted file mode 100644 index ce4aec3d..00000000 --- a/doc/src/week9/._week9-bs018.html +++ /dev/null @@ -1,711 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Blocking Transformations, getting there

    -

    We have

    -$$ -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\tag{4} -\end{align} -$$ - -

    The term \( e_k \) is called the truncation error:

    -$$ -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\tag{5} -\end{equation} -$$ - -

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs019.html b/doc/src/week9/._week9-bs019.html deleted file mode 100644 index 0fd8978e..00000000 --- a/doc/src/week9/._week9-bs019.html +++ /dev/null @@ -1,711 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Blocking Transformations, final expressions

    - -

    We can then wrap up

    -$$ -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\tag{6} -\end{align} -$$ - -

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    -$$ -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \tag{7} -\end{align} -$$ - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs020.html b/doc/src/week9/._week9-bs020.html deleted file mode 100644 index 5dc169b1..00000000 --- a/doc/src/week9/._week9-bs020.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    More on the blocking method

    - -

    Flyvbjerg and Petersen demonstrated that the sequence -\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term -\( e_k \) can be made as small as we would like by making \( k \) (and hence -\( d \)) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by -\( \widehat{\sigma}^2_k/n_k \). -

    - -

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs021.html b/doc/src/week9/._week9-bs021.html deleted file mode 100644 index 12543a6e..00000000 --- a/doc/src/week9/._week9-bs021.html +++ /dev/null @@ -1,932 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -

    -
    -

     

     

     

    - - -

    Example code form last week

    - - -
    -
    -
    -
    -
    -
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    -# Using gaussian rng for new positions and Metropolis- Hastings 
    -# Added energy minimization
    -from math import exp, sqrt
    -from random import random, seed, normalvariate
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from mpl_toolkits.mplot3d import Axes3D
    -from matplotlib import cm
    -from matplotlib.ticker import LinearLocator, FormatStrFormatter
    -from scipy.optimize import minimize
    -import sys
    -import os
    -
    -# Where to save data files
    -PROJECT_ROOT_DIR = "Results"
    -DATA_ID = "Results/EnergyMin"
    -
    -if not os.path.exists(PROJECT_ROOT_DIR):
    -    os.mkdir(PROJECT_ROOT_DIR)
    -
    -if not os.path.exists(DATA_ID):
    -    os.makedirs(DATA_ID)
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -outfile = open(data_path("Energies.dat"),'w')
    -
    -
    -# Trial wave function for the 2-electron quantum dot in two dims
    -def WaveFunction(r,alpha,beta):
    -    r1 = r[0,0]**2 + r[0,1]**2
    -    r2 = r[1,0]**2 + r[1,1]**2
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = r12/(1+beta*r12)
    -    return exp(-0.5*alpha*(r1+r2)+deno)
    -
    -# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    -def LocalEnergy(r,alpha,beta):
    -    
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    -
    -# Derivate of wave function ansatz as function of variational parameters
    -def DerivativeWFansatz(r,alpha,beta):
    -    
    -    WfDer  = np.zeros((2), np.double)
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    WfDer[0] = -0.5*(r1+r2)
    -    WfDer[1] = -r12*r12*deno2
    -    return  WfDer
    -
    -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    -def QuantumForce(r,alpha,beta):
    -
    -    qforce = np.zeros((NumberParticles,Dimension), np.double)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12
    -    qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12
    -    return qforce
    -    
    -
    -# Computing the derivative of the energy and the energy 
    -def EnergyDerivative(x0):
    -
    -    
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    EnergyDer = 0.0
    -    DeltaPsi = 0.0
    -    DerivativePsiE = 0.0 
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)
    -        DeltaPsi += DerPsi
    -        energy += DeltaE
    -        DerivativePsiE += DerPsi*DeltaE
    -            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    DerivativePsiE /= NumberMCcycles
    -    DeltaPsi /= NumberMCcycles
    -    EnergyDer  = 2*(DerivativePsiE-DeltaPsi*energy)
    -    return EnergyDer
    -
    -
    -# Computing the expectation value of the local energy 
    -def Energy(x0):
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        energy += DeltaE
    -        if Printout: 
    -           outfile.write('%f\n' %(energy/(MCcycle+1.0)))            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    return energy
    -
    -#Here starts the main program with variable declarations
    -NumberParticles = 2
    -Dimension = 2
    -# seed for rng generator 
    -seed()
    -# Monte Carlo cycles for parameter optimization
    -Printout = False
    -NumberMCcycles= 10000
    -# guess for variational parameters
    -x0 = np.array([0.9,0.2])
    -# Using Broydens method to find optimal parameters
    -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})
    -x0 = res.x
    -# Compute the energy again with the optimal parameters and increased number of Monte Cycles
    -NumberMCcycles= 2**19
    -Printout = True
    -FinalEnergy = Energy(x0)
    -EResult = np.array([FinalEnergy,FinalEnergy])
    -outfile.close()
    -#nice printout with Pandas
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Optimal Parameters':x0, 'Final Energy':EResult}
    -frame = pd.DataFrame(data)
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs022.html b/doc/src/week9/._week9-bs022.html deleted file mode 100644 index e49fae5f..00000000 --- a/doc/src/week9/._week9-bs022.html +++ /dev/null @@ -1,777 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Resampling analysis

    - -

    The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of Marius Jonsson is given here -

    - - - -
    -
    -
    -
    -
    -
    # Common imports
    -import os
    -
    -# Where to save the figures and data files
    -DATA_ID = "Results/EnergyMin"
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -infile = open(data_path("Energies.dat"),'r')
    -
    -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    -from numpy.linalg import inv
    -
    -def block(x):
    -    # preliminaries
    -    n = len(x)
    -    d = int(log2(n))
    -    s, gamma = zeros(d), zeros(d)
    -    mu = mean(x)
    -
    -    # estimate the auto-covariance and variances 
    -    # for each blocking transformation
    -    for i in arange(0,d):
    -        n = len(x)
    -        # estimate autocovariance of x
    -        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    -        # estimate variance of x
    -        s[i] = var(x)
    -        # perform blocking transformation
    -        x = 0.5*(x[0::2] + x[1::2])
    -   
    -    # generate the test observator M_k from the theorem
    -    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    -
    -    # we need a list of magic numbers
    -    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    -
    -    # use magic to determine when we should have stopped blocking
    -    for k in arange(0,d):
    -        if(M[k] < q[k]):
    -            break
    -    if (k >= d-1):
    -        print("Warning: Use more data")
    -    return mu, s[k]/2**(d-k)
    -
    -
    -x = loadtxt(infile)
    -(mean, var) = block(x) 
    -std = sqrt(var)
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Mean':[mean], 'STDev':[std]}
    -frame = pd.DataFrame(data,index=['Values'])
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs023.html b/doc/src/week9/._week9-bs023.html deleted file mode 100644 index 3a01dcc7..00000000 --- a/doc/src/week9/._week9-bs023.html +++ /dev/null @@ -1,703 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Content

    -
      -
    • Simple compiler options
    • -
    • Tools to benchmark your code
    • -
    • Machine architectures
    • -
    • What is vectorization?
    • -
    • How to measure code performance
    • -
    • Parallelization with OpenMP
    • -
    • Parallelization with MPI
    • -
    • Vectorization and parallelization, examples
    • -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs024.html b/doc/src/week9/._week9-bs024.html deleted file mode 100644 index 2d43eb0d..00000000 --- a/doc/src/week9/._week9-bs024.html +++ /dev/null @@ -1,757 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Optimization and profiling

    -
    -
    - - -

    Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as -

    - - -
    -
    -
    -
    -
    -
    c++  -c  mycode.cpp
    -c++  -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For Fortran replace with for example gfortran or ifort. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. -

    - -

    It is instructive to look up the compiler manual for further instructions by writing

    - - -
    -
    -
    -
    -
    -
    man c++
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs025.html b/doc/src/week9/._week9-bs025.html deleted file mode 100644 index 1121a3ae..00000000 --- a/doc/src/week9/._week9-bs025.html +++ /dev/null @@ -1,729 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    More on optimization

    -
    -
    - -

    We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. -

    - - -
    -
    -
    -
    -
    -
    c++  -O3 -c  mycode.cpp
    -c++  -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This (other options are -O2 or -Ofast) is the recommended option.

    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs026.html b/doc/src/week9/._week9-bs026.html deleted file mode 100644 index bed0c48d..00000000 --- a/doc/src/week9/._week9-bs026.html +++ /dev/null @@ -1,755 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Optimization and profiling

    -
    -
    - -

    It is also useful to profile your program under the development stage. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -pg -O3 -c  mycode.cpp
    -c++  -pg -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    After you have run the code you can obtain the profiling information via

    - - -
    -
    -
    -
    -
    -
    gprof mycode.exe >  ProfileOutput
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs027.html b/doc/src/week9/._week9-bs027.html deleted file mode 100644 index 19c3cbe9..00000000 --- a/doc/src/week9/._week9-bs027.html +++ /dev/null @@ -1,737 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Optimization and debugging

    -
    -
    - -

    Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -g -O0 -c  mycode.cpp
    -c++  -g -O0 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    -
    -
    - -
    -
    - -

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag -ftrapv does this for the CLANG compiler on OS X operating systems. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs028.html b/doc/src/week9/._week9-bs028.html deleted file mode 100644 index c6698375..00000000 --- a/doc/src/week9/._week9-bs028.html +++ /dev/null @@ -1,764 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Other hints

    -
    -
    - -

    In general, irrespective of compiler options, it is useful to

    -
      -
    • avoid if tests or call to functions inside loops, if possible.
    • -
    • avoid multiplication with constants inside loops if possible
    • -
    -

    Here is an example of a part of a program where specific operations lead to a slower code

    - - -
    -
    -
    -
    -
    -
    k = n-1;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] +c*d;
    -    e = g[k];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    A better code is

    - - -
    -
    -
    -
    -
    -
    temp = c*d;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] + temp;
    -}
    -e = g[n-1];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs029.html b/doc/src/week9/._week9-bs029.html deleted file mode 100644 index c7c38c51..00000000 --- a/doc/src/week9/._week9-bs029.html +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Vectorization and the basic idea behind parallel computing

    -
    -
    - -

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

    -
      -
    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • -
    • Multiple processors are involved to solve a global problem.
    • -
    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    • -
    -

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs030.html b/doc/src/week9/._week9-bs030.html deleted file mode 100644 index 02a3522a..00000000 --- a/doc/src/week9/._week9-bs030.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    A rough classification of hardware models

    -
    -
    - - -
      -
    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • -
    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • -
    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    • -
    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs031.html b/doc/src/week9/._week9-bs031.html deleted file mode 100644 index 00d8962c..00000000 --- a/doc/src/week9/._week9-bs031.html +++ /dev/null @@ -1,706 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Shared memory and distributed memory

    -
    -
    - -

    One way of categorizing modern parallel computers is to look at the memory configuration.

    -
      -
    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • -
    • In distributed memory systems each CPU has its own memory.
    • -
    -

    The CPUs are connected by some network and may exchange messages.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs032.html b/doc/src/week9/._week9-bs032.html deleted file mode 100644 index 9f51a05f..00000000 --- a/doc/src/week9/._week9-bs032.html +++ /dev/null @@ -1,704 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -

    -
    -

     

     

     

    - - -

    Different parallel programming paradigms

    -
    -
    - - -
      -
    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • -
    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    • -
    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs033.html b/doc/src/week9/._week9-bs033.html deleted file mode 100644 index 2e6b278d..00000000 --- a/doc/src/week9/._week9-bs033.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Different parallel programming paradigms

    -
    -
    - - -
      -
    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • -
    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs034.html b/doc/src/week9/._week9-bs034.html deleted file mode 100644 index a5e43a70..00000000 --- a/doc/src/week9/._week9-bs034.html +++ /dev/null @@ -1,733 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    What is vectorization?

    -

    Vectorization is a special -case of Single Instructions Multiple Data (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. -

    - -

    Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs035.html b/doc/src/week9/._week9-bs035.html deleted file mode 100644 index 4b8dbfca..00000000 --- a/doc/src/week9/._week9-bs035.html +++ /dev/null @@ -1,714 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Number of elements that can acted upon

    -

    A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. -

    - -

    The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. -

    - -

    IN summary, our instructions operate on 128 bit (16 byte) operands

    -
      -
    • 4 floats or ints
    • -
    • 2 doubles
    • -
    • Data paths 128 bits vide for vector unit
    • -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs036.html b/doc/src/week9/._week9-bs036.html deleted file mode 100644 index 66e077b3..00000000 --- a/doc/src/week9/._week9-bs036.html +++ /dev/null @@ -1,725 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Number of elements that can acted upon, examples

    -

    We start with the simple scalar operations given by

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have \( 3\times 32 \) bits that are not used. -

    - -

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs037.html b/doc/src/week9/._week9-bs037.html deleted file mode 100644 index a3465561..00000000 --- a/doc/src/week9/._week9-bs037.html +++ /dev/null @@ -1,726 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Operation counts for scalar operation

    -

    The code

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    has for \( n \) repeats

    -
      -
    1. one load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs038.html b/doc/src/week9/._week9-bs038.html deleted file mode 100644 index 91d5dede..00000000 --- a/doc/src/week9/._week9-bs038.html +++ /dev/null @@ -1,726 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Number of elements that can acted upon, examples

    -

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i+=4){
    -    a[i] = b[i] + c[i];
    -    a[i+1] = b[i+1] + c[i+1];
    -    a[i+2] = b[i+2] + c[i+2];
    -    a[i+3] = b[i+3] + c[i+3];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Four additions are now done in a single step.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs039.html b/doc/src/week9/._week9-bs039.html deleted file mode 100644 index 15837176..00000000 --- a/doc/src/week9/._week9-bs039.html +++ /dev/null @@ -1,700 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -

    -
    -

     

     

     

    - - -

    Number of operations when vectorized

    -

    For \( n/4 \) repeats assuming floats or integers

    -
      -
    1. one vector load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs040.html b/doc/src/week9/._week9-bs040.html deleted file mode 100644 index f32f8c13..00000000 --- a/doc/src/week9/._week9-bs040.html +++ /dev/null @@ -1,764 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    A simple test case with and without vectorization

    -

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double *a, *b, *c;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -// Allocate space for the vectors to be used
    -    a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -  // Set up values for vectors  a and b
    -  for (int i = 0; i < n; i++){
    -    double angle = 2.0*M_PI*i/ (( double ) n);
    -    a[i] = s*(sin(angle) + cos(angle));
    -    b[i] =  s*sin(2.0*angle);
    -    c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (int i = 0; i < n; i++){
    -    c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  double Norm2 = 0.0;
    -  for (int i = 0; i < n; i++){
    -    Norm2  += c[i]*c[i];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    -  cout << "  Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs041.html b/doc/src/week9/._week9-bs041.html deleted file mode 100644 index d2c79367..00000000 --- a/doc/src/week9/._week9-bs041.html +++ /dev/null @@ -1,801 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Compiling with and without vectorization

    -

    We can compile and link without vectorization using the clang c++ compiler

    - - -
    -
    -
    -
    -
    -
    clang -o novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization (and additional optimizations)

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 10000000
    -Time used  for norm computation=0.04720500000
    -Compphys:~ hjensen$ ./novec.x 10000000
    -Time used  for norm computation=0.03311700000
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 1000000000
    -Time used  for norm computation=58.41391100
    -Compphys:~ hjensen$ ./novec.x 1000000000
    -Time used  for norm computation=46.51295300
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We will discuss these issues further in the next slides.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs042.html b/doc/src/week9/._week9-bs042.html deleted file mode 100644 index 1d38757d..00000000 --- a/doc/src/week9/._week9-bs042.html +++ /dev/null @@ -1,790 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Compiling with and without vectorization using clang

    -

    We can compile and link without vectorization with clang compiler

    - - -
    -
    -
    -
    -
    -
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We can also add vectorization analysis, see for example

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    or figure out if vectorization was missed

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs043.html b/doc/src/week9/._week9-bs043.html deleted file mode 100644 index a8f6a19d..00000000 --- a/doc/src/week9/._week9-bs043.html +++ /dev/null @@ -1,724 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, criteria

    - -

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    - -

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The variable \( n \) does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs044.html b/doc/src/week9/._week9-bs044.html deleted file mode 100644 index a009250c..00000000 --- a/doc/src/week9/._week9-bs044.html +++ /dev/null @@ -1,726 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, exit criteria

    - -

    An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -    if (a[j] < 0 ) break;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs045.html b/doc/src/week9/._week9-bs045.html deleted file mode 100644 index 208b044e..00000000 --- a/doc/src/week9/._week9-bs045.html +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, straight-line code

    - -

    SIMD instructions perform the same type of operations multiple times. -A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the if statement is implemented as a masked assignment. -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    double x  = cos(j*1.0);
    -    if (x > 0 ) {
    -       a[j] =  x*sin(j*2.0); 
    -    }
    -    else {
    -       a[j] = 0.0;
    -    }
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs046.html b/doc/src/week9/._week9-bs046.html deleted file mode 100644 index 69aac9c7..00000000 --- a/doc/src/week9/._week9-bs046.html +++ /dev/null @@ -1,724 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, nested loops

    - -

    Only the innermost loop of the following example is vectorized

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs047.html b/doc/src/week9/._week9-bs047.html deleted file mode 100644 index cfb30d7b..00000000 --- a/doc/src/week9/._week9-bs047.html +++ /dev/null @@ -1,725 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, function calls

    - -

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable -

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      a[i] = log10(i)*cos(i);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs048.html b/doc/src/week9/._week9-bs048.html deleted file mode 100644 index a733da36..00000000 --- a/doc/src/week9/._week9-bs048.html +++ /dev/null @@ -1,752 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, data dependencies

    - -

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i] = a[i-1] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value \( a[i-1] \) computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values \( a[0] \), \( a[1] \), \( a[2] \) and \( a[3] \) from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have

    - - -
    -
    -
    -
    -
    -
       a[1] = a[0] + b;
    -   a[2] = a[1] + b;
    -   a[3] = a[2] + b;
    -   a[4] = a[3] + b;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs049.html b/doc/src/week9/._week9-bs049.html deleted file mode 100644 index 81e0eddb..00000000 --- a/doc/src/week9/._week9-bs049.html +++ /dev/null @@ -1,727 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, more data dependencies

    - -

    On the other hand, a so-called -write-after-read statement can be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i-1] = a[i] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency that can be vectorized since no iteration with a higher value of \( i \) -can complete before an iteration with a lower value of \( i \). However, such code leads to problems with parallelization. -

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs050.html b/doc/src/week9/._week9-bs050.html deleted file mode 100644 index 41ad4205..00000000 --- a/doc/src/week9/._week9-bs050.html +++ /dev/null @@ -1,725 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Automatic vectorization and vectorization inhibitors, memory stride

    - -

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    - -

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs051.html b/doc/src/week9/._week9-bs051.html deleted file mode 100644 index 650c41f8..00000000 --- a/doc/src/week9/._week9-bs051.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Memory management

    -

    The main memory contains the program data

    -
      -
    1. Cache memory contains a copy of the main memory data
    2. -
    3. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    4. -
    5. Registers contain working data only
    6. -
        -
      • Modern CPUs perform most or all operations only on data in register
      • -
      -
    7. Multiple Cache memories contain a copy of the main memory data
    8. -
        -
      • Cache items accessed by their address in main memory
      • -
      • L1 cache is the fastest but has the least capacity
      • -
      • L2, L3 provide intermediate performance/size tradeoffs
      • -
      -
    -

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs052.html b/doc/src/week9/._week9-bs052.html deleted file mode 100644 index 3a54f757..00000000 --- a/doc/src/week9/._week9-bs052.html +++ /dev/null @@ -1,704 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Memory and communication

    - -
      -
    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. -
    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. -
        -
      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • -
      • Even if you do not use the data, it is moved and occupies space in the cache
      • -
      -
    -

    Many of these performance features are not captured in most programming languages.

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs053.html b/doc/src/week9/._week9-bs053.html deleted file mode 100644 index fe227f06..00000000 --- a/doc/src/week9/._week9-bs053.html +++ /dev/null @@ -1,725 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Measuring performance

    - -

    How do we measure performance? What is wrong with this code to time a loop?

    - - -
    -
    -
    -
    -
    -
      clock_t start, finish;
    -  start = clock();
    -  for (int j = 0; j < i; j++) {
    -    a[j] = b[j]+b[j]*c[j];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs054.html b/doc/src/week9/._week9-bs054.html deleted file mode 100644 index 3b7c0212..00000000 --- a/doc/src/week9/._week9-bs054.html +++ /dev/null @@ -1,700 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Problems with measuring time

    -
      -
    1. Timers are not infinitely accurate
    2. -
    3. All clocks have a granularity, the minimum time that they can measure
    4. -
    5. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    6. -
    7. Always know what your clock granularity is
    8. -
    9. Ensure that your measurement is for a long enough duration (say 100 times the tick)
    10. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs055.html b/doc/src/week9/._week9-bs055.html deleted file mode 100644 index b5e0ffa4..00000000 --- a/doc/src/week9/._week9-bs055.html +++ /dev/null @@ -1,703 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Problems with cold start

    - -

    What happens when the code is executed? The assumption is that the code is ready to -execute. But -

    -
      -
    1. Code may still be on disk, and not even read into memory.
    2. -
    3. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    4. -
    5. Multiple tests often necessary to ensure that cold start effects are not present
    6. -
    7. Special effort often required to ensure data in the intended part of the memory hierarchy.
    8. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs056.html b/doc/src/week9/._week9-bs056.html deleted file mode 100644 index 3e234143..00000000 --- a/doc/src/week9/._week9-bs056.html +++ /dev/null @@ -1,700 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Problems with smart compilers

    - -
      -
    1. If the result of the computation is not used, the compiler may eliminate the code
    2. -
    3. Performance will look impossibly fantastic
    4. -
    5. Even worse, eliminate some of the code so the performance looks plausible
    6. -
    7. Ensure that the results are (or may be) used.
    8. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs057.html b/doc/src/week9/._week9-bs057.html deleted file mode 100644 index 8c76800c..00000000 --- a/doc/src/week9/._week9-bs057.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Problems with interference

    -
      -
    1. Other activities are sharing your processor
    2. -
        -
      • Operating system, system demons, other users
      • -
      • Some parts of the hardware do not always perform with exactly the same performance
      • -
      -
    3. Make multiple tests and report
    4. -
    5. Easy choices include
    6. -
        -
      • Average tests represent what users might observe over time
      • -
      -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs058.html b/doc/src/week9/._week9-bs058.html deleted file mode 100644 index 716f9a3f..00000000 --- a/doc/src/week9/._week9-bs058.html +++ /dev/null @@ -1,699 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Problems with measuring performance

    -
      -
    1. Accurate, reproducible performance measurement is hard
    2. -
    3. Think carefully about your experiment:
    4. -
    5. What is it, precisely, that you want to measure?
    6. -
    7. How representative is your test to the situation that you are trying to measure?
    8. -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs059.html b/doc/src/week9/._week9-bs059.html deleted file mode 100644 index 17184cda..00000000 --- a/doc/src/week9/._week9-bs059.html +++ /dev/null @@ -1,722 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Thomas algorithm for tridiagonal linear algebra equations

    -
    -
    - -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs060.html b/doc/src/week9/._week9-bs060.html deleted file mode 100644 index e66e5ddf..00000000 --- a/doc/src/week9/._week9-bs060.html +++ /dev/null @@ -1,737 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Thomas algorithm, forward substitution

    -
    -
    - -

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    -$$ - a_i = 0, -$$ - - -$$ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -$$ - -

    and

    -$$ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -$$ - -

    At this point the simplified equation, with only an upper triangular matrix takes the form

    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs061.html b/doc/src/week9/._week9-bs061.html deleted file mode 100644 index 49cf568e..00000000 --- a/doc/src/week9/._week9-bs061.html +++ /dev/null @@ -1,714 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Thomas algorithm, backward substitution

    -
    -
    - -

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    -$$ - c_i = 0, -$$ - -

    and

    -$$ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -$$ - -

    All that ramains to be computed is the solution, which is the very straight forward process of

    -$$ -x_i = \frac{f_i}{b_i} -$$ -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs062.html b/doc/src/week9/._week9-bs062.html deleted file mode 100644 index dd041aa0..00000000 --- a/doc/src/week9/._week9-bs062.html +++ /dev/null @@ -1,750 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Thomas algorithm and counting of operations (floating point and memory)

    -
    -
    - - -

    We have in specific case the following operations with the floating operations

    - -
      -
    • Memory Reads: \( 14(N-2) \);
    • -
    • Memory Writes: \( 4(N-2) \);
    • -
    • Subtractions: \( 3(N-2) \);
    • -
    • Multiplications: \( 3(N-2) \);
    • -
    • Divisions: \( 4(N-2) \).
    • -
    -
    -
    - - -
    -
    - - - -
    -
    -
    -
    -
    -
    // Forward substitution    
    -// Note that we can simplify by precalculating a[i-1]/b[i-1]
    -  for (int i=1; i < n; i++) {
    -     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    -     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    -  }
    -  x[n-1] = f[n-1] / b[n-1];
    -  // Backwards substitution                                                           
    -  for (int i = n-2; i >= 0; i--) {
    -     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    -     x[i] = f[i]/b[i];
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs063.html b/doc/src/week9/._week9-bs063.html deleted file mode 100644 index 81516243..00000000 --- a/doc/src/week9/._week9-bs063.html +++ /dev/null @@ -1,764 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Example: Transpose of a matrix

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B;
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -  }
    -  // Set up values for matrix A
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      A[i][j] =  cos(i*1.0)*sin(j*3.0);
    -    }
    -  }
    -  clock_t start, finish;
    -  start = clock();
    -  // Then compute the transpose
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      B[i][j]= A[j][i];
    -    }
    -  }
    -
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for setting up transpose of matrix=" << timeused  << endl;
    -
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs064.html b/doc/src/week9/._week9-bs064.html deleted file mode 100644 index 7d9d3169..00000000 --- a/doc/src/week9/._week9-bs064.html +++ /dev/null @@ -1,783 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double **A, **B, **C;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Set up values for matrix A and B and zero matrix C
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double sum = 0.0;
    -       for (int k = 0; k < n; k++) {
    -           sum += B[i][k]*A[k][j];
    -       }
    -       C[i][j] = sum;
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  double Fsum = 0.0;
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << timeused  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs065.html b/doc/src/week9/._week9-bs065.html deleted file mode 100644 index 48608261..00000000 --- a/doc/src/week9/._week9-bs065.html +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    How do we define speedup? Simplest form

    -
    -
    - -
      -
    • Speedup measures the ratio of performance between two objects
    • -
    • Versions of same code, with different number of processors
    • -
    • Serial and vector versions
    • -
    • Try different programing languages, c++ and Fortran
    • -
    • Two algorithms computing the same result
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs066.html b/doc/src/week9/._week9-bs066.html deleted file mode 100644 index 455f5be4..00000000 --- a/doc/src/week9/._week9-bs066.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    How do we define speedup? Correct baseline

    -
    -
    - -

    The key is choosing the correct baseline for comparison

    -
      -
    • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
    • -
        -
      • For parallel applications, this is much harder:
      • -
          -
        • Choice of algorithm, decomposition, performance of baseline case etc.
        • -
        -
      -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs067.html b/doc/src/week9/._week9-bs067.html deleted file mode 100644 index f231b190..00000000 --- a/doc/src/week9/._week9-bs067.html +++ /dev/null @@ -1,708 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Parallel speedup

    -
    -
    - -

    For parallel applications, speedup is typically defined as

    -
      -
    • Speedup \( =T_1/T_p \)
    • -
    -

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

    -
      -
    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs068.html b/doc/src/week9/._week9-bs068.html deleted file mode 100644 index 7f0fefe9..00000000 --- a/doc/src/week9/._week9-bs068.html +++ /dev/null @@ -1,709 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Speedup and memory

    -
    -
    - -

    The speedup on \( p \) processors can -be greater than \( p \) if memory usage is optimal! -Consider the case of a memorybound computation with \( M \) words of memory -

    -
      -
    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • -
    • \( T_1 \) uses the main memory bandwidth
    • -
    • \( T_p \) uses the appropriate cache bandwidth
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs069.html b/doc/src/week9/._week9-bs069.html deleted file mode 100644 index 1ca12bc3..00000000 --- a/doc/src/week9/._week9-bs069.html +++ /dev/null @@ -1,711 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - -
    -
    -

     

     

     

    - - -

    Upper bounds on speedup

    -
    -
    - -

    Assume that almost all parts of a code are perfectly -parallelizable (fraction \( f \)). The remainder, -fraction \( (1-f) \) cannot be parallelized at all. -

    - -

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take -time \( Wf/p \) on \( p \) processors. -

    -
      -
    • What is the maximum possible speedup as a function of \( f \)?
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs070.html b/doc/src/week9/._week9-bs070.html deleted file mode 100644 index 01108c04..00000000 --- a/doc/src/week9/._week9-bs070.html +++ /dev/null @@ -1,724 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Amdahl's law

    -
    -
    - -

    On one processor we have

    -$$ -T_1 = (1-f)W + fW = W -$$ - -

    On \( p \) processors we have

    -$$ -T_p = (1-f)W + \frac{fW}{p}, -$$ - -

    resulting in a speedup of

    -$$ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -$$ - -

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    -$$ -\frac{1}{1-f}, -$$ - -

    meaning that if -if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup -is \( 1/(1-.99)=100 \)! -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs071.html b/doc/src/week9/._week9-bs071.html deleted file mode 100644 index fed561cb..00000000 --- a/doc/src/week9/._week9-bs071.html +++ /dev/null @@ -1,708 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    How much is parallelizable

    -
    -
    - -

    If any non-parallel code slips into the -application, the parallel -performance is limited. -

    - -

    In many simulations, however, the fraction of non-parallelizable work -is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs072.html b/doc/src/week9/._week9-bs072.html deleted file mode 100644 index 8f094ac5..00000000 --- a/doc/src/week9/._week9-bs072.html +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Today's situation of parallel computing

    -
    -
    - - -
      -
    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • -
    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • -
    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    • -
    -

    Our lectures will focus on both MPI and OpenMP.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs073.html b/doc/src/week9/._week9-bs073.html deleted file mode 100644 index 6acdfb79..00000000 --- a/doc/src/week9/._week9-bs073.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Overhead present in parallel computing

    -
    -
    - - -
      -
    • Uneven load balance: not all the processors can perform useful work at all time.
    • -
    • Overhead of synchronization
    • -
    • Overhead of communication
    • -
    • Extra computation due to parallelization
    • -
    -

    Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs074.html b/doc/src/week9/._week9-bs074.html deleted file mode 100644 index 0d951384..00000000 --- a/doc/src/week9/._week9-bs074.html +++ /dev/null @@ -1,705 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallelizing a sequential algorithm

    -
    -
    - - -
      -
    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • -
    • Distribute the global work and data among \( P \) processors.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs075.html b/doc/src/week9/._week9-bs075.html deleted file mode 100644 index 5266e16f..00000000 --- a/doc/src/week9/._week9-bs075.html +++ /dev/null @@ -1,704 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Strategies

    -
    -
    - -
      -
    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • -
    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs076.html b/doc/src/week9/._week9-bs076.html deleted file mode 100644 index e5f746fb..00000000 --- a/doc/src/week9/._week9-bs076.html +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    How do I run MPI on a PC/Laptop? MPI

    -
    -
    - -

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -

    -
      -
    • Compile with mpicxx/mpic++ or mpif90
    • -
    - - -
    -
    -
    -
    -
    -
      # Compile and link
    -  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    -  #  run code with for example 8 processes using mpirun/mpiexec
    -  mpiexec -n 8 ./nameofprog.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs077.html b/doc/src/week9/._week9-bs077.html deleted file mode 100644 index 2d14eea9..00000000 --- a/doc/src/week9/._week9-bs077.html +++ /dev/null @@ -1,754 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Can I do it on my own PC/laptop? OpenMP installation

    -
    -
    - -

    If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: -

    - -
      -
    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • -
    • For OS X users however, install libomp
    • -
    - - -
    -
    -
    -
    -
    -
      brew install libomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and compile and link as

    - - -
    -
    -
    -
    -
    -
    c++ -o <name executable> <name program.cpp>  -lomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs078.html b/doc/src/week9/._week9-bs078.html deleted file mode 100644 index f3e68e76..00000000 --- a/doc/src/week9/._week9-bs078.html +++ /dev/null @@ -1,776 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Installing MPI

    -
    -
    - -

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    - - -
    -
    -
    -
    -
    -
      sudo apt-get install libopenmpi-dev
    -  sudo apt-get install openmpi-bin
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew -

    - - -
    -
    -
    -
    -
    -
       brew install openmpi
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When running an executable (code.x), run as

    - - -
    -
    -
    -
    -
    -
      mpirun -n 10 ./code.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    where we indicate that we want the number of processes to be 10.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs079.html b/doc/src/week9/._week9-bs079.html deleted file mode 100644 index 9564c933..00000000 --- a/doc/src/week9/._week9-bs079.html +++ /dev/null @@ -1,703 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Installing MPI and using Qt

    -
    -
    - -

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    - -

    You may need to tell Qt where openmpi is stored.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs080.html b/doc/src/week9/._week9-bs080.html deleted file mode 100644 index 07a4adcb..00000000 --- a/doc/src/week9/._week9-bs080.html +++ /dev/null @@ -1,711 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    What is Message Passing Interface (MPI)?

    -
    -
    - - -

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. -

    - -

    MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. -

    - -

    An MPI computation is a collection of processes communicating with messages.

    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs081.html b/doc/src/week9/._week9-bs081.html deleted file mode 100644 index 6752638a..00000000 --- a/doc/src/week9/._week9-bs081.html +++ /dev/null @@ -1,754 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Going Parallel with MPI

    -
    -
    - -

    Task parallelism: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs082.html b/doc/src/week9/._week9-bs082.html deleted file mode 100644 index 70705a4a..00000000 --- a/doc/src/week9/._week9-bs082.html +++ /dev/null @@ -1,714 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    MPI is a library

    -
    -
    - -

    MPI is a library specification for the message passing interface, -proposed as a standard. -

    - -
      -
    • independent of hardware;
    • -
    • not a language or compiler specification;
    • -
    • not a specific implementation or product.
    • -
    -

    A message passing standard for portability and ease-of-use. -Designed for high performance. -

    - -

    Insert communication and synchronization functions where necessary.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs083.html b/doc/src/week9/._week9-bs083.html deleted file mode 100644 index 4dec0aed..00000000 --- a/doc/src/week9/._week9-bs083.html +++ /dev/null @@ -1,752 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Bindings to MPI routines

    -
    -
    - - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The discussion in these slides focuses on the C++ binding.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs084.html b/doc/src/week9/._week9-bs084.html deleted file mode 100644 index 4ba1f0d6..00000000 --- a/doc/src/week9/._week9-bs084.html +++ /dev/null @@ -1,732 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Communicator

    -
    -
    - -
      -
    • A group of MPI processes with a name (context).
    • -
    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • -
    • By default the communicator contains all the MPI processes.
    • -
    - - -
    -
    -
    -
    -
    -
      MPI_COMM_WORLD 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Mechanism to identify subset of processes.
    • -
    • Promotes modular design of parallel libraries.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs085.html b/doc/src/week9/._week9-bs085.html deleted file mode 100644 index 81713681..00000000 --- a/doc/src/week9/._week9-bs085.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Some of the most important MPI functions

    -
    -
    - - -
      -
    • \( MPI\_Init \) - initiate an MPI computation
    • -
    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • -
    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • -
    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • -
    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • -
    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • -
    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs086.html b/doc/src/week9/._week9-bs086.html deleted file mode 100644 index 0d4d72c4..00000000 --- a/doc/src/week9/._week9-bs086.html +++ /dev/null @@ -1,737 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    The first MPI C/C++ program

    -
    -
    - - -

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    - - -
    -
    -
    -
    -
    -
    using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -int main (int nargs, char* args[])
    -{
    -int numprocs, my_rank;
    -//   MPI initializations
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -     << numprocs << endl;
    -//  End MPI
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs087.html b/doc/src/week9/._week9-bs087.html deleted file mode 100644 index 134f35cc..00000000 --- a/doc/src/week9/._week9-bs087.html +++ /dev/null @@ -1,732 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    The Fortran program

    -
    -
    - - - -
    -
    -
    -
    -
    -
    PROGRAM hello
    -INCLUDE "mpif.h"
    -INTEGER:: size, my_rank, ierr
    -
    -CALL  MPI_INIT(ierr)
    -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    -CALL MPI_FINALIZE(ierr)
    -
    -END PROGRAM hello
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs088.html b/doc/src/week9/._week9-bs088.html deleted file mode 100644 index 4475cf1e..00000000 --- a/doc/src/week9/._week9-bs088.html +++ /dev/null @@ -1,706 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Note 1

    -
    -
    - - -
      -
    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • -
    • It is the operating system which opts for an ordering.
    • -
    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs089.html b/doc/src/week9/._week9-bs089.html deleted file mode 100644 index 64cbb14f..00000000 --- a/doc/src/week9/._week9-bs089.html +++ /dev/null @@ -1,734 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Ordered output with MPIBarrier

    -
    -
    - - - - -
    -
    -
    -
    -
    -
    int main (int nargs, char* args[])
    -{
    - int numprocs, my_rank, i;
    - MPI_Init (&nargs, &args);
    - MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    - for (i = 0; i < numprocs; i++) {}
    - MPI_Barrier (MPI_COMM_WORLD);
    - if (i == my_rank) {
    - cout << "Hello world, I have  rank " << my_rank << 
    -        " out of " << numprocs << endl;}
    -      MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs090.html b/doc/src/week9/._week9-bs090.html deleted file mode 100644 index ad85091c..00000000 --- a/doc/src/week9/._week9-bs090.html +++ /dev/null @@ -1,709 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Note 2

    -
    -
    - -
      -
    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • -
    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • -
    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • -
    -

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs091.html b/doc/src/week9/._week9-bs091.html deleted file mode 100644 index 2dfc5223..00000000 --- a/doc/src/week9/._week9-bs091.html +++ /dev/null @@ -1,737 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Ordered output

    -
    -
    - - - - -
    -
    -
    -
    -
    -
    .....
    -int numprocs, my_rank, flag;
    -MPI_Status status;
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -if (my_rank > 0)
    -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    -           MPI_COMM_WORLD, &status);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -<< numprocs << endl;
    -if (my_rank < numprocs-1)
    -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    -          100, MPI_COMM_WORLD);
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs092.html b/doc/src/week9/._week9-bs092.html deleted file mode 100644 index ae90d140..00000000 --- a/doc/src/week9/._week9-bs092.html +++ /dev/null @@ -1,738 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Note 3

    -
    -
    - - -

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ -is defined as -

    - - -
    -
    -
    -
    -
    -
    int MPI_Send(void *buf, int count, 
    -             MPI_Datatype datatype, 
    -             int dest, int tag, MPI_Comm comm)}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable buf is the variable we wish to send while count -is the number of variables we are passing. If we are passing only a single value, this should be 1. -

    - -

    If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) -since we are actually passing 100 values. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs093.html b/doc/src/week9/._week9-bs093.html deleted file mode 100644 index f2e07cd3..00000000 --- a/doc/src/week9/._week9-bs093.html +++ /dev/null @@ -1,742 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Note 4

    -
    -
    - - -

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) -is similar to the send call. -

    - - -
    -
    -
    -
    -
    -
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    -            int source, 
    -            int tag, MPI_Comm comm, MPI_Status *status )
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The arguments that are different from those in MPI\_SEND are -buf which is the name of the variable where you will be storing the received data, -source which replaces the destination in the send command. This is the return ID of the sender. -

    - -

    Finally, we have used \( MPI\_Status\_status \), -where one can check if the receive was completed. -

    - -

    The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs094.html b/doc/src/week9/._week9-bs094.html deleted file mode 100644 index 62f58770..00000000 --- a/doc/src/week9/._week9-bs094.html +++ /dev/null @@ -1,710 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Numerical integration in parallel

    -
    -
    - - -
      -
    • The code example computes \( \pi \) using the trapezoidal rules.
    • -
    • The trapezoidal rule
    • -
    -$$ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -$$ - -

    Click on this link for the full program.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs095.html b/doc/src/week9/._week9-bs095.html deleted file mode 100644 index a463281d..00000000 --- a/doc/src/week9/._week9-bs095.html +++ /dev/null @@ -1,738 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    -
    -
    - - - - -
    -
    -
    -
    -
    -
    //    Trapezoidal rule and numerical integration usign MPI
    -using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -
    -//     Here we define various functions called by the main program
    -
    -double int_function(double );
    -double trapezoidal_rule(double , double , int , double (*)(double));
    -
    -//   Main function begins here
    -int main (int nargs, char* args[])
    -{
    -  int n, local_n, numprocs, my_rank; 
    -  double a, b, h, local_a, local_b, total_sum, local_sum;   
    -  double  time_start, time_end, total_time;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs096.html b/doc/src/week9/._week9-bs096.html deleted file mode 100644 index 4f371e52..00000000 --- a/doc/src/week9/._week9-bs096.html +++ /dev/null @@ -1,736 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Dissection of trapezoidal rule

    -
    -
    - - - - -
    -
    -
    -
    -
    -
      //  MPI initializations
    -  MPI_Init (&nargs, &args);
    -  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -  time_start = MPI_Wtime();
    -  //  Fixed values for a, b and n 
    -  a = 0.0 ; b = 1.0;  n = 1000;
    -  h = (b-a)/n;    // h is the same for all processes 
    -  local_n = n/numprocs;  
    -  // make sure n > numprocs, else integer division gives zero
    -  // Length of each process' interval of
    -  // integration = local_n*h.  
    -  local_a = a + my_rank*local_n*h;
    -  local_b = local_a + local_n*h;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs097.html b/doc/src/week9/._week9-bs097.html deleted file mode 100644 index 2e93defe..00000000 --- a/doc/src/week9/._week9-bs097.html +++ /dev/null @@ -1,738 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Integrating with MPI

    -
    -
    - - - - -
    -
    -
    -
    -
    -
      total_sum = 0.0;
    -  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    -                               &int_function); 
    -  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    -              MPI_SUM, 0, MPI_COMM_WORLD);
    -  time_end = MPI_Wtime();
    -  total_time = time_end-time_start;
    -  if ( my_rank == 0) {
    -    cout << "Trapezoidal rule = " <<  total_sum << endl;
    -    cout << "Time = " <<  total_time  
    -         << " on number of processors: "  << numprocs  << endl;
    -  }
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  // end of main program
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs098.html b/doc/src/week9/._week9-bs098.html deleted file mode 100644 index 896e921e..00000000 --- a/doc/src/week9/._week9-bs098.html +++ /dev/null @@ -1,736 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    How do I use \( MPI\_reduce \)?

    -
    -
    - - -

    Here we have used

    - - -
    -
    -
    -
    -
    -
    MPI_reduce( void *senddata, void* resultdata, int count, 
    -     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable \( count \) represents the total dimensionality, 1 in case of just one variable, -while \( MPI\_Datatype \) -defines the type of variable which is sent and received. -

    - -

    The new feature is \( MPI\_Op \). It defines the type -of operation we want to do. -

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs099.html b/doc/src/week9/._week9-bs099.html deleted file mode 100644 index 5f9228b0..00000000 --- a/doc/src/week9/._week9-bs099.html +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    More on \( MPI\_Reduce \)

    -
    -
    - -

    In our case, since we are summing -the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). -If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or -\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer -\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \). -

    - -

    \( MPI\_Allreduce \) is defined as

    - - -
    -
    -
    -
    -
    -
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    -          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs100.html b/doc/src/week9/._week9-bs100.html deleted file mode 100644 index e4e70a3c..00000000 --- a/doc/src/week9/._week9-bs100.html +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Dissection of trapezoidal rule

    -
    -
    - - -

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function -\( MPI\_Wtime \). -

    - - -
    -
    -
    -
    -
    -
    //  this function defines the function to integrate
    -double int_function(double x)
    -{
    -  double value = 4./(1.+x*x);
    -  return value;
    -} // end of function to evaluate
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs101.html b/doc/src/week9/._week9-bs101.html deleted file mode 100644 index 6370ee56..00000000 --- a/doc/src/week9/._week9-bs101.html +++ /dev/null @@ -1,739 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Dissection of trapezoidal rule

    -
    -
    - - - -
    -
    -
    -
    -
    -
    //  this function defines the trapezoidal rule
    -double trapezoidal_rule(double a, double b, int n, 
    -                         double (*func)(double))
    -{
    -  double trapez_sum;
    -  double fa, fb, x, step;
    -  int    j;
    -  step=(b-a)/((double) n);
    -  fa=(*func)(a)/2. ;
    -  fb=(*func)(b)/2. ;
    -  trapez_sum=0.;
    -  for (j=1; j <= n-1; j++){
    -    x=j*step+a;
    -    trapez_sum+=(*func)(x);
    -  }
    -  trapez_sum=(trapez_sum+fb+fa)*step;
    -  return trapez_sum;
    -}  // end trapezoidal_rule 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs102.html b/doc/src/week9/._week9-bs102.html deleted file mode 100644 index e3f6cc51..00000000 --- a/doc/src/week9/._week9-bs102.html +++ /dev/null @@ -1,1177 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    The quantum dot program for two electrons

    -
    -
    - - - -
    -
    -
    -
    -
    -
    // Variational Monte Carlo for atoms with importance sampling, slater det
    -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    -#include "mpi.h"
    -#include <cmath>
    -#include <random>
    -#include <string>
    -#include <iostream>
    -#include <fstream>
    -#include <iomanip>
    -#include "vectormatrixclass.h"
    -
    -using namespace  std;
    -// output file as global variable
    -ofstream ofile;  
    -// the step length and its squared inverse for the second derivative 
    -//  Here we define global variables  used in various functions
    -//  These can be changed by using classes
    -int Dimension = 2; 
    -int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    -
    -// declaration of functions 
    -
    -// The Mc sampling for the variational Monte Carlo 
    -void  MonteCarloSampling(int, double &, double &, Vector &);
    -
    -// The variational wave function
    -double  WaveFunction(Matrix &, Vector &);
    -
    -// The local energy 
    -double  LocalEnergy(Matrix &, Vector &);
    -
    -// The quantum force
    -void  QuantumForce(Matrix &, Matrix &, Vector &);
    -
    -
    -// inline function for single-particle wave function
    -inline double SPwavefunction(double r, double alpha) { 
    -   return exp(-alpha*r*0.5);
    -}
    -
    -// inline function for derivative of single-particle wave function
    -inline double DerivativeSPwavefunction(double r, double alpha) { 
    -  return -r*alpha;
    -}
    -
    -// function for absolute value of relative distance
    -double RelativeDistance(Matrix &r, int i, int j) { 
    -      double r_ij = 0;  
    -      for (int k = 0; k < Dimension; k++) { 
    -	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    -      }
    -      return sqrt(r_ij); 
    -}
    -
    -// inline function for derivative of Jastrow factor
    -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    -  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    -}
    -
    -// function for square of position of single particle
    -double singleparticle_pos2(Matrix &r, int i) { 
    -    double r_single_particle = 0;
    -    for (int j = 0; j < Dimension; j++) { 
    -      r_single_particle  += r(i,j)*r(i,j);
    -    }
    -    return r_single_particle;
    -}
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    -
    -static double sqrarg;
    -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    -
    -
    -static double maxarg1,maxarg2;
    -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    -        (maxarg1) : (maxarg2))
    -
    -
    -// Begin of main program   
    -
    -int main(int argc, char* argv[])
    -{
    -
    -  //  MPI initializations
    -  int NumberProcesses, MyRank, NumberMCsamples;
    -  MPI_Init (&argc, &argv);
    -  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    -  double StartTime = MPI_Wtime();
    -  if (MyRank == 0 && argc <= 1) {
    -    cout << "Bad Usage: " << argv[0] << 
    -      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    -  }
    -  // Read filename and number of Monte Carlo cycles from the command line
    -  if (MyRank == 0 && argc > 2) {
    -    string filename = argv[1]; // first command line argument after name of program
    -    NumberMCsamples  = atoi(argv[2]);
    -    string fileout = filename;
    -    string argument = to_string(NumberMCsamples);
    -    // Final filename as filename+NumberMCsamples
    -    fileout.append(argument);
    -    ofile.open(fileout);
    -  }
    -  // broadcast the number of  Monte Carlo samples
    -  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    -  // Two variational parameters only
    -  Vector VariationalParameters(2);
    -  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    -  // Loop over variational parameters
    -  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    -    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    -      VariationalParameters(0) = alpha;  // value of alpha
    -      VariationalParameters(1) = beta;  // value of beta
    -      //  Do the mc sampling  and accumulate data with MPI_Reduce
    -      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    -      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    -      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    -      //  Collect data in total averages
    -      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      // Print out results  in case of Master node, set to MyRank = 0
    -      if ( MyRank == 0) {
    -	double Energy = TotalEnergy/( (double)NumberProcesses);
    -	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    -	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    -	ofile << setiosflags(ios::showpoint | ios::uppercase);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    -	ofile << setw(15) << setprecision(8) << Energy;
    -	ofile << setw(15) << setprecision(8) << Variance;
    -	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    -      }
    -    }
    -  }
    -  double EndTime = MPI_Wtime();
    -  double TotalTime = EndTime-StartTime;
    -  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    -  if (MyRank == 0)  ofile.close();  // close output file
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  //  end of main function
    -
    -
    -// Monte Carlo sampling with the Metropolis algorithm  
    -
    -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    -{
    -
    - // Initialize the seed and call the Mersienne algo
    -  std::random_device rd;
    -  std::mt19937_64 gen(rd());
    -  // Set up the uniform distribution for x \in [[0, 1]
    -  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    -  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    -  // diffusion constant from Schroedinger equation
    -  double D = 0.5; 
    -  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    -  // allocate matrices which contain the position of the particles  
    -  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    -  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    -  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    -  //  initial trial positions
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int j = 0; j < Dimension; j++) {
    -      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    -    }
    -  }
    -  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    -  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    -  // loop over monte carlo cycles 
    -  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    -    // new position 
    -    for (int i = 0; i < NumberParticles; i++) { 
    -      for (int j = 0; j < Dimension; j++) {
    -	// gaussian deviate to compute new positions using a given timestep
    -	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -      }  
    -      //  for the other particles we need to set the position to the old position since
    -      //  we move only one particle at the time
    -      for (int k = 0; k < NumberParticles; k++) {
    -	if ( k != i) {
    -	  for (int j = 0; j < Dimension; j++) {
    -	    NewPosition(k,j) = OldPosition(k,j);
    -	  }
    -	} 
    -      }
    -      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    -      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    -      //  we compute the log of the ratio of the greens functions to be used in the 
    -      //  Metropolis-Hastings algorithm
    -      double GreensFunction = 0.0;            
    -      for (int j = 0; j < Dimension; j++) {
    -	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    -	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    -      }
    -      GreensFunction = exp(GreensFunction);
    -      // The Metropolis test is performed by moving one particle at the time
    -      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    -	for (int  j = 0; j < Dimension; j++) {
    -	  OldPosition(i,j) = NewPosition(i,j);
    -	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    -	}
    -	OldWaveFunction = NewWaveFunction;
    -      }
    -    }  //  end of loop over particles
    -    // compute local energy  
    -    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    -    // update energies
    -    Energy += DeltaE;
    -    EnergySquared += DeltaE*DeltaE;
    -  }   // end of loop over MC trials   
    -  // update the energy average and its squared 
    -  cumulative_e = Energy/NumberMCsamples;
    -  cumulative_e2 = EnergySquared/NumberMCsamples;
    -}   // end MonteCarloSampling function  
    -
    -
    -// Function to compute the squared wave function and the quantum force
    -
    -double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    -{
    -  double wf = 0.0;
    -  // full Slater determinant for two particles, replace with Slater det for more particles 
    -  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    -  // contribution from Jastrow factor
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    -    }
    -  }
    -  return wf;
    -}
    -
    -// Function to calculate the local energy without numerical derivation of kinetic energy
    -
    -double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    -{
    -
    -  // compute the kinetic and potential energy from the single-particle part
    -  // for a many-electron system this has to be replaced by a Slater determinant
    -  // The absolute value of the interparticle length
    -  Matrix length( NumberParticles, NumberParticles);
    -  // Set up interparticle distance
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for(int j = i+1; j < NumberParticles; j++){
    -      length(i,j) = RelativeDistance(r, i, j);
    -      length(j,i) =  length(i,j);
    -    }
    -  }
    -  double KineticEnergy = 0.0;
    -  // Set up kinetic energy from Slater and Jastrow terms
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int k = 0; k < Dimension; k++) {
    -      double sum1 = 0.0; 
    -      for(int j = 0; j < NumberParticles; j++){
    -	if ( j != i) {
    -	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    -    }
    -  }
    -  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    -  for (int i = 0; i < NumberParticles-1; i++) {
    -      for (int j = i+1; j < NumberParticles; j++) {
    -        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    -      }
    -  }
    -  KineticEnergy *= -0.5;
    -  // Set up potential energy, external potential + eventual electron-electron repulsion
    -  double PotentialEnergy = 0;
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    double DistanceSquared = singleparticle_pos2(r, i);
    -    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    -  }
    -  // Add the electron-electron repulsion
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      PotentialEnergy += 1.0/length(i,j);          
    -    }
    -  }
    -  double LocalE = KineticEnergy+PotentialEnergy;
    -  return LocalE;
    -}
    -
    -// Compute the analytical expression for the quantum force
    -void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    -{
    -  // compute the first derivative 
    -  for (int i = 0; i < NumberParticles; i++) {
    -    for (int k = 0; k < Dimension; k++) {
    -      // single-particle part, replace with Slater det for larger systems
    -      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    -      //  Jastrow factor contribution
    -      double Jsum = 0.0;
    -      for (int j = 0; j < NumberParticles; j++) {
    -	if ( j != i) {
    -	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      qforce(i,k) = 2.0*(Jsum+sppart);
    -    }
    -  }
    -} // end of QuantumForce function
    -
    -
    -#define ITMAX 200
    -#define EPS 3.0e-8
    -#define TOLX (4*EPS)
    -#define STPMX 100.0
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    -{
    -
    -  int check,i,its,j;
    -  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    -  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    -  Matrix hessian(n,n);
    -
    -  fp=(*func)(p);
    -  (*dfunc)(p,g);
    -  for (i = 0;i < n;i++) {
    -    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    -    hessian(i,i)=1.0;
    -    xi(i) = -g(i);
    -    sum += p(i)*p(i);
    -  }
    -  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    -  for (its=1;its<=ITMAX;its++) {
    -    *iter=its;
    -    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    -    fp = *fret;
    -    for (i = 0; i< n;i++) {
    -      xi(i)=pnew(i)-p(i);
    -      p(i)=pnew(i);
    -    }
    -    test=0.0;
    -    for (i = 0;i< n;i++) {
    -      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    -      if (temp > test) test=temp;
    -    }
    -    if (test < TOLX) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i);
    -    (*dfunc)(p,g);
    -    test=0.0;
    -    den=FMAX(*fret,1.0);
    -    for (i=0;i<n;i++) {
    -      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    -      if (temp > test) test=temp;
    -    }
    -    if (test < gtol) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    -    for (i=0;i<n;i++) {
    -      hdg(i)=0.0;
    -      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    -    }
    -    fac=fae=sumdg=sumxi=0.0;
    -    for (i=0;i<n;i++) {
    -      fac += dg(i)*xi(i);
    -      fae += dg(i)*hdg(i);
    -      sumdg += SQR(dg(i));
    -      sumxi += SQR(xi(i));
    -    }
    -    if (fac*fac > EPS*sumdg*sumxi) {
    -      fac=1.0/fac;
    -      fad=1.0/fae;
    -      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    -      for (i=0;i<n;i++) {
    -	for (j=0;j<n;j++) {
    -	  hessian(i,j) += fac*xi(i)*xi(j)
    -	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    -	}
    -      }
    -    }
    -    for (i=0;i<n;i++) {
    -      xi(i)=0.0;
    -      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    -    }
    -  }
    -  cout << "too many iterations in dfpmin" << endl;
    -}
    -#undef ITMAX
    -#undef EPS
    -#undef TOLX
    -#undef STPMX
    -
    -#define ALF 1.0e-4
    -#define TOLX 1.0e-7
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    -{
    -  int i;
    -  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    -    test,tmplam;
    -
    -  *check=0;
    -  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    -  sum=sqrt(sum);
    -  if (sum > stpmax)
    -    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    -  for (slope=0.0,i=0;i<n;i++)
    -    slope += g(i)*p(i);
    -  test=0.0;
    -  for (i=0;i<n;i++) {
    -    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    -    if (temp > test) test=temp;
    -  }
    -  alamin=TOLX/test;
    -  alam=1.0;
    -  for (;;) {
    -    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    -    *f=(*func)(x);
    -    if (alam < alamin) {
    -      for (i=0;i<n;i++) x(i)=xold(i);
    -      *check=1;
    -      return;
    -    } else if (*f <= fold+ALF*alam*slope) return;
    -    else {
    -      if (alam == 1.0)
    -	tmplam = -slope/(2.0*(*f-fold-slope));
    -      else {
    -	rhs1 = *f-fold-alam*slope;
    -	rhs2=f2-fold2-alam2*slope;
    -	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    -	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    -	if (a == 0.0) tmplam = -slope/(2.0*b);
    -	else {
    -	  disc=b*b-3.0*a*slope;
    -	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    -	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    -	}
    -	if (tmplam>0.5*alam)
    -	  tmplam=0.5*alam;
    -      }
    -    }
    -    alam2=alam;
    -    f2 = *f;
    -    fold2=fold;
    -    alam=FMAX(tmplam,0.1*alam);
    -  }
    -}
    -#undef ALF
    -#undef TOLX
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs103.html b/doc/src/week9/._week9-bs103.html deleted file mode 100644 index 8fac6fa4..00000000 --- a/doc/src/week9/._week9-bs103.html +++ /dev/null @@ -1,717 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    What is OpenMP

    -
    -
    - -
      -
    • OpenMP provides high-level thread programming
    • -
    • Multiple cooperating threads are allowed to run simultaneously
    • -
    • Threads are created and destroyed dynamically in a fork-join pattern
    • -
        -
      • An OpenMP program consists of a number of parallel regions
      • -
      • Between two parallel regions there is only one master thread
      • -
      • In the beginning of a parallel region, a team of new threads is spawned
      • -
      -
    • The newly spawned threads work simultaneously with the master thread
    • -
    • At the end of a parallel region, the new threads are destroyed
    • -
    -

    Many good tutorials online and excellent textbook

    -
      -
    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. -
    3. Many tutorials online like OpenMP official site
    4. -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs104.html b/doc/src/week9/._week9-bs104.html deleted file mode 100644 index 6e219014..00000000 --- a/doc/src/week9/._week9-bs104.html +++ /dev/null @@ -1,760 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Getting started, things to remember

    -
    -
    - -
      -
    • Remember the header file
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Insert compiler directives in C++ syntax as
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp...
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Compile with for example c++ -fopenmp code.cpp
    • -
    • Execute
    • -
        -
      • Remember to assign the environment variable OMP NUM THREADS
      • -
      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      • -
      -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs105.html b/doc/src/week9/._week9-bs105.html deleted file mode 100644 index 13aecc4f..00000000 --- a/doc/src/week9/._week9-bs105.html +++ /dev/null @@ -1,750 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    OpenMP syntax

    -
      -
    • Mostly directives
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp construct [ clause ...]
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Some functions and types
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Most apply to a block of code
    • -
    • Specifically, a structured block
    • -
    • Enter at top, exit at bottom only, exit(), abort() permitted
    • -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs106.html b/doc/src/week9/._week9-bs106.html deleted file mode 100644 index 85a3adf4..00000000 --- a/doc/src/week9/._week9-bs106.html +++ /dev/null @@ -1,702 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Different OpenMP styles of parallelism

    -

    OpenMP supports several different ways to specify thread parallelism

    - -
      -
    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • -
    • Parallel loops: Special case for loops, simplifies data parallel code
    • -
    • Task parallelism, new in OpenMP 3
    • -
    • Several ways to manage thread coordination, including Master regions and Locks
    • -
    • Memory model for shared data
    • -
    -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs107.html b/doc/src/week9/._week9-bs107.html deleted file mode 100644 index 83cdccb7..00000000 --- a/doc/src/week9/._week9-bs107.html +++ /dev/null @@ -1,740 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    General code structure

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -main ()
    -{
    -int var1, var2, var3;
    -/* serial code */
    -/* ... */
    -/* start of a parallel region */
    -#pragma omp parallel private(var1, var2) shared(var3)
    -{
    -/* ... */
    -}
    -/* more serial code */
    -/* ... */
    -/* another parallel region */
    -#pragma omp parallel
    -{
    -/* ... */
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs108.html b/doc/src/week9/._week9-bs108.html deleted file mode 100644 index 8302350d..00000000 --- a/doc/src/week9/._week9-bs108.html +++ /dev/null @@ -1,736 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallel region

    -
    -
    - -
      -
    • A parallel region is a block of code that is executed by a team of threads
    • -
    • The following compiler directive creates a parallel region
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added at the end of the directive
    • -
    • Most often used clauses:
    • -
        -
      • default(shared) or default(none)
      • -
      • public(list of variables)
      • -
      • private(list of variables)
      • -
      -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs109.html b/doc/src/week9/._week9-bs109.html deleted file mode 100644 index 0535ac7e..00000000 --- a/doc/src/week9/._week9-bs109.html +++ /dev/null @@ -1,738 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Hello world, not again, please!

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -#include <cstdio>
    -int main (int argc, char *argv[])
    -{
    -int th_id, nthreads;
    -#pragma omp parallel private(th_id) shared(nthreads)
    -{
    -th_id = omp_get_thread_num();
    -printf("Hello World from thread %d\n", th_id);
    -#pragma omp barrier
    -if ( th_id == 0 ) {
    -nthreads = omp_get_num_threads();
    -printf("There are %d threads\n",nthreads);
    -}
    -}
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs110.html b/doc/src/week9/._week9-bs110.html deleted file mode 100644 index 9be952fd..00000000 --- a/doc/src/week9/._week9-bs110.html +++ /dev/null @@ -1,765 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Hello world, yet another variant

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <cstdio>
    -#include <omp.h>
    -int main(int argc, char *argv[]) 
    -{
    - omp_set_num_threads(4); 
    -#pragma omp parallel
    - {
    -   int id = omp_get_thread_num();
    -   int nproc = omp_get_num_threads(); 
    -   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    - } 
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Variables declared outside of the parallel region are shared by all threads -If a variable like id is declared outside of the -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel, 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    it would have been shared by various the threads, possibly causing erroneous output

    -
      -
    • Why? What would go wrong? Why do we add possibly?
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs111.html b/doc/src/week9/._week9-bs111.html deleted file mode 100644 index f62dcabf..00000000 --- a/doc/src/week9/._week9-bs111.html +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Important OpenMP library routines

    -
    -
    - - -
      -
    • int omp get num threads (), returns the number of threads inside a parallel region
    • -
    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • -
    • void omp set num threads (int), sets the number of threads to be used
    • -
    • void omp set nested (int), turns nested parallelism on/off
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs112.html b/doc/src/week9/._week9-bs112.html deleted file mode 100644 index dc1368f0..00000000 --- a/doc/src/week9/._week9-bs112.html +++ /dev/null @@ -1,733 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Private variables

    -
    -
    - -

    Private clause can be used to make thread- private versions of such variables:

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel private(id)
    -{
    - int id = omp_get_thread_num();
    - cout << "My thread num" << id << endl; 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • What is their value on entry? Exit?
    • -
    • OpenMP provides ways to control that
    • -
    • Can use default(none) to require the sharing of each variable to be described
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs113.html b/doc/src/week9/._week9-bs113.html deleted file mode 100644 index 5786409f..00000000 --- a/doc/src/week9/._week9-bs113.html +++ /dev/null @@ -1,730 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Master region

    -
    -
    - -

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel 
    -{
    -  #pragma omp master
    -   {
    -      int id = omp_get_thread_num();
    -      cout << "My thread num" << id << endl; 
    -   } 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs114.html b/doc/src/week9/._week9-bs114.html deleted file mode 100644 index 8de6d5b0..00000000 --- a/doc/src/week9/._week9-bs114.html +++ /dev/null @@ -1,738 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallel for loop

    -
    -
    - -
      -
    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added, such as
    • -
        -
      • schedule(static, chunk size)
      • -
      • schedule(dynamic, chunk size)
      • -
      • schedule(guided, chunk size) (non-deterministic allocation)
      • -
      • schedule(runtime)
      • -
      • private(list of variables)
      • -
      • reduction(operator:variable)
      • -
      • nowait
      • -
      -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs115.html b/doc/src/week9/._week9-bs115.html deleted file mode 100644 index 5ed23399..00000000 --- a/doc/src/week9/._week9-bs115.html +++ /dev/null @@ -1,729 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallel computations and loops

    - -
    -
    - -

    OpenMP provides an easy way to parallelize a loop

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -  for (i=0; i<n; i++) c[i] = a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    OpenMP handles index variable (no need to declare in for loop or make private)

    - -

    Which thread does which values? Several options.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs116.html b/doc/src/week9/._week9-bs116.html deleted file mode 100644 index 604c97f6..00000000 --- a/doc/src/week9/._week9-bs116.html +++ /dev/null @@ -1,709 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Scheduling of loop computations

    - -
    -
    - -

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled -and OpenMP defines three choices of loop scheduling: -

    -
      -
    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. -
    3. Dynamic: Selection made at runtime
    4. -
    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs117.html b/doc/src/week9/._week9-bs117.html deleted file mode 100644 index 7ff2f4e8..00000000 --- a/doc/src/week9/._week9-bs117.html +++ /dev/null @@ -1,736 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Example code for loop scheduling

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(dynamic,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs118.html b/doc/src/week9/._week9-bs118.html deleted file mode 100644 index 65988f01..00000000 --- a/doc/src/week9/._week9-bs118.html +++ /dev/null @@ -1,736 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Example code for loop scheduling, guided instead of dynamic

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(guided,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs119.html b/doc/src/week9/._week9-bs119.html deleted file mode 100644 index 94d0fadf..00000000 --- a/doc/src/week9/._week9-bs119.html +++ /dev/null @@ -1,756 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    More on Parallel for loop

    -
    -
    - -
      -
    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • -
    • The loop index is private to each thread
    • -
    • A reduction variable is special
    • -
        -
      • During the for-loop there is a local private copy in each thread
      • -
      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • -
      -
    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • -
    - - -
    -
    -
    -
    -
    -
    // #pragma omp parallel and #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    can be combined into

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs120.html b/doc/src/week9/._week9-bs120.html deleted file mode 100644 index e3df3b26..00000000 --- a/doc/src/week9/._week9-bs120.html +++ /dev/null @@ -1,751 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    What can happen with this loop?

    - -
    -
    - -

    What happens with code like this

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    - - -
    -
    -
    -
    -
    -
    sum = 0.0;
    -#pragma omp parallel for reduction(+:sum)
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs121.html b/doc/src/week9/._week9-bs121.html deleted file mode 100644 index 04dff569..00000000 --- a/doc/src/week9/._week9-bs121.html +++ /dev/null @@ -1,732 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Inner product

    -
    -
    - -$$ -\sum_{i=0}^{n-1} a_ib_i -$$ - - - -
    -
    -
    -
    -
    -
    int i;
    -double sum = 0.;
    -/* allocating and initializing arrays */
    -/* ... */
    -#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    - for (i=0; i<N; i++) sum += a[i]*b[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs122.html b/doc/src/week9/._week9-bs122.html deleted file mode 100644 index f39aa7a7..00000000 --- a/doc/src/week9/._week9-bs122.html +++ /dev/null @@ -1,735 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Different threads do different tasks

    -
    -
    - - -

    Different threads do different tasks independently, each section is executed by one thread.

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -#pragma omp sections
    -{
    -#pragma omp section
    -funcA ();
    -#pragma omp section
    -funcB ();
    -#pragma omp section
    -funcC ();
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs123.html b/doc/src/week9/._week9-bs123.html deleted file mode 100644 index a6542487..00000000 --- a/doc/src/week9/._week9-bs123.html +++ /dev/null @@ -1,750 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Single execution

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #pragma omp single { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The code is executed by one thread only, no guarantee which thread

    - -

    Can introduce an implicit barrier at the end

    - - -
    -
    -
    -
    -
    -
    #pragma omp master { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs124.html b/doc/src/week9/._week9-bs124.html deleted file mode 100644 index 7fafa115..00000000 --- a/doc/src/week9/._week9-bs124.html +++ /dev/null @@ -1,820 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Coordination and synchronization

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #pragma omp barrier
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Synchronization, must be encountered by all threads in a team (or none)

    - - -
    -
    -
    -
    -
    -
    #pragma omp ordered { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is another form of synchronization (in sequential order). -The form -

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and

    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic { single assignment statement }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is more efficient than

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs125.html b/doc/src/week9/._week9-bs125.html deleted file mode 100644 index 862c4125..00000000 --- a/doc/src/week9/._week9-bs125.html +++ /dev/null @@ -1,715 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Data scope

    -
    -
    - -
      -
    • OpenMP data scope attribute clauses:
    • -
        -
      • shared
      • -
      • private
      • -
      • firstprivate
      • -
      • lastprivate
      • -
      • reduction
      • -
      -
    -

    What are the purposes of these attributes

    -
      -
    • define how and which variables are transferred to a parallel region (and back)
    • -
    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs126.html b/doc/src/week9/._week9-bs126.html deleted file mode 100644 index 68256fe4..00000000 --- a/doc/src/week9/._week9-bs126.html +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Some remarks

    -
    -
    - - -
      -
    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • -
    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • -
    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • -
    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs127.html b/doc/src/week9/._week9-bs127.html deleted file mode 100644 index cd237ae7..00000000 --- a/doc/src/week9/._week9-bs127.html +++ /dev/null @@ -1,768 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallelizing nested for-loops

    -
    -
    - - -
      -
    • Serial code
    • -
    - - -
    -
    -
    -
    -
    -
    for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -        a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Parallelization
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for private(j)
    -for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -       a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • -
    • Why must j be private? To avoid race condition among the threads
    • -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs128.html b/doc/src/week9/._week9-bs128.html deleted file mode 100644 index bbde9f39..00000000 --- a/doc/src/week9/._week9-bs128.html +++ /dev/null @@ -1,733 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Nested parallelism

    -
    -
    - -

    When a thread in a parallel region encounters another parallel construct, it -may create a new team of threads and become the master of the new -team. -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel num_threads(4)
    -{
    -/* .... */
    -#pragma omp parallel num_threads(2)
    -{
    -//  
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs129.html b/doc/src/week9/._week9-bs129.html deleted file mode 100644 index d6c9e935..00000000 --- a/doc/src/week9/._week9-bs129.html +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Parallel tasks

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #pragma omp task 
    -#pragma omp parallel shared(p_vec) private(i)
    -{
    -#pragma omp single
    -{
    -for (i=0; i<N; i++) {
    -  double r = random_number();
    -  if (p_vec[i] > r) {
    -#pragma omp task
    -   do_work (p_vec[i]);
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs130.html b/doc/src/week9/._week9-bs130.html deleted file mode 100644 index 3d44e13a..00000000 --- a/doc/src/week9/._week9-bs130.html +++ /dev/null @@ -1,759 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Common mistakes

    -
    -
    - -

    Race condition

    - - -
    -
    -
    -
    -
    -
    int nthreads;
    -#pragma omp parallel shared(nthreads)
    -{
    -nthreads = omp_get_num_threads();
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Deadlock

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -...
    -#pragma omp critical
    -{
    -...
    -#pragma omp barrier
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs131.html b/doc/src/week9/._week9-bs131.html deleted file mode 100644 index f1d5da69..00000000 --- a/doc/src/week9/._week9-bs131.html +++ /dev/null @@ -1,730 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Not all computations are simple

    -
    -
    - -

    Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads -

    - -

    An example is finding the location and value of the largest element in an array

    - - -
    -
    -
    -
    -
    -
    for (i=0; i<n; i++) { 
    -   if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -   }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs132.html b/doc/src/week9/._week9-bs132.html deleted file mode 100644 index 26b84107..00000000 --- a/doc/src/week9/._week9-bs132.html +++ /dev/null @@ -1,754 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Not all computations are simple, competing threads

    -
    -
    - -

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    -
      -
    1. OpenMP provides several ways to coordinate access to shared values
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following block
    2. -
    -

    Atomic may be faster than critical but depends on hardware

    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs133.html b/doc/src/week9/._week9-bs133.html deleted file mode 100644 index b6b41ca9..00000000 --- a/doc/src/week9/._week9-bs133.html +++ /dev/null @@ -1,727 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    How to find the max value using OpenMP

    -
    -
    - -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -    if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs134.html b/doc/src/week9/._week9-bs134.html deleted file mode 100644 index b3373106..00000000 --- a/doc/src/week9/._week9-bs134.html +++ /dev/null @@ -1,732 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Then deal with the race conditions

    -
    -
    - -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -#pragma omp critical
    -  {
    -     if (x[i] > maxval) {
    -       maxval = x[i];
    -       maxloc = i; 
    -     }
    -  }
    -} 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -

    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs135.html b/doc/src/week9/._week9-bs135.html deleted file mode 100644 index ecd0d861..00000000 --- a/doc/src/week9/._week9-bs135.html +++ /dev/null @@ -1,689 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    What can slow down OpenMP performance?

    -

    Give it a thought!

    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs136.html b/doc/src/week9/._week9-bs136.html deleted file mode 100644 index 599b6c70..00000000 --- a/doc/src/week9/._week9-bs136.html +++ /dev/null @@ -1,700 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    What can slow down OpenMP performance?

    -
    -
    - -

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    -
      -
    • We do not care about the value during the execution of the loop, just the value at the end.
    • -
    -

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs137.html b/doc/src/week9/._week9-bs137.html deleted file mode 100644 index 3740456b..00000000 --- a/doc/src/week9/._week9-bs137.html +++ /dev/null @@ -1,727 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Find the max location for each thread

    -
    -
    - - - -
    -
    -
    -
    -
    -
    int maxloc[MAX_THREADS], mloc;
    -double maxval[MAX_THREADS], mval; 
    -#pragma omp parallel shared(maxval,maxloc)
    -{
    -  int id = omp_get_thread_num(); 
    -  maxval[id] = -1.0e30;
    -#pragma omp for
    -   for (int i=0; i<n; i++) {
    -       if (x[i] > maxval[id]) { 
    -           maxloc[id] = i;
    -           maxval[id] = x[i]; 
    -       }
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs138.html b/doc/src/week9/._week9-bs138.html deleted file mode 100644 index 060ae5da..00000000 --- a/doc/src/week9/._week9-bs138.html +++ /dev/null @@ -1,726 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Combine the values from each thread

    -
    -
    - - - -
    -
    -
    -
    -
    -
    #pragma omp flush (maxloc,maxval)
    -#pragma omp master
    -  {
    -    int nt = omp_get_num_threads(); 
    -    mloc = maxloc[0]; 
    -    mval = maxval[0]; 
    -    for (int i=1; i<nt; i++) {
    -        if (maxval[i] > mval) { 
    -           mval = maxval[i]; 
    -           mloc = maxloc[i];
    -        } 
    -     }
    -   }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Note that we let the master process perform the last operation.

    -
    -
    - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/._week9-bs139.html b/doc/src/week9/._week9-bs139.html deleted file mode 100644 index 5214c06c..00000000 --- a/doc/src/week9/._week9-bs139.html +++ /dev/null @@ -1,764 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - -
    -

     

     

     

    - - -

    Matrix-matrix multiplication

    -

    This code computes the norm of a vector using OpenMp

    - - -
    -
    -
    -
    -
    -
    //  OpenMP program to compute vector norm by adding two other vectors
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of vector
    -  int n = atoi(argv[1]);
    -  double *a, *b, *c;
    -  int i;
    -  int thread_num;
    -  double wtime, Norm2, s, angle;
    -  cout << "  Perform addition of two vectors and compute the norm-2." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the vectors to be used
    -  a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)
    -  // Set up values for vectors  a and b
    -  for (i = 0; i < n; i++){
    -      angle = 2.0*M_PI*i/ (( double ) n);
    -      a[i] = s*(sin(angle) + cos(angle));
    -      b[i] =  s*sin(2.0*angle);
    -      c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (i = 0; i < n; i++){
    -     c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  Norm2 = 0.0;
    -  for (i = 0; i < n; i++){
    -     Norm2  += c[i]*c[i];
    -  }
    -// end parallel region
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm-2 computation=" << wtime  << endl;
    -  cout << " Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -

    - -

    - -
    - - - - -
    - -
    - - - diff --git a/doc/src/week9/.week9.copyright b/doc/src/week9/.week9.copyright deleted file mode 100644 index a4379260..00000000 --- a/doc/src/week9/.week9.copyright +++ /dev/null @@ -1 +0,0 @@ -{'holder': ['Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no'], 'year': '1999-2024', 'license': 'Released under CC Attribution-NonCommercial 4.0 license', 'cite doconce': False} \ No newline at end of file diff --git a/doc/src/week9/README.txt b/doc/src/week9/README.txt deleted file mode 100644 index 763ea11c..00000000 --- a/doc/src/week9/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -This IPython notebook week9.ipynb does not require any additional -programs. diff --git a/doc/src/week9/_minted-week9/0A6C47F8068CEC3257BC594AE784D85A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/0A6C47F8068CEC3257BC594AE784D85A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 40141e32..00000000 --- a/doc/src/week9/_minted-week9/0A6C47F8068CEC3257BC594AE784D85A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,20 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}cstdio\PYGZgt{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{argv}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{th\PYGZus{}id}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{nthreads}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel private(th\PYGZus{}id) shared(nthreads)} -\PYG{p}{\PYGZob{}} -\PYG{n}{th\PYGZus{}id}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}thread\PYGZus{}num}\PYG{p}{();} -\PYG{n}{printf}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}Hello World from thread \PYGZpc{}d}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{th\PYGZus{}id}\PYG{p}{);} -\PYG{c+cp}{\PYGZsh{}pragma omp barrier} -\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{th\PYGZus{}id}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{n}{nthreads}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}num\PYGZus{}threads}\PYG{p}{();} -\PYG{n}{printf}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}There are \PYGZpc{}d threads}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s}{\PYGZdq{}}\PYG{p}{,}\PYG{n}{nthreads}\PYG{p}{);} -\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} -\PYG{k}{return}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/0CC5E88A797F0E8C126048DE4396FC7F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/0CC5E88A797F0E8C126048DE4396FC7F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index a4b9a87f..00000000 --- a/doc/src/week9/_minted-week9/0CC5E88A797F0E8C126048DE4396FC7F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{o}{*}\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/0DAB6AD41FAED4AB2EF5C7571274C4D20DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/0DAB6AD41FAED4AB2EF5C7571274C4D20DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 8af4aba6..00000000 --- a/doc/src/week9/_minted-week9/0DAB6AD41FAED4AB2EF5C7571274C4D20DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,10 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{maxval}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{maxloc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/118CEE4B50FB428DD3D8549EB29F0AAE0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/118CEE4B50FB428DD3D8549EB29F0AAE0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index fd3919a5..00000000 --- a/doc/src/week9/_minted-week9/118CEE4B50FB428DD3D8549EB29F0AAE0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{mpirun}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{n}\PYG{+w}{ }\PYG{l+m+mi}{10}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{code}\PYG{p}{.}\PYG{n}{x} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/1490AC872D3FB9D3137AC51EC16D9E880DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/1490AC872D3FB9D3137AC51EC16D9E880DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 05a4949a..00000000 --- a/doc/src/week9/_minted-week9/1490AC872D3FB9D3137AC51EC16D9E880DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{clang}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{fno}\PYG{o}{\PYGZhy{}}\PYG{n}{vectorize}\PYG{+w}{ }\PYG{n}{novec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{vecexample}\PYG{p}{.}\PYG{n}{cpp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/1B144BE7E9B688D03F08657E119E94790DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/1B144BE7E9B688D03F08657E119E94790DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 38ffcd72..00000000 --- a/doc/src/week9/_minted-week9/1B144BE7E9B688D03F08657E119E94790DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{cos}\PYG{p}{(}\PYG{n}{j}\PYG{o}{*}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/23DB3DD9992D3EA60D66BAB6E94FA2C30DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/23DB3DD9992D3EA60D66BAB6E94FA2C30DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 460deee8..00000000 --- a/doc/src/week9/_minted-week9/23DB3DD9992D3EA60D66BAB6E94FA2C30DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,15 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// Forward substitution} -\PYG{c+c1}{// Note that we can simplify by precalculating a[i\PYGZhy{}1]/b[i\PYGZhy{}1]} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{o}{*}\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{])}\PYG{o}{/}\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{g}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{o}{*}\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{])}\PYG{o}{/}\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{f}\PYG{p}{[}\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{/}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{];} -\PYG{+w}{ }\PYG{c+c1}{// Backwards substitution} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}2}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZgt{}=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZhy{}\PYGZhy{}}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{o}{*}\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{/}\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{f}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{o}{/}\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/23E6263994F51499BBF63FFD69B8B6E50DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/23E6263994F51499BBF63FFD69B8B6E50DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 5634b520..00000000 --- a/doc/src/week9/_minted-week9/23E6263994F51499BBF63FFD69B8B6E50DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/25191E82FD3D485EB84FA500B216EB440DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/25191E82FD3D485EB84FA500B216EB440DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 96e32563..00000000 --- a/doc/src/week9/_minted-week9/25191E82FD3D485EB84FA500B216EB440DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp single \PYGZob{} ... \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/260BBDD86FAABC6792995A2B95ABBDC2B0CAD346A13BD81D9AF720CF234DAE08.pygtex b/doc/src/week9/_minted-week9/260BBDD86FAABC6792995A2B95ABBDC2B0CAD346A13BD81D9AF720CF234DAE08.pygtex deleted file mode 100644 index 61178974..00000000 --- a/doc/src/week9/_minted-week9/260BBDD86FAABC6792995A2B95ABBDC2B0CAD346A13BD81D9AF720CF234DAE08.pygtex +++ /dev/null @@ -1,68 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYGZsh{}include \PYGZlt{}cstdlib\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iostream\PYGZgt{} -\PYGZsh{}include \PYGZlt{}cmath\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iomanip\PYGZgt{} -\PYGZsh{}include \PYGZdq{}time.h\PYGZdq{} - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -\PYGZob{} - // read in dimension of square matrix - int n = atoi(argv[1]); - double s = 1.0/sqrt( (double) n); - double **A, **B, **C; - // Start timing - clock\PYGZus{}t start, finish; - start = clock(); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - \PYGZcb{} - // Set up values for matrix A and B and zero matrix C - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (int j = 0; j \PYGZlt{} n; j++) \PYGZob{} - double angle = 2.0*M\PYGZus{}PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - \PYGZcb{} - \PYGZcb{} - // Then perform the matrix\PYGZhy{}matrix multiplication - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (int j = 0; j \PYGZlt{} n; j++) \PYGZob{} - double sum = 0.0; - for (int k = 0; k \PYGZlt{} n; k++) \PYGZob{} - sum += B[i][k]*A[k][j]; - \PYGZcb{} - C[i][j] = sum; - \PYGZcb{} - \PYGZcb{} - // Compute now the Frobenius norm - double Fsum = 0.0; - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (int j = 0; j \PYGZlt{} n; j++) \PYGZob{} - Fsum += C[i][j]*C[i][j]; - \PYGZcb{} - \PYGZcb{} - Fsum = sqrt(Fsum); - finish = clock(); - double timeused = (double) (finish \PYGZhy{} start)/(CLOCKS\PYGZus{}PER\PYGZus{}SEC ); - cout \PYGZlt{}\PYGZlt{} setiosflags(ios::showpoint | ios::uppercase); - cout \PYGZlt{}\PYGZlt{} setprecision(10) \PYGZlt{}\PYGZlt{} setw(20) \PYGZlt{}\PYGZlt{} \PYGZdq{}Time used for matrix\PYGZhy{}matrix multiplication=\PYGZdq{} \PYGZlt{}\PYGZlt{} timeused \PYGZlt{}\PYGZlt{} endl; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} Frobenius norm = \PYGZdq{} \PYGZlt{}\PYGZlt{} Fsum \PYGZlt{}\PYGZlt{} endl; - // Free up space - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - \PYGZcb{} - delete[] A; - delete[] B; - delete[] C; - return 0; -\PYGZcb{} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/2989E68D2545DF095F8938FE8589B3BC0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/2989E68D2545DF095F8938FE8589B3BC0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index d75d42d8..00000000 --- a/doc/src/week9/_minted-week9/2989E68D2545DF095F8938FE8589B3BC0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,16 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp flush (maxloc,maxval)} -\PYG{c+cp}{\PYGZsh{}pragma omp master} -\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nt}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}num\PYGZus{}threads}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{mloc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{maxloc}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{mval}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{];} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{nt}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{mval}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{mval}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{mloc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{maxloc}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/2A64863B652EA217A1D3AE654024CA750DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/2A64863B652EA217A1D3AE654024CA750DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index ae31ccdf..00000000 --- a/doc/src/week9/_minted-week9/2A64863B652EA217A1D3AE654024CA750DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{clang}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{Rpass}\PYG{o}{\PYGZhy{}}\PYG{n}{missed}\PYG{o}{=}\PYG{n}{loop}\PYG{o}{\PYGZhy{}}\PYG{n}{vectorize}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{vec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{vecexample}\PYG{p}{.}\PYG{n}{cpp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/2D780435D05ED64219F7B38EA5F3AB390DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/2D780435D05ED64219F7B38EA5F3AB390DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 76acec76..00000000 --- a/doc/src/week9/_minted-week9/2D780435D05ED64219F7B38EA5F3AB390DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{c}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{cpp} -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{exe}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{o} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/2F38624B127E615C4039128AF3F503E70DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/2F38624B127E615C4039128AF3F503E70DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index ced31734..00000000 --- a/doc/src/week9/_minted-week9/2F38624B127E615C4039128AF3F503E70DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,11 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{c+cp}{\PYGZsh{}pragma omp master} -\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}thread\PYGZus{}num}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}My thread num\PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/32243EB5D4E7A20BB0A45AC237E18DA20DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/32243EB5D4E7A20BB0A45AC237E18DA20DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index d014d577..00000000 --- a/doc/src/week9/_minted-week9/32243EB5D4E7A20BB0A45AC237E18DA20DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{b}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{15.}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/3258EE875704B37DC577CFA7C25BB3060DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/3258EE875704B37DC577CFA7C25BB3060DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index ad053703..00000000 --- a/doc/src/week9/_minted-week9/3258EE875704B37DC577CFA7C25BB3060DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp atomic \PYGZob{} single assignment statement \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/36E0347A38C0565D1FE358C3F94B01330DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/36E0347A38C0565D1FE358C3F94B01330DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index c7660fa7..00000000 --- a/doc/src/week9/_minted-week9/36E0347A38C0565D1FE358C3F94B01330DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,15 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel} -\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp sections} -\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp section} -\PYG{n}{funcA}\PYG{+w}{ }\PYG{p}{();} -\PYG{c+cp}{\PYGZsh{}pragma omp section} -\PYG{n}{funcB}\PYG{+w}{ }\PYG{p}{();} -\PYG{c+cp}{\PYGZsh{}pragma omp section} -\PYG{n}{funcC}\PYG{+w}{ }\PYG{p}{();} -\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/38B2F39EC7518890F1584A08825C00CC0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/38B2F39EC7518890F1584A08825C00CC0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 8775dbe2..00000000 --- a/doc/src/week9/_minted-week9/38B2F39EC7518890F1584A08825C00CC0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{l+m+mi}{100}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{l+m+mi}{100}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/39BDD06D518805E8CC3772EB120B830A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/39BDD06D518805E8CC3772EB120B830A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index fe7b5855..00000000 --- a/doc/src/week9/_minted-week9/39BDD06D518805E8CC3772EB120B830A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,460 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// Variational Monte Carlo for atoms with importance sampling, slater det} -\PYG{c+c1}{// Test case for 2\PYGZhy{}electron quantum dot, no classes using Mersenne\PYGZhy{}Twister RNG} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZdq{}mpi.h\PYGZdq{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}cmath\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}random\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}string\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}iostream\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}fstream\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}iomanip\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZdq{}vectormatrixclass.h\PYGZdq{}} - -\PYG{k}{using}\PYG{+w}{ }\PYG{k}{namespace}\PYG{+w}{ }\PYG{n+nn}{std}\PYG{p}{;} -\PYG{c+c1}{// output file as global variable} -\PYG{n}{ofstream}\PYG{+w}{ }\PYG{n}{ofile}\PYG{p}{;} -\PYG{c+c1}{// the step length and its squared inverse for the second derivative} -\PYG{c+c1}{// Here we define global variables used in various functions} -\PYG{c+c1}{// These can be changed by using classes} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{2}\PYG{p}{;} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{2}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// we fix also the number of electrons to be 2} - -\PYG{c+c1}{// declaration of functions} - -\PYG{c+c1}{// The Mc sampling for the variational Monte Carlo} -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{MonteCarloSampling}\PYG{p}{(}\PYG{k+kt}{int}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{);} - -\PYG{c+c1}{// The variational wave function} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{WaveFunction}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{);} - -\PYG{c+c1}{// The local energy} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{LocalEnergy}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{);} - -\PYG{c+c1}{// The quantum force} -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{QuantumForce}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{p}{);} - - -\PYG{c+c1}{// inline function for single\PYGZhy{}particle wave function} -\PYG{k+kr}{inline}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{SPwavefunction}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{alpha}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{exp}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{n}{alpha}\PYG{o}{*}\PYG{n}{r}\PYG{o}{*}\PYG{l+m+mf}{0.5}\PYG{p}{);} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// inline function for derivative of single\PYGZhy{}particle wave function} -\PYG{k+kr}{inline}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{DerivativeSPwavefunction}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{alpha}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{o}{*}\PYG{n}{alpha}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// function for absolute value of relative distance} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{RelativeDistance}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{r\PYGZus{}ij}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{r\PYGZus{}ij}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{(}\PYG{n}{j}\PYG{p}{,}\PYG{n}{k}\PYG{p}{))}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{(}\PYG{n}{j}\PYG{p}{,}\PYG{n}{k}\PYG{p}{));} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{r\PYGZus{}ij}\PYG{p}{);} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// inline function for derivative of Jastrow factor} -\PYG{k+kr}{inline}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{JastrowDerivative}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{beta}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{k}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{(}\PYG{n}{j}\PYG{p}{,}\PYG{n}{k}\PYG{p}{))}\PYG{o}{/}\PYG{p}{(}\PYG{n}{RelativeDistance}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{)}\PYG{o}{*}\PYG{n}{pow}\PYG{p}{(}\PYG{l+m+mf}{1.0}\PYG{o}{+}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{RelativeDistance}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{),}\PYG{l+m+mi}{2}\PYG{p}{));} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// function for square of position of single particle} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{singleparticle\PYGZus{}pos2}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{r\PYGZus{}single\PYGZus{}particle}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{r\PYGZus{}single\PYGZus{}particle}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{*}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{r\PYGZus{}single\PYGZus{}particle}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{lnsrch}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{xold}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{fold}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{g}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{x}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{f}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{stpmax}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{check}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{));} - -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{dfpmin}\PYG{p}{(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{gtol}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{iter}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{fret}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{),}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{dfunc}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{g}\PYG{p}{));} - -\PYG{k}{static}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{sqrarg}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)} - - -\PYG{k}{static}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{maxarg1}\PYG{p}{,}\PYG{n}{maxarg2}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) \PYGZgt{} (maxarg2) ?\PYGZbs{}} -\PYG{c+cp}{ (maxarg1) : (maxarg2))} - - -\PYG{c+c1}{// Begin of main program} - -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{argv}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} - -\PYG{+w}{ }\PYG{c+c1}{// MPI initializations} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{NumberProcesses}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MyRank}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{NumberMCsamples}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Init}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{argv}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}size}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{NumberProcesses}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}rank}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{MyRank}\PYG{p}{);} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{StartTime}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Wtime}\PYG{p}{();} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MyRank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{o}{\PYGZam{}\PYGZam{}}\PYG{+w}{ }\PYG{n}{argc}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Bad Usage: \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}} -\PYG{+w}{ }\PYG{l+s}{\PYGZdq{} Read also output file on same line and number of Monte Carlo cycles\PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{c+c1}{// Read filename and number of Monte Carlo cycles from the command line} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MyRank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{o}{\PYGZam{}\PYGZam{}}\PYG{+w}{ }\PYG{n}{argc}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{l+m+mi}{2}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{string}\PYG{+w}{ }\PYG{n}{filename}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{];}\PYG{+w}{ }\PYG{c+c1}{// first command line argument after name of program} -\PYG{+w}{ }\PYG{n}{NumberMCsamples}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{atoi}\PYG{p}{(}\PYG{n}{argv}\PYG{p}{[}\PYG{l+m+mi}{2}\PYG{p}{]);} -\PYG{+w}{ }\PYG{n}{string}\PYG{+w}{ }\PYG{n}{fileout}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{filename}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{string}\PYG{+w}{ }\PYG{n}{argument}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{to\PYGZus{}string}\PYG{p}{(}\PYG{n}{NumberMCsamples}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// Final filename as filename+NumberMCsamples} -\PYG{+w}{ }\PYG{n}{fileout}\PYG{p}{.}\PYG{n}{append}\PYG{p}{(}\PYG{n}{argument}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{p}{.}\PYG{n}{open}\PYG{p}{(}\PYG{n}{fileout}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{c+c1}{// broadcast the number of Monte Carlo samples} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Bcast}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{NumberMCsamples}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}INT}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// Two variational parameters only} -\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{2}\PYG{p}{);} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{TotalNumberMCsamples}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{NumberMCsamples}\PYG{o}{*}\PYG{n}{NumberProcesses}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// Loop over variational parameters} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{alpha}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.5}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{alpha}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{l+m+mf}{1.5}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{alpha}\PYG{+w}{ }\PYG{o}{+=}\PYG{l+m+mf}{0.1}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{beta}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{beta}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{l+m+mf}{0.5}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{beta}\PYG{+w}{ }\PYG{o}{+=}\PYG{l+m+mf}{0.05}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{alpha}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// value of alpha} -\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{beta}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// value of beta} -\PYG{+w}{ }\PYG{c+c1}{// Do the mc sampling and accumulate data with MPI\PYGZus{}Reduce} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{TotalEnergy}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{TotalEnergySquared}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{LocalProcessEnergy}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{LocalProcessEnergy2}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{LocalProcessEnergy}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{LocalProcessEnergy2}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{MonteCarloSampling}\PYG{p}{(}\PYG{n}{NumberMCsamples}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{LocalProcessEnergy}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{LocalProcessEnergy2}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// Collect data in total averages} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Reduce}\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{LocalProcessEnergy}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{TotalEnergy}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}DOUBLE}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}SUM}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Reduce}\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{LocalProcessEnergy2}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{TotalEnergySquared}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}DOUBLE}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}SUM}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// Print out results in case of Master node, set to MyRank = 0} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{MyRank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{Energy}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{TotalEnergy}\PYG{o}{/}\PYG{p}{(}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{double}\PYG{p}{)}\PYG{n}{NumberProcesses}\PYG{p}{);} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{Variance}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{TotalEnergySquared}\PYG{o}{/}\PYG{p}{(}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{double}\PYG{p}{)}\PYG{n}{NumberProcesses}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{Energy}\PYG{o}{*}\PYG{n}{Energy}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{StandardDeviation}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{Variance}\PYG{o}{/}\PYG{p}{((}\PYG{k+kt}{double}\PYG{p}{)}\PYG{n}{TotalNumberMCsamples}\PYG{p}{));}\PYG{+w}{ }\PYG{c+c1}{// over optimistic error} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setiosflags}\PYG{p}{(}\PYG{n}{ios}\PYG{o}{::}\PYG{n}{showpoint}\PYG{+w}{ }\PYG{o}{|}\PYG{+w}{ }\PYG{n}{ios}\PYG{o}{::}\PYG{n}{uppercase}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setw}\PYG{p}{(}\PYG{l+m+mi}{15}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setprecision}\PYG{p}{(}\PYG{l+m+mi}{8}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setw}\PYG{p}{(}\PYG{l+m+mi}{15}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setprecision}\PYG{p}{(}\PYG{l+m+mi}{8}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setw}\PYG{p}{(}\PYG{l+m+mi}{15}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setprecision}\PYG{p}{(}\PYG{l+m+mi}{8}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Energy}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setw}\PYG{p}{(}\PYG{l+m+mi}{15}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setprecision}\PYG{p}{(}\PYG{l+m+mi}{8}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Variance}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{ofile}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setw}\PYG{p}{(}\PYG{l+m+mi}{15}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{setprecision}\PYG{p}{(}\PYG{l+m+mi}{8}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{StandardDeviation}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{EndTime}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Wtime}\PYG{p}{();} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{TotalTime}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{EndTime}\PYG{o}{\PYGZhy{}}\PYG{n}{StartTime}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{MyRank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{p}{)}\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Time = \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{TotalTime}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{} on number of processors: \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberProcesses}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MyRank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{ofile}\PYG{p}{.}\PYG{n}{close}\PYG{p}{();}\PYG{+w}{ }\PYG{c+c1}{// close output file} -\PYG{+w}{ }\PYG{c+c1}{// End MPI} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Finalize}\PYG{+w}{ }\PYG{p}{();} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of main function} - - -\PYG{c+c1}{// Monte Carlo sampling with the Metropolis algorithm} - -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{MonteCarloSampling}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{NumberMCsamples}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{cumulative\PYGZus{}e}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{cumulative\PYGZus{}e2}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{VariationalParameters}\PYG{p}{)} -\PYG{p}{\PYGZob{}} - -\PYG{+w}{ }\PYG{c+c1}{// Initialize the seed and call the Mersienne algo} -\PYG{+w}{ }\PYG{n}{std}\PYG{o}{::}\PYG{n}{random\PYGZus{}device}\PYG{+w}{ }\PYG{n}{rd}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{std}\PYG{o}{::}\PYG{n}{mt19937\PYGZus{}64}\PYG{+w}{ }\PYG{n}{gen}\PYG{p}{(}\PYG{n}{rd}\PYG{p}{());} -\PYG{+w}{ }\PYG{c+c1}{// Set up the uniform distribution for x \PYGZbs{}in [[0, 1]} -\PYG{+w}{ }\PYG{n}{std}\PYG{o}{::}\PYG{n}{uniform\PYGZus{}real\PYGZus{}distribution}\PYG{o}{\PYGZlt{}}\PYG{k+kt}{double}\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{UniformNumberGenerator}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{std}\PYG{o}{::}\PYG{n}{normal\PYGZus{}distribution}\PYG{o}{\PYGZlt{}}\PYG{k+kt}{double}\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{Normaldistribution}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// diffusion constant from Schroedinger equation} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{D}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.5}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{timestep}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.05}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// we fix the time step for the gaussian deviate} -\PYG{+w}{ }\PYG{c+c1}{// allocate matrices which contain the position of the particles} -\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{n}{OldPosition}\PYG{p}{(}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{NewPosition}\PYG{p}{(}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{n}{OldQuantumForce}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{NewQuantumForce}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{);} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{Energy}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{EnergySquared}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{DeltaE}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// initial trial positions} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{OldPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{Normaldistribution}\PYG{p}{(}\PYG{n}{gen}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{timestep}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{OldWaveFunction}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{OldPosition}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{OldPosition}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{OldQuantumForce}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// loop over monte carlo cycles} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{cycles}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{cycles}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{n}{NumberMCsamples}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{cycles}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{c+c1}{// new position} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{c+c1}{// gaussian deviate to compute new positions using a given timestep} -\PYG{+w}{ }\PYG{n}{NewPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{OldPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{Normaldistribution}\PYG{p}{(}\PYG{n}{gen}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{timestep}\PYG{p}{)}\PYG{o}{+}\PYG{n}{OldQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{*}\PYG{n}{timestep}\PYG{o}{*}\PYG{n}{D}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// NewPosition(i,j) = OldPosition(i,j) + gaussian\PYGZus{}deviate(\PYGZam{}idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{c+c1}{// for the other particles we need to set the position to the old position since} -\PYG{+w}{ }\PYG{c+c1}{// we move only one particle at the time} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{!=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{NewPosition}\PYG{p}{(}\PYG{n}{k}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{OldPosition}\PYG{p}{(}\PYG{n}{k}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{NewWaveFunction}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{NewPosition}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{NewPosition}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{NewQuantumForce}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// we compute the log of the ratio of the greens functions to be used in the} -\PYG{+w}{ }\PYG{c+c1}{// Metropolis\PYGZhy{}Hastings algorithm} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{GreensFunction}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{GreensFunction}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{OldQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{+}\PYG{n}{NewQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{))}\PYG{o}{*} -\PYG{+w}{ }\PYG{p}{(}\PYG{n}{D}\PYG{o}{*}\PYG{n}{timestep}\PYG{o}{*}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{OldQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{NewQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{))}\PYG{o}{\PYGZhy{}}\PYG{n}{NewPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{+}\PYG{n}{OldPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{));} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{GreensFunction}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{exp}\PYG{p}{(}\PYG{n}{GreensFunction}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// The Metropolis test is performed by moving one particle at the time} -\PYG{+w}{ }\PYG{k}{if}\PYG{p}{(}\PYG{n}{UniformNumberGenerator}\PYG{p}{(}\PYG{n}{gen}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{n}{GreensFunction}\PYG{o}{*}\PYG{n}{NewWaveFunction}\PYG{o}{*}\PYG{n}{NewWaveFunction}\PYG{o}{/}\PYG{n}{OldWaveFunction}\PYG{o}{/}\PYG{n}{OldWaveFunction}\PYG{+w}{ }\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{OldPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{NewPosition}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{OldQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{NewQuantumForce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{OldWaveFunction}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{NewWaveFunction}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of loop over particles} -\PYG{+w}{ }\PYG{c+c1}{// compute local energy} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{DeltaE}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{LocalEnergy}\PYG{p}{(}\PYG{n}{OldPosition}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// update energies} -\PYG{+w}{ }\PYG{n}{Energy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{DeltaE}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{EnergySquared}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{DeltaE}\PYG{o}{*}\PYG{n}{DeltaE}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of loop over MC trials} -\PYG{+w}{ }\PYG{c+c1}{// update the energy average and its squared} -\PYG{+w}{ }\PYG{n}{cumulative\PYGZus{}e}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{Energy}\PYG{o}{/}\PYG{n}{NumberMCsamples}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{cumulative\PYGZus{}e2}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{EnergySquared}\PYG{o}{/}\PYG{n}{NumberMCsamples}\PYG{p}{;} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end MonteCarloSampling function} - - -\PYG{c+c1}{// Function to compute the squared wave function and the quantum force} - -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{WaveFunction}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{VariationalParameters}\PYG{p}{)} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{wf}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// full Slater determinant for two particles, replace with Slater det for more particles} -\PYG{+w}{ }\PYG{n}{wf}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{SPwavefunction}\PYG{p}{(}\PYG{n}{singleparticle\PYGZus{}pos2}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{))}\PYG{o}{*}\PYG{n}{SPwavefunction}\PYG{p}{(}\PYG{n}{singleparticle\PYGZus{}pos2}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{),}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{));} -\PYG{+w}{ }\PYG{c+c1}{// contribution from Jastrow factor} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{wf}\PYG{+w}{ }\PYG{o}{*=}\PYG{+w}{ }\PYG{n}{exp}\PYG{p}{(}\PYG{n}{RelativeDistance}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{)}\PYG{o}{/}\PYG{p}{((}\PYG{l+m+mf}{1.0}\PYG{o}{+}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{o}{*}\PYG{n}{RelativeDistance}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{))));} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{wf}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// Function to calculate the local energy without numerical derivation of kinetic energy} - -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{LocalEnergy}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{VariationalParameters}\PYG{p}{)} -\PYG{p}{\PYGZob{}} - -\PYG{+w}{ }\PYG{c+c1}{// compute the kinetic and potential energy from the single\PYGZhy{}particle part} -\PYG{+w}{ }\PYG{c+c1}{// for a many\PYGZhy{}electron system this has to be replaced by a Slater determinant} -\PYG{+w}{ }\PYG{c+c1}{// The absolute value of the interparticle length} -\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{n}{length}\PYG{p}{(}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{);} -\PYG{+w}{ }\PYG{c+c1}{// Set up interparticle distance} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{RelativeDistance}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{length}\PYG{p}{(}\PYG{n}{j}\PYG{p}{,}\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// Set up kinetic energy from Slater and Jastrow terms} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{sum1}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{!=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{sum1}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{JastrowDerivative}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{k}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{sum1}\PYG{o}{+}\PYG{n}{DerivativeSPwavefunction}\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{),}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)))}\PYG{o}{*}\PYG{p}{(}\PYG{n}{sum1}\PYG{o}{+}\PYG{n}{DerivativeSPwavefunction}\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{),}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)));} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{l+m+mi}{\PYGZhy{}2}\PYG{o}{*}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{o}{*}\PYG{n}{NumberParticles}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{l+m+mf}{2.0}\PYG{o}{/}\PYG{p}{(}\PYG{n}{pow}\PYG{p}{(}\PYG{l+m+mf}{1.0}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{o}{*}\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{),}\PYG{l+m+mi}{2}\PYG{p}{))}\PYG{o}{*}\PYG{p}{(}\PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{l+m+mi}{\PYGZhy{}2}\PYG{o}{*}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{+}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{o}{*}\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{))}\PYG{+w}{ }\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{+w}{ }\PYG{o}{*=}\PYG{+w}{ }\PYG{l+m+mf}{\PYGZhy{}0.5}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// Set up potential energy, external potential + eventual electron\PYGZhy{}electron repulsion} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{PotentialEnergy}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{DistanceSquared}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{singleparticle\PYGZus{}pos2}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{PotentialEnergy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{n}{DistanceSquared}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// sp energy HO part, note it has the oscillator frequency set to 1!} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{c+c1}{// Add the electron\PYGZhy{}electron repulsion} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{PotentialEnergy}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{length}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{LocalE}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{KineticEnergy}\PYG{o}{+}\PYG{n}{PotentialEnergy}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{LocalE}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\PYG{c+c1}{// Compute the analytical expression for the quantum force} -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{QuantumForce}\PYG{p}{(}\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{qforce}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{VariationalParameters}\PYG{p}{)} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{c+c1}{// compute the first derivative} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{Dimension}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{k}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{c+c1}{// single\PYGZhy{}particle part, replace with Slater det for larger systems} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{sppart}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{DerivativeSPwavefunction}\PYG{p}{(}\PYG{n}{r}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{),}\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{));} -\PYG{+w}{ }\PYG{c+c1}{// Jastrow factor contribution} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{Jsum}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{NumberParticles}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{!=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{Jsum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{JastrowDerivative}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{VariationalParameters}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{k}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{qforce}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{k}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{2.0}\PYG{o}{*}\PYG{p}{(}\PYG{n}{Jsum}\PYG{o}{+}\PYG{n}{sppart}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of QuantumForce function} - - -\PYG{c+cp}{\PYGZsh{}define ITMAX 200} -\PYG{c+cp}{\PYGZsh{}define EPS 3.0e\PYGZhy{}8} -\PYG{c+cp}{\PYGZsh{}define TOLX (4*EPS)} -\PYG{c+cp}{\PYGZsh{}define STPMX 100.0} - -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{dfpmin}\PYG{p}{(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{gtol}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{iter}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{fret}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{),}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{dfunc}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{g}\PYG{p}{))} -\PYG{p}{\PYGZob{}} - -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{check}\PYG{p}{,}\PYG{n}{i}\PYG{p}{,}\PYG{n}{its}\PYG{p}{,}\PYG{n}{j}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{den}\PYG{p}{,}\PYG{n}{fac}\PYG{p}{,}\PYG{n}{fad}\PYG{p}{,}\PYG{n}{fae}\PYG{p}{,}\PYG{n}{fp}\PYG{p}{,}\PYG{n}{stpmax}\PYG{p}{,}\PYG{n}{sum}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{n}{sumdg}\PYG{p}{,}\PYG{n}{sumxi}\PYG{p}{,}\PYG{n}{temp}\PYG{p}{,}\PYG{n}{test}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{n}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{g}\PYG{p}{(}\PYG{n}{n}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{n}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{pnew}\PYG{p}{(}\PYG{n}{n}\PYG{p}{),}\PYG{+w}{ }\PYG{n}{xi}\PYG{p}{(}\PYG{n}{n}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{Matrix}\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{n}\PYG{p}{,}\PYG{n}{n}\PYG{p}{);} - -\PYG{+w}{ }\PYG{n}{fp}\PYG{o}{=}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{p}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{dfunc}\PYG{p}{)(}\PYG{n}{p}\PYG{p}{,}\PYG{n}{g}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{l+m+mf}{1.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{g}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{stpmax}\PYG{o}{=}\PYG{n}{STPMX}\PYG{o}{*}\PYG{n}{FMAX}\PYG{p}{(}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{sum}\PYG{p}{),(}\PYG{k+kt}{double}\PYG{p}{)}\PYG{n}{n}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{its}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{n}{its}\PYG{o}{\PYGZlt{}=}\PYG{n}{ITMAX}\PYG{p}{;}\PYG{n}{its}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{o}{*}\PYG{n}{iter}\PYG{o}{=}\PYG{n}{its}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{lnsrch}\PYG{p}{(}\PYG{n}{n}\PYG{p}{,}\PYG{n}{p}\PYG{p}{,}\PYG{n}{fp}\PYG{p}{,}\PYG{n}{g}\PYG{p}{,}\PYG{n}{xi}\PYG{p}{,}\PYG{n}{pnew}\PYG{p}{,}\PYG{n}{fret}\PYG{p}{,}\PYG{n}{stpmax}\PYG{p}{,}\PYG{o}{\PYGZam{}}\PYG{n}{check}\PYG{p}{,}\PYG{n}{func}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{fp}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{fret}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{pnew}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{pnew}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{temp}\PYG{o}{=}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{))}\PYG{o}{/}\PYG{n}{FMAX}\PYG{p}{(}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)),}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{temp}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{test}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{n}{temp}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{test}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{TOLX}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{g}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{dfunc}\PYG{p}{)(}\PYG{n}{p}\PYG{p}{,}\PYG{n}{g}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{den}\PYG{o}{=}\PYG{n}{FMAX}\PYG{p}{(}\PYG{o}{*}\PYG{n}{fret}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{temp}\PYG{o}{=}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{g}\PYG{p}{(}\PYG{n}{i}\PYG{p}{))}\PYG{o}{*}\PYG{n}{FMAX}\PYG{p}{(}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)),}\PYG{l+m+mf}{1.0}\PYG{p}{)}\PYG{o}{/}\PYG{n}{den}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{temp}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{test}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{n}{temp}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{test}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{gtol}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{return}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{g}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{*}\PYG{n}{dg}\PYG{p}{(}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{fac}\PYG{o}{=}\PYG{n}{fae}\PYG{o}{=}\PYG{n}{sumdg}\PYG{o}{=}\PYG{n}{sumxi}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{fac}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{fae}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{sumdg}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{SQR}\PYG{p}{(}\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{));} -\PYG{+w}{ }\PYG{n}{sumxi}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{SQR}\PYG{p}{(}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{));} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{fac}\PYG{o}{*}\PYG{n}{fac}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{EPS}\PYG{o}{*}\PYG{n}{sumdg}\PYG{o}{*}\PYG{n}{sumxi}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{fac}\PYG{o}{=}\PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{fac}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{fad}\PYG{o}{=}\PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{fae}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{fac}\PYG{o}{*}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{fad}\PYG{o}{*}\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{fac}\PYG{o}{*}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{xi}\PYG{p}{(}\PYG{n}{j}\PYG{p}{)} -\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{fad}\PYG{o}{*}\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{hdg}\PYG{p}{(}\PYG{n}{j}\PYG{p}{)}\PYG{o}{+}\PYG{n}{fae}\PYG{o}{*}\PYG{n}{dg}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{dg}\PYG{p}{(}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{xi}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{\PYGZhy{}=}\PYG{+w}{ }\PYG{n}{hessian}\PYG{p}{(}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{)}\PYG{o}{*}\PYG{n}{g}\PYG{p}{(}\PYG{n}{j}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}too many iterations in dfpmin\PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} -\PYG{c+cp}{\PYGZsh{}undef ITMAX} -\PYG{c+cp}{\PYGZsh{}undef EPS} -\PYG{c+cp}{\PYGZsh{}undef TOLX} -\PYG{c+cp}{\PYGZsh{}undef STPMX} - -\PYG{c+cp}{\PYGZsh{}define ALF 1.0e\PYGZhy{}4} -\PYG{c+cp}{\PYGZsh{}define TOLX 1.0e\PYGZhy{}7} - -\PYG{k+kt}{void}\PYG{+w}{ }\PYG{n+nf}{lnsrch}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{xold}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{fold}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{g}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{x}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{f}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{stpmax}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{check}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{Vector}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{p}\PYG{p}{))} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{,}\PYG{n}{alam}\PYG{p}{,}\PYG{n}{alam2}\PYG{p}{,}\PYG{n}{alamin}\PYG{p}{,}\PYG{n}{b}\PYG{p}{,}\PYG{n}{disc}\PYG{p}{,}\PYG{n}{f2}\PYG{p}{,}\PYG{n}{fold2}\PYG{p}{,}\PYG{n}{rhs1}\PYG{p}{,}\PYG{n}{rhs2}\PYG{p}{,}\PYG{n}{slope}\PYG{p}{,}\PYG{n}{sum}\PYG{p}{,}\PYG{n}{temp}\PYG{p}{,} -\PYG{+w}{ }\PYG{n}{test}\PYG{p}{,}\PYG{n}{tmplam}\PYG{p}{;} - -\PYG{+w}{ }\PYG{o}{*}\PYG{n}{check}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{sum}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{sum}\PYG{o}{=}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{sum}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{stpmax}\PYG{p}{)} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{+w}{ }\PYG{o}{*=}\PYG{+w}{ }\PYG{n}{stpmax}\PYG{o}{/}\PYG{n}{sum}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{slope}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)} -\PYG{+w}{ }\PYG{n}{slope}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{g}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{temp}\PYG{o}{=}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{))}\PYG{o}{/}\PYG{n}{FMAX}\PYG{p}{(}\PYG{n}{fabs}\PYG{p}{(}\PYG{n}{xold}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)),}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{temp}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{test}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{test}\PYG{o}{=}\PYG{n}{temp}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{alamin}\PYG{o}{=}\PYG{n}{TOLX}\PYG{o}{/}\PYG{n}{test}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{alam}\PYG{o}{=}\PYG{l+m+mf}{1.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(;;)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{xold}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{+}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{p}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{o}{*}\PYG{n}{f}\PYG{o}{=}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{x}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{alam}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{alamin}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{=}\PYG{n}{xold}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{o}{*}\PYG{n}{check}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{return}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{f}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{n}{fold}\PYG{o}{+}\PYG{n}{ALF}\PYG{o}{*}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{slope}\PYG{p}{)}\PYG{+w}{ }\PYG{k}{return}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{alam}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mf}{1.0}\PYG{p}{)} -\PYG{+w}{ }\PYG{n}{tmplam}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{slope}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mf}{2.0}\PYG{o}{*}\PYG{p}{(}\PYG{o}{*}\PYG{n}{f}\PYG{o}{\PYGZhy{}}\PYG{n}{fold}\PYG{o}{\PYGZhy{}}\PYG{n}{slope}\PYG{p}{));} -\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{rhs1}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{f}\PYG{o}{\PYGZhy{}}\PYG{n}{fold}\PYG{o}{\PYGZhy{}}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{slope}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{rhs2}\PYG{o}{=}\PYG{n}{f2}\PYG{o}{\PYGZhy{}}\PYG{n}{fold2}\PYG{o}{\PYGZhy{}}\PYG{n}{alam2}\PYG{o}{*}\PYG{n}{slope}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{a}\PYG{o}{=}\PYG{p}{(}\PYG{n}{rhs1}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{alam}\PYG{p}{)}\PYG{o}{\PYGZhy{}}\PYG{n}{rhs2}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam2}\PYG{o}{*}\PYG{n}{alam2}\PYG{p}{))}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam}\PYG{o}{\PYGZhy{}}\PYG{n}{alam2}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{b}\PYG{o}{=}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{n}{alam2}\PYG{o}{*}\PYG{n}{rhs1}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{alam}\PYG{p}{)}\PYG{o}{+}\PYG{n}{alam}\PYG{o}{*}\PYG{n}{rhs2}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam2}\PYG{o}{*}\PYG{n}{alam2}\PYG{p}{))}\PYG{o}{/}\PYG{p}{(}\PYG{n}{alam}\PYG{o}{\PYGZhy{}}\PYG{n}{alam2}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{a}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{tmplam}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{slope}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mf}{2.0}\PYG{o}{*}\PYG{n}{b}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{disc}\PYG{o}{=}\PYG{n}{b}\PYG{o}{*}\PYG{n}{b}\PYG{l+m+mf}{\PYGZhy{}3.0}\PYG{o}{*}\PYG{n}{a}\PYG{o}{*}\PYG{n}{slope}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{disc}\PYG{o}{\PYGZlt{}}\PYG{l+m+mf}{0.0}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Roundoff problem in lnsrch.\PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{n}{tmplam}\PYG{o}{=}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{n}{b}\PYG{o}{+}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{disc}\PYG{p}{))}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mf}{3.0}\PYG{o}{*}\PYG{n}{a}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{tmplam}\PYG{o}{\PYGZgt{}}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{n}{alam}\PYG{p}{)} -\PYG{+w}{ }\PYG{n}{tmplam}\PYG{o}{=}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{n}{alam}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{alam2}\PYG{o}{=}\PYG{n}{alam}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{f2}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{f}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{fold2}\PYG{o}{=}\PYG{n}{fold}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{alam}\PYG{o}{=}\PYG{n}{FMAX}\PYG{p}{(}\PYG{n}{tmplam}\PYG{p}{,}\PYG{l+m+mf}{0.1}\PYG{o}{*}\PYG{n}{alam}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} -\PYG{c+cp}{\PYGZsh{}undef ALF} -\PYG{c+cp}{\PYGZsh{}undef TOLX} - - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/3AC82BEB472EF31642CEE04E21F67ECC0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/3AC82BEB472EF31642CEE04E21F67ECC0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 9556c433..00000000 --- a/doc/src/week9/_minted-week9/3AC82BEB472EF31642CEE04E21F67ECC0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel \PYGZob{} ... \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/3B98EBB88E64CD2C81DFDB84AFC1629D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/3B98EBB88E64CD2C81DFDB84AFC1629D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 93dc503b..00000000 --- a/doc/src/week9/_minted-week9/3B98EBB88E64CD2C81DFDB84AFC1629D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,13 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp critical} -\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{maxval}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{maxloc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/3E0C0C0FDDCEAEC0D593AF05C8D9AEB70DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/3E0C0C0FDDCEAEC0D593AF05C8D9AEB70DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4c87bee4..00000000 --- a/doc/src/week9/_minted-week9/3E0C0C0FDDCEAEC0D593AF05C8D9AEB70DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{clang}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{novec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{vecexample}\PYG{p}{.}\PYG{n}{cpp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/4236AEA215F4B524E5B6FFC80851B9600DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/4236AEA215F4B524E5B6FFC80851B9600DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 7b039d89..00000000 --- a/doc/src/week9/_minted-week9/4236AEA215F4B524E5B6FFC80851B9600DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp atomic} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/485A5796695A934830A2325AA99ECBAE0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/485A5796695A934830A2325AA99ECBAE0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 7dba5727..00000000 --- a/doc/src/week9/_minted-week9/485A5796695A934830A2325AA99ECBAE0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Recv}\PYG{p}{(}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{buf}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{count}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Datatype}\PYG{+w}{ }\PYG{n}{datatype}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{source}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{tag}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm}\PYG{+w}{ }\PYG{n}{comm}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Status}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{status}\PYG{+w}{ }\PYG{p}{)} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/49441808FB11013E51C438764FC0757E0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/49441808FB11013E51C438764FC0757E0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index ddbe743b..00000000 --- a/doc/src/week9/_minted-week9/49441808FB11013E51C438764FC0757E0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp ordered \PYGZob{} a block of codes \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/4AE12D397D7F961596A3F89C28AC9B6D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/4AE12D397D7F961596A3F89C28AC9B6D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 560be046..00000000 --- a/doc/src/week9/_minted-week9/4AE12D397D7F961596A3F89C28AC9B6D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,17 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{maxloc}\PYG{p}{[}\PYG{n}{MAX\PYGZus{}THREADS}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{mloc}\PYG{p}{;} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{MAX\PYGZus{}THREADS}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{mval}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel shared(maxval,maxloc)} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}thread\PYGZus{}num}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{id}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{\PYGZhy{}1.0e30}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp for} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{id}\PYG{p}{])}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{maxloc}\PYG{p}{[}\PYG{n}{id}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{[}\PYG{n}{id}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/4E8DC707963D22FD986FA69987A6F9360DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/4E8DC707963D22FD986FA69987A6F9360DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 3bab81b2..00000000 --- a/doc/src/week9/_minted-week9/4E8DC707963D22FD986FA69987A6F9360DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{c+cp}{\PYGZsh{} Compile and link} -\PYG{+w}{ }\PYG{n}{mpic}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{nameofprog}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{nameofprog}\PYG{p}{.}\PYG{n}{cpp} -\PYG{+w}{ }\PYG{c+cp}{\PYGZsh{} run code with for example 8 processes using mpirun/mpiexec} -\PYG{+w}{ }\PYG{n}{mpiexec}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{n}\PYG{+w}{ }\PYG{l+m+mi}{8}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{nameofprog}\PYG{p}{.}\PYG{n}{x} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/517C4E3BE1BFE4D4D8044EE7079745F90DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/517C4E3BE1BFE4D4D8044EE7079745F90DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 374903f7..00000000 --- a/doc/src/week9/_minted-week9/517C4E3BE1BFE4D4D8044EE7079745F90DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{sudo}\PYG{+w}{ }\PYG{n}{apt}\PYG{o}{\PYGZhy{}}\PYG{n}{get}\PYG{+w}{ }\PYG{n}{install}\PYG{+w}{ }\PYG{n}{libopenmpi}\PYG{o}{\PYGZhy{}}\PYG{n}{dev} -\PYG{+w}{ }\PYG{n}{sudo}\PYG{+w}{ }\PYG{n}{apt}\PYG{o}{\PYGZhy{}}\PYG{n}{get}\PYG{+w}{ }\PYG{n}{install}\PYG{+w}{ }\PYG{n}{openmpi}\PYG{o}{\PYGZhy{}}\PYG{n}{bin} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/521A0F94D0EF4F11AF58C10A6998F8FD0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/521A0F94D0EF4F11AF58C10A6998F8FD0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index b0e76ec4..00000000 --- a/doc/src/week9/_minted-week9/521A0F94D0EF4F11AF58C10A6998F8FD0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{c}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{cpp} -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{exe}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{o} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/52788122147F40FDF64E3D34A4D5C10EB0CAD346A13BD81D9AF720CF234DAE08.pygtex b/doc/src/week9/_minted-week9/52788122147F40FDF64E3D34A4D5C10EB0CAD346A13BD81D9AF720CF234DAE08.pygtex deleted file mode 100644 index ae5d2832..00000000 --- a/doc/src/week9/_minted-week9/52788122147F40FDF64E3D34A4D5C10EB0CAD346A13BD81D9AF720CF234DAE08.pygtex +++ /dev/null @@ -1,10 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] - clock\PYGZus{}t start, finish; - start = clock(); - for (int j = 0; j \PYGZlt{} i; j++) \PYGZob{} - a[j] = b[j]+b[j]*c[j]; - \PYGZcb{} - finish = clock(); - double timeused = (double) (finish \PYGZhy{} start)/(CLOCKS\PYGZus{}PER\PYGZus{}SEC ); - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/5358A6138325DBB0CB89E0A77CA092A50DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/5358A6138325DBB0CB89E0A77CA092A50DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 6362f3e3..00000000 --- a/doc/src/week9/_minted-week9/5358A6138325DBB0CB89E0A77CA092A50DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,9 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{+=}\PYG{l+m+mi}{4}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{2}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{2}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{2}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{3}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{3}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{o}{+}\PYG{l+m+mi}{3}\PYG{p}{];} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/5389DFE1B2AD5452E98B577558064F79B0CAD346A13BD81D9AF720CF234DAE08.pygtex b/doc/src/week9/_minted-week9/5389DFE1B2AD5452E98B577558064F79B0CAD346A13BD81D9AF720CF234DAE08.pygtex deleted file mode 100644 index af6725d4..00000000 --- a/doc/src/week9/_minted-week9/5389DFE1B2AD5452E98B577558064F79B0CAD346A13BD81D9AF720CF234DAE08.pygtex +++ /dev/null @@ -1,60 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -// OpenMP program to compute vector norm by adding two other vectors -\PYGZsh{}include \PYGZlt{}cstdlib\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iostream\PYGZgt{} -\PYGZsh{}include \PYGZlt{}cmath\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iomanip\PYGZgt{} -\PYGZsh{}include \PYGZlt{}omp.h\PYGZgt{} -\PYGZsh{} include \PYGZlt{}ctime\PYGZgt{} - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -\PYGZob{} - // read in dimension of vector - int n = atoi(argv[1]); - double *a, *b, *c; - int i; - int thread\PYGZus{}num; - double wtime, Norm2, s, angle; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} Perform addition of two vectors and compute the norm\PYGZhy{}2.\PYGZdq{} \PYGZlt{}\PYGZlt{} endl; - omp\PYGZus{}set\PYGZus{}num\PYGZus{}threads(4); - thread\PYGZus{}num = omp\PYGZus{}get\PYGZus{}max\PYGZus{}threads (); - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The number of processors available = \PYGZdq{} \PYGZlt{}\PYGZlt{} omp\PYGZus{}get\PYGZus{}num\PYGZus{}procs () \PYGZlt{}\PYGZlt{} endl ; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The number of threads available = \PYGZdq{} \PYGZlt{}\PYGZlt{} thread\PYGZus{}num \PYGZlt{}\PYGZlt{} endl; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The matrix order n = \PYGZdq{} \PYGZlt{}\PYGZlt{} n \PYGZlt{}\PYGZlt{} endl; - - s = 1.0/sqrt( (double) n); - wtime = omp\PYGZus{}get\PYGZus{}wtime ( ); - // Allocate space for the vectors to be used - a = new double [n]; b = new double [n]; c = new double [n]; - // Define parallel region -\PYGZsh{} pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2) - // Set up values for vectors a and b - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - angle = 2.0*M\PYGZus{}PI*i/ (( double ) n); - a[i] = s*(sin(angle) + cos(angle)); - b[i] = s*sin(2.0*angle); - c[i] = 0.0; - \PYGZcb{} - // Then perform the vector addition - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - c[i] += a[i]+b[i]; - \PYGZcb{} - // Compute now the norm\PYGZhy{}2 - Norm2 = 0.0; - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - Norm2 += c[i]*c[i]; - \PYGZcb{} -// end parallel region - wtime = omp\PYGZus{}get\PYGZus{}wtime ( ) \PYGZhy{} wtime; - cout \PYGZlt{}\PYGZlt{} setiosflags(ios::showpoint | ios::uppercase); - cout \PYGZlt{}\PYGZlt{} setprecision(10) \PYGZlt{}\PYGZlt{} setw(20) \PYGZlt{}\PYGZlt{} \PYGZdq{}Time used for norm\PYGZhy{}2 computation=\PYGZdq{} \PYGZlt{}\PYGZlt{} wtime \PYGZlt{}\PYGZlt{} endl; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} Norm\PYGZhy{}2 = \PYGZdq{} \PYGZlt{}\PYGZlt{} Norm2 \PYGZlt{}\PYGZlt{} endl; - // Free up space - delete[] a; - delete[] b; - delete[] c; - return 0; -\PYGZcb{} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/59BA9422B354E0D316E8A5D73BA20B4A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/59BA9422B354E0D316E8A5D73BA20B4A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4823bda4..00000000 --- a/doc/src/week9/_minted-week9/59BA9422B354E0D316E8A5D73BA20B4A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{2}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{3}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{2}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{l+m+mi}{3}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/5A6DAA7214BFCDCF8F733CF3810D47460DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/5A6DAA7214BFCDCF8F733CF3810D47460DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index b9503220..00000000 --- a/doc/src/week9/_minted-week9/5A6DAA7214BFCDCF8F733CF3810D47460DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/5B343579BCF9963505A1D3AA4E6149140DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/5B343579BCF9963505A1D3AA4E6149140DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index fc3be7b6..00000000 --- a/doc/src/week9/_minted-week9/5B343579BCF9963505A1D3AA4E6149140DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{MPI\PYGZus{}Allreduce}\PYG{p}{(}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{senddata}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{resultdata}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{count}\PYG{p}{,} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Datatype}\PYG{+w}{ }\PYG{n}{datatype}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Op}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm}\PYG{+w}{ }\PYG{n}{comm}\PYG{p}{)} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/5F0C24C76C78B09EE11739CC23F2BC800DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/5F0C24C76C78B09EE11739CC23F2BC800DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index c286f6d0..00000000 --- a/doc/src/week9/_minted-week9/5F0C24C76C78B09EE11739CC23F2BC800DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{man}\PYG{+w}{ }\PYG{n}{c}\PYG{o}{++} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/63FBD8276D325745F8C303CA3370CA700DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/63FBD8276D325745F8C303CA3370CA700DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index d36ca2e3..00000000 --- a/doc/src/week9/_minted-week9/63FBD8276D325745F8C303CA3370CA700DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,16 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}cstdio\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{argv}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{omp\PYGZus{}set\PYGZus{}num\PYGZus{}threads}\PYG{p}{(}\PYG{l+m+mi}{4}\PYG{p}{);} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel} -\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}thread\PYGZus{}num}\PYG{p}{();} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nproc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}num\PYGZus{}threads}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Hello world with id number and processes \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{nproc}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{k}{return}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/668D6DF261A6E3F3CCB0BC5FE288FE2F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/668D6DF261A6E3F3CCB0BC5FE288FE2F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 18d60bd1..00000000 --- a/doc/src/week9/_minted-week9/668D6DF261A6E3F3CCB0BC5FE288FE2F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/6745E0E5044DEC5EAF2A777B2FA0488D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/6745E0E5044DEC5EAF2A777B2FA0488D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4a0869a7..00000000 --- a/doc/src/week9/_minted-week9/6745E0E5044DEC5EAF2A777B2FA0488D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{gprof}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{exe}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{ProfileOutput} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/69B258D5AC69935C85BD91E993EC77030DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/69B258D5AC69935C85BD91E993EC77030DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 1d3a0af6..00000000 --- a/doc/src/week9/_minted-week9/69B258D5AC69935C85BD91E993EC77030DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{pg}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{c}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{cpp} -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{pg}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{exe}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{o} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/6E59941A3833E1FE1319E254324B7B260DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/6E59941A3833E1FE1319E254324B7B260DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 95810411..00000000 --- a/doc/src/week9/_minted-week9/6E59941A3833E1FE1319E254324B7B260DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,18 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}define CHUNKSIZE 100} -\PYG{c+cp}{\PYGZsh{}define N 1000} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{argv}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{chunk}\PYG{p}{;} -\PYG{k+kt}{float}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{N}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{N}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{N}\PYG{p}{];} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{*}\PYG{+w}{ }\PYG{l+m+mf}{1.0}\PYG{p}{;} -\PYG{n}{chunk}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{CHUNKSIZE}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel shared(a,b,c,chunk) private(i)} -\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp for schedule(dynamic,chunk)} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+cm}{/* end of parallel region */} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/726CE244F7A189BFA217714F4A9629F30DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/726CE244F7A189BFA217714F4A9629F30DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 84e8f306..00000000 --- a/doc/src/week9/_minted-week9/726CE244F7A189BFA217714F4A9629F30DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,21 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// this function defines the trapezoidal rule} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{trapezoidal\PYGZus{}rule}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{k+kt}{double}\PYG{p}{))} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{trapez\PYGZus{}sum}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{fa}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{fb}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{step}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{step}\PYG{o}{=}\PYG{p}{(}\PYG{n}{b}\PYG{o}{\PYGZhy{}}\PYG{n}{a}\PYG{p}{)}\PYG{o}{/}\PYG{p}{((}\PYG{k+kt}{double}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{fa}\PYG{o}{=}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{a}\PYG{p}{)}\PYG{o}{/}\PYG{l+m+mf}{2.}\PYG{+w}{ }\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{fb}\PYG{o}{=}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{b}\PYG{p}{)}\PYG{o}{/}\PYG{l+m+mf}{2.}\PYG{+w}{ }\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{trapez\PYGZus{}sum}\PYG{o}{=}\PYG{l+m+mf}{0.}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}=}\PYG{+w}{ }\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{x}\PYG{o}{=}\PYG{n}{j}\PYG{o}{*}\PYG{n}{step}\PYG{o}{+}\PYG{n}{a}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{trapez\PYGZus{}sum}\PYG{o}{+=}\PYG{p}{(}\PYG{o}{*}\PYG{n}{func}\PYG{p}{)(}\PYG{n}{x}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{trapez\PYGZus{}sum}\PYG{o}{=}\PYG{p}{(}\PYG{n}{trapez\PYGZus{}sum}\PYG{o}{+}\PYG{n}{fb}\PYG{o}{+}\PYG{n}{fa}\PYG{p}{)}\PYG{o}{*}\PYG{n}{step}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{trapez\PYGZus{}sum}\PYG{p}{;} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end trapezoidal\PYGZus{}rule} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/76ED9E3D3A3215F846384023116EBB7A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/76ED9E3D3A3215F846384023116EBB7A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index e8193659..00000000 --- a/doc/src/week9/_minted-week9/76ED9E3D3A3215F846384023116EBB7A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,17 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k}{using}\PYG{+w}{ }\PYG{k}{namespace}\PYG{+w}{ }\PYG{n+nn}{std}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}mpi.h\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}iostream\PYGZgt{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{args}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{p}{;} -\PYG{c+c1}{// MPI initializations} -\PYG{n}{MPI\PYGZus{}Init}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{args}\PYG{p}{);} -\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}size}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{numprocs}\PYG{p}{);} -\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}rank}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{my\PYGZus{}rank}\PYG{p}{);} -\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Hello world, I have rank \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{} out of \PYGZdq{}} -\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{c+c1}{// End MPI} -\PYG{n}{MPI\PYGZus{}Finalize}\PYG{+w}{ }\PYG{p}{();} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/7963ABE74028F8E391D17DA4CA04E05B0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/7963ABE74028F8E391D17DA4CA04E05B0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index b1c1bc8d..00000000 --- a/doc/src/week9/_minted-week9/7963ABE74028F8E391D17DA4CA04E05B0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{brew}\PYG{+w}{ }\PYG{n}{install}\PYG{+w}{ }\PYG{n}{libomp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/7BACEDF5BB1AC2AD961BDBC4D1F9DA5D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/7BACEDF5BB1AC2AD961BDBC4D1F9DA5D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 2b7c546b..00000000 --- a/doc/src/week9/_minted-week9/7BACEDF5BB1AC2AD961BDBC4D1F9DA5D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{log10}\PYG{p}{(}\PYG{n}{i}\PYG{p}{)}\PYG{o}{*}\PYG{n}{cos}\PYG{p}{(}\PYG{n}{i}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/7F07B82F53C725F23B54C8A7B79CF5360DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/7F07B82F53C725F23B54C8A7B79CF5360DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 454f846c..00000000 --- a/doc/src/week9/_minted-week9/7F07B82F53C725F23B54C8A7B79CF5360DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,12 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel} -\PYG{p}{\PYGZob{}} -\PYG{p}{...} -\PYG{c+cp}{\PYGZsh{}pragma omp critical} -\PYG{p}{\PYGZob{}} -\PYG{p}{...} -\PYG{c+cp}{\PYGZsh{}pragma omp barrier} -\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/81B54D900E1709D3194224A29260C12F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/81B54D900E1709D3194224A29260C12F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 312bae06..00000000 --- a/doc/src/week9/_minted-week9/81B54D900E1709D3194224A29260C12F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{cos}\PYG{p}{(}\PYG{n}{j}\PYG{o}{*}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{a}\PYG{p}{[}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{p}{)}\PYG{+w}{ }\PYG{k}{break}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/81D7D0361B0B45516A69DF03C150C67D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/81D7D0361B0B45516A69DF03C150C67D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index d3f1a86a..00000000 --- a/doc/src/week9/_minted-week9/81D7D0361B0B45516A69DF03C150C67D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,15 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{args}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Init}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{args}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}size}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{numprocs}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}rank}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{my\PYGZus{}rank}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Barrier}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Hello world, I have rank \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}} -\PYG{+w}{ }\PYG{l+s}{\PYGZdq{} out of \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;\PYGZcb{}} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Finalize}\PYG{+w}{ }\PYG{p}{();} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/824B5B818A4AD2013ED199E479D7A2400DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/824B5B818A4AD2013ED199E479D7A2400DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4952c6aa..00000000 --- a/doc/src/week9/_minted-week9/824B5B818A4AD2013ED199E479D7A2400DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,19 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// Trapezoidal rule and numerical integration usign MPI} -\PYG{k}{using}\PYG{+w}{ }\PYG{k}{namespace}\PYG{+w}{ }\PYG{n+nn}{std}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}mpi.h\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}iostream\PYGZgt{}} - -\PYG{c+c1}{// Here we define various functions called by the main program} - -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{int\PYGZus{}function}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{);} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{trapezoidal\PYGZus{}rule}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{*}\PYG{p}{)(}\PYG{k+kt}{double}\PYG{p}{));} - -\PYG{c+c1}{// Main function begins here} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{args}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}n}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{h}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}a}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}b}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{total\PYGZus{}sum}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}sum}\PYG{p}{;} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{time\PYGZus{}start}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{time\PYGZus{}end}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{total\PYGZus{}time}\PYG{p}{;} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/8755B283091CF6EFD1D61EA7DF38C18F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/8755B283091CF6EFD1D61EA7DF38C18F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 36aea8d1..00000000 --- a/doc/src/week9/_minted-week9/8755B283091CF6EFD1D61EA7DF38C18F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{n}{name}\PYG{+w}{ }\PYG{n}{executable}\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{n}{name}\PYG{+w}{ }\PYG{n}{program}\PYG{p}{.}\PYG{n}{cpp}\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{lomp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9178F05C3CEBD2807E143475DF5843F10DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9178F05C3CEBD2807E143475DF5843F10DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 60297524..00000000 --- a/doc/src/week9/_minted-week9/9178F05C3CEBD2807E143475DF5843F10DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,22 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} -\PYG{n}{main}\PYG{+w}{ }\PYG{p}{()} -\PYG{p}{\PYGZob{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{var1}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{var2}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{var3}\PYG{p}{;} -\PYG{c+cm}{/* serial code */} -\PYG{c+cm}{/* ... */} -\PYG{c+cm}{/* start of a parallel region */} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel private(var1, var2) shared(var3)} -\PYG{p}{\PYGZob{}} -\PYG{c+cm}{/* ... */} -\PYG{p}{\PYGZcb{}} -\PYG{c+cm}{/* more serial code */} -\PYG{c+cm}{/* ... */} -\PYG{c+cm}{/* another parallel region */} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel} -\PYG{p}{\PYGZob{}} -\PYG{c+cm}{/* ... */} -\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/919233BB70F39100D4BA0CFC73CC37F00DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/919233BB70F39100D4BA0CFC73CC37F00DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 90bd726f..00000000 --- a/doc/src/week9/_minted-week9/919233BB70F39100D4BA0CFC73CC37F00DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Send}\PYG{p}{(}\PYG{k+kt}{void}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{buf}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{count}\PYG{p}{,} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Datatype}\PYG{+w}{ }\PYG{n}{datatype}\PYG{p}{,} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{dest}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{tag}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm}\PYG{+w}{ }\PYG{n}{comm}\PYG{p}{)\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9342B1CB5D7A51D27802969C958F5D7A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9342B1CB5D7A51D27802969C958F5D7A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 41306c1f..00000000 --- a/doc/src/week9/_minted-week9/9342B1CB5D7A51D27802969C958F5D7A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{brew}\PYG{+w}{ }\PYG{n}{install}\PYG{+w}{ }\PYG{n}{openmpi} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9674C8246A2FF2BB6157963C01DD59510DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9674C8246A2FF2BB6157963C01DD59510DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 18eaeb47..00000000 --- a/doc/src/week9/_minted-week9/9674C8246A2FF2BB6157963C01DD59510DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp construct [ clause ...]} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9C742881A3D82D5F30B5060457BB7F190DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9C742881A3D82D5F30B5060457BB7F190DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4e2e8c4d..00000000 --- a/doc/src/week9/_minted-week9/9C742881A3D82D5F30B5060457BB7F190DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel,} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9E7E1C037200BF7AFF999C0AAE9428890DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9E7E1C037200BF7AFF999C0AAE9428890DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index a2c024ef..00000000 --- a/doc/src/week9/_minted-week9/9E7E1C037200BF7AFF999C0AAE9428890DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp critical \PYGZob{} a block of codes \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/9FC2D2E5AAA3432653F93E80B14BCAF50DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/9FC2D2E5AAA3432653F93E80B14BCAF50DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index afc632f4..00000000 --- a/doc/src/week9/_minted-week9/9FC2D2E5AAA3432653F93E80B14BCAF50DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/A2C942DDC1F9CF9BD256911EFAA895590DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/A2C942DDC1F9CF9BD256911EFAA895590DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index e1a0faa2..00000000 --- a/doc/src/week9/_minted-week9/A2C942DDC1F9CF9BD256911EFAA895590DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,19 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{total\PYGZus{}sum}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{local\PYGZus{}sum}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{trapezoidal\PYGZus{}rule}\PYG{p}{(}\PYG{n}{local\PYGZus{}a}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}b}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{local\PYGZus{}n}\PYG{p}{,} -\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{int\PYGZus{}function}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Reduce}\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{local\PYGZus{}sum}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{total\PYGZus{}sum}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}DOUBLE}\PYG{p}{,} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}SUM}\PYG{p}{,}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{time\PYGZus{}end}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Wtime}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{total\PYGZus{}time}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{time\PYGZus{}end}\PYG{o}{\PYGZhy{}}\PYG{n}{time\PYGZus{}start}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{+w}{ }\PYG{o}{==}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Trapezoidal rule = \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{total\PYGZus{}sum}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}Time = \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{total\PYGZus{}time} -\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{} on number of processors: \PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{numprocs}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{c+c1}{// End MPI} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Finalize}\PYG{+w}{ }\PYG{p}{();} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of main program} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/A8E266067B144C7E384681ABF07B03350DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/A8E266067B144C7E384681ABF07B03350DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index e93e9190..00000000 --- a/doc/src/week9/_minted-week9/A8E266067B144C7E384681ABF07B03350DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/A9979A25D5212B32BF91E8AA0504BB810DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/A9979A25D5212B32BF91E8AA0504BB810DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 6dc23c18..00000000 --- a/doc/src/week9/_minted-week9/A9979A25D5212B32BF91E8AA0504BB810DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{clang}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{Rpass}\PYG{o}{\PYGZhy{}}\PYG{n}{analysis}\PYG{o}{=}\PYG{n}{loop}\PYG{o}{\PYGZhy{}}\PYG{n}{vectorize}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{vec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{vecexample}\PYG{p}{.}\PYG{n}{cpp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/AE088E29FB4E738F6B44D236504058980DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/AE088E29FB4E738F6B44D236504058980DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 71e1e73c..00000000 --- a/doc/src/week9/_minted-week9/AE088E29FB4E738F6B44D236504058980DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel private(id)} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}thread\PYGZus{}num}\PYG{p}{();} -\PYG{+w}{ }\PYG{n}{cout}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{l+s}{\PYGZdq{}My thread num\PYGZdq{}}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{id}\PYG{+w}{ }\PYG{o}{\PYGZlt{}\PYGZlt{}}\PYG{+w}{ }\PYG{n}{endl}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/AE87E97195FD5FF205CB2008603BC2825FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex b/doc/src/week9/_minted-week9/AE87E97195FD5FF205CB2008603BC2825FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex deleted file mode 100644 index 06d5871f..00000000 --- a/doc/src/week9/_minted-week9/AE87E97195FD5FF205CB2008603BC2825FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex +++ /dev/null @@ -1,219 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{\PYGZsh{} 2\PYGZhy{}electron VMC code for 2dim quantum dot with importance sampling} -\PYG{c+c1}{\PYGZsh{} Using gaussian rng for new positions and Metropolis\PYGZhy{} Hastings} -\PYG{c+c1}{\PYGZsh{} Added energy minimization} -\PYG{k+kn}{from} \PYG{n+nn}{math} \PYG{k+kn}{import} \PYG{n}{exp}\PYG{p}{,} \PYG{n}{sqrt} -\PYG{k+kn}{from} \PYG{n+nn}{random} \PYG{k+kn}{import} \PYG{n}{random}\PYG{p}{,} \PYG{n}{seed}\PYG{p}{,} \PYG{n}{normalvariate} -\PYG{k+kn}{import} \PYG{n+nn}{numpy} \PYG{k}{as} \PYG{n+nn}{np} -\PYG{k+kn}{import} \PYG{n+nn}{matplotlib.pyplot} \PYG{k}{as} \PYG{n+nn}{plt} -\PYG{k+kn}{from} \PYG{n+nn}{mpl\PYGZus{}toolkits.mplot3d} \PYG{k+kn}{import} \PYG{n}{Axes3D} -\PYG{k+kn}{from} \PYG{n+nn}{matplotlib} \PYG{k+kn}{import} \PYG{n}{cm} -\PYG{k+kn}{from} \PYG{n+nn}{matplotlib.ticker} \PYG{k+kn}{import} \PYG{n}{LinearLocator}\PYG{p}{,} \PYG{n}{FormatStrFormatter} -\PYG{k+kn}{from} \PYG{n+nn}{scipy.optimize} \PYG{k+kn}{import} \PYG{n}{minimize} -\PYG{k+kn}{import} \PYG{n+nn}{sys} -\PYG{k+kn}{import} \PYG{n+nn}{os} - -\PYG{c+c1}{\PYGZsh{} Where to save data files} -\PYG{n}{PROJECT\PYGZus{}ROOT\PYGZus{}DIR} \PYG{o}{=} \PYG{l+s+s2}{\PYGZdq{}Results\PYGZdq{}} -\PYG{n}{DATA\PYGZus{}ID} \PYG{o}{=} \PYG{l+s+s2}{\PYGZdq{}Results/EnergyMin\PYGZdq{}} - -\PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{PROJECT\PYGZus{}ROOT\PYGZus{}DIR}\PYG{p}{):} - \PYG{n}{os}\PYG{o}{.}\PYG{n}{mkdir}\PYG{p}{(}\PYG{n}{PROJECT\PYGZus{}ROOT\PYGZus{}DIR}\PYG{p}{)} - -\PYG{k}{if} \PYG{o+ow}{not} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{exists}\PYG{p}{(}\PYG{n}{DATA\PYGZus{}ID}\PYG{p}{):} - \PYG{n}{os}\PYG{o}{.}\PYG{n}{makedirs}\PYG{p}{(}\PYG{n}{DATA\PYGZus{}ID}\PYG{p}{)} - -\PYG{k}{def} \PYG{n+nf}{data\PYGZus{}path}\PYG{p}{(}\PYG{n}{dat\PYGZus{}id}\PYG{p}{):} - \PYG{k}{return} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{DATA\PYGZus{}ID}\PYG{p}{,} \PYG{n}{dat\PYGZus{}id}\PYG{p}{)} - -\PYG{n}{outfile} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{data\PYGZus{}path}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}Energies.dat\PYGZdq{}}\PYG{p}{),}\PYG{l+s+s1}{\PYGZsq{}w\PYGZsq{}}\PYG{p}{)} - - -\PYG{c+c1}{\PYGZsh{} Trial wave function for the 2\PYGZhy{}electron quantum dot in two dims} -\PYG{k}{def} \PYG{n+nf}{WaveFunction}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{):} - \PYG{n}{r1} \PYG{o}{=} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} - \PYG{n}{r2} \PYG{o}{=} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} - \PYG{n}{r12} \PYG{o}{=} \PYG{n}{sqrt}\PYG{p}{((}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{deno} \PYG{o}{=} \PYG{n}{r12}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{+}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{r12}\PYG{p}{)} - \PYG{k}{return} \PYG{n}{exp}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{n}{alpha}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r1}\PYG{o}{+}\PYG{n}{r2}\PYG{p}{)}\PYG{o}{+}\PYG{n}{deno}\PYG{p}{)} - -\PYG{c+c1}{\PYGZsh{} Local energy for the 2\PYGZhy{}electron quantum dot in two dims, using analytical local energy} -\PYG{k}{def} \PYG{n+nf}{LocalEnergy}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{):} - - \PYG{n}{r1} \PYG{o}{=} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{r2} \PYG{o}{=} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{r12} \PYG{o}{=} \PYG{n}{sqrt}\PYG{p}{((}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{deno} \PYG{o}{=} \PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{+}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{r12}\PYG{p}{)} - \PYG{n}{deno2} \PYG{o}{=} \PYG{n}{deno}\PYG{o}{*}\PYG{n}{deno} - \PYG{k}{return} \PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{\PYGZhy{}}\PYG{n}{alpha}\PYG{o}{*}\PYG{n}{alpha}\PYG{p}{)}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r1} \PYG{o}{+} \PYG{n}{r2}\PYG{p}{)} \PYG{o}{+}\PYG{l+m+mf}{2.0}\PYG{o}{*}\PYG{n}{alpha} \PYG{o}{+} \PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{r12}\PYG{o}{+}\PYG{n}{deno2}\PYG{o}{*}\PYG{p}{(}\PYG{n}{alpha}\PYG{o}{*}\PYG{n}{r12}\PYG{o}{\PYGZhy{}}\PYG{n}{deno2}\PYG{o}{+}\PYG{l+m+mi}{2}\PYG{o}{*}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{deno}\PYG{o}{\PYGZhy{}}\PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{n}{r12}\PYG{p}{)} - -\PYG{c+c1}{\PYGZsh{} Derivate of wave function ansatz as function of variational parameters} -\PYG{k}{def} \PYG{n+nf}{DerivativeWFansatz}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{):} - - \PYG{n}{WfDer} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{l+m+mi}{2}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{r1} \PYG{o}{=} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{r2} \PYG{o}{=} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{r12} \PYG{o}{=} \PYG{n}{sqrt}\PYG{p}{((}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{deno} \PYG{o}{=} \PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{+}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{r12}\PYG{p}{)} - \PYG{n}{deno2} \PYG{o}{=} \PYG{n}{deno}\PYG{o}{*}\PYG{n}{deno} - \PYG{n}{WfDer}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r1}\PYG{o}{+}\PYG{n}{r2}\PYG{p}{)} - \PYG{n}{WfDer}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{n}{r12}\PYG{o}{*}\PYG{n}{r12}\PYG{o}{*}\PYG{n}{deno2} - \PYG{k}{return} \PYG{n}{WfDer} - -\PYG{c+c1}{\PYGZsh{} Setting up the quantum force for the two\PYGZhy{}electron quantum dot, recall that it is a vector} -\PYG{k}{def} \PYG{n+nf}{QuantumForce}\PYG{p}{(}\PYG{n}{r}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{):} - - \PYG{n}{qforce} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{r12} \PYG{o}{=} \PYG{n}{sqrt}\PYG{p}{((}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{0}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2} \PYG{o}{+} \PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{l+m+mi}{1}\PYG{p}{])}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{p}{)} - \PYG{n}{deno} \PYG{o}{=} \PYG{l+m+mf}{1.0}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{o}{+}\PYG{n}{beta}\PYG{o}{*}\PYG{n}{r12}\PYG{p}{)} - \PYG{n}{qforce}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,:]} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{2}\PYG{o}{*}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,:]}\PYG{o}{*}\PYG{n}{alpha}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,:]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,:])}\PYG{o}{*}\PYG{n}{deno}\PYG{o}{*}\PYG{n}{deno}\PYG{o}{/}\PYG{n}{r12} - \PYG{n}{qforce}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,:]} \PYG{o}{=} \PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{2}\PYG{o}{*}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,:]}\PYG{o}{*}\PYG{n}{alpha}\PYG{o}{*}\PYG{p}{(}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{,:]}\PYG{o}{\PYGZhy{}}\PYG{n}{r}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{,:])}\PYG{o}{*}\PYG{n}{deno}\PYG{o}{*}\PYG{n}{deno}\PYG{o}{/}\PYG{n}{r12} - \PYG{k}{return} \PYG{n}{qforce} - - -\PYG{c+c1}{\PYGZsh{} Computing the derivative of the energy and the energy} -\PYG{k}{def} \PYG{n+nf}{EnergyDerivative}\PYG{p}{(}\PYG{n}{x0}\PYG{p}{):} - - - \PYG{c+c1}{\PYGZsh{} Parameters in the Fokker\PYGZhy{}Planck simulation of the quantum force} - \PYG{n}{D} \PYG{o}{=} \PYG{l+m+mf}{0.5} - \PYG{n}{TimeStep} \PYG{o}{=} \PYG{l+m+mf}{0.05} - \PYG{c+c1}{\PYGZsh{} positions} - \PYG{n}{PositionOld} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{PositionNew} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{c+c1}{\PYGZsh{} Quantum force} - \PYG{n}{QuantumForceOld} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{QuantumForceNew} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - - \PYG{n}{energy} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{DeltaE} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{alpha} \PYG{o}{=} \PYG{n}{x0}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} - \PYG{n}{beta} \PYG{o}{=} \PYG{n}{x0}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} - \PYG{n}{EnergyDer} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{DeltaPsi} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{DerivativePsiE} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{c+c1}{\PYGZsh{}Initial position} - \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{):} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{normalvariate}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{TimeStep}\PYG{p}{)} - \PYG{n}{wfold} \PYG{o}{=} \PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{QuantumForceOld} \PYG{o}{=} \PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,} \PYG{n}{beta}\PYG{p}{)} - - \PYG{c+c1}{\PYGZsh{}Loop over MC MCcycles} - \PYG{k}{for} \PYG{n}{MCcycle} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberMCcycles}\PYG{p}{):} - \PYG{c+c1}{\PYGZsh{}Trial position moving one particle at the time} - \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{):} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{normalvariate}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{TimeStep}\PYG{p}{)}\PYG{o}{+}\PYGZbs{} - \PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{*}\PYG{n}{TimeStep}\PYG{o}{*}\PYG{n}{D} - \PYG{n}{wfnew} \PYG{o}{=} \PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{PositionNew}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{QuantumForceNew} \PYG{o}{=} \PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{PositionNew}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,} \PYG{n}{beta}\PYG{p}{)} - \PYG{n}{GreensFunction} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{GreensFunction} \PYG{o}{+=} \PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])}\PYG{o}{*}\PYGZbs{} - \PYG{p}{(}\PYG{n}{D}\PYG{o}{*}\PYG{n}{TimeStep}\PYG{o}{*}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])}\PYG{o}{\PYGZhy{}}\PYGZbs{} - \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])} - - \PYG{n}{GreensFunction} \PYG{o}{=} \PYG{n}{exp}\PYG{p}{(}\PYG{n}{GreensFunction}\PYG{p}{)} - \PYG{n}{ProbabilityRatio} \PYG{o}{=} \PYG{n}{GreensFunction}\PYG{o}{*}\PYG{n}{wfnew}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{o}{/}\PYG{n}{wfold}\PYG{o}{**}\PYG{l+m+mi}{2} - \PYG{c+c1}{\PYGZsh{}Metropolis\PYGZhy{}Hastings test to see whether we accept the move} - \PYG{k}{if} \PYG{n}{random}\PYG{p}{()} \PYG{o}{\PYGZlt{}=} \PYG{n}{ProbabilityRatio}\PYG{p}{:} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} - \PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} - \PYG{n}{wfold} \PYG{o}{=} \PYG{n}{wfnew} - \PYG{n}{DeltaE} \PYG{o}{=} \PYG{n}{LocalEnergy}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{DerPsi} \PYG{o}{=} \PYG{n}{DerivativeWFansatz}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{DeltaPsi} \PYG{o}{+=} \PYG{n}{DerPsi} - \PYG{n}{energy} \PYG{o}{+=} \PYG{n}{DeltaE} - \PYG{n}{DerivativePsiE} \PYG{o}{+=} \PYG{n}{DerPsi}\PYG{o}{*}\PYG{n}{DeltaE} - - \PYG{c+c1}{\PYGZsh{} We calculate mean values} - \PYG{n}{energy} \PYG{o}{/=} \PYG{n}{NumberMCcycles} - \PYG{n}{DerivativePsiE} \PYG{o}{/=} \PYG{n}{NumberMCcycles} - \PYG{n}{DeltaPsi} \PYG{o}{/=} \PYG{n}{NumberMCcycles} - \PYG{n}{EnergyDer} \PYG{o}{=} \PYG{l+m+mi}{2}\PYG{o}{*}\PYG{p}{(}\PYG{n}{DerivativePsiE}\PYG{o}{\PYGZhy{}}\PYG{n}{DeltaPsi}\PYG{o}{*}\PYG{n}{energy}\PYG{p}{)} - \PYG{k}{return} \PYG{n}{EnergyDer} - - -\PYG{c+c1}{\PYGZsh{} Computing the expectation value of the local energy} -\PYG{k}{def} \PYG{n+nf}{Energy}\PYG{p}{(}\PYG{n}{x0}\PYG{p}{):} - \PYG{c+c1}{\PYGZsh{} Parameters in the Fokker\PYGZhy{}Planck simulation of the quantum force} - \PYG{n}{D} \PYG{o}{=} \PYG{l+m+mf}{0.5} - \PYG{n}{TimeStep} \PYG{o}{=} \PYG{l+m+mf}{0.05} - \PYG{c+c1}{\PYGZsh{} positions} - \PYG{n}{PositionOld} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{PositionNew} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{c+c1}{\PYGZsh{} Quantum force} - \PYG{n}{QuantumForceOld} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - \PYG{n}{QuantumForceNew} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{zeros}\PYG{p}{((}\PYG{n}{NumberParticles}\PYG{p}{,}\PYG{n}{Dimension}\PYG{p}{),} \PYG{n}{np}\PYG{o}{.}\PYG{n}{double}\PYG{p}{)} - - \PYG{n}{energy} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{DeltaE} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{n}{alpha} \PYG{o}{=} \PYG{n}{x0}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} - \PYG{n}{beta} \PYG{o}{=} \PYG{n}{x0}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} - \PYG{c+c1}{\PYGZsh{}Initial position} - \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{):} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{normalvariate}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{TimeStep}\PYG{p}{)} - \PYG{n}{wfold} \PYG{o}{=} \PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{QuantumForceOld} \PYG{o}{=} \PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,} \PYG{n}{beta}\PYG{p}{)} - - \PYG{c+c1}{\PYGZsh{}Loop over MC MCcycles} - \PYG{k}{for} \PYG{n}{MCcycle} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberMCcycles}\PYG{p}{):} - \PYG{c+c1}{\PYGZsh{}Trial position moving one particle at the time} - \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{NumberParticles}\PYG{p}{):} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{normalvariate}\PYG{p}{(}\PYG{l+m+mf}{0.0}\PYG{p}{,}\PYG{l+m+mf}{1.0}\PYG{p}{)}\PYG{o}{*}\PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{TimeStep}\PYG{p}{)}\PYG{o}{+}\PYGZbs{} - \PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{*}\PYG{n}{TimeStep}\PYG{o}{*}\PYG{n}{D} - \PYG{n}{wfnew} \PYG{o}{=} \PYG{n}{WaveFunction}\PYG{p}{(}\PYG{n}{PositionNew}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{QuantumForceNew} \PYG{o}{=} \PYG{n}{QuantumForce}\PYG{p}{(}\PYG{n}{PositionNew}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,} \PYG{n}{beta}\PYG{p}{)} - \PYG{n}{GreensFunction} \PYG{o}{=} \PYG{l+m+mf}{0.0} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{GreensFunction} \PYG{o}{+=} \PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])}\PYG{o}{*}\PYGZbs{} - \PYG{p}{(}\PYG{n}{D}\PYG{o}{*}\PYG{n}{TimeStep}\PYG{o}{*}\PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])}\PYG{o}{\PYGZhy{}}\PYGZbs{} - \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]}\PYG{o}{+}\PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{])} - - \PYG{n}{GreensFunction} \PYG{o}{=} \PYG{n}{exp}\PYG{p}{(}\PYG{n}{GreensFunction}\PYG{p}{)} - \PYG{n}{ProbabilityRatio} \PYG{o}{=} \PYG{n}{GreensFunction}\PYG{o}{*}\PYG{n}{wfnew}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{o}{/}\PYG{n}{wfold}\PYG{o}{**}\PYG{l+m+mi}{2} - \PYG{c+c1}{\PYGZsh{}Metropolis\PYGZhy{}Hastings test to see whether we accept the move} - \PYG{k}{if} \PYG{n}{random}\PYG{p}{()} \PYG{o}{\PYGZlt{}=} \PYG{n}{ProbabilityRatio}\PYG{p}{:} - \PYG{k}{for} \PYG{n}{j} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{n}{Dimension}\PYG{p}{):} - \PYG{n}{PositionOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{PositionNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} - \PYG{n}{QuantumForceOld}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} \PYG{o}{=} \PYG{n}{QuantumForceNew}\PYG{p}{[}\PYG{n}{i}\PYG{p}{,}\PYG{n}{j}\PYG{p}{]} - \PYG{n}{wfold} \PYG{o}{=} \PYG{n}{wfnew} - \PYG{n}{DeltaE} \PYG{o}{=} \PYG{n}{LocalEnergy}\PYG{p}{(}\PYG{n}{PositionOld}\PYG{p}{,}\PYG{n}{alpha}\PYG{p}{,}\PYG{n}{beta}\PYG{p}{)} - \PYG{n}{energy} \PYG{o}{+=} \PYG{n}{DeltaE} - \PYG{k}{if} \PYG{n}{Printout}\PYG{p}{:} - \PYG{n}{outfile}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+si}{\PYGZpc{}f}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+s1}{\PYGZsq{}} \PYG{o}{\PYGZpc{}}\PYG{p}{(}\PYG{n}{energy}\PYG{o}{/}\PYG{p}{(}\PYG{n}{MCcycle}\PYG{o}{+}\PYG{l+m+mf}{1.0}\PYG{p}{)))} - \PYG{c+c1}{\PYGZsh{} We calculate mean values} - \PYG{n}{energy} \PYG{o}{/=} \PYG{n}{NumberMCcycles} - \PYG{k}{return} \PYG{n}{energy} - -\PYG{c+c1}{\PYGZsh{}Here starts the main program with variable declarations} -\PYG{n}{NumberParticles} \PYG{o}{=} \PYG{l+m+mi}{2} -\PYG{n}{Dimension} \PYG{o}{=} \PYG{l+m+mi}{2} -\PYG{c+c1}{\PYGZsh{} seed for rng generator} -\PYG{n}{seed}\PYG{p}{()} -\PYG{c+c1}{\PYGZsh{} Monte Carlo cycles for parameter optimization} -\PYG{n}{Printout} \PYG{o}{=} \PYG{k+kc}{False} -\PYG{n}{NumberMCcycles}\PYG{o}{=} \PYG{l+m+mi}{10000} -\PYG{c+c1}{\PYGZsh{} guess for variational parameters} -\PYG{n}{x0} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{array}\PYG{p}{([}\PYG{l+m+mf}{0.9}\PYG{p}{,}\PYG{l+m+mf}{0.2}\PYG{p}{])} -\PYG{c+c1}{\PYGZsh{} Using Broydens method to find optimal parameters} -\PYG{n}{res} \PYG{o}{=} \PYG{n}{minimize}\PYG{p}{(}\PYG{n}{Energy}\PYG{p}{,} \PYG{n}{x0}\PYG{p}{,} \PYG{n}{method}\PYG{o}{=}\PYG{l+s+s1}{\PYGZsq{}BFGS\PYGZsq{}}\PYG{p}{,} \PYG{n}{jac}\PYG{o}{=}\PYG{n}{EnergyDerivative}\PYG{p}{,} \PYG{n}{options}\PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{l+s+s1}{\PYGZsq{}gtol\PYGZsq{}}\PYG{p}{:} \PYG{l+m+mf}{1e\PYGZhy{}4}\PYG{p}{,}\PYG{l+s+s1}{\PYGZsq{}disp\PYGZsq{}}\PYG{p}{:} \PYG{k+kc}{True}\PYG{p}{\PYGZcb{})} -\PYG{n}{x0} \PYG{o}{=} \PYG{n}{res}\PYG{o}{.}\PYG{n}{x} -\PYG{c+c1}{\PYGZsh{} Compute the energy again with the optimal parameters and increased number of Monte Cycles} -\PYG{n}{NumberMCcycles}\PYG{o}{=} \PYG{l+m+mi}{2}\PYG{o}{**}\PYG{l+m+mi}{19} -\PYG{n}{Printout} \PYG{o}{=} \PYG{k+kc}{True} -\PYG{n}{FinalEnergy} \PYG{o}{=} \PYG{n}{Energy}\PYG{p}{(}\PYG{n}{x0}\PYG{p}{)} -\PYG{n}{EResult} \PYG{o}{=} \PYG{n}{np}\PYG{o}{.}\PYG{n}{array}\PYG{p}{([}\PYG{n}{FinalEnergy}\PYG{p}{,}\PYG{n}{FinalEnergy}\PYG{p}{])} -\PYG{n}{outfile}\PYG{o}{.}\PYG{n}{close}\PYG{p}{()} -\PYG{c+c1}{\PYGZsh{}nice printout with Pandas} -\PYG{k+kn}{import} \PYG{n+nn}{pandas} \PYG{k}{as} \PYG{n+nn}{pd} -\PYG{k+kn}{from} \PYG{n+nn}{pandas} \PYG{k+kn}{import} \PYG{n}{DataFrame} -\PYG{n}{data} \PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{l+s+s1}{\PYGZsq{}Optimal Parameters\PYGZsq{}}\PYG{p}{:}\PYG{n}{x0}\PYG{p}{,} \PYG{l+s+s1}{\PYGZsq{}Final Energy\PYGZsq{}}\PYG{p}{:}\PYG{n}{EResult}\PYG{p}{\PYGZcb{}} -\PYG{n}{frame} \PYG{o}{=} \PYG{n}{pd}\PYG{o}{.}\PYG{n}{DataFrame}\PYG{p}{(}\PYG{n}{data}\PYG{p}{)} -\PYG{n+nb}{print}\PYG{p}{(}\PYG{n}{frame}\PYG{p}{)} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/B1072FF5A94C91CA0E7A2DEAC165673E0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/B1072FF5A94C91CA0E7A2DEAC165673E0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 1b1af436..00000000 --- a/doc/src/week9/_minted-week9/B1072FF5A94C91CA0E7A2DEAC165673E0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,9 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{maxval}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{maxval}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{+w}{ }\PYG{n}{maxloc}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/B14B88CDFDEFB1B74580189DAD406D980DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/B14B88CDFDEFB1B74580189DAD406D980DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index f4e01b2d..00000000 --- a/doc/src/week9/_minted-week9/B14B88CDFDEFB1B74580189DAD406D980DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n+nl}{Compphys}\PYG{p}{:}\PYG{o}{\PYGZti{}}\PYG{+w}{ }\PYG{n}{hjensen\PYGZdl{}}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{vec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{l+m+mi}{10000000} -\PYG{n}{Time}\PYG{+w}{ }\PYG{n}{used}\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{n}{norm}\PYG{+w}{ }\PYG{n}{computation}\PYG{o}{=}\PYG{l+m+mf}{0.04720500000} -\PYG{n+nl}{Compphys}\PYG{p}{:}\PYG{o}{\PYGZti{}}\PYG{+w}{ }\PYG{n}{hjensen\PYGZdl{}}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{novec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{l+m+mi}{10000000} -\PYG{n}{Time}\PYG{+w}{ }\PYG{n}{used}\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{n}{norm}\PYG{+w}{ }\PYG{n}{computation}\PYG{o}{=}\PYG{l+m+mf}{0.03311700000} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/B3374F1FF8EA8D36202B633FCF5DDC440DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/B3374F1FF8EA8D36202B633FCF5DDC440DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 8bdf9244..00000000 --- a/doc/src/week9/_minted-week9/B3374F1FF8EA8D36202B633FCF5DDC440DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,9 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for private(j)} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{l+m+mi}{100}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{j}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{\PYGZlt{}}\PYG{l+m+mi}{100}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{][}\PYG{n}{j}\PYG{p}{];} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/B34F6855F3305E747885C0F16A6DE2355FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex b/doc/src/week9/_minted-week9/B34F6855F3305E747885C0F16A6DE2355FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex deleted file mode 100644 index e3a4eb9e..00000000 --- a/doc/src/week9/_minted-week9/B34F6855F3305E747885C0F16A6DE2355FCE5D6F17CA120A1ECEC9E69E1BF2E5.pygtex +++ /dev/null @@ -1,59 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{\PYGZsh{} Common imports} -\PYG{k+kn}{import} \PYG{n+nn}{os} - -\PYG{c+c1}{\PYGZsh{} Where to save the figures and data files} -\PYG{n}{DATA\PYGZus{}ID} \PYG{o}{=} \PYG{l+s+s2}{\PYGZdq{}Results/EnergyMin\PYGZdq{}} - -\PYG{k}{def} \PYG{n+nf}{data\PYGZus{}path}\PYG{p}{(}\PYG{n}{dat\PYGZus{}id}\PYG{p}{):} - \PYG{k}{return} \PYG{n}{os}\PYG{o}{.}\PYG{n}{path}\PYG{o}{.}\PYG{n}{join}\PYG{p}{(}\PYG{n}{DATA\PYGZus{}ID}\PYG{p}{,} \PYG{n}{dat\PYGZus{}id}\PYG{p}{)} - -\PYG{n}{infile} \PYG{o}{=} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{data\PYGZus{}path}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}Energies.dat\PYGZdq{}}\PYG{p}{),}\PYG{l+s+s1}{\PYGZsq{}r\PYGZsq{}}\PYG{p}{)} - -\PYG{k+kn}{from} \PYG{n+nn}{numpy} \PYG{k+kn}{import} \PYG{n}{log2}\PYG{p}{,} \PYG{n}{zeros}\PYG{p}{,} \PYG{n}{mean}\PYG{p}{,} \PYG{n}{var}\PYG{p}{,} \PYG{n+nb}{sum}\PYG{p}{,} \PYG{n}{loadtxt}\PYG{p}{,} \PYG{n}{arange}\PYG{p}{,} \PYG{n}{array}\PYG{p}{,} \PYG{n}{cumsum}\PYG{p}{,} \PYG{n}{dot}\PYG{p}{,} \PYG{n}{transpose}\PYG{p}{,} \PYG{n}{diagonal}\PYG{p}{,} \PYG{n}{sqrt} -\PYG{k+kn}{from} \PYG{n+nn}{numpy.linalg} \PYG{k+kn}{import} \PYG{n}{inv} - -\PYG{k}{def} \PYG{n+nf}{block}\PYG{p}{(}\PYG{n}{x}\PYG{p}{):} - \PYG{c+c1}{\PYGZsh{} preliminaries} - \PYG{n}{n} \PYG{o}{=} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} - \PYG{n}{d} \PYG{o}{=} \PYG{n+nb}{int}\PYG{p}{(}\PYG{n}{log2}\PYG{p}{(}\PYG{n}{n}\PYG{p}{))} - \PYG{n}{s}\PYG{p}{,} \PYG{n}{gamma} \PYG{o}{=} \PYG{n}{zeros}\PYG{p}{(}\PYG{n}{d}\PYG{p}{),} \PYG{n}{zeros}\PYG{p}{(}\PYG{n}{d}\PYG{p}{)} - \PYG{n}{mu} \PYG{o}{=} \PYG{n}{mean}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} - - \PYG{c+c1}{\PYGZsh{} estimate the auto\PYGZhy{}covariance and variances} - \PYG{c+c1}{\PYGZsh{} for each blocking transformation} - \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n}{arange}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{n}{d}\PYG{p}{):} - \PYG{n}{n} \PYG{o}{=} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} - \PYG{c+c1}{\PYGZsh{} estimate autocovariance of x} - \PYG{n}{gamma}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]} \PYG{o}{=} \PYG{p}{(}\PYG{n}{n}\PYG{p}{)}\PYG{o}{**}\PYG{p}{(}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)}\PYG{o}{*}\PYG{n+nb}{sum}\PYG{p}{(} \PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{:(}\PYG{n}{n}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{)]}\PYG{o}{\PYGZhy{}}\PYG{n}{mu}\PYG{p}{)}\PYG{o}{*}\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:}\PYG{n}{n}\PYG{p}{]}\PYG{o}{\PYGZhy{}}\PYG{n}{mu}\PYG{p}{)} \PYG{p}{)} - \PYG{c+c1}{\PYGZsh{} estimate variance of x} - \PYG{n}{s}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]} \PYG{o}{=} \PYG{n}{var}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} - \PYG{c+c1}{\PYGZsh{} perform blocking transformation} - \PYG{n}{x} \PYG{o}{=} \PYG{l+m+mf}{0.5}\PYG{o}{*}\PYG{p}{(}\PYG{n}{x}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{::}\PYG{l+m+mi}{2}\PYG{p}{]} \PYG{o}{+} \PYG{n}{x}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{::}\PYG{l+m+mi}{2}\PYG{p}{])} - - \PYG{c+c1}{\PYGZsh{} generate the test observator M\PYGZus{}k from the theorem} - \PYG{n}{M} \PYG{o}{=} \PYG{p}{(}\PYG{n}{cumsum}\PYG{p}{(} \PYG{p}{((}\PYG{n}{gamma}\PYG{o}{/}\PYG{n}{s}\PYG{p}{)}\PYG{o}{**}\PYG{l+m+mi}{2}\PYG{o}{*}\PYG{l+m+mi}{2}\PYG{o}{**}\PYG{n}{arange}\PYG{p}{(}\PYG{l+m+mi}{1}\PYG{p}{,}\PYG{n}{d}\PYG{o}{+}\PYG{l+m+mi}{1}\PYG{p}{)[::}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{])[::}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{p}{)} \PYG{p}{)[::}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{]} - - \PYG{c+c1}{\PYGZsh{} we need a list of magic numbers} - \PYG{n}{q} \PYG{o}{=}\PYG{n}{array}\PYG{p}{([}\PYG{l+m+mf}{6.634897}\PYG{p}{,}\PYG{l+m+mf}{9.210340}\PYG{p}{,} \PYG{l+m+mf}{11.344867}\PYG{p}{,} \PYG{l+m+mf}{13.276704}\PYG{p}{,} \PYG{l+m+mf}{15.086272}\PYG{p}{,} \PYG{l+m+mf}{16.811894}\PYG{p}{,} \PYG{l+m+mf}{18.475307}\PYG{p}{,} \PYG{l+m+mf}{20.090235}\PYG{p}{,} \PYG{l+m+mf}{21.665994}\PYG{p}{,} \PYG{l+m+mf}{23.209251}\PYG{p}{,} \PYG{l+m+mf}{24.724970}\PYG{p}{,} \PYG{l+m+mf}{26.216967}\PYG{p}{,} \PYG{l+m+mf}{27.688250}\PYG{p}{,} \PYG{l+m+mf}{29.141238}\PYG{p}{,} \PYG{l+m+mf}{30.577914}\PYG{p}{,} \PYG{l+m+mf}{31.999927}\PYG{p}{,} \PYG{l+m+mf}{33.408664}\PYG{p}{,} \PYG{l+m+mf}{34.805306}\PYG{p}{,} \PYG{l+m+mf}{36.190869}\PYG{p}{,} \PYG{l+m+mf}{37.566235}\PYG{p}{,} \PYG{l+m+mf}{38.932173}\PYG{p}{,} \PYG{l+m+mf}{40.289360}\PYG{p}{,} \PYG{l+m+mf}{41.638398}\PYG{p}{,} \PYG{l+m+mf}{42.979820}\PYG{p}{,} \PYG{l+m+mf}{44.314105}\PYG{p}{,} \PYG{l+m+mf}{45.641683}\PYG{p}{,} \PYG{l+m+mf}{46.962942}\PYG{p}{,} \PYG{l+m+mf}{48.278236}\PYG{p}{,} \PYG{l+m+mf}{49.587884}\PYG{p}{,} \PYG{l+m+mf}{50.892181}\PYG{p}{])} - - \PYG{c+c1}{\PYGZsh{} use magic to determine when we should have stopped blocking} - \PYG{k}{for} \PYG{n}{k} \PYG{o+ow}{in} \PYG{n}{arange}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{,}\PYG{n}{d}\PYG{p}{):} - \PYG{k}{if}\PYG{p}{(}\PYG{n}{M}\PYG{p}{[}\PYG{n}{k}\PYG{p}{]} \PYG{o}{\PYGZlt{}} \PYG{n}{q}\PYG{p}{[}\PYG{n}{k}\PYG{p}{]):} - \PYG{k}{break} - \PYG{k}{if} \PYG{p}{(}\PYG{n}{k} \PYG{o}{\PYGZgt{}=} \PYG{n}{d}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{):} - \PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}Warning: Use more data\PYGZdq{}}\PYG{p}{)} - \PYG{k}{return} \PYG{n}{mu}\PYG{p}{,} \PYG{n}{s}\PYG{p}{[}\PYG{n}{k}\PYG{p}{]}\PYG{o}{/}\PYG{l+m+mi}{2}\PYG{o}{**}\PYG{p}{(}\PYG{n}{d}\PYG{o}{\PYGZhy{}}\PYG{n}{k}\PYG{p}{)} - - -\PYG{n}{x} \PYG{o}{=} \PYG{n}{loadtxt}\PYG{p}{(}\PYG{n}{infile}\PYG{p}{)} -\PYG{p}{(}\PYG{n}{mean}\PYG{p}{,} \PYG{n}{var}\PYG{p}{)} \PYG{o}{=} \PYG{n}{block}\PYG{p}{(}\PYG{n}{x}\PYG{p}{)} -\PYG{n}{std} \PYG{o}{=} \PYG{n}{sqrt}\PYG{p}{(}\PYG{n}{var}\PYG{p}{)} -\PYG{k+kn}{import} \PYG{n+nn}{pandas} \PYG{k}{as} \PYG{n+nn}{pd} -\PYG{k+kn}{from} \PYG{n+nn}{pandas} \PYG{k+kn}{import} \PYG{n}{DataFrame} -\PYG{n}{data} \PYG{o}{=}\PYG{p}{\PYGZob{}}\PYG{l+s+s1}{\PYGZsq{}Mean\PYGZsq{}}\PYG{p}{:[}\PYG{n}{mean}\PYG{p}{],} \PYG{l+s+s1}{\PYGZsq{}STDev\PYGZsq{}}\PYG{p}{:[}\PYG{n}{std}\PYG{p}{]\PYGZcb{}} -\PYG{n}{frame} \PYG{o}{=} \PYG{n}{pd}\PYG{o}{.}\PYG{n}{DataFrame}\PYG{p}{(}\PYG{n}{data}\PYG{p}{,}\PYG{n}{index}\PYG{o}{=}\PYG{p}{[}\PYG{l+s+s1}{\PYGZsq{}Values\PYGZsq{}}\PYG{p}{])} -\PYG{n+nb}{print}\PYG{p}{(}\PYG{n}{frame}\PYG{p}{)} - - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/BA9032551FF35EDAFF90CD33F6B15E9C0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/BA9032551FF35EDAFF90CD33F6B15E9C0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 64fbeaf7..00000000 --- a/doc/src/week9/_minted-week9/BA9032551FF35EDAFF90CD33F6B15E9C0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n+nl}{Compphys}\PYG{p}{:}\PYG{o}{\PYGZti{}}\PYG{+w}{ }\PYG{n}{hjensen\PYGZdl{}}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{vec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{l+m+mi}{1000000000} -\PYG{n}{Time}\PYG{+w}{ }\PYG{n}{used}\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{n}{norm}\PYG{+w}{ }\PYG{n}{computation}\PYG{o}{=}\PYG{l+m+mf}{58.41391100} -\PYG{n+nl}{Compphys}\PYG{p}{:}\PYG{o}{\PYGZti{}}\PYG{+w}{ }\PYG{n}{hjensen\PYGZdl{}}\PYG{+w}{ }\PYG{p}{.}\PYG{o}{/}\PYG{n}{novec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{l+m+mi}{1000000000} -\PYG{n}{Time}\PYG{+w}{ }\PYG{n}{used}\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{n}{norm}\PYG{+w}{ }\PYG{n}{computation}\PYG{o}{=}\PYG{l+m+mf}{46.51295300} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C1059EB3F2FBA0A55E300ACD9ADA65600DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C1059EB3F2FBA0A55E300ACD9ADA65600DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index a58c4e12..00000000 --- a/doc/src/week9/_minted-week9/C1059EB3F2FBA0A55E300ACD9ADA65600DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,11 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp parallel num\PYGZus{}threads(4)} -\PYG{p}{\PYGZob{}} -\PYG{c+cm}{/* .... */} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel num\PYGZus{}threads(2)} -\PYG{p}{\PYGZob{}} -\PYG{c+c1}{//} -\PYG{p}{\PYGZcb{}} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C381E2F8F93BD26F791AD665A0A8CD3C0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C381E2F8F93BD26F791AD665A0A8CD3C0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 7843a09e..00000000 --- a/doc/src/week9/_minted-week9/C381E2F8F93BD26F791AD665A0A8CD3C0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{temp}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{c}\PYG{o}{*}\PYG{n}{d}\PYG{p}{;} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{temp}\PYG{p}{;} -\PYG{p}{\PYGZcb{}} -\PYG{n}{e}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{g}\PYG{p}{[}\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{];} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C3D4CC4B811D0A33C89CB1F8C986D0AE0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C3D4CC4B811D0A33C89CB1F8C986D0AE0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 13b0aaf5..00000000 --- a/doc/src/week9/_minted-week9/C3D4CC4B811D0A33C89CB1F8C986D0AE0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{clang}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O3}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{Rpass}\PYG{o}{=}\PYG{n}{loop}\PYG{o}{\PYGZhy{}}\PYG{n}{vectorize}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{vec}\PYG{p}{.}\PYG{n}{x}\PYG{+w}{ }\PYG{n}{vecexample}\PYG{p}{.}\PYG{n}{cpp} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C4773C9EC90AFB10AA82F3F0707B2A7F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C4773C9EC90AFB10AA82F3F0707B2A7F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 39130767..00000000 --- a/doc/src/week9/_minted-week9/C4773C9EC90AFB10AA82F3F0707B2A7F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{nthreads}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel shared(nthreads)} -\PYG{p}{\PYGZob{}} -\PYG{n}{nthreads}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{omp\PYGZus{}get\PYGZus{}num\PYGZus{}threads}\PYG{p}{();} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C50BA650723AA066C821FCC7380308940DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C50BA650723AA066C821FCC7380308940DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 1ec8a22a..00000000 --- a/doc/src/week9/_minted-week9/C50BA650723AA066C821FCC7380308940DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,8 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{k}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{n}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{;} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{n}{c}\PYG{o}{*}\PYG{n}{d}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{e}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{g}\PYG{p}{[}\PYG{n}{k}\PYG{p}{];} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/C59B9D50DA2EFBDC04B033DE762F84A10DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/C59B9D50DA2EFBDC04B033DE762F84A10DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 286f69d4..00000000 --- a/doc/src/week9/_minted-week9/C59B9D50DA2EFBDC04B033DE762F84A10DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// \PYGZsh{}pragma omp parallel and \PYGZsh{}pragma omp for} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/CAB640527971B6B57C1BE2A96BFCE9060DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/CAB640527971B6B57C1BE2A96BFCE9060DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 4762b09e..00000000 --- a/doc/src/week9/_minted-week9/CAB640527971B6B57C1BE2A96BFCE9060DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp for} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/CF4F99DF7158F68DB3D95830ADC84A6A0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/CF4F99DF7158F68DB3D95830ADC84A6A0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index eeb52f4f..00000000 --- a/doc/src/week9/_minted-week9/CF4F99DF7158F68DB3D95830ADC84A6A0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Command\PYGZus{}name} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/D260B7D3534A177583278E39A1E8B4540DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/D260B7D3534A177583278E39A1E8B4540DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 42bb3d51..00000000 --- a/doc/src/week9/_minted-week9/D260B7D3534A177583278E39A1E8B4540DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,17 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{c+c1}{// MPI initializations} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Init}\PYG{+w}{ }\PYG{p}{(}\PYG{o}{\PYGZam{}}\PYG{n}{nargs}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{args}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}size}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{numprocs}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm\PYGZus{}rank}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD}\PYG{p}{,}\PYG{+w}{ }\PYG{o}{\PYGZam{}}\PYG{n}{my\PYGZus{}rank}\PYG{p}{);} -\PYG{+w}{ }\PYG{n}{time\PYGZus{}start}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Wtime}\PYG{p}{();} -\PYG{+w}{ }\PYG{c+c1}{// Fixed values for a, b and n} -\PYG{+w}{ }\PYG{n}{a}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{+w}{ }\PYG{p}{;}\PYG{+w}{ }\PYG{n}{b}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{1.0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{n}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{1000}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{h}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{b}\PYG{o}{\PYGZhy{}}\PYG{n}{a}\PYG{p}{)}\PYG{o}{/}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{c+c1}{// h is the same for all processes} -\PYG{+w}{ }\PYG{n}{local\PYGZus{}n}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{n}\PYG{o}{/}\PYG{n}{numprocs}\PYG{p}{;} -\PYG{+w}{ }\PYG{c+c1}{// make sure n \PYGZgt{} numprocs, else integer division gives zero} -\PYG{+w}{ }\PYG{c+c1}{// Length of each process\PYGZsq{} interval of} -\PYG{+w}{ }\PYG{c+c1}{// integration = local\PYGZus{}n*h.} -\PYG{+w}{ }\PYG{n}{local\PYGZus{}a}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{my\PYGZus{}rank}\PYG{o}{*}\PYG{n}{local\PYGZus{}n}\PYG{o}{*}\PYG{n}{h}\PYG{p}{;} -\PYG{+w}{ }\PYG{n}{local\PYGZus{}b}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{local\PYGZus{}a}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{local\PYGZus{}n}\PYG{o}{*}\PYG{n}{h}\PYG{p}{;} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/D40FF8DF66AE7E75C9595A8D5BACAB640DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/D40FF8DF66AE7E75C9595A8D5BACAB640DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index e563a9ee..00000000 --- a/doc/src/week9/_minted-week9/D40FF8DF66AE7E75C9595A8D5BACAB640DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}COMM\PYGZus{}WORLD} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/D83A68DEA822CDD8B1F913727E09D71C0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/D83A68DEA822CDD8B1F913727E09D71C0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 5a75e871..00000000 --- a/doc/src/week9/_minted-week9/D83A68DEA822CDD8B1F913727E09D71C0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,6 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for reduction(+:sum)} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{o}{*}\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/DC0B57C0CB950072FDDB0AEC2171B7DC0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/DC0B57C0CB950072FDDB0AEC2171B7DC0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 5dce4df3..00000000 --- a/doc/src/week9/_minted-week9/DC0B57C0CB950072FDDB0AEC2171B7DC0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,10 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{;} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.}\PYG{p}{;} -\PYG{c+cm}{/* allocating and initializing arrays */} -\PYG{c+cm}{/* ... */} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel for default(shared) private(i) reduction(+:sum)} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{sum}\PYG{+w}{ }\PYG{o}{+=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{o}{*}\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/DCC11AEB5F26E6AB9C552C2F9405813F0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/DCC11AEB5F26E6AB9C552C2F9405813F0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 0524f648..00000000 --- a/doc/src/week9/_minted-week9/DCC11AEB5F26E6AB9C552C2F9405813F0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp...} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/DDE70CA1C57A6186C2C2DC3C1B7D40B40DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/DDE70CA1C57A6186C2C2DC3C1B7D40B40DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 3a65b49c..00000000 --- a/doc/src/week9/_minted-week9/DDE70CA1C57A6186C2C2DC3C1B7D40B40DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp master \PYGZob{} ... \PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/E0955720563C64B02F95CDF882E4FC2C0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/E0955720563C64B02F95CDF882E4FC2C0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index ed3f01ee..00000000 --- a/doc/src/week9/_minted-week9/E0955720563C64B02F95CDF882E4FC2C0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp barrier} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/E181C2032ABBA4FD80D0BF5670AEE5F3B0CAD346A13BD81D9AF720CF234DAE08.pygtex b/doc/src/week9/_minted-week9/E181C2032ABBA4FD80D0BF5670AEE5F3B0CAD346A13BD81D9AF720CF234DAE08.pygtex deleted file mode 100644 index bc6944fb..00000000 --- a/doc/src/week9/_minted-week9/E181C2032ABBA4FD80D0BF5670AEE5F3B0CAD346A13BD81D9AF720CF234DAE08.pygtex +++ /dev/null @@ -1,81 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -// Matrix\PYGZhy{}matrix multiplication and Frobenius norm of a matrix with OpenMP -\PYGZsh{}include \PYGZlt{}cstdlib\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iostream\PYGZgt{} -\PYGZsh{}include \PYGZlt{}cmath\PYGZgt{} -\PYGZsh{}include \PYGZlt{}iomanip\PYGZgt{} -\PYGZsh{}include \PYGZlt{}omp.h\PYGZgt{} -\PYGZsh{} include \PYGZlt{}ctime\PYGZgt{} - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -\PYGZob{} - // read in dimension of square matrix - int n = atoi(argv[1]); - double **A, **B, **C; - int i, j, k; - int thread\PYGZus{}num; - double wtime, Fsum, s, angle; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} Compute matrix product C = A * B and Frobenius norm.\PYGZdq{} \PYGZlt{}\PYGZlt{} endl; - omp\PYGZus{}set\PYGZus{}num\PYGZus{}threads(4); - thread\PYGZus{}num = omp\PYGZus{}get\PYGZus{}max\PYGZus{}threads (); - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The number of processors available = \PYGZdq{} \PYGZlt{}\PYGZlt{} omp\PYGZus{}get\PYGZus{}num\PYGZus{}procs () \PYGZlt{}\PYGZlt{} endl ; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The number of threads available = \PYGZdq{} \PYGZlt{}\PYGZlt{} thread\PYGZus{}num \PYGZlt{}\PYGZlt{} endl; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} The matrix order n = \PYGZdq{} \PYGZlt{}\PYGZlt{} n \PYGZlt{}\PYGZlt{} endl; - - s = 1.0/sqrt( (double) n); - wtime = omp\PYGZus{}get\PYGZus{}wtime ( ); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - \PYGZcb{} - // Define parallel region -\PYGZsh{} pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum) - // Set up values for matrix A and B and zero matrix C - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (j = 0; j \PYGZlt{} n; j++) \PYGZob{} - angle = 2.0*M\PYGZus{}PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - \PYGZcb{} - \PYGZcb{} - // Then perform the matrix\PYGZhy{}matrix multiplication - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (j = 0; j \PYGZlt{} n; j++) \PYGZob{} - C[i][j] = 0.0; - for (k = 0; k \PYGZlt{} n; k++) \PYGZob{} - C[i][j] += A[i][k]*B[k][j]; - \PYGZcb{} - \PYGZcb{} - \PYGZcb{} - // Compute now the Frobenius norm - Fsum = 0.0; - for (i = 0; i \PYGZlt{} n; i++)\PYGZob{} - for (j = 0; j \PYGZlt{} n; j++) \PYGZob{} - Fsum += C[i][j]*C[i][j]; - \PYGZcb{} - \PYGZcb{} - Fsum = sqrt(Fsum); -// end parallel region and letting only one thread perform I/O - wtime = omp\PYGZus{}get\PYGZus{}wtime ( ) \PYGZhy{} wtime; - cout \PYGZlt{}\PYGZlt{} setiosflags(ios::showpoint | ios::uppercase); - cout \PYGZlt{}\PYGZlt{} setprecision(10) \PYGZlt{}\PYGZlt{} setw(20) \PYGZlt{}\PYGZlt{} \PYGZdq{}Time used for matrix\PYGZhy{}matrix multiplication=\PYGZdq{} \PYGZlt{}\PYGZlt{} wtime \PYGZlt{}\PYGZlt{} endl; - cout \PYGZlt{}\PYGZlt{} \PYGZdq{} Frobenius norm = \PYGZdq{} \PYGZlt{}\PYGZlt{} Fsum \PYGZlt{}\PYGZlt{} endl; - // Free up space - for (int i = 0; i \PYGZlt{} n; i++)\PYGZob{} - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - \PYGZcb{} - delete[] A; - delete[] B; - delete[] C; - return 0; -\PYGZcb{} - - - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/EBF64C0559909635066A31CFCA48FB890DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/EBF64C0559909635066A31CFCA48FB890DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 6bbe939d..00000000 --- a/doc/src/week9/_minted-week9/EBF64C0559909635066A31CFCA48FB890DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,12 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{j}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{x}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{cos}\PYG{p}{(}\PYG{n}{j}\PYG{o}{*}\PYG{l+m+mf}{1.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{x}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{l+m+mi}{0}\PYG{+w}{ }\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{x}\PYG{o}{*}\PYG{n}{sin}\PYG{p}{(}\PYG{n}{j}\PYG{o}{*}\PYG{l+m+mf}{2.0}\PYG{p}{);} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{k}{else}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{j}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{0.0}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/EEA4F347F2C750B4F63E1210ECFA01AF0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/EEA4F347F2C750B4F63E1210ECFA01AF0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index b924721f..00000000 --- a/doc/src/week9/_minted-week9/EEA4F347F2C750B4F63E1210ECFA01AF0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,7 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{b}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{15.}\PYG{p}{;} -\PYG{+w}{ }\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mi}{1}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{n}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{l+m+mi}{\PYGZhy{}1}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{;} -\PYG{+w}{ }\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/EF953C491D773131C133BD5EFEBF53E60DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/EF953C491D773131C133BD5EFEBF53E60DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index d512e605..00000000 --- a/doc/src/week9/_minted-week9/EF953C491D773131C133BD5EFEBF53E60DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,18 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}include}\PYG{+w}{ }\PYG{c+cpf}{\PYGZlt{}omp.h\PYGZgt{}} -\PYG{c+cp}{\PYGZsh{}define CHUNKSIZE 100} -\PYG{c+cp}{\PYGZsh{}define N 1000} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n+nf}{main}\PYG{+w}{ }\PYG{p}{(}\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{argc}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{char}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{argv}\PYG{p}{[])} -\PYG{p}{\PYGZob{}} -\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{i}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{chunk}\PYG{p}{;} -\PYG{k+kt}{float}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{N}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{N}\PYG{p}{],}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{N}\PYG{p}{];} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{*}\PYG{+w}{ }\PYG{l+m+mf}{1.0}\PYG{p}{;} -\PYG{n}{chunk}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{CHUNKSIZE}\PYG{p}{;} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel shared(a,b,c,chunk) private(i)} -\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp for schedule(guided,chunk)} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{+w}{ }\PYG{o}{\PYGZlt{}}\PYG{+w}{ }\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{n}{c}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{a}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{+}\PYG{+w}{ }\PYG{n}{b}\PYG{p}{[}\PYG{n}{i}\PYG{p}{];} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+cm}{/* end of parallel region */} -\PYG{p}{\PYGZcb{}} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/F42E70E67DFBF66F590D1AB1C6F2A3A30DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/F42E70E67DFBF66F590D1AB1C6F2A3A30DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 59c99c1b..00000000 --- a/doc/src/week9/_minted-week9/F42E70E67DFBF66F590D1AB1C6F2A3A30DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,4 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp critical} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/F664E716A6DBB4386F9AE50DEF564DC00DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/F664E716A6DBB4386F9AE50DEF564DC00DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 368db391..00000000 --- a/doc/src/week9/_minted-week9/F664E716A6DBB4386F9AE50DEF564DC00DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,13 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+cp}{\PYGZsh{}pragma omp task} -\PYG{c+cp}{\PYGZsh{}pragma omp parallel shared(p\PYGZus{}vec) private(i)} -\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp single} -\PYG{p}{\PYGZob{}} -\PYG{k}{for}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{i}\PYG{o}{=}\PYG{l+m+mi}{0}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{\PYGZlt{}}\PYG{n}{N}\PYG{p}{;}\PYG{+w}{ }\PYG{n}{i}\PYG{o}{++}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{r}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{n}{random\PYGZus{}number}\PYG{p}{();} -\PYG{+w}{ }\PYG{k}{if}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{p\PYGZus{}vec}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]}\PYG{+w}{ }\PYG{o}{\PYGZgt{}}\PYG{+w}{ }\PYG{n}{r}\PYG{p}{)}\PYG{+w}{ }\PYG{p}{\PYGZob{}} -\PYG{c+cp}{\PYGZsh{}pragma omp task} -\PYG{+w}{ }\PYG{n}{do\PYGZus{}work}\PYG{+w}{ }\PYG{p}{(}\PYG{n}{p\PYGZus{}vec}\PYG{p}{[}\PYG{n}{i}\PYG{p}{]);} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/FB6759D3D69E9EFF4E66FDAAE43F5D4D0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/FB6759D3D69E9EFF4E66FDAAE43F5D4D0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 092941a5..00000000 --- a/doc/src/week9/_minted-week9/FB6759D3D69E9EFF4E66FDAAE43F5D4D0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{MPI\PYGZus{}reduce}\PYG{p}{(}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{+w}{ }\PYG{o}{*}\PYG{n}{senddata}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{void}\PYG{o}{*}\PYG{+w}{ }\PYG{n}{resultdata}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{count}\PYG{p}{,} -\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Datatype}\PYG{+w}{ }\PYG{n}{datatype}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Op}\PYG{p}{,}\PYG{+w}{ }\PYG{k+kt}{int}\PYG{+w}{ }\PYG{n}{root}\PYG{p}{,}\PYG{+w}{ }\PYG{n}{MPI\PYGZus{}Comm}\PYG{+w}{ }\PYG{n}{comm}\PYG{p}{)} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/FCA2371BE7E05424E97D799D6024AE6E0DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/FCA2371BE7E05424E97D799D6024AE6E0DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 508b3086..00000000 --- a/doc/src/week9/_minted-week9/FCA2371BE7E05424E97D799D6024AE6E0DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,5 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{g}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O0}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{c}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{cpp} -\PYG{n}{c}\PYG{o}{++}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{g}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{O0}\PYG{+w}{ }\PYG{o}{\PYGZhy{}}\PYG{n}{o}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{exe}\PYG{+w}{ }\PYG{n}{mycode}\PYG{p}{.}\PYG{n}{o} - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/FDACD668132974D45A08A486A6FCF1930DC076E8BF450B81976EF4AD1C19D937.pygtex b/doc/src/week9/_minted-week9/FDACD668132974D45A08A486A6FCF1930DC076E8BF450B81976EF4AD1C19D937.pygtex deleted file mode 100644 index 1cafb6eb..00000000 --- a/doc/src/week9/_minted-week9/FDACD668132974D45A08A486A6FCF1930DC076E8BF450B81976EF4AD1C19D937.pygtex +++ /dev/null @@ -1,10 +0,0 @@ -\begin{Verbatim}[commandchars=\\\{\},codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8\relax}] -\PYG{c+c1}{// this function defines the function to integrate} -\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n+nf}{int\PYGZus{}function}\PYG{p}{(}\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{x}\PYG{p}{)} -\PYG{p}{\PYGZob{}} -\PYG{+w}{ }\PYG{k+kt}{double}\PYG{+w}{ }\PYG{n}{value}\PYG{+w}{ }\PYG{o}{=}\PYG{+w}{ }\PYG{l+m+mf}{4.}\PYG{o}{/}\PYG{p}{(}\PYG{l+m+mf}{1.}\PYG{o}{+}\PYG{n}{x}\PYG{o}{*}\PYG{n}{x}\PYG{p}{);} -\PYG{+w}{ }\PYG{k}{return}\PYG{+w}{ }\PYG{n}{value}\PYG{p}{;} -\PYG{p}{\PYGZcb{}}\PYG{+w}{ }\PYG{c+c1}{// end of function to evaluate} - - -\end{Verbatim} diff --git a/doc/src/week9/_minted-week9/default.pygstyle b/doc/src/week9/_minted-week9/default.pygstyle deleted file mode 100644 index 211763d1..00000000 --- a/doc/src/week9/_minted-week9/default.pygstyle +++ /dev/null @@ -1,101 +0,0 @@ - -\makeatletter -\def\PYG@reset{\let\PYG@it=\relax \let\PYG@bf=\relax% - \let\PYG@ul=\relax \let\PYG@tc=\relax% - \let\PYG@bc=\relax \let\PYG@ff=\relax} -\def\PYG@tok#1{\csname PYG@tok@#1\endcsname} -\def\PYG@toks#1+{\ifx\relax#1\empty\else% - \PYG@tok{#1}\expandafter\PYG@toks\fi} -\def\PYG@do#1{\PYG@bc{\PYG@tc{\PYG@ul{% - \PYG@it{\PYG@bf{\PYG@ff{#1}}}}}}} -\def\PYG#1#2{\PYG@reset\PYG@toks#1+\relax+\PYG@do{#2}} - -\@namedef{PYG@tok@w}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}} -\@namedef{PYG@tok@c}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} -\@namedef{PYG@tok@cp}{\def\PYG@tc##1{\textcolor[rgb]{0.61,0.40,0.00}{##1}}} -\@namedef{PYG@tok@k}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@kp}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@kt}{\def\PYG@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}} -\@namedef{PYG@tok@o}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@ow}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} -\@namedef{PYG@tok@nb}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@nf}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} -\@namedef{PYG@tok@nc}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} -\@namedef{PYG@tok@nn}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} -\@namedef{PYG@tok@ne}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.80,0.25,0.22}{##1}}} -\@namedef{PYG@tok@nv}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@no}{\def\PYG@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}} -\@namedef{PYG@tok@nl}{\def\PYG@tc##1{\textcolor[rgb]{0.46,0.46,0.00}{##1}}} -\@namedef{PYG@tok@ni}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.44,0.44,0.44}{##1}}} -\@namedef{PYG@tok@na}{\def\PYG@tc##1{\textcolor[rgb]{0.41,0.47,0.13}{##1}}} -\@namedef{PYG@tok@nt}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@nd}{\def\PYG@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} -\@namedef{PYG@tok@s}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@sd}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@si}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.64,0.35,0.47}{##1}}} -\@namedef{PYG@tok@se}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.67,0.36,0.12}{##1}}} -\@namedef{PYG@tok@sr}{\def\PYG@tc##1{\textcolor[rgb]{0.64,0.35,0.47}{##1}}} -\@namedef{PYG@tok@ss}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@sx}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@m}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@gh}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} -\@namedef{PYG@tok@gu}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}} -\@namedef{PYG@tok@gd}{\def\PYG@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}} -\@namedef{PYG@tok@gi}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.52,0.00}{##1}}} -\@namedef{PYG@tok@gr}{\def\PYG@tc##1{\textcolor[rgb]{0.89,0.00,0.00}{##1}}} -\@namedef{PYG@tok@ge}{\let\PYG@it=\textit} -\@namedef{PYG@tok@gs}{\let\PYG@bf=\textbf} -\@namedef{PYG@tok@gp}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} -\@namedef{PYG@tok@go}{\def\PYG@tc##1{\textcolor[rgb]{0.44,0.44,0.44}{##1}}} -\@namedef{PYG@tok@gt}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}} -\@namedef{PYG@tok@err}{\def\PYG@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}} -\@namedef{PYG@tok@kc}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@kd}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@kn}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@kr}{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@bp}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} -\@namedef{PYG@tok@fm}{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} -\@namedef{PYG@tok@vc}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@vg}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@vi}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@vm}{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} -\@namedef{PYG@tok@sa}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@sb}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@sc}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@dl}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@s2}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@sh}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@s1}{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} -\@namedef{PYG@tok@mb}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@mf}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@mh}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@mi}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@il}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@mo}{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} -\@namedef{PYG@tok@ch}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} -\@namedef{PYG@tok@cm}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} -\@namedef{PYG@tok@cpf}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} -\@namedef{PYG@tok@c1}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} -\@namedef{PYG@tok@cs}{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.24,0.48,0.48}{##1}}} - -\def\PYGZbs{\char`\\} -\def\PYGZus{\char`\_} -\def\PYGZob{\char`\{} -\def\PYGZcb{\char`\}} -\def\PYGZca{\char`\^} -\def\PYGZam{\char`\&} -\def\PYGZlt{\char`\<} -\def\PYGZgt{\char`\>} -\def\PYGZsh{\char`\#} -\def\PYGZpc{\char`\%} -\def\PYGZdl{\char`\$} -\def\PYGZhy{\char`\-} -\def\PYGZsq{\char`\'} -\def\PYGZdq{\char`\"} -\def\PYGZti{\char`\~} -% for compatibility with earlier versions -\def\PYGZat{@} -\def\PYGZlb{[} -\def\PYGZrb{]} -\makeatother - diff --git a/doc/src/week9/ipynb-week9-src.tar.gz b/doc/src/week9/ipynb-week9-src.tar.gz deleted file mode 100644 index 622f1df5d6c58769d57edb8138b62981029b6649..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 192 zcmV;x06+g9iwFSLF7sso1MSaC3c@fD2H>uHia9|^(lo`TU>7a~5igL^)W+JRCMnw6 z+Xv`MaZ^OdxA_@n7-kOHdb7(ScXz>J5JHSn7&B$bl!(mr1Y-)ACMjhkjR^yU)\n' + - ' * http://lab.hakim.se/reveal-js\n' + - ' * MIT licensed\n' + - ' *\n' + - ' * Copyright (C) 2014 Hakim El Hattab, http://hakim.se\n' + - ' */' - }, - - qunit: { - files: [ 'test/*.html' ] - }, - - uglify: { - options: { - banner: '<%= meta.banner %>\n' - }, - build: { - src: 'js/reveal.js', - dest: 'js/reveal.min.js' - } - }, - - cssmin: { - compress: { - files: { - 'css/reveal.min.css': [ 'css/reveal.css' ] - } - } - }, - - sass: { - main: { - files: { - 'css/theme/darkgray.css': 'css/theme/source/darkgray.scss', - 'css/theme/beigesmall.css': 'css/theme/source/beigesmall.scss', - 'css/theme/cbc.css': 'css/theme/source/cbc.scss', - 'css/theme/default.css': 'css/theme/source/default.scss', - 'css/theme/beige.css': 'css/theme/source/beige.scss', - 'css/theme/night.css': 'css/theme/source/night.scss', - 'css/theme/serif.css': 'css/theme/source/serif.scss', - 'css/theme/simple.css': 'css/theme/source/simple.scss', - 'css/theme/sky.css': 'css/theme/source/sky.scss', - 'css/theme/moon.css': 'css/theme/source/moon.scss', - 'css/theme/solarized.css': 'css/theme/source/solarized.scss', - 'css/theme/blood.css': 'css/theme/source/blood.scss' - } - } - }, - - jshint: { - options: { - curly: false, - eqeqeq: true, - immed: true, - latedef: true, - newcap: true, - noarg: true, - sub: true, - undef: true, - eqnull: true, - browser: true, - expr: true, - globals: { - head: false, - module: false, - console: false, - unescape: false - } - }, - files: [ 'Gruntfile.js', 'js/reveal.js' ] - }, - - connect: { - server: { - options: { - port: port, - base: '.' - } - } - }, - - zip: { - 'reveal-js-presentation.zip': [ - 'index.html', - 'css/**', - 'js/**', - 'lib/**', - 'images/**', - 'plugin/**' - ] - }, - - watch: { - main: { - files: [ 'Gruntfile.js', 'js/reveal.js', 'css/reveal.css' ], - tasks: 'default' - }, - theme: { - files: [ 'css/theme/source/*.scss', 'css/theme/template/*.scss' ], - tasks: 'themes' - } - } - - }); - - // Dependencies - grunt.loadNpmTasks( 'grunt-contrib-qunit' ); - grunt.loadNpmTasks( 'grunt-contrib-jshint' ); - grunt.loadNpmTasks( 'grunt-contrib-cssmin' ); - grunt.loadNpmTasks( 'grunt-contrib-uglify' ); - grunt.loadNpmTasks( 'grunt-contrib-watch' ); - grunt.loadNpmTasks( 'grunt-contrib-sass' ); - grunt.loadNpmTasks( 'grunt-contrib-connect' ); - grunt.loadNpmTasks( 'grunt-zip' ); - - // Default task - grunt.registerTask( 'default', [ 'jshint', 'cssmin', 'uglify', 'qunit' ] ); - - // Theme task - grunt.registerTask( 'themes', [ 'sass' ] ); - - // Package presentation to archive - grunt.registerTask( 'package', [ 'default', 'zip' ] ); - - // Serve presentation locally - grunt.registerTask( 'serve', [ 'connect', 'watch' ] ); - - // Run tests - grunt.registerTask( 'test', [ 'jshint', 'qunit' ] ); - -}; diff --git a/doc/src/week9/reveal.js/LICENSE b/doc/src/week9/reveal.js/LICENSE deleted file mode 100644 index 09623076..00000000 --- a/doc/src/week9/reveal.js/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (C) 2015 Hakim El Hattab, http://hakim.se - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/doc/src/week9/reveal.js/README.md b/doc/src/week9/reveal.js/README.md deleted file mode 100644 index 573b1959..00000000 --- a/doc/src/week9/reveal.js/README.md +++ /dev/null @@ -1,1052 +0,0 @@ -# reveal.js [![Build Status](https://travis-ci.org/hakimel/reveal.js.svg?branch=master)](https://travis-ci.org/hakimel/reveal.js) - -A framework for easily creating beautiful presentations using HTML. [Check out the live demo](http://lab.hakim.se/reveal-js/). - -reveal.js comes with a broad range of features including [nested slides](https://github.com/hakimel/reveal.js#markup), [Markdown contents](https://github.com/hakimel/reveal.js#markdown), [PDF export](https://github.com/hakimel/reveal.js#pdf-export), [speaker notes](https://github.com/hakimel/reveal.js#speaker-notes) and a [JavaScript API](https://github.com/hakimel/reveal.js#api). It's best viewed in a modern browser but [fallbacks](https://github.com/hakimel/reveal.js/wiki/Browser-Support) are available to make sure your presentation can still be viewed elsewhere. - - -#### More reading: -- [Installation](#installation): Step-by-step instructions for getting reveal.js running on your computer. -- [Changelog](https://github.com/hakimel/reveal.js/releases): Up-to-date version history. -- [Examples](https://github.com/hakimel/reveal.js/wiki/Example-Presentations): Presentations created with reveal.js, add your own! -- [Browser Support](https://github.com/hakimel/reveal.js/wiki/Browser-Support): Explanation of browser support and fallbacks. -- [Plugins](https://github.com/hakimel/reveal.js/wiki/Plugins,-Tools-and-Hardware): A list of plugins that can be used to extend reveal.js. - -## Online Editor - -Presentations are written using HTML or Markdown but there's also an online editor for those of you who prefer a graphical interface. Give it a try at [http://slides.com](http://slides.com). - - -## Instructions - -### Markup - -Markup hierarchy needs to be ``
    `` where the ``
    `` represents one slide and can be repeated indefinitely. If you place multiple ``
    ``'s inside of another ``
    `` they will be shown as vertical slides. The first of the vertical slides is the "root" of the others (at the top), and it will be included in the horizontal sequence. For example: - -```html -
    -
    -
    Single Horizontal Slide
    -
    -
    Vertical Slide 1
    -
    Vertical Slide 2
    -
    -
    -
    -``` - -### Markdown - -It's possible to write your slides using Markdown. To enable Markdown, add the ```data-markdown``` attribute to your ```
    ``` elements and wrap the contents in a ``` -
    -``` - -#### External Markdown - -You can write your content as a separate file and have reveal.js load it at runtime. Note the separator arguments which determine how slides are delimited in the external file. The ```data-charset``` attribute is optional and specifies which charset to use when loading the external file. - -When used locally, this feature requires that reveal.js [runs from a local web server](#full-setup). - -```html -
    -
    -``` - -#### Element Attributes - -Special syntax (in html comment) is available for adding attributes to Markdown elements. This is useful for fragments, amongst other things. - -```html -
    - -
    -``` - -#### Slide Attributes - -Special syntax (in html comment) is available for adding attributes to the slide `
    ` elements generated by your Markdown. - -```html -
    - -
    -``` - - -### Configuration - -At the end of your page you need to initialize reveal by running the following code. Note that all config values are optional and will default as specified below. - -```javascript -Reveal.initialize({ - - // Display controls in the bottom right corner - controls: true, - - // Display a presentation progress bar - progress: true, - - // Display the page number of the current slide - slideNumber: false, - - // Push each slide change to the browser history - history: false, - - // Enable keyboard shortcuts for navigation - keyboard: true, - - // Enable the slide overview mode - overview: true, - - // Vertical centering of slides - center: true, - - // Enables touch navigation on devices with touch input - touch: true, - - // Loop the presentation - loop: false, - - // Change the presentation direction to be RTL - rtl: false, - - // Turns fragments on and off globally - fragments: true, - - // Flags if the presentation is running in an embedded mode, - // i.e. contained within a limited portion of the screen - embedded: false, - - // Flags if we should show a help overlay when the questionmark - // key is pressed - help: true, - - // Number of milliseconds between automatically proceeding to the - // next slide, disabled when set to 0, this value can be overwritten - // by using a data-autoslide attribute on your slides - autoSlide: 0, - - // Stop auto-sliding after user input - autoSlideStoppable: true, - - // Enable slide navigation via mouse wheel - mouseWheel: false, - - // Hides the address bar on mobile devices - hideAddressBar: true, - - // Opens links in an iframe preview overlay - previewLinks: false, - - // Transition style - transition: 'default', // none/fade/slide/convex/concave/zoom - - // Transition speed - transitionSpeed: 'default', // default/fast/slow - - // Transition style for full page slide backgrounds - backgroundTransition: 'default', // none/fade/slide/convex/concave/zoom - - // Number of slides away from the current that are visible - viewDistance: 3, - - // Parallax background image - parallaxBackgroundImage: '', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'" - - // Parallax background size - parallaxBackgroundSize: '', // CSS syntax, e.g. "2100px 900px" - - // Amount to move parallax background (horizontal and vertical) on slide change - // Number, e.g. 100 - parallaxBackgroundHorizontal: '', - parallaxBackgroundVertical: '' - -}); -``` - - -The configuration can be updated after initialization using the ```configure``` method: - -```javascript -// Turn autoSlide off -Reveal.configure({ autoSlide: 0 }); - -// Start auto-sliding every 5s -Reveal.configure({ autoSlide: 5000 }); -``` - - -### Dependencies - -Reveal.js doesn't _rely_ on any third party scripts to work but a few optional libraries are included by default. These libraries are loaded as dependencies in the order they appear, for example: - -```javascript -Reveal.initialize({ - dependencies: [ - // Cross-browser shim that fully implements classList - https://github.com/eligrey/classList.js/ - { src: 'lib/js/classList.js', condition: function() { return !document.body.classList; } }, - - // Interpret Markdown in
    elements - { src: 'plugin/markdown/marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } }, - { src: 'plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } }, - - // Syntax highlight for elements - { src: 'plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } }, - - // Zoom in and out with Alt+click - { src: 'plugin/zoom-js/zoom.js', async: true }, - - // Speaker notes - { src: 'plugin/notes/notes.js', async: true }, - - // Remote control your reveal.js presentation using a touch device - { src: 'plugin/remotes/remotes.js', async: true }, - - // MathJax - { src: 'plugin/math/math.js', async: true } - ] -}); -``` - -You can add your own extensions using the same syntax. The following properties are available for each dependency object: -- **src**: Path to the script to load -- **async**: [optional] Flags if the script should load after reveal.js has started, defaults to false -- **callback**: [optional] Function to execute when the script has loaded -- **condition**: [optional] Function which must return true for the script to be loaded - - -### Ready Event - -A 'ready' event is fired when reveal.js has loaded all non-async dependencies and is ready to start navigating. To check if reveal.js is already 'ready' you can call `Reveal.isReady()`. - -```javascript -Reveal.addEventListener( 'ready', function( event ) { - // event.currentSlide, event.indexh, event.indexv -} ); -``` - - -### Presentation Size - -All presentations have a normal size, that is the resolution at which they are authored. The framework will automatically scale presentations uniformly based on this size to ensure that everything fits on any given display or viewport. - -See below for a list of configuration options related to sizing, including default values: - -```javascript -Reveal.initialize({ - - ... - - // The "normal" size of the presentation, aspect ratio will be preserved - // when the presentation is scaled to fit different resolutions. Can be - // specified using percentage units. - width: 960, - height: 700, - - // Factor of the display size that should remain empty around the content - margin: 0.1, - - // Bounds for smallest/largest possible scale to apply to content - minScale: 0.2, - maxScale: 1.5 - -}); -``` - - -### Auto-sliding - -Presentations can be configured to progress through slides automatically, without any user input. To enable this you will need to tell the framework how many milliseconds it should wait between slides: - -```javascript -// Slide every five seconds -Reveal.configure({ - autoSlide: 5000 -}); -``` -When this is turned on a control element will appear that enables users to pause and resume auto-sliding. Alternatively, sliding can be paused or resumed by pressing »a« on the keyboard. Sliding is paused automatically as soon as the user starts navigating. You can disable these controls by specifying ```autoSlideStoppable: false``` in your reveal.js config. - -You can also override the slide duration for individual slides and fragments by using the ```data-autoslide``` attribute: - -```html -
    -

    After 2 seconds the first fragment will be shown.

    -

    After 10 seconds the next fragment will be shown.

    -

    Now, the fragment is displayed for 2 seconds before the next slide is shown.

    -
    -``` - -Whenever the auto-slide mode is resumed or paused the ```autoslideresumed``` and ```autoslidepaused``` events are fired. - - -### Keyboard Bindings - -If you're unhappy with any of the default keyboard bindings you can override them using the ```keyboard``` config option: - -```javascript -Reveal.configure({ - keyboard: { - 13: 'next', // go to the next slide when the ENTER key is pressed - 27: function() {}, // do something custom when ESC is pressed - 32: null // don't do anything when SPACE is pressed (i.e. disable a reveal.js default binding) - } -}); -``` - -### Lazy Loading - -When working on presentation with a lot of media or iframe content it's important to load lazily. Lazy loading means that reveal.js will only load content for the few slides nearest to the current slide. The number of slides that are preloaded is determined by the `viewDistance` configuration option. - -To enable lazy loading all you need to do is change your "src" attributes to "data-src" as shown below. This is supported for image, video, audio and iframe elements. Lazy loaded iframes will also unload when the containing slide is no longer visible. - -```html -
    - - - -
    -``` - - -### API - -The ``Reveal`` object exposes a JavaScript API for controlling navigation and reading state: - -```javascript -// Navigation -Reveal.slide( indexh, indexv, indexf ); -Reveal.left(); -Reveal.right(); -Reveal.up(); -Reveal.down(); -Reveal.prev(); -Reveal.next(); -Reveal.prevFragment(); -Reveal.nextFragment(); - -// Toggle presentation states, optionally pass true/false to force on/off -Reveal.toggleOverview(); -Reveal.togglePause(); -Reveal.toggleAutoSlide(); - -// Change a config value at runtime -Reveal.configure({ controls: true }); - -// Returns the present configuration options -Reveal.getConfig(); - -// Fetch the current scale of the presentation -Reveal.getScale(); - -// Retrieves the previous and current slide elements -Reveal.getPreviousSlide(); -Reveal.getCurrentSlide(); - -Reveal.getIndices(); // { h: 0, v: 0 } } -Reveal.getProgress(); // 0-1 -Reveal.getTotalSlides(); - -// State checks -Reveal.isFirstSlide(); -Reveal.isLastSlide(); -Reveal.isOverview(); -Reveal.isPaused(); -Reveal.isAutoSliding(); -``` - -### Slide Changed Event - -A 'slidechanged' event is fired each time the slide is changed (regardless of state). The event object holds the index values of the current slide as well as a reference to the previous and current slide HTML nodes. - -Some libraries, like MathJax (see [#226](https://github.com/hakimel/reveal.js/issues/226#issuecomment-10261609)), get confused by the transforms and display states of slides. Often times, this can be fixed by calling their update or render function from this callback. - -```javascript -Reveal.addEventListener( 'slidechanged', function( event ) { - // event.previousSlide, event.currentSlide, event.indexh, event.indexv -} ); -``` - -### Presentation State - -The presentation's current state can be fetched by using the `getState` method. A state object contains all of the information required to put the presentation back as it was when `getState` was first called. Sort of like a snapshot. It's a simple object that can easily be stringified and persisted or sent over the wire. - -```javascript -Reveal.slide( 1 ); -// we're on slide 1 - -var state = Reveal.getState(); - -Reveal.slide( 3 ); -// we're on slide 3 - -Reveal.setState( state ); -// we're back on slide 1 -``` - -### Slide States - -If you set ``data-state="somestate"`` on a slide ``
    ``, "somestate" will be applied as a class on the document element when that slide is opened. This allows you to apply broad style changes to the page based on the active slide. - -Furthermore you can also listen to these changes in state via JavaScript: - -```javascript -Reveal.addEventListener( 'somestate', function() { - // TODO: Sprinkle magic -}, false ); -``` - -### Slide Backgrounds - -Slides are contained within a limited portion of the screen by default to allow them to fit any display and scale uniformly. You can apply full page backgrounds outside of the slide area by adding a ```data-background``` attribute to your ```
    ``` elements. Four different types of backgrounds are supported: color, image, video and iframe. Below are a few examples. - -```html -
    -

    All CSS color formats are supported, like rgba() or hsl().

    -
    -
    -

    This slide will have a full-size background image.

    -
    -
    -

    This background image will be sized to 100px and repeated.

    -
    -
    -

    Video. Multiple sources can be defined using a comma separated list. Video will loop when the data-background-video-loop attribute is provided.

    -
    -
    -

    Embeds a web page as a background. Note that the page won't be interactive.

    -
    -``` - -Backgrounds transition using a fade animation by default. This can be changed to a linear sliding transition by passing ```backgroundTransition: 'slide'``` to the ```Reveal.initialize()``` call. Alternatively you can set ```data-background-transition``` on any section with a background to override that specific transition. - - -### Parallax Background - -If you want to use a parallax scrolling background, set the first two config properties below when initializing reveal.js (the other two are optional). - -```javascript -Reveal.initialize({ - - // Parallax background image - parallaxBackgroundImage: '', // e.g. "https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg" - - // Parallax background size - parallaxBackgroundSize: '', // CSS syntax, e.g. "2100px 900px" - currently only pixels are supported (don't use % or auto) - - // Amount of pixels to move the parallax background per slide step, - // a value of 0 disables movement along the given axis - // These are optional, if they aren't specified they'll be calculated automatically - parallaxBackgroundHorizontal: 200, - parallaxBackgroundVertical: 50 - -}); -``` - -Make sure that the background size is much bigger than screen size to allow for some scrolling. [View example](http://lab.hakim.se/reveal-js/?parallaxBackgroundImage=https%3A%2F%2Fs3.amazonaws.com%2Fhakim-static%2Freveal-js%2Freveal-parallax-1.jpg¶llaxBackgroundSize=2100px%20900px). - - - -### Slide Transitions -The global presentation transition is set using the ```transition``` config value. You can override the global transition for a specific slide by using the ```data-transition``` attribute: - -```html -
    -

    This slide will override the presentation transition and zoom!

    -
    - -
    -

    Choose from three transition speeds: default, fast or slow!

    -
    -``` - -You can also use different in and out transitions for the same slide: - -```html -
    - The train goes on … -
    -
    - and on … -
    -
    - and stops. -
    -
    - (Passengers entering and leaving) -
    -
    - And it starts again. -
    -``` - - -Note that this does not work with the page and cube transitions. - - -### Internal links - -It's easy to link between slides. The first example below targets the index of another slide whereas the second targets a slide with an ID attribute (```
    ```): - -```html -Link -Link -``` - -You can also add relative navigation links, similar to the built in reveal.js controls, by appending one of the following classes on any element. Note that each element is automatically given an ```enabled``` class when it's a valid navigation route based on the current slide. - -```html - - - - - - -``` - - -### Fragments -Fragments are used to highlight individual elements on a slide. Every element with the class ```fragment``` will be stepped through before moving on to the next slide. Here's an example: http://lab.hakim.se/reveal-js/#/fragments - -The default fragment style is to start out invisible and fade in. This style can be changed by appending a different class to the fragment: - -```html -
    -

    grow

    -

    shrink

    -

    fade-out

    -

    visible only once

    -

    blue only once

    -

    highlight-red

    -

    highlight-green

    -

    highlight-blue

    -
    -``` - -Multiple fragments can be applied to the same element sequentially by wrapping it, this will fade in the text on the first step and fade it back out on the second. - -```html -
    - - I'll fade in, then out - -
    -``` - -The display order of fragments can be controlled using the ```data-fragment-index``` attribute. - -```html -
    -

    Appears last

    -

    Appears first

    -

    Appears second

    -
    -``` - -### Fragment events - -When a slide fragment is either shown or hidden reveal.js will dispatch an event. - -Some libraries, like MathJax (see #505), get confused by the initially hidden fragment elements. Often times this can be fixed by calling their update or render function from this callback. - -```javascript -Reveal.addEventListener( 'fragmentshown', function( event ) { - // event.fragment = the fragment DOM element -} ); -Reveal.addEventListener( 'fragmenthidden', function( event ) { - // event.fragment = the fragment DOM element -} ); -``` - -### Code syntax highlighting - -By default, Reveal is configured with [highlight.js](http://softwaremaniacs.org/soft/highlight/en/) for code syntax highlighting. Below is an example with clojure code that will be syntax highlighted. When the `data-trim` attribute is present surrounding whitespace is automatically removed. - -```html -
    -
    
    -(def lazy-fib
    -  (concat
    -   [0 1]
    -   ((fn rfib [a b]
    -        (lazy-cons (+ a b) (rfib b (+ a b)))) 0 1)))
    -	
    -
    -``` - -### Slide number -If you would like to display the page number of the current slide you can do so using the ```slideNumber``` configuration value. - -```javascript -// Shows the slide number using default formatting -Reveal.configure({ slideNumber: true }); - -// Slide number formatting can be configured using these variables: -// h: current slide's horizontal index -// v: current slide's vertical index -// c: current slide index (flattened) -// t: total number of slides (flattened) -Reveal.configure({ slideNumber: 'c / t' }); - -``` - - -### Overview mode - -Press "Esc" or "o" keys to toggle the overview mode on and off. While you're in this mode, you can still navigate between slides, -as if you were at 1,000 feet above your presentation. The overview mode comes with a few API hooks: - -```javascript -Reveal.addEventListener( 'overviewshown', function( event ) { /* ... */ } ); -Reveal.addEventListener( 'overviewhidden', function( event ) { /* ... */ } ); - -// Toggle the overview mode programmatically -Reveal.toggleOverview(); -``` - -### Fullscreen mode -Just press »F« on your keyboard to show your presentation in fullscreen mode. Press the »ESC« key to exit fullscreen mode. - - -### Embedded media -Embedded HTML5 `
    -
    -

     

     

     

    - - -
    -
    -

    Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking

    -
    - - -
    -Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no [1, 2] -
    - -
    -[1] Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway -
    -
    -[2] Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA -
    -
    -
    -

    March 11-15

    -
    -
    - - - -

    Read »

    - - -
    - -

    - -

    - -
    - - - - -
    - © 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license -
    - - - diff --git a/doc/src/week9/week9-plain.tex b/doc/src/week9/week9-plain.tex deleted file mode 100644 index 98b02c01..00000000 --- a/doc/src/week9/week9-plain.tex +++ /dev/null @@ -1,5936 +0,0 @@ -%% -%% Automatically generated file from DocOnce source -%% (https://github.com/doconce/doconce/) -%% doconce format latex week9.do.txt --minted_latex_style=trac --latex_admon=paragraph --no_mako -%% - - -%-------------------- begin preamble ---------------------- - -\documentclass[% -oneside, % oneside: electronic viewing, twoside: printing -final, % draft: marks overfull hboxes, figures with paths -10pt]{article} - -\listfiles % print all files needed to compile this document - -\usepackage{relsize,makeidx,color,setspace,amsmath,amsfonts,amssymb} -\usepackage[table]{xcolor} -\usepackage{bm,ltablex,microtype} - -\usepackage[pdftex]{graphicx} - -\usepackage{fancyvrb} % packages needed for verbatim environments -\usepackage{minted} -\usemintedstyle{default} - -\usepackage[T1]{fontenc} -%\usepackage[latin1]{inputenc} -\usepackage{ucs} -\usepackage[utf8x]{inputenc} - -\usepackage{lmodern} % Latin Modern fonts derived from Computer Modern - -% Hyperlinks in PDF: -\definecolor{linkcolor}{rgb}{0,0,0.4} -\usepackage{hyperref} -\hypersetup{ - breaklinks=true, - colorlinks=true, - linkcolor=linkcolor, - urlcolor=linkcolor, - citecolor=black, - filecolor=black, - %filecolor=blue, - pdfmenubar=true, - pdftoolbar=true, - bookmarksdepth=3 % Uncomment (and tweak) for PDF bookmarks with more levels than the TOC - } -%\hyperbaseurl{} % hyperlinks are relative to this root - -\setcounter{tocdepth}{2} % levels in table of contents - -% --- fancyhdr package for fancy headers --- -\usepackage{fancyhdr} -\fancyhf{} % sets both header and footer to nothing -\renewcommand{\headrulewidth}{0pt} -\fancyfoot[LE,RO]{\thepage} -% Ensure copyright on titlepage (article style) and chapter pages (book style) -\fancypagestyle{plain}{ - \fancyhf{} - \fancyfoot[C]{{\footnotesize \copyright\ 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license}} -% \renewcommand{\footrulewidth}{0mm} - \renewcommand{\headrulewidth}{0mm} -} -% Ensure copyright on titlepages with \thispagestyle{empty} -\fancypagestyle{empty}{ - \fancyhf{} - \fancyfoot[C]{{\footnotesize \copyright\ 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license}} - \renewcommand{\footrulewidth}{0mm} - \renewcommand{\headrulewidth}{0mm} -} - -\pagestyle{fancy} - - -\usepackage[framemethod=TikZ]{mdframed} - -% --- begin definitions of admonition environments --- - -% --- end of definitions of admonition environments --- - -% prevent orhpans and widows -\clubpenalty = 10000 -\widowpenalty = 10000 - -\newenvironment{doconceexercise}{}{} -\newcounter{doconceexercisecounter} - - -% ------ header in subexercises ------ -%\newcommand{\subex}[1]{\paragraph{#1}} -%\newcommand{\subex}[1]{\par\vspace{1.7mm}\noindent{\bf #1}\ \ } -\makeatletter -% 1.5ex is the spacing above the header, 0.5em the spacing after subex title -\newcommand\subex{\@startsection*{paragraph}{4}{\z@}% - {1.5ex\@plus1ex \@minus.2ex}% - {-0.5em}% - {\normalfont\normalsize\bfseries}} -\makeatother - - -% --- end of standard preamble for documents --- - - -% insert custom LaTeX commands... - -\raggedbottom -\makeindex -\usepackage[totoc]{idxlayout} % for index in the toc -\usepackage[nottoc]{tocbibind} % for references/bibliography in the toc - -%-------------------- end preamble ---------------------- - -\begin{document} - -% matching end for #ifdef PREAMBLE - -\newcommand{\exercisesection}[1]{\subsection*{#1}} - - -% ------------------- main content ---------------------- - - - -% ----------------- title ------------------------- - -\thispagestyle{empty} - -\begin{center} -{\LARGE\bf -\begin{spacing}{1.25} -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking -\end{spacing} -} -\end{center} - -% ----------------- author(s) ------------------------- - -\begin{center} -{\bf Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no${}^{1, 2}$} \\ [0mm] -\end{center} - -\begin{center} -% List of all institutions: -\centerline{{\small ${}^1$Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway}} -\centerline{{\small ${}^2$Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA}} -\end{center} - -% ----------------- end author(s) ------------------------- - -% --- begin date --- -\begin{center} -March 11-15 -\end{center} -% --- end date --- - -\vspace{1cm} - - -% !split -\subsection*{Overview of week 11, March 11-15} - -% --- begin paragraph admon --- -\paragraph{Topics.} -\begin{enumerate} -\item Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week - -\item Resampling Techniques, emphasis on Blocking - -\item Discussion of onebody densities (whiteboard notes) - -\item Start discussion on optimization and parallelization for Python and C++ -% * \href{{https://youtu.be/}}{Video of lecture TBA} -% * \href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/HandWrittenNotes/2024/NotesMarch22.pdf}}{Handwritten notes} -\end{enumerate} - -\noindent -% --- end paragraph admon --- - - - -Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week. - -% !split -\subsection*{Why resampling methods ?} - -% --- begin paragraph admon --- -\paragraph{Statistical analysis.} -\begin{itemize} -\item Our simulations can be treated as \emph{computer experiments}. This is particularly the case for Monte Carlo methods - -\item The results can be analysed with the same statistical tools as we would use analysing experimental data. - -\item As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Statistical analysis} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item As in other experiments, many numerical experiments have two classes of errors: -\begin{enumerate} - -\item Statistical errors - -\item Systematical errors - -\end{enumerate} - -\noindent -\item Statistical errors can be estimated using standard tools from statistics - -\item Systematical errors are method specific and must be treated differently from case to case. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{And why do we use such methods?} - -As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. - -The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. - -% !split -\subsection*{Central limit theorem} - -Last week we derived the central limit theorem with the following assumptions: - - -% --- begin paragraph admon --- -\paragraph{Measurement $i$.} -We assumed that each individual measurement $x_{ij}$ is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment $i$ with $n$ samples as -\[ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -\] -and the sample variance -\[ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -\] -% --- end paragraph admon --- - - - -% !split -\subsection*{Further remarks} - -Note that we use $n$ instead of $n-1$ in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. - -% !split -\subsection*{Running many measurements} - - -% --- begin paragraph admon --- -\paragraph{Adding $m$ measurements $i$.} -With the assumption that the average measurements $i$ are also defined as iid stochastic variables and have the same probability function $p$, -we defined the total average over $m$ experiments as -\[ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -\] -and the total variance -\[ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -\] -% --- end paragraph admon --- - - -These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit $m\rightarrow \infty$, the distribution for $\overline{X}$ is given by a Gaussian distribution with variance $\sigma^2_m$. - -% !split -\subsection*{Adding more definitions} - -The total sample variance over the $mn$ measurements is defined as -\[ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -\] -We have from the equation for $\sigma_m^2$ -\[ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -\] -and introducing the centered value $\tilde{x}_{ij}=x_{ij}-\overline{X}$, we can rewrite $\sigma_m^2$ as -\[ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -\] - -% !split -\subsection*{Further rewriting} - -We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j1$ and $X_1,X_2,\cdots, X_n$ is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging $X_1,X_2,\cdots,X_n$ in an $n$-tuple. Define: -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} - -% !split -\subsection*{Why blocking?} - -The strength of the blocking method is when the number of -observations, $n$ is large. For large $n$, the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger $n$ is. - -% !split -\subsection*{Blocking Transformations} - We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from $\bm{X}$ and form a new vector -$\bm{X}_1$. Continuing in the same way by taking the mean of -subsequent pairs of elements of $\bm{X}_1$ we obtain $\bm{X}_2$, and -so on. -Define $\bm{X}_i$ recursively by: - -\begin{align} -(\bm{X}_0)_k &\equiv (\bm{X})_k \nonumber \\ -(\bm{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\bm{X}_i)_{2k-1} + -(\bm{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\end{align} - -% !split -\subsection*{Blocking transformations} - -The quantity $\bm{X}_k$ is -subject to $k$ \textbf{blocking transformations}. We now have $d$ vectors -$\bm{X}_0, \bm{X}_1,\cdots,\vec X_{d-1}$ containing the subsequent -averages of observations. It turns out that if the components of -$\bm{X}$ is a stationary time series, then the components of -$\bm{X}_i$ is a stationary time series for all $0 \leq i \leq d-1$ - -We can then compute the autocovariance, the variance, sample mean, and -number of observations for each $i$. -Let $\gamma_i, \sigma_i^2, -\overline{X}_i$ denote the covariance, variance and average of the -elements of $\bm{X}_i$ and let $n_i$ be the number of elements of -$\bm{X}_i$. It follows by induction that $n_i = n/2^i$. - -% !split -\subsection*{Blocking Transformations} - -Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since $h =|i-j|$ -we can define -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} \\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\end{align} - -The quantity $\hat{X}$ is asymptotically uncorrelated by assumption, $\hat{X}_k$ is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean $\mathrm{var}(\overline{X})$. - -% !split -\subsection*{Blocking Transformations, getting there} -We have -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\end{align} -The term $e_k$ is called the \textbf{truncation error}: -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\end{equation} -We can show that $\mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j)$ for all $0 \leq i \leq d-1$ and $0 \leq j \leq d-1$. - -% !split -\subsection*{Blocking Transformations, final expressions} - -We can then wrap up -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\end{align} -By repeated use of this equation we get $\mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X})$ for all $0 \leq i \leq d-1$. This has the consequence that -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \label{eq:convergence} -\end{align} - -% !split -\subsection*{More on the blocking method} - -Flyvbjerg and Petersen demonstrated that the sequence -$\{e_k\}_{k=0}^{d-1}$ is decreasing, and conjecture that the term -$e_k$ can be made as small as we would like by making $k$ (and hence -$d$) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -$e_k$ is sufficiently small, and then estimate $\mathrm{var}(\overline{X})$ by -$\widehat{\sigma}^2_k/n_k$. - -For an elegant solution and proof of the blocking method, see the recent article of \href{{https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304}}{Marius Jonsson (former MSc student of the Computational Physics group)}. - -% !split -\subsection*{Example code form last week} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{python} -# 2-electron VMC code for 2dim quantum dot with importance sampling -# Using gaussian rng for new positions and Metropolis- Hastings -# Added energy minimization -from math import exp, sqrt -from random import random, seed, normalvariate -import numpy as np -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -from matplotlib import cm -from matplotlib.ticker import LinearLocator, FormatStrFormatter -from scipy.optimize import minimize -import sys -import os - -# Where to save data files -PROJECT_ROOT_DIR = "Results" -DATA_ID = "Results/EnergyMin" - -if not os.path.exists(PROJECT_ROOT_DIR): - os.mkdir(PROJECT_ROOT_DIR) - -if not os.path.exists(DATA_ID): - os.makedirs(DATA_ID) - -def data_path(dat_id): - return os.path.join(DATA_ID, dat_id) - -outfile = open(data_path("Energies.dat"),'w') - - -# Trial wave function for the 2-electron quantum dot in two dims -def WaveFunction(r,alpha,beta): - r1 = r[0,0]**2 + r[0,1]**2 - r2 = r[1,0]**2 + r[1,1]**2 - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = r12/(1+beta*r12) - return exp(-0.5*alpha*(r1+r2)+deno) - -# Local energy for the 2-electron quantum dot in two dims, using analytical local energy -def LocalEnergy(r,alpha,beta): - - r1 = (r[0,0]**2 + r[0,1]**2) - r2 = (r[1,0]**2 + r[1,1]**2) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - deno2 = deno*deno - return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12) - -# Derivate of wave function ansatz as function of variational parameters -def DerivativeWFansatz(r,alpha,beta): - - WfDer = np.zeros((2), np.double) - r1 = (r[0,0]**2 + r[0,1]**2) - r2 = (r[1,0]**2 + r[1,1]**2) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - deno2 = deno*deno - WfDer[0] = -0.5*(r1+r2) - WfDer[1] = -r12*r12*deno2 - return WfDer - -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector -def QuantumForce(r,alpha,beta): - - qforce = np.zeros((NumberParticles,Dimension), np.double) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12 - qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12 - return qforce - - -# Computing the derivative of the energy and the energy -def EnergyDerivative(x0): - - - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - EnergyDer = 0.0 - DeltaPsi = 0.0 - DerivativePsiE = 0.0 - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - DerPsi = DerivativeWFansatz(PositionOld,alpha,beta) - DeltaPsi += DerPsi - energy += DeltaE - DerivativePsiE += DerPsi*DeltaE - - # We calculate mean values - energy /= NumberMCcycles - DerivativePsiE /= NumberMCcycles - DeltaPsi /= NumberMCcycles - EnergyDer = 2*(DerivativePsiE-DeltaPsi*energy) - return EnergyDer - - -# Computing the expectation value of the local energy -def Energy(x0): - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - energy += DeltaE - if Printout: - outfile.write('%f\n' %(energy/(MCcycle+1.0))) - # We calculate mean values - energy /= NumberMCcycles - return energy - -#Here starts the main program with variable declarations -NumberParticles = 2 -Dimension = 2 -# seed for rng generator -seed() -# Monte Carlo cycles for parameter optimization -Printout = False -NumberMCcycles= 10000 -# guess for variational parameters -x0 = np.array([0.9,0.2]) -# Using Broydens method to find optimal parameters -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True}) -x0 = res.x -# Compute the energy again with the optimal parameters and increased number of Monte Cycles -NumberMCcycles= 2**19 -Printout = True -FinalEnergy = Energy(x0) -EResult = np.array([FinalEnergy,FinalEnergy]) -outfile.close() -#nice printout with Pandas -import pandas as pd -from pandas import DataFrame -data ={'Optimal Parameters':x0, 'Final Energy':EResult} -frame = pd.DataFrame(data) -print(frame) - -\end{minted} - - -% !split -\subsection*{Resampling analysis} - -The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of \href{{https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304}}{Marius Jonsson} is given here - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{python} -# Common imports -import os - -# Where to save the figures and data files -DATA_ID = "Results/EnergyMin" - -def data_path(dat_id): - return os.path.join(DATA_ID, dat_id) - -infile = open(data_path("Energies.dat"),'r') - -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt -from numpy.linalg import inv - -def block(x): - # preliminaries - n = len(x) - d = int(log2(n)) - s, gamma = zeros(d), zeros(d) - mu = mean(x) - - # estimate the auto-covariance and variances - # for each blocking transformation - for i in arange(0,d): - n = len(x) - # estimate autocovariance of x - gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) ) - # estimate variance of x - s[i] = var(x) - # perform blocking transformation - x = 0.5*(x[0::2] + x[1::2]) - - # generate the test observator M_k from the theorem - M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] ) )[::-1] - - # we need a list of magic numbers - q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181]) - - # use magic to determine when we should have stopped blocking - for k in arange(0,d): - if(M[k] < q[k]): - break - if (k >= d-1): - print("Warning: Use more data") - return mu, s[k]/2**(d-k) - - -x = loadtxt(infile) -(mean, var) = block(x) -std = sqrt(var) -import pandas as pd -from pandas import DataFrame -data ={'Mean':[mean], 'STDev':[std]} -frame = pd.DataFrame(data,index=['Values']) -print(frame) - - -\end{minted} - - -% !split -\subsection*{Content} -\begin{itemize} -\item Simple compiler options - -\item Tools to benchmark your code - -\item Machine architectures - -\item What is vectorization? - -\item How to measure code performance - -\item Parallelization with OpenMP - -\item Parallelization with MPI - -\item Vectorization and parallelization, examples -\end{itemize} - -\noindent -% !split -\subsection*{Optimization and profiling} - -% --- begin paragraph admon --- -\paragraph{} - -Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -c++ -c mycode.cpp -c++ -o mycode.exe mycode.o - -\end{minted} - -For Fortran replace with for example \textbf{gfortran} or \textbf{ifort}. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. - -It is instructive to look up the compiler manual for further instructions by writing - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -man c++ - -\end{minted} -% --- end paragraph admon --- - - -% !split -\subsection*{More on optimization} - -% --- begin paragraph admon --- -\paragraph{} -We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -c++ -O3 -c mycode.cpp -c++ -O3 -o mycode.exe mycode.o - -\end{minted} - -This (other options are -O2 or -Ofast) is the recommended option. -% --- end paragraph admon --- - - -% !split -\subsection*{Optimization and profiling} - -% --- begin paragraph admon --- -\paragraph{} -It is also useful to profile your program under the development stage. -You would then compile with - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -c++ -pg -O3 -c mycode.cpp -c++ -pg -O3 -o mycode.exe mycode.o - -\end{minted} - -After you have run the code you can obtain the profiling information via - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -gprof mycode.exe > ProfileOutput - -\end{minted} - -When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use \href{{http://www.valgrind.org}}{valgrind}. An excellent environment for all these aspects, and much more, is Qt creator. -% --- end paragraph admon --- - - - -% !split -\subsection*{Optimization and debugging} - -% --- begin paragraph admon --- -\paragraph{} -Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -c++ -g -O0 -c mycode.cpp -c++ -g -O0 -o mycode.exe mycode.o - -\end{minted} - -This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option \textbf{-O0}. -% --- end paragraph admon --- - - - -% --- begin paragraph admon --- -\paragraph{Other optimization flags.} -Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag \textbf{-ftrapv} does this for the CLANG compiler on OS X operating systems. -% --- end paragraph admon --- - - - -% !split -\subsection*{Other hints} - -% --- begin paragraph admon --- -\paragraph{} -In general, irrespective of compiler options, it is useful to -\begin{itemize} -\item avoid if tests or call to functions inside loops, if possible. - -\item avoid multiplication with constants inside loops if possible -\end{itemize} - -\noindent -Here is an example of a part of a program where specific operations lead to a slower code - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -k = n-1; -for (i = 0; i < n; i++){ - a[i] = b[i] +c*d; - e = g[k]; -} - -\end{minted} - -A better code is - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -temp = c*d; -for (i = 0; i < n; i++){ - a[i] = b[i] + temp; -} -e = g[n-1]; - -\end{minted} - -Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -% --- end paragraph admon --- - - - -% !split -\subsection*{Vectorization and the basic idea behind parallel computing} - -% --- begin paragraph admon --- -\paragraph{} -Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements. -\begin{itemize} -\item Pursuit of shorter computation time and larger simulation size gives rise to parallel computing. - -\item Multiple processors are involved to solve a global problem. - -\item The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer. -\end{itemize} - -\noindent -Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models. -% --- end paragraph admon --- - - - -% !split -\subsection*{A rough classification of hardware models} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Conventional single-processor computers are named SISD (single-instruction-single-data) machines. - -\item SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data. - -\item Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - -% !split -\subsection*{Shared memory and distributed memory} - -% --- begin paragraph admon --- -\paragraph{} -One way of categorizing modern parallel computers is to look at the memory configuration. -\begin{itemize} -\item In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory. - -\item In distributed memory systems each CPU has its own memory. -\end{itemize} - -\noindent -The CPUs are connected by some network and may exchange messages. -% --- end paragraph admon --- - - - -% !split -\subsection*{Different parallel programming paradigms} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Task parallelism}: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use. - -\item \textbf{Data parallelism}: use of multiple threads (e.g.~one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - -% !split -\subsection*{Different parallel programming paradigms} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Message passing}: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization. - -\item This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{What is vectorization?} -Vectorization is a special -case of \textbf{Single Instructions Multiple Data} (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. - -Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\end{minted} - -If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. - -% !split -\subsection*{Number of elements that can acted upon} -A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. - -The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. - -IN summary, our instructions operate on 128 bit (16 byte) operands -\begin{itemize} -\item 4 floats or ints - -\item 2 doubles - -\item Data paths 128 bits vide for vector unit -\end{itemize} - -\noindent -% !split -\subsection*{Number of elements that can acted upon, examples} -We start with the simple scalar operations given by - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\end{minted} - -If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have $3\times 32$ bits that are not used. - -We have thus unused space in our SIMD registers. These registers could hold three additional integers. - -% !split -\subsection*{Operation counts for scalar operation} -The code - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\end{minted} - -has for $n$ repeats -\begin{enumerate} -\item one load for $c[i]$ in address 1 - -\item one load for $b[i]$ in address 2 - -\item add $c[i]$ and $b[i]$ to give $a[i]$ - -\item store $a[i]$ in address 2 -\end{enumerate} - -\noindent -% !split -\subsection*{Number of elements that can acted upon, examples} -If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -for (i = 0; i < n; i+=4){ - a[i] = b[i] + c[i]; - a[i+1] = b[i+1] + c[i+1]; - a[i+2] = b[i+2] + c[i+2]; - a[i+3] = b[i+3] + c[i+3]; -} - -\end{minted} - - -Four additions are now done in a single step. - -% !split -\subsection*{Number of operations when vectorized} -For $n/4$ repeats assuming floats or integers -\begin{enumerate} -\item one vector load for $c[i]$ in address 1 - -\item one load for $b[i]$ in address 2 - -\item add $c[i]$ and $b[i]$ to give $a[i]$ - -\item store $a[i]$ in address 2 -\end{enumerate} - -\noindent -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp}}{A simple test case with and without vectorization}} -We implement these operations in a simple c++ program that computes at the end the norm of a vector. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double s = 1.0/sqrt( (double) n); - double *a, *b, *c; - // Start timing - clock_t start, finish; - start = clock(); -// Allocate space for the vectors to be used - a = new double [n]; b = new double [n]; c = new double [n]; - // Define parallel region - // Set up values for vectors a and b - for (int i = 0; i < n; i++){ - double angle = 2.0*M_PI*i/ (( double ) n); - a[i] = s*(sin(angle) + cos(angle)); - b[i] = s*sin(2.0*angle); - c[i] = 0.0; - } - // Then perform the vector addition - for (int i = 0; i < n; i++){ - c[i] += a[i]+b[i]; - } - // Compute now the norm-2 - double Norm2 = 0.0; - for (int i = 0; i < n; i++){ - Norm2 += c[i]*c[i]; - } - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for norm computation=" << timeused << endl; - cout << " Norm-2 = " << Norm2 << endl; - // Free up space - delete[] a; - delete[] b; - delete[] c; - return 0; -} - - - - - -\end{Verbatim} - - -% !split -\subsection*{Compiling with and without vectorization} -We can compile and link without vectorization using the clang c++ compiler - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang -o novec.x vecexample.cpp - -\end{minted} - -and with vectorization (and additional optimizations) - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp - -\end{minted} - -The speedup depends on the size of the vectors. In the example here we have run with $10^7$ elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -Compphys:~ hjensen$ ./vec.x 10000000 -Time used for norm computation=0.04720500000 -Compphys:~ hjensen$ ./novec.x 10000000 -Time used for norm computation=0.03311700000 - -\end{minted} - -This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for $10^9$ elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -Compphys:~ hjensen$ ./vec.x 1000000000 -Time used for norm computation=58.41391100 -Compphys:~ hjensen$ ./novec.x 1000000000 -Time used for norm computation=46.51295300 - -\end{minted} - -We will discuss these issues further in the next slides. - -% !split -\subsection*{Compiling with and without vectorization using clang} -We can compile and link without vectorization with clang compiler - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang++ -o -fno-vectorize novec.x vecexample.cpp - -\end{minted} - -and with vectorization - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp - -\end{minted} - -We can also add vectorization analysis, see for example - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang++ -O3 -Rpass-analysis=loop-vectorize -o vec.x vecexample.cpp - -\end{minted} - -or figure out if vectorization was missed - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -clang++ -O3 -Rpass-missed=loop-vectorize -o vec.x vecexample.cpp - -\end{minted} - - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, criteria} - -Not all loops can be vectorized, as discussed in \href{{https://software.intel.com/en-us/articles/a-guide-to-auto-vectorization-with-intel-c-compilers}}{Intel's guide to vectorization} - -An important criteria is that the loop counter $n$ is known at the entry of the loop. - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int j = 0; j < n; j++) { - a[j] = cos(j*1.0); - } - -\end{minted} - -The variable $n$ does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, exit criteria} - -An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int j = 0; j < n; j++) { - a[j] = cos(j*1.0); - if (a[j] < 0 ) break; - } - -\end{minted} - -Avoid loop termination conditions and opt for a single entry loop variable $n$. The lower and upper bounds have to be kept fixed within the loop. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, straight-line code} - -SIMD instructions perform the same type of operations multiple times. -A \textbf{switch} statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the \textbf{if} statement is implemented as a masked assignment. - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int j = 0; j < n; j++) { - double x = cos(j*1.0); - if (x > 0 ) { - a[j] = x*sin(j*2.0); - } - else { - a[j] = 0.0; - } - } - -\end{minted} - -These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as \textbf{switch}, \textbf{go to}, or \textbf{return} statements or \textbf{if} constructs that cannot be treated as masked assignments. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, nested loops} - -Only the innermost loop of the following example is vectorized - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - a[i][j] += b[i][j]; - } - } - -\end{minted} - -The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, function calls} - -Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -$\sin{x}$, $\cos{x}$, $\exp{x}$ etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int i = 0; i < n; i++) { - a[i] = log10(i)*cos(i); - } - -\end{minted} - -Similarly, \textbf{inline} functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, data dependencies} - -One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - double b = 15.; - for (int i = 1; i < n; i++) { - a[i] = a[i-1] + b; - } - -\end{minted} - -is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value $a[i-1]$ computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values $a[0]$, $a[1]$, $a[2]$ and $a[3]$ from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - a[1] = a[0] + b; - a[2] = a[1] + b; - a[3] = a[2] + b; - a[4] = a[3] + b; - -\end{minted} - -and if the two first iterations are executed at the same by the SIMD instruction, the value of say $a[1]$ could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, more data dependencies} - -On the other hand, a so-called -write-after-read statement can be vectorized. The following code - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - double b = 15.; - for (int i = 1; i < n; i++) { - a[i-1] = a[i] + b; - } - -\end{minted} - -is an example of flow dependency that can be vectorized since no iteration with a higher value of $i$ -can complete before an iteration with a lower value of $i$. However, such code leads to problems with parallelization. - -% !split -\subsection*{Automatic vectorization and vectorization inhibitors, memory stride} - -For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code. - -When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - a[i][j] += b[i][j]; - } - } - -\end{minted} - - -% !split -\subsection*{Memory management} -The main memory contains the program data -\begin{enumerate} -\item Cache memory contains a copy of the main memory data - -\item Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory - -\item Registers contain working data only -\begin{itemize} - - \item Modern CPUs perform most or all operations only on data in register - -\end{itemize} - -\noindent -\item Multiple Cache memories contain a copy of the main memory data -\begin{itemize} - - \item Cache items accessed by their address in main memory - - \item L1 cache is the fastest but has the least capacity - - \item L2, L3 provide intermediate performance/size tradeoffs -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -Loads and stores to memory can be as important as floating point operations when we measure performance. - -% !split -\subsection*{Memory and communication} - -\begin{enumerate} -\item Most communication in a computer is carried out in chunks, blocks of bytes of data that move together - -\item In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines -\begin{itemize} - - \item Lines are typically 64-128 bytes, or 8-16 double precision words - - \item Even if you do not use the data, it is moved and occupies space in the cache -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -Many of these performance features are not captured in most programming languages. - -% !split -\subsection*{Measuring performance} - -How do we measure performance? What is wrong with this code to time a loop? - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{text} - clock_t start, finish; - start = clock(); - for (int j = 0; j < i; j++) { - a[j] = b[j]+b[j]*c[j]; - } - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - -\end{minted} - - -% !split -\subsection*{Problems with measuring time} -\begin{enumerate} -\item Timers are not infinitely accurate - -\item All clocks have a granularity, the minimum time that they can measure - -\item The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick) - -\item Always know what your clock granularity is - -\item Ensure that your measurement is for a long enough duration (say 100 times the \textbf{tick}) -\end{enumerate} - -\noindent -% !split -\subsection*{Problems with cold start} - -What happens when the code is executed? The assumption is that the code is ready to -execute. But -\begin{enumerate} -\item Code may still be on disk, and not even read into memory. - -\item Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring) - -\item Multiple tests often necessary to ensure that cold start effects are not present - -\item Special effort often required to ensure data in the intended part of the memory hierarchy. -\end{enumerate} - -\noindent -% !split -\subsection*{Problems with smart compilers} - -\begin{enumerate} -\item If the result of the computation is not used, the compiler may eliminate the code - -\item Performance will look impossibly fantastic - -\item Even worse, eliminate some of the code so the performance looks plausible - -\item Ensure that the results are (or may be) used. -\end{enumerate} - -\noindent -% !split -\subsection*{Problems with interference} -\begin{enumerate} -\item Other activities are sharing your processor -\begin{itemize} - - \item Operating system, system demons, other users - - \item Some parts of the hardware do not always perform with exactly the same performance - -\end{itemize} - -\noindent -\item Make multiple tests and report - -\item Easy choices include -\begin{itemize} - - \item Average tests represent what users might observe over time -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -% !split -\subsection*{Problems with measuring performance} -\begin{enumerate} -\item Accurate, reproducible performance measurement is hard - -\item Think carefully about your experiment: - -\item What is it, precisely, that you want to measure? - -\item How representative is your test to the situation that you are trying to measure? -\end{enumerate} - -\noindent -% !split -\subsection*{Thomas algorithm for tridiagonal linear algebra equations} - -% --- begin paragraph admon --- -\paragraph{} -\[ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -\] -% --- end paragraph admon --- - - - -% !split -\subsection*{Thomas algorithm, forward substitution} - -% --- begin paragraph admon --- -\paragraph{} -The first step is to multiply the first row by $a_0/b_0$ and subtract it from the second row. This is known as the forward substitution step. We obtain then -\[ - a_i = 0, -\] - -\[ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -\] -and -\[ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -\] -At this point the simplified equation, with only an upper triangular matrix takes the form -\[ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -\] -% --- end paragraph admon --- - - - -% !split -\subsection*{Thomas algorithm, backward substitution} - -% --- begin paragraph admon --- -\paragraph{} -The next step is the backward substitution step. The last row is multiplied by $c_{N-3}/b_{N-2}$ and subtracted from the second to last row, thus eliminating $c_{N-3}$ from the last row. The general backward substitution procedure is -\[ - c_i = 0, -\] -and -\[ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -\] -All that ramains to be computed is the solution, which is the very straight forward process of -\[ -x_i = \frac{f_i}{b_i} -\] -% --- end paragraph admon --- - - - -% !split -\subsection*{Thomas algorithm and counting of operations (floating point and memory)} - -% --- begin paragraph admon --- -\paragraph{} - -We have in specific case the following operations with the floating operations - -\begin{itemize} -\item Memory Reads: $14(N-2)$; - -\item Memory Writes: $4(N-2)$; - -\item Subtractions: $3(N-2)$; - -\item Multiplications: $3(N-2)$; - -\item Divisions: $4(N-2)$. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// Forward substitution -// Note that we can simplify by precalculating a[i-1]/b[i-1] - for (int i=1; i < n; i++) { - b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1]; - f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1]; - } - x[n-1] = f[n-1] / b[n-1]; - // Backwards substitution - for (int i = n-2; i >= 0; i--) { - f[i] = f[i] - c[i]*f[i+1]/b[i+1]; - x[i] = f[i]/b[i]; - } - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp}}{Example: Transpose of a matrix}} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double **A, **B; - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; - for (int i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - } - // Set up values for matrix A - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - A[i][j] = cos(i*1.0)*sin(j*3.0); - } - } - clock_t start, finish; - start = clock(); - // Then compute the transpose - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - B[i][j]= A[j][i]; - } - } - - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for setting up transpose of matrix=" << timeused << endl; - - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - } - delete[] A; - delete[] B; - return 0; -} - - -\end{Verbatim} - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp}}{Matrix-matrix multiplication}} -This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{text} -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double s = 1.0/sqrt( (double) n); - double **A, **B, **C; - // Start timing - clock_t start, finish; - start = clock(); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (int i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - } - // Set up values for matrix A and B and zero matrix C - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - double angle = 2.0*M_PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - } - } - // Then perform the matrix-matrix multiplication - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - double sum = 0.0; - for (int k = 0; k < n; k++) { - sum += B[i][k]*A[k][j]; - } - C[i][j] = sum; - } - } - // Compute now the Frobenius norm - double Fsum = 0.0; - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - Fsum += C[i][j]*C[i][j]; - } - } - Fsum = sqrt(Fsum); - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for matrix-matrix multiplication=" << timeused << endl; - cout << " Frobenius norm = " << Fsum << endl; - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - } - delete[] A; - delete[] B; - delete[] C; - return 0; -} - -\end{minted} - - -% !split -\subsection*{How do we define speedup? Simplest form} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Speedup measures the ratio of performance between two objects - -\item Versions of same code, with different number of processors - -\item Serial and vector versions - -\item Try different programing languages, c++ and Fortran - -\item Two algorithms computing the \textbf{same} result -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{How do we define speedup? Correct baseline} - -% --- begin paragraph admon --- -\paragraph{} -The key is choosing the correct baseline for comparison -\begin{itemize} -\item For our serial vs.~vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off -\begin{itemize} - - \item For parallel applications, this is much harder: -\begin{itemize} - - \item Choice of algorithm, decomposition, performance of baseline case etc. -\end{itemize} - -\noindent -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Parallel speedup} - -% --- begin paragraph admon --- -\paragraph{} -For parallel applications, speedup is typically defined as -\begin{itemize} -\item Speedup $=T_1/T_p$ -\end{itemize} - -\noindent -Here $T_1$ is the time on one processor and $T_p$ is the time using $p$ processors. -\begin{itemize} - \item Can the speedup become larger than $p$? That means using $p$ processors is more than $p$ times faster than using one processor. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Speedup and memory} - -% --- begin paragraph admon --- -\paragraph{} -The speedup on $p$ processors can -be greater than $p$ if memory usage is optimal! -Consider the case of a memorybound computation with $M$ words of memory -\begin{itemize} - \item If $M/p$ fits into cache while $M$ does not, the time to access memory will be different in the two cases: - - \item $T_1$ uses the main memory bandwidth - - \item $T_p$ uses the appropriate cache bandwidth -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Upper bounds on speedup} - -% --- begin paragraph admon --- -\paragraph{} -Assume that almost all parts of a code are perfectly -parallelizable (fraction $f$). The remainder, -fraction $(1-f)$ cannot be parallelized at all. - -That is, there is work that takes time $W$ on one process; a fraction $f$ of that work will take -time $Wf/p$ on $p$ processors. -\begin{itemize} -\item What is the maximum possible speedup as a function of $f$? -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Amdahl's law} - -% --- begin paragraph admon --- -\paragraph{} -On one processor we have -\[ -T_1 = (1-f)W + fW = W -\] -On $p$ processors we have -\[ -T_p = (1-f)W + \frac{fW}{p}, -\] -resulting in a speedup of -\[ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -\] - -As $p$ goes to infinity, $fW/p$ goes to zero, and the maximum speedup is -\[ -\frac{1}{1-f}, -\] -meaning that if -if $f = 0.99$ (all but $1\%$ parallelizable), the maximum speedup -is $1/(1-.99)=100$! -% --- end paragraph admon --- - - - -% !split -\subsection*{How much is parallelizable} - -% --- begin paragraph admon --- -\paragraph{} -If any non-parallel code slips into the -application, the parallel -performance is limited. - -In many simulations, however, the fraction of non-parallelizable work -is $10^{-6}$ or less due to large arrays or objects that are perfectly parallelizable. -% --- end paragraph admon --- - - - -% !split -\subsection*{Today's situation of parallel computing} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective. - -\item Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems. - -\item Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP. -\end{itemize} - -\noindent -Our lectures will focus on both MPI and OpenMP. -% --- end paragraph admon --- - - - -% !split -\subsection*{Overhead present in parallel computing} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Uneven load balance}: not all the processors can perform useful work at all time. - -\item \textbf{Overhead of synchronization} - -\item \textbf{Overhead of communication} - -\item \textbf{Extra computation due to parallelization} -\end{itemize} - -\noindent -Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -% --- end paragraph admon --- - - - -% !split -\subsection*{Parallelizing a sequential algorithm} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part, - -\item Distribute the global work and data among $P$ processors. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Strategies} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC. - -\item When you are convinced that your codes run correctly, you can start your production runs on available supercomputers. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{How do I run MPI on a PC/Laptop? MPI} - -% --- begin paragraph admon --- -\paragraph{} -To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the \href{{https://www.open-mpi.org/}}{OpenMPI website}. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -\begin{itemize} -\item Compile with mpicxx/mpic++ or mpif90 -\end{itemize} - -\noindent - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - # Compile and link - mpic++ -O3 -o nameofprog.x nameofprog.cpp - # run code with for example 8 processes using mpirun/mpiexec - mpiexec -n 8 ./nameofprog.x - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Can I do it on my own PC/laptop? OpenMP installation} - -% --- begin paragraph admon --- -\paragraph{} -If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: - -\begin{itemize} -\item For OpenMP, the compile option \textbf{-fopenmp} is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below. - -\item For OS X users however, install \textbf{libomp} -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - brew install libomp - -\end{minted} - -and compile and link as - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -c++ -o -lomp - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Installing MPI} - -% --- begin paragraph admon --- -\paragraph{} -For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager) - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - sudo apt-get install libopenmpi-dev - sudo apt-get install openmpi-bin - -\end{minted} - -For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - brew install openmpi - -\end{minted} - -When running an executable (code.x), run as - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - mpirun -n 10 ./code.x - -\end{minted} - -where we indicate that we want the number of processes to be 10. -% --- end paragraph admon --- - - - -% !split -\subsection*{Installing MPI and using Qt} - -% --- begin paragraph admon --- -\paragraph{} -With openmpi installed, when using Qt, add to your .pro file the instructions \href{{http://dragly.org/2012/03/14/developing-mpi-applications-in-qt-creator/}}{here} - -You may need to tell Qt where openmpi is stored. -% --- end paragraph admon --- - - - -% !split -\subsection*{What is Message Passing Interface (MPI)?} - -% --- begin paragraph admon --- -\paragraph{} - -\textbf{MPI} is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. - -MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. - -An MPI computation is a collection of processes communicating with messages. -% --- end paragraph admon --- - - -% !split -\subsection*{Going Parallel with MPI} - -% --- begin paragraph admon --- -\paragraph{} -\textbf{Task parallelism}: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. - -MPI is a message-passing library where all the routines -have corresponding C/C++-binding - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - MPI_Command_name - -\end{minted} - -and Fortran-binding (routine names are in uppercase, but can also be in lower case) - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] - MPI_COMMAND_NAME - -\end{Verbatim} -% --- end paragraph admon --- - - - -% !split -\subsection*{MPI is a library} - -% --- begin paragraph admon --- -\paragraph{} -MPI is a library specification for the message passing interface, -proposed as a standard. - -\begin{itemize} -\item independent of hardware; - -\item not a language or compiler specification; - -\item not a specific implementation or product. -\end{itemize} - -\noindent -A message passing standard for portability and ease-of-use. -Designed for high performance. - -Insert communication and synchronization functions where necessary. -% --- end paragraph admon --- - - - -% !split -\subsection*{Bindings to MPI routines} - -% --- begin paragraph admon --- -\paragraph{} - -MPI is a message-passing library where all the routines -have corresponding C/C++-binding - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - MPI_Command_name - -\end{minted} - -and Fortran-binding (routine names are in uppercase, but can also be in lower case) - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] - MPI_COMMAND_NAME - -\end{Verbatim} - -The discussion in these slides focuses on the C++ binding. -% --- end paragraph admon --- - - - -% !split -\subsection*{Communicator} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item A group of MPI processes with a name (context). - -\item Any process is identified by its rank. The rank is only meaningful within a particular communicator. - -\item By default the communicator contains all the MPI processes. -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - MPI_COMM_WORLD - -\end{minted} - -\begin{itemize} -\item Mechanism to identify subset of processes. - -\item Promotes modular design of parallel libraries. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Some of the most important MPI functions} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item $MPI\_Init$ - initiate an MPI computation - -\item $MPI\_Finalize$ - terminate the MPI computation and clean up - -\item $MPI\_Comm\_size$ - how many processes participate in a given MPI communicator? - -\item $MPI\_Comm\_rank$ - which one am I? (A number between 0 and size-1.) - -\item $MPI\_Send$ - send a message to a particular process within an MPI communicator - -\item $MPI\_Recv$ - receive a message from a particular process within an MPI communicator - -\item $MPI\_reduce$ or $MPI\_Allreduce$, send and receive messages -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp}}{The first MPI C/C++ program}} - -% --- begin paragraph admon --- -\paragraph{} - -Let every process write "Hello world" (oh not this program again!!) on the standard output. - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -using namespace std; -#include -#include -int main (int nargs, char* args[]) -{ -int numprocs, my_rank; -// MPI initializations -MPI_Init (&nargs, &args); -MPI_Comm_size (MPI_COMM_WORLD, &numprocs); -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); -cout << "Hello world, I have rank " << my_rank << " out of " - << numprocs << endl; -// End MPI -MPI_Finalize (); - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{The Fortran program} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] -PROGRAM hello -INCLUDE "mpif.h" -INTEGER:: size, my_rank, ierr - -CALL MPI_INIT(ierr) -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr) -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size -CALL MPI_FINALIZE(ierr) - -END PROGRAM hello - -\end{Verbatim} -% --- end paragraph admon --- - - - -% !split -\subsection*{Note 1} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item The output to screen is not ordered since all processes are trying to write to screen simultaneously. - -\item It is the operating system which opts for an ordering. - -\item If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp}}{Ordered output with MPIBarrier}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -int main (int nargs, char* args[]) -{ - int numprocs, my_rank, i; - MPI_Init (&nargs, &args); - MPI_Comm_size (MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); - for (i = 0; i < numprocs; i++) {} - MPI_Barrier (MPI_COMM_WORLD); - if (i == my_rank) { - cout << "Hello world, I have rank " << my_rank << - " out of " << numprocs << endl;} - MPI_Finalize (); - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Note 2} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Here we have used the $MPI\_Barrier$ function to ensure that that every process has completed its set of instructions in a particular order. - -\item A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here $MPI\_COMM\_WORLD$) have called $MPI\_Barrier$. - -\item The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like $MPI\_ALLREDUCE$ to be discussed later, have the same property; that is, no process can exit the operation until all processes have started. -\end{itemize} - -\noindent -However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp}}{Ordered output}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - -\begin{Verbatim}[numbers=none,fontsize=\fontsize{9pt}{9pt},baselinestretch=0.95] -..... -int numprocs, my_rank, flag; -MPI_Status status; -MPI_Init (&nargs, &args); -MPI_Comm_size (MPI_COMM_WORLD, &numprocs); -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); -if (my_rank > 0) -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, - MPI_COMM_WORLD, &status); -cout << "Hello world, I have rank " << my_rank << " out of " -<< numprocs << endl; -if (my_rank < numprocs-1) -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, - 100, MPI_COMM_WORLD); -MPI_Finalize (); - -\end{Verbatim} -% --- end paragraph admon --- - - - -% !split -\subsection*{Note 3} - -% --- begin paragraph admon --- -\paragraph{} - -The basic sending of messages is given by the function $MPI\_SEND$, which in C/C++ -is defined as - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -int MPI_Send(void *buf, int count, - MPI_Datatype datatype, - int dest, int tag, MPI_Comm comm)} - -\end{minted} - -This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable \textbf{buf} is the variable we wish to send while \textbf{count} -is the number of variables we are passing. If we are passing only a single value, this should be 1. - -If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be $10\times 10=100$ -since we are actually passing 100 values. -% --- end paragraph admon --- - - - -% !split -\subsection*{Note 4} - -% --- begin paragraph admon --- -\paragraph{} - -Once you have sent a message, you must receive it on another task. The function $MPI\_RECV$ -is similar to the send call. - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -int MPI_Recv( void *buf, int count, MPI_Datatype datatype, - int source, - int tag, MPI_Comm comm, MPI_Status *status ) - -\end{minted} - - -The arguments that are different from those in MPI\_SEND are -\textbf{buf} which is the name of the variable where you will be storing the received data, -\textbf{source} which replaces the destination in the send command. This is the return ID of the sender. - -Finally, we have used $MPI\_Status\_status$, -where one can check if the receive was completed. - -The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp}}{Numerical integration in parallel}} - -% --- begin paragraph admon --- -\paragraph{Integrating $\pi$.} - -\begin{itemize} -\item The code example computes $\pi$ using the trapezoidal rules. - -\item The trapezoidal rule -\end{itemize} - -\noindent -\[ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -\] -Click \href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp}}{on this link} for the full program. -% --- end paragraph admon --- - - - -% !split -\subsection*{Dissection of trapezoidal rule with $MPI\_reduce$} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// Trapezoidal rule and numerical integration usign MPI -using namespace std; -#include -#include - -// Here we define various functions called by the main program - -double int_function(double ); -double trapezoidal_rule(double , double , int , double (*)(double)); - -// Main function begins here -int main (int nargs, char* args[]) -{ - int n, local_n, numprocs, my_rank; - double a, b, h, local_a, local_b, total_sum, local_sum; - double time_start, time_end, total_time; - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - // MPI initializations - MPI_Init (&nargs, &args); - MPI_Comm_size (MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); - time_start = MPI_Wtime(); - // Fixed values for a, b and n - a = 0.0 ; b = 1.0; n = 1000; - h = (b-a)/n; // h is the same for all processes - local_n = n/numprocs; - // make sure n > numprocs, else integer division gives zero - // Length of each process' interval of - // integration = local_n*h. - local_a = a + my_rank*local_n*h; - local_b = local_a + local_n*h; - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Integrating with \textbf{MPI}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} - total_sum = 0.0; - local_sum = trapezoidal_rule(local_a, local_b, local_n, - &int_function); - MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, - MPI_SUM, 0, MPI_COMM_WORLD); - time_end = MPI_Wtime(); - total_time = time_end-time_start; - if ( my_rank == 0) { - cout << "Trapezoidal rule = " << total_sum << endl; - cout << "Time = " << total_time - << " on number of processors: " << numprocs << endl; - } - // End MPI - MPI_Finalize (); - return 0; -} // end of main program - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{How do I use $MPI\_reduce$?} - -% --- begin paragraph admon --- -\paragraph{} - -Here we have used - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -MPI_reduce( void *senddata, void* resultdata, int count, - MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm) - -\end{minted} - - -The two variables $senddata$ and $resultdata$ are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable $count$ represents the total dimensionality, 1 in case of just one variable, -while $MPI\_Datatype$ -defines the type of variable which is sent and received. - -The new feature is $MPI\_Op$. It defines the type -of operation we want to do. -% --- end paragraph admon --- - - - -% !split -\subsection*{More on $MPI\_Reduce$} - -% --- begin paragraph admon --- -\paragraph{} -In our case, since we are summing -the rectangle contributions from every process we define $MPI\_Op = MPI\_SUM$. -If we have an array or matrix we can search for the largest og smallest element by sending either $MPI\_MAX$ or -$MPI\_MIN$. If we want the location as well (which array element) we simply transfer -$MPI\_MAXLOC$ or $MPI\_MINOC$. If we want the product we write $MPI\_PROD$. - -$MPI\_Allreduce$ is defined as - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -MPI_Allreduce( void *senddata, void* resultdata, int count, - MPI_Datatype datatype, MPI_Op, MPI_Comm comm) - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - -We use $MPI\_reduce$ to collect data from each process. Note also the use of the function -$MPI\_Wtime$. - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// this function defines the function to integrate -double int_function(double x) -{ - double value = 4./(1.+x*x); - return value; -} // end of function to evaluate - - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// this function defines the trapezoidal rule -double trapezoidal_rule(double a, double b, int n, - double (*func)(double)) -{ - double trapez_sum; - double fa, fb, x, step; - int j; - step=(b-a)/((double) n); - fa=(*func)(a)/2. ; - fb=(*func)(b)/2. ; - trapez_sum=0.; - for (j=1; j <= n-1; j++){ - x=j*step+a; - trapez_sum+=(*func)(x); - } - trapez_sum=(trapez_sum+fb+fa)*step; - return trapez_sum; -} // end trapezoidal_rule - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp}}{The quantum dot program for two electrons}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// Variational Monte Carlo for atoms with importance sampling, slater det -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG -#include "mpi.h" -#include -#include -#include -#include -#include -#include -#include "vectormatrixclass.h" - -using namespace std; -// output file as global variable -ofstream ofile; -// the step length and its squared inverse for the second derivative -// Here we define global variables used in various functions -// These can be changed by using classes -int Dimension = 2; -int NumberParticles = 2; // we fix also the number of electrons to be 2 - -// declaration of functions - -// The Mc sampling for the variational Monte Carlo -void MonteCarloSampling(int, double &, double &, Vector &); - -// The variational wave function -double WaveFunction(Matrix &, Vector &); - -// The local energy -double LocalEnergy(Matrix &, Vector &); - -// The quantum force -void QuantumForce(Matrix &, Matrix &, Vector &); - - -// inline function for single-particle wave function -inline double SPwavefunction(double r, double alpha) { - return exp(-alpha*r*0.5); -} - -// inline function for derivative of single-particle wave function -inline double DerivativeSPwavefunction(double r, double alpha) { - return -r*alpha; -} - -// function for absolute value of relative distance -double RelativeDistance(Matrix &r, int i, int j) { - double r_ij = 0; - for (int k = 0; k < Dimension; k++) { - r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k)); - } - return sqrt(r_ij); -} - -// inline function for derivative of Jastrow factor -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){ - return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2)); -} - -// function for square of position of single particle -double singleparticle_pos2(Matrix &r, int i) { - double r_single_particle = 0; - for (int j = 0; j < Dimension; j++) { - r_single_particle += r(i,j)*r(i,j); - } - return r_single_particle; -} - -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x, - double *f, double stpmax, int *check, double (*func)(Vector &p)); - -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret, - double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g)); - -static double sqrarg; -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg) - - -static double maxarg1,maxarg2; -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\ - (maxarg1) : (maxarg2)) - - -// Begin of main program - -int main(int argc, char* argv[]) -{ - - // MPI initializations - int NumberProcesses, MyRank, NumberMCsamples; - MPI_Init (&argc, &argv); - MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses); - MPI_Comm_rank (MPI_COMM_WORLD, &MyRank); - double StartTime = MPI_Wtime(); - if (MyRank == 0 && argc <= 1) { - cout << "Bad Usage: " << argv[0] << - " Read also output file on same line and number of Monte Carlo cycles" << endl; - } - // Read filename and number of Monte Carlo cycles from the command line - if (MyRank == 0 && argc > 2) { - string filename = argv[1]; // first command line argument after name of program - NumberMCsamples = atoi(argv[2]); - string fileout = filename; - string argument = to_string(NumberMCsamples); - // Final filename as filename+NumberMCsamples - fileout.append(argument); - ofile.open(fileout); - } - // broadcast the number of Monte Carlo samples - MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD); - // Two variational parameters only - Vector VariationalParameters(2); - int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; - // Loop over variational parameters - for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){ - for (double beta = 0.1; beta <= 0.5; beta +=0.05){ - VariationalParameters(0) = alpha; // value of alpha - VariationalParameters(1) = beta; // value of beta - // Do the mc sampling and accumulate data with MPI_Reduce - double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2; - LocalProcessEnergy = LocalProcessEnergy2 = 0.0; - MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters); - // Collect data in total averages - MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - // Print out results in case of Master node, set to MyRank = 0 - if ( MyRank == 0) { - double Energy = TotalEnergy/( (double)NumberProcesses); - double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy; - double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error - ofile << setiosflags(ios::showpoint | ios::uppercase); - ofile << setw(15) << setprecision(8) << VariationalParameters(0); - ofile << setw(15) << setprecision(8) << VariationalParameters(1); - ofile << setw(15) << setprecision(8) << Energy; - ofile << setw(15) << setprecision(8) << Variance; - ofile << setw(15) << setprecision(8) << StandardDeviation << endl; - } - } - } - double EndTime = MPI_Wtime(); - double TotalTime = EndTime-StartTime; - if ( MyRank == 0 ) cout << "Time = " << TotalTime << " on number of processors: " << NumberProcesses << endl; - if (MyRank == 0) ofile.close(); // close output file - // End MPI - MPI_Finalize (); - return 0; -} // end of main function - - -// Monte Carlo sampling with the Metropolis algorithm - -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters) -{ - - // Initialize the seed and call the Mersienne algo - std::random_device rd; - std::mt19937_64 gen(rd()); - // Set up the uniform distribution for x \in [[0, 1] - std::uniform_real_distribution UniformNumberGenerator(0.0,1.0); - std::normal_distribution Normaldistribution(0.0,1.0); - // diffusion constant from Schroedinger equation - double D = 0.5; - double timestep = 0.05; // we fix the time step for the gaussian deviate - // allocate matrices which contain the position of the particles - Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension); - Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension); - double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0; - // initial trial positions - for (int i = 0; i < NumberParticles; i++) { - for (int j = 0; j < Dimension; j++) { - OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep); - } - } - double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters); - QuantumForce(OldPosition, OldQuantumForce, VariationalParameters); - // loop over monte carlo cycles - for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ - // new position - for (int i = 0; i < NumberParticles; i++) { - for (int j = 0; j < Dimension; j++) { - // gaussian deviate to compute new positions using a given timestep - NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D; - // NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D; - } - // for the other particles we need to set the position to the old position since - // we move only one particle at the time - for (int k = 0; k < NumberParticles; k++) { - if ( k != i) { - for (int j = 0; j < Dimension; j++) { - NewPosition(k,j) = OldPosition(k,j); - } - } - } - double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); - QuantumForce(NewPosition, NewQuantumForce, VariationalParameters); - // we compute the log of the ratio of the greens functions to be used in the - // Metropolis-Hastings algorithm - double GreensFunction = 0.0; - for (int j = 0; j < Dimension; j++) { - GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))* - (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j)); - } - GreensFunction = exp(GreensFunction); - // The Metropolis test is performed by moving one particle at the time - if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { - for (int j = 0; j < Dimension; j++) { - OldPosition(i,j) = NewPosition(i,j); - OldQuantumForce(i,j) = NewQuantumForce(i,j); - } - OldWaveFunction = NewWaveFunction; - } - } // end of loop over particles - // compute local energy - double DeltaE = LocalEnergy(OldPosition, VariationalParameters); - // update energies - Energy += DeltaE; - EnergySquared += DeltaE*DeltaE; - } // end of loop over MC trials - // update the energy average and its squared - cumulative_e = Energy/NumberMCsamples; - cumulative_e2 = EnergySquared/NumberMCsamples; -} // end MonteCarloSampling function - - -// Function to compute the squared wave function and the quantum force - -double WaveFunction(Matrix &r, Vector &VariationalParameters) -{ - double wf = 0.0; - // full Slater determinant for two particles, replace with Slater det for more particles - wf = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0)); - // contribution from Jastrow factor - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j)))); - } - } - return wf; -} - -// Function to calculate the local energy without numerical derivation of kinetic energy - -double LocalEnergy(Matrix &r, Vector &VariationalParameters) -{ - - // compute the kinetic and potential energy from the single-particle part - // for a many-electron system this has to be replaced by a Slater determinant - // The absolute value of the interparticle length - Matrix length( NumberParticles, NumberParticles); - // Set up interparticle distance - for (int i = 0; i < NumberParticles-1; i++) { - for(int j = i+1; j < NumberParticles; j++){ - length(i,j) = RelativeDistance(r, i, j); - length(j,i) = length(i,j); - } - } - double KineticEnergy = 0.0; - // Set up kinetic energy from Slater and Jastrow terms - for (int i = 0; i < NumberParticles; i++) { - for (int k = 0; k < Dimension; k++) { - double sum1 = 0.0; - for(int j = 0; j < NumberParticles; j++){ - if ( j != i) { - sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k); - } - } - KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0))); - } - } - KineticEnergy += -2*VariationalParameters(0)*NumberParticles; - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) ); - } - } - KineticEnergy *= -0.5; - // Set up potential energy, external potential + eventual electron-electron repulsion - double PotentialEnergy = 0; - for (int i = 0; i < NumberParticles; i++) { - double DistanceSquared = singleparticle_pos2(r, i); - PotentialEnergy += 0.5*DistanceSquared; // sp energy HO part, note it has the oscillator frequency set to 1! - } - // Add the electron-electron repulsion - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - PotentialEnergy += 1.0/length(i,j); - } - } - double LocalE = KineticEnergy+PotentialEnergy; - return LocalE; -} - -// Compute the analytical expression for the quantum force -void QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters) -{ - // compute the first derivative - for (int i = 0; i < NumberParticles; i++) { - for (int k = 0; k < Dimension; k++) { - // single-particle part, replace with Slater det for larger systems - double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0)); - // Jastrow factor contribution - double Jsum = 0.0; - for (int j = 0; j < NumberParticles; j++) { - if ( j != i) { - Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k); - } - } - qforce(i,k) = 2.0*(Jsum+sppart); - } - } -} // end of QuantumForce function - - -#define ITMAX 200 -#define EPS 3.0e-8 -#define TOLX (4*EPS) -#define STPMX 100.0 - -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret, - double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g)) -{ - - int check,i,its,j; - double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test; - Vector dg(n), g(n), hdg(n), pnew(n), xi(n); - Matrix hessian(n,n); - - fp=(*func)(p); - (*dfunc)(p,g); - for (i = 0;i < n;i++) { - for (j = 0; j< n;j++) hessian(i,j)=0.0; - hessian(i,i)=1.0; - xi(i) = -g(i); - sum += p(i)*p(i); - } - stpmax=STPMX*FMAX(sqrt(sum),(double)n); - for (its=1;its<=ITMAX;its++) { - *iter=its; - lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func); - fp = *fret; - for (i = 0; i< n;i++) { - xi(i)=pnew(i)-p(i); - p(i)=pnew(i); - } - test=0.0; - for (i = 0;i< n;i++) { - temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0); - if (temp > test) test=temp; - } - if (test < TOLX) { - return; - } - for (i=0;i test) test=temp; - } - if (test < gtol) { - return; - } - for (i=0;i EPS*sumdg*sumxi) { - fac=1.0/fac; - fad=1.0/fae; - for (i=0;i stpmax) - for (i=0;i test) test=temp; - } - alamin=TOLX/test; - alam=1.0; - for (;;) { - for (i=0;i0.5*alam) - tmplam=0.5*alam; - } - } - alam2=alam; - f2 = *f; - fold2=fold; - alam=FMAX(tmplam,0.1*alam); - } -} -#undef ALF -#undef TOLX - - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{What is OpenMP} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item OpenMP provides high-level thread programming - -\item Multiple cooperating threads are allowed to run simultaneously - -\item Threads are created and destroyed dynamically in a fork-join pattern -\begin{itemize} - - \item An OpenMP program consists of a number of parallel regions - - \item Between two parallel regions there is only one master thread - - \item In the beginning of a parallel region, a team of new threads is spawned - -\end{itemize} - -\noindent - \item The newly spawned threads work simultaneously with the master thread - - \item At the end of a parallel region, the new threads are destroyed -\end{itemize} - -\noindent -Many good tutorials online and excellent textbook -\begin{enumerate} -\item \href{{http://mitpress.mit.edu/books/using-openmp}}{Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas} - -\item Many tutorials online like \href{{http://www.openmp.org}}{OpenMP official site} -\end{enumerate} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Getting started, things to remember} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} - \item Remember the header file -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include - -\end{minted} - -\begin{itemize} - \item Insert compiler directives in C++ syntax as -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp... - -\end{minted} - -\begin{itemize} -\item Compile with for example \emph{c++ -fopenmp code.cpp} - -\item Execute -\begin{itemize} - - \item Remember to assign the environment variable \textbf{OMP NUM THREADS} - - \item It specifies the total number of threads inside a parallel region, if not otherwise overwritten -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{OpenMP syntax} -\begin{itemize} -\item Mostly directives -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp construct [ clause ...] - -\end{minted} - -\begin{itemize} - \item Some functions and types -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include - -\end{minted} - -\begin{itemize} - \item Most apply to a block of code - - \item Specifically, a \textbf{structured block} - - \item Enter at top, exit at bottom only, exit(), abort() permitted -\end{itemize} - -\noindent -% !split -\subsection*{Different OpenMP styles of parallelism} -OpenMP supports several different ways to specify thread parallelism - -\begin{itemize} -\item General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code - -\item Parallel loops: Special case for loops, simplifies data parallel code - -\item Task parallelism, new in OpenMP 3 - -\item Several ways to manage thread coordination, including Master regions and Locks - -\item Memory model for shared data -\end{itemize} - -\noindent -% !split -\subsection*{General code structure} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include -main () -{ -int var1, var2, var3; -/* serial code */ -/* ... */ -/* start of a parallel region */ -#pragma omp parallel private(var1, var2) shared(var3) -{ -/* ... */ -} -/* more serial code */ -/* ... */ -/* another parallel region */ -#pragma omp parallel -{ -/* ... */ -} -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Parallel region} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item A parallel region is a block of code that is executed by a team of threads - -\item The following compiler directive creates a parallel region -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel { ... } - -\end{minted} - -\begin{itemize} -\item Clauses can be added at the end of the directive - -\item Most often used clauses: -\begin{itemize} - - \item \textbf{default(shared)} or \textbf{default(none)} - - \item \textbf{public(list of variables)} - - \item \textbf{private(list of variables)} -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Hello world, not again, please!} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include -#include -int main (int argc, char *argv[]) -{ -int th_id, nthreads; -#pragma omp parallel private(th_id) shared(nthreads) -{ -th_id = omp_get_thread_num(); -printf("Hello World from thread %d\n", th_id); -#pragma omp barrier -if ( th_id == 0 ) { -nthreads = omp_get_num_threads(); -printf("There are %d threads\n",nthreads); -} -} -return 0; -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Hello world, yet another variant} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include -#include -int main(int argc, char *argv[]) -{ - omp_set_num_threads(4); -#pragma omp parallel - { - int id = omp_get_thread_num(); - int nproc = omp_get_num_threads(); - cout << "Hello world with id number and processes " << id << nproc << endl; - } -return 0; -} - -\end{minted} - -Variables declared outside of the parallel region are shared by all threads -If a variable like \textbf{id} is declared outside of the - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel, - -\end{minted} - -it would have been shared by various the threads, possibly causing erroneous output -\begin{itemize} - \item Why? What would go wrong? Why do we add possibly? -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Important OpenMP library routines} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{int omp get num threads ()}, returns the number of threads inside a parallel region - -\item \textbf{int omp get thread num ()}, returns the a thread for each thread inside a parallel region - -\item \textbf{void omp set num threads (int)}, sets the number of threads to be used - -\item \textbf{void omp set nested (int)}, turns nested parallelism on/off -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Private variables} - -% --- begin paragraph admon --- -\paragraph{} -Private clause can be used to make thread- private versions of such variables: - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel private(id) -{ - int id = omp_get_thread_num(); - cout << "My thread num" << id << endl; -} - -\end{minted} - -\begin{itemize} -\item What is their value on entry? Exit? - -\item OpenMP provides ways to control that - -\item Can use default(none) to require the sharing of each variable to be described -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Master region} - -% --- begin paragraph admon --- -\paragraph{} -It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel -{ - #pragma omp master - { - int id = omp_get_thread_num(); - cout << "My thread num" << id << endl; - } -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Parallel for loop} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} - \item Inside a parallel region, the following compiler directive can be used to parallelize a for-loop: -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp for - -\end{minted} - -\begin{itemize} -\item Clauses can be added, such as -\begin{itemize} - - \item \textbf{schedule(static, chunk size)} - - \item \textbf{schedule(dynamic, chunk size)} - - \item \textbf{schedule(guided, chunk size)} (non-deterministic allocation) - - \item \textbf{schedule(runtime)} - - \item \textbf{private(list of variables)} - - \item \textbf{reduction(operator:variable)} - - \item \textbf{nowait} -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection*{Parallel computations and loops} - - -% --- begin paragraph admon --- -\paragraph{} -OpenMP provides an easy way to parallelize a loop - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel for - for (i=0; i -#define CHUNKSIZE 100 -#define N 1000 -int main (int argc, char *argv[]) -{ -int i, chunk; -float a[N], b[N], c[N]; -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0; -chunk = CHUNKSIZE; -#pragma omp parallel shared(a,b,c,chunk) private(i) -{ -#pragma omp for schedule(dynamic,chunk) -for (i=0; i < N; i++) c[i] = a[i] + b[i]; -} /* end of parallel region */ -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Example code for loop scheduling, guided instead of dynamic} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#include -#define CHUNKSIZE 100 -#define N 1000 -int main (int argc, char *argv[]) -{ -int i, chunk; -float a[N], b[N], c[N]; -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0; -chunk = CHUNKSIZE; -#pragma omp parallel shared(a,b,c,chunk) private(i) -{ -#pragma omp for schedule(guided,chunk) -for (i=0; i < N; i++) c[i] = a[i] + b[i]; -} /* end of parallel region */ -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{More on Parallel for loop} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop - -\item The loop index is private to each thread - -\item A reduction variable is special -\begin{itemize} - - \item During the for-loop there is a local private copy in each thread - - \item At the end of the for-loop, all the local copies are combined together by the reduction operation - -\end{itemize} - -\noindent -\item Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler -\end{itemize} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -// #pragma omp parallel and #pragma omp for - -\end{minted} - -can be combined into - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel for - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{What can happen with this loop?} - - -% --- begin paragraph admon --- -\paragraph{} -What happens with code like this - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel for -for (i=0; i r) { -#pragma omp task - do_work (p_vec[i]); - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Common mistakes} - -% --- begin paragraph admon --- -\paragraph{} -Race condition - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -int nthreads; -#pragma omp parallel shared(nthreads) -{ -nthreads = omp_get_num_threads(); -} - -\end{minted} - -Deadlock - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel -{ -... -#pragma omp critical -{ -... -#pragma omp barrier -} -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Not all computations are simple} - -% --- begin paragraph admon --- -\paragraph{} -Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads - -An example is finding the location and value of the largest element in an array - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Not all computations are simple, competing threads} - -% --- begin paragraph admon --- -\paragraph{} -All threads are potentially accessing and changing the same values, \textbf{maxloc} and \textbf{maxval}. -\begin{enumerate} -\item OpenMP provides several ways to coordinate access to shared values -\end{enumerate} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp atomic - -\end{minted} - -\begin{enumerate} -\item Only one thread at a time can execute the following statement (not block). We can use the critical option -\end{enumerate} - -\noindent - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp critical - -\end{minted} - -\begin{enumerate} -\item Only one thread at a time can execute the following block -\end{enumerate} - -\noindent -Atomic may be faster than critical but depends on hardware -% --- end paragraph admon --- - - - -% !split -\subsection*{How to find the max value using OpenMP} - -% --- begin paragraph admon --- -\paragraph{} -Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel for - for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Then deal with the race conditions} - -% --- begin paragraph admon --- -\paragraph{} -Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp parallel for - for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } - } -} - -\end{minted} - - -Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -% --- end paragraph admon --- - - -% !split -\subsection*{What can slow down OpenMP performance?} -Give it a thought! - -% !split -\subsection*{What can slow down OpenMP performance?} - -% --- begin paragraph admon --- -\paragraph{} -Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop. -\begin{itemize} - \item We do not care about the value during the execution of the loop, just the value at the end. -\end{itemize} - -\noindent -This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties - -\textbf{Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread} -% --- end paragraph admon --- - - - -% !split -\subsection*{Find the max location for each thread} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -int maxloc[MAX_THREADS], mloc; -double maxval[MAX_THREADS], mval; -#pragma omp parallel shared(maxval,maxloc) -{ - int id = omp_get_thread_num(); - maxval[id] = -1.0e30; -#pragma omp for - for (int i=0; i maxval[id]) { - maxloc[id] = i; - maxval[id] = x[i]; - } - } -} - -\end{minted} -% --- end paragraph admon --- - - - -% !split -\subsection*{Combine the values from each thread} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{c++} -#pragma omp flush (maxloc,maxval) -#pragma omp master - { - int nt = omp_get_num_threads(); - mloc = maxloc[0]; - mval = maxval[0]; - for (int i=1; i mval) { - mval = maxval[i]; - mloc = maxloc[i]; - } - } - } - -\end{minted} - -Note that we let the master process perform the last operation. -% --- end paragraph admon --- - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp}}{Matrix-matrix multiplication}} -This code computes the norm of a vector using OpenMp - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{text} -// OpenMP program to compute vector norm by adding two other vectors -#include -#include -#include -#include -#include -# include - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of vector - int n = atoi(argv[1]); - double *a, *b, *c; - int i; - int thread_num; - double wtime, Norm2, s, angle; - cout << " Perform addition of two vectors and compute the norm-2." << endl; - omp_set_num_threads(4); - thread_num = omp_get_max_threads (); - cout << " The number of processors available = " << omp_get_num_procs () << endl ; - cout << " The number of threads available = " << thread_num << endl; - cout << " The matrix order n = " << n << endl; - - s = 1.0/sqrt( (double) n); - wtime = omp_get_wtime ( ); - // Allocate space for the vectors to be used - a = new double [n]; b = new double [n]; c = new double [n]; - // Define parallel region -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2) - // Set up values for vectors a and b - for (i = 0; i < n; i++){ - angle = 2.0*M_PI*i/ (( double ) n); - a[i] = s*(sin(angle) + cos(angle)); - b[i] = s*sin(2.0*angle); - c[i] = 0.0; - } - // Then perform the vector addition - for (i = 0; i < n; i++){ - c[i] += a[i]+b[i]; - } - // Compute now the norm-2 - Norm2 = 0.0; - for (i = 0; i < n; i++){ - Norm2 += c[i]*c[i]; - } -// end parallel region - wtime = omp_get_wtime ( ) - wtime; - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for norm-2 computation=" << wtime << endl; - cout << " Norm-2 = " << Norm2 << endl; - // Free up space - delete[] a; - delete[] b; - delete[] c; - return 0; -} - -\end{minted} - - -% !split -\subsection*{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp}}{Matrix-matrix multiplication}} -This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\begin{minted}[fontsize=\fontsize{9pt}{9pt},linenos=false,mathescape,baselinestretch=1.0,fontfamily=tt,xleftmargin=7mm]{text} -// Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP -#include -#include -#include -#include -#include -# include - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double **A, **B, **C; - int i, j, k; - int thread_num; - double wtime, Fsum, s, angle; - cout << " Compute matrix product C = A * B and Frobenius norm." << endl; - omp_set_num_threads(4); - thread_num = omp_get_max_threads (); - cout << " The number of processors available = " << omp_get_num_procs () << endl ; - cout << " The number of threads available = " << thread_num << endl; - cout << " The matrix order n = " << n << endl; - - s = 1.0/sqrt( (double) n); - wtime = omp_get_wtime ( ); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - } - // Define parallel region -# pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum) - // Set up values for matrix A and B and zero matrix C - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - angle = 2.0*M_PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - } - } - // Then perform the matrix-matrix multiplication - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - C[i][j] = 0.0; - for (k = 0; k < n; k++) { - C[i][j] += A[i][k]*B[k][j]; - } - } - } - // Compute now the Frobenius norm - Fsum = 0.0; - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - Fsum += C[i][j]*C[i][j]; - } - } - Fsum = sqrt(Fsum); -// end parallel region and letting only one thread perform I/O - wtime = omp_get_wtime ( ) - wtime; - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for matrix-matrix multiplication=" << wtime << endl; - cout << " Frobenius norm = " << Fsum << endl; - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - } - delete[] A; - delete[] B; - delete[] C; - return 0; -} - - - -\end{minted} - - - -% ------------------- end of main content --------------- - -\end{document} - diff --git a/doc/src/week9/week9-reveal.html b/doc/src/week9/week9-reveal.html deleted file mode 100644 index d5f52da2..00000000 --- a/doc/src/week9/week9-reveal.html +++ /dev/null @@ -1,6265 +0,0 @@ - - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - - - - - - - - -
    - -
    -

    Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking

    -
    - - -
    -Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no [1, 2] -
    - -
    -[1] Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway -
    -
    -[2] Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA -
    -
    -
    -

    March 11-15

    -
    -
    - - -
    - © 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license -
    -
    - -
    -

    Overview of week 11, March 11-15

    -
    -Topics -

    -

      -

    1. Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week
    2. -

    3. Resampling Techniques, emphasis on Blocking
    4. -

    5. Discussion of onebody densities (whiteboard notes)
    6. -

    7. Start discussion on optimization and parallelization for Python and C++ - -
    8. -
    -
    - -

    Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week.

    -
    - -
    -

    Why resampling methods ?

    -
    -Statistical analysis -

    -

      -

    • Our simulations can be treated as computer experiments. This is particularly the case for Monte Carlo methods
    • -

    • The results can be analysed with the same statistical tools as we would use analysing experimental data.
    • -

    • As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors.
    • -
    -
    -
    - -
    -

    Statistical analysis

    -
    - -

    -

      -

    • As in other experiments, many numerical experiments have two classes of errors: -
        -

      1. Statistical errors
      2. -

      3. Systematical errors
      4. -
      -

      -

    • Statistical errors can be estimated using standard tools from statistics
    • -

    • Systematical errors are method specific and must be treated differently from case to case.
    • -
    -
    -
    - -
    -

    And why do we use such methods?

    - -

    As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. -

    - -

    The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. -

    -
    - -
    -

    Central limit theorem

    - -

    Last week we derived the central limit theorem with the following assumptions:

    - -
    -Measurement \( i \) -

    -

    We assumed that each individual measurement \( x_{ij} \) is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment \( i \) with \( n \) samples as -

    -

     
    -$$ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -$$ -

     
    - -

    and the sample variance

    -

     
    -$$ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -$$ -

     
    -

    -
    - -
    -

    Further remarks

    - -

    Note that we use \( n \) instead of \( n-1 \) in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. -

    -
    - -
    -

    Running many measurements

    - -
    -Adding \( m \) measurements \( i \) -

    -

    With the assumption that the average measurements \( i \) are also defined as iid stochastic variables and have the same probability function \( p \), -we defined the total average over \( m \) experiments as -

    -

     
    -$$ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -$$ -

     
    - -

    and the total variance

    -

     
    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -$$ -

     
    -

    - -

    These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit \( m\rightarrow \infty \), the distribution for \( \overline{X} \) is given by a Gaussian distribution with variance \( \sigma^2_m \).

    -
    - -
    -

    Adding more definitions

    - -

    The total sample variance over the \( mn \) measurements is defined as

    -

     
    -$$ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -$$ -

     
    - -

    We have from the equation for \( \sigma_m^2 \)

    -

     
    -$$ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -$$ -

     
    - -

    and introducing the centered value \( \tilde{x}_{ij}=x_{ij}-\overline{X} \), we can rewrite \( \sigma_m^2 \) as

    -

     
    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -$$ -

     
    -

    - -
    -

    Further rewriting

    - -

    We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements

    -

     
    -$$ -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ -

     
    - -

    The first term on the last rhs is nothing but the total sample variance \( \sigma^2 \) divided by \( m \). The second term represents the covariance.

    -
    - -
    -

    The covariance term

    - -

    Using the definition of the total sample variance we have

    -

     
    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ -

     
    - -

    The first term is what we have used till now in order to estimate the -standard deviation. However, the second term which gives us a measure -of the correlations between different stochastic events, can result in -contributions which give rise to a larger standard deviation and -variance \( \sigma_m^2 \). Note also the evaluation of the second term -leads to a double sum over all events. If we run a VMC calculation -with say \( 10^9 \) Monte carlo samples, the latter term would lead to -\( 10^{18} \) function evaluations. We don't want to, by obvious reasons, to venture into that many evaluations. -

    - -

    Note also that if our stochastic events are iid then the covariance terms is zero.

    -
    - -
    -

    Rewriting the covariance term

    - -

    We introduce now a variable \( d=\vert j-k\vert \) and rewrite

    -

     
    -$$ -\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}, -$$ -

     
    - -

    in terms of a function

    -

     
    -$$ -f_d=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n-d}\tilde{x}_{ik}\tilde{x}_{i(k+d)}. -$$ -

     
    - -

    We note that for \( d=0 \) we have

    -

     
    -$$ -f_0=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n}\tilde{x}_{ik}\tilde{x}_{i(k)}=\sigma^2! -$$ -

     
    -

    - -
    -

    Introducing the correlation function

    - -

    We introduce then a correlation function \( \kappa_d=f_d/\sigma^2 \). Note that \( \kappa_0 =1 \). We rewrite the variance \( \sigma_m^2 \) as

    -

     
    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}\left[1+2\sum_{d=1}^{n-1} \kappa_d\right]. -\end{align*} -$$ -

     
    - -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    - -

    Note: code will be inserted here later.

    -
    - -
    -

    Resampling methods: Blocking

    - -

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) -and has become one of the standard ways to estimate the variance -\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely -\( \widehat{\theta} = \overline{X} \), the mean value. -

    - -

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: -

    -

     
    -$$ -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} -$$ -

     
    -

    - -
    -

    Why blocking?

    - -

    The strength of the blocking method is when the number of -observations, \( n \) is large. For large \( n \), the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger \( n \) is. -

    -
    - -
    -

    Blocking Transformations

    -

    We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from \( \boldsymbol{X} \) and form a new vector -\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of -subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and -so on. -Define \( \boldsymbol{X}_i \) recursively by: -

    - -

     
    -$$ -\begin{align} -(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ -(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + -(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\tag{1} -\end{align} -$$ -

     
    -

    - -
    -

    Blocking transformations

    - -

    The quantity \( \boldsymbol{X}_k \) is -subject to \( k \) blocking transformations. We now have \( d \) vectors -\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent -averages of observations. It turns out that if the components of -\( \boldsymbol{X} \) is a stationary time series, then the components of -\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \) -

    - -

    We can then compute the autocovariance, the variance, sample mean, and -number of observations for each \( i \). -Let \( \gamma_i, \sigma_i^2, -\overline{X}_i \) denote the covariance, variance and average of the -elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of -\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \). -

    -
    - -
    -

    Blocking Transformations

    - -

    Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since \( h =|i-j| \) -we can define -

    -

     
    -$$ -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} -\tag{2}\\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\tag{3} -\end{align} -$$ -

     
    - -

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean \( \mathrm{var}(\overline{X}) \). -

    -
    - -
    -

    Blocking Transformations, getting there

    -

    We have

    -

     
    -$$ -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\tag{4} -\end{align} -$$ -

     
    - -

    The term \( e_k \) is called the truncation error:

    -

     
    -$$ -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\tag{5} -\end{equation} -$$ -

     
    - -

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    -
    - -
    -

    Blocking Transformations, final expressions

    - -

    We can then wrap up

    -

     
    -$$ -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\tag{6} -\end{align} -$$ -

     
    - -

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    -

     
    -$$ -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \tag{7} -\end{align} -$$ -

     
    -

    - -
    -

    More on the blocking method

    - -

    Flyvbjerg and Petersen demonstrated that the sequence -\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term -\( e_k \) can be made as small as we would like by making \( k \) (and hence -\( d \)) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by -\( \widehat{\sigma}^2_k/n_k \). -

    - -

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    -
    - -
    -

    Example code form last week

    - - -
    -
    -
    -
    -
    -
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    -# Using gaussian rng for new positions and Metropolis- Hastings 
    -# Added energy minimization
    -from math import exp, sqrt
    -from random import random, seed, normalvariate
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from mpl_toolkits.mplot3d import Axes3D
    -from matplotlib import cm
    -from matplotlib.ticker import LinearLocator, FormatStrFormatter
    -from scipy.optimize import minimize
    -import sys
    -import os
    -
    -# Where to save data files
    -PROJECT_ROOT_DIR = "Results"
    -DATA_ID = "Results/EnergyMin"
    -
    -if not os.path.exists(PROJECT_ROOT_DIR):
    -    os.mkdir(PROJECT_ROOT_DIR)
    -
    -if not os.path.exists(DATA_ID):
    -    os.makedirs(DATA_ID)
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -outfile = open(data_path("Energies.dat"),'w')
    -
    -
    -# Trial wave function for the 2-electron quantum dot in two dims
    -def WaveFunction(r,alpha,beta):
    -    r1 = r[0,0]**2 + r[0,1]**2
    -    r2 = r[1,0]**2 + r[1,1]**2
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = r12/(1+beta*r12)
    -    return exp(-0.5*alpha*(r1+r2)+deno)
    -
    -# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    -def LocalEnergy(r,alpha,beta):
    -    
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    -
    -# Derivate of wave function ansatz as function of variational parameters
    -def DerivativeWFansatz(r,alpha,beta):
    -    
    -    WfDer  = np.zeros((2), np.double)
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    WfDer[0] = -0.5*(r1+r2)
    -    WfDer[1] = -r12*r12*deno2
    -    return  WfDer
    -
    -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    -def QuantumForce(r,alpha,beta):
    -
    -    qforce = np.zeros((NumberParticles,Dimension), np.double)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12
    -    qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12
    -    return qforce
    -    
    -
    -# Computing the derivative of the energy and the energy 
    -def EnergyDerivative(x0):
    -
    -    
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    EnergyDer = 0.0
    -    DeltaPsi = 0.0
    -    DerivativePsiE = 0.0 
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)
    -        DeltaPsi += DerPsi
    -        energy += DeltaE
    -        DerivativePsiE += DerPsi*DeltaE
    -            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    DerivativePsiE /= NumberMCcycles
    -    DeltaPsi /= NumberMCcycles
    -    EnergyDer  = 2*(DerivativePsiE-DeltaPsi*energy)
    -    return EnergyDer
    -
    -
    -# Computing the expectation value of the local energy 
    -def Energy(x0):
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        energy += DeltaE
    -        if Printout: 
    -           outfile.write('%f\n' %(energy/(MCcycle+1.0)))            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    return energy
    -
    -#Here starts the main program with variable declarations
    -NumberParticles = 2
    -Dimension = 2
    -# seed for rng generator 
    -seed()
    -# Monte Carlo cycles for parameter optimization
    -Printout = False
    -NumberMCcycles= 10000
    -# guess for variational parameters
    -x0 = np.array([0.9,0.2])
    -# Using Broydens method to find optimal parameters
    -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})
    -x0 = res.x
    -# Compute the energy again with the optimal parameters and increased number of Monte Cycles
    -NumberMCcycles= 2**19
    -Printout = True
    -FinalEnergy = Energy(x0)
    -EResult = np.array([FinalEnergy,FinalEnergy])
    -outfile.close()
    -#nice printout with Pandas
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Optimal Parameters':x0, 'Final Energy':EResult}
    -frame = pd.DataFrame(data)
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Resampling analysis

    - -

    The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of Marius Jonsson is given here -

    - - - -
    -
    -
    -
    -
    -
    # Common imports
    -import os
    -
    -# Where to save the figures and data files
    -DATA_ID = "Results/EnergyMin"
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -infile = open(data_path("Energies.dat"),'r')
    -
    -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    -from numpy.linalg import inv
    -
    -def block(x):
    -    # preliminaries
    -    n = len(x)
    -    d = int(log2(n))
    -    s, gamma = zeros(d), zeros(d)
    -    mu = mean(x)
    -
    -    # estimate the auto-covariance and variances 
    -    # for each blocking transformation
    -    for i in arange(0,d):
    -        n = len(x)
    -        # estimate autocovariance of x
    -        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    -        # estimate variance of x
    -        s[i] = var(x)
    -        # perform blocking transformation
    -        x = 0.5*(x[0::2] + x[1::2])
    -   
    -    # generate the test observator M_k from the theorem
    -    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    -
    -    # we need a list of magic numbers
    -    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    -
    -    # use magic to determine when we should have stopped blocking
    -    for k in arange(0,d):
    -        if(M[k] < q[k]):
    -            break
    -    if (k >= d-1):
    -        print("Warning: Use more data")
    -    return mu, s[k]/2**(d-k)
    -
    -
    -x = loadtxt(infile)
    -(mean, var) = block(x) 
    -std = sqrt(var)
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Mean':[mean], 'STDev':[std]}
    -frame = pd.DataFrame(data,index=['Values'])
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Content

    -
      -

    • Simple compiler options
    • -

    • Tools to benchmark your code
    • -

    • Machine architectures
    • -

    • What is vectorization?
    • -

    • How to measure code performance
    • -

    • Parallelization with OpenMP
    • -

    • Parallelization with MPI
    • -

    • Vectorization and parallelization, examples
    • -
    -
    - -
    -

    Optimization and profiling

    -
    - -

    - -

    Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as -

    - - -
    -
    -
    -
    -
    -
    c++  -c  mycode.cpp
    -c++  -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For Fortran replace with for example gfortran or ifort. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. -

    - -

    It is instructive to look up the compiler manual for further instructions by writing

    - - -
    -
    -
    -
    -
    -
    man c++
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    More on optimization

    -
    - -

    -

    We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. -

    - - -
    -
    -
    -
    -
    -
    c++  -O3 -c  mycode.cpp
    -c++  -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This (other options are -O2 or -Ofast) is the recommended option.

    -
    -
    - -
    -

    Optimization and profiling

    -
    - -

    -

    It is also useful to profile your program under the development stage. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -pg -O3 -c  mycode.cpp
    -c++  -pg -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    After you have run the code you can obtain the profiling information via

    - - -
    -
    -
    -
    -
    -
    gprof mycode.exe >  ProfileOutput
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator. -

    -
    -
    - -
    -

    Optimization and debugging

    -
    - -

    -

    Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -g -O0 -c  mycode.cpp
    -c++  -g -O0 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    -
    - -
    -Other optimization flags -

    -

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag -ftrapv does this for the CLANG compiler on OS X operating systems. -

    -
    -
    - -
    -

    Other hints

    -
    - -

    -

    In general, irrespective of compiler options, it is useful to

    -
      -

    • avoid if tests or call to functions inside loops, if possible.
    • -

    • avoid multiplication with constants inside loops if possible
    • -
    -

    -

    Here is an example of a part of a program where specific operations lead to a slower code

    - - -
    -
    -
    -
    -
    -
    k = n-1;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] +c*d;
    -    e = g[k];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    A better code is

    - - -
    -
    -
    -
    -
    -
    temp = c*d;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] + temp;
    -}
    -e = g[n-1];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -

    -
    -
    - -
    -

    Vectorization and the basic idea behind parallel computing

    -
    - -

    -

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

    -
      -

    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • -

    • Multiple processors are involved to solve a global problem.
    • -

    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    • -
    -

    -

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    -
    -
    - -
    -

    A rough classification of hardware models

    -
    - -

    - -

      -

    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • -

    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • -

    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    • -
    -
    -
    - -
    -

    Shared memory and distributed memory

    -
    - -

    -

    One way of categorizing modern parallel computers is to look at the memory configuration.

    -
      -

    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • -

    • In distributed memory systems each CPU has its own memory.
    • -
    -

    -

    The CPUs are connected by some network and may exchange messages.

    -
    -
    - -
    -

    Different parallel programming paradigms

    -
    - -

    - -

      -

    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • -

    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    • -
    -
    -
    - -
    -

    Different parallel programming paradigms

    -
    - -

    - -

      -

    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • -

    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • -
    -
    -
    - -
    -

    What is vectorization?

    -

    Vectorization is a special -case of Single Instructions Multiple Data (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. -

    - -

    Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. -

    -
    - -
    -

    Number of elements that can acted upon

    -

    A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. -

    - -

    The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. -

    - -

    IN summary, our instructions operate on 128 bit (16 byte) operands

    -
      -

    • 4 floats or ints
    • -

    • 2 doubles
    • -

    • Data paths 128 bits vide for vector unit
    • -
    -
    - -
    -

    Number of elements that can acted upon, examples

    -

    We start with the simple scalar operations given by

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have \( 3\times 32 \) bits that are not used. -

    - -

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    -
    - -
    -

    Operation counts for scalar operation

    -

    The code

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    has for \( n \) repeats

    -
      -

    1. one load for \( c[i] \) in address 1
    2. -

    3. one load for \( b[i] \) in address 2
    4. -

    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -

    7. store \( a[i] \) in address 2
    8. -
    -
    - -
    -

    Number of elements that can acted upon, examples

    -

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i+=4){
    -    a[i] = b[i] + c[i];
    -    a[i+1] = b[i+1] + c[i+1];
    -    a[i+2] = b[i+2] + c[i+2];
    -    a[i+3] = b[i+3] + c[i+3];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Four additions are now done in a single step.

    -
    - -
    -

    Number of operations when vectorized

    -

    For \( n/4 \) repeats assuming floats or integers

    -
      -

    1. one vector load for \( c[i] \) in address 1
    2. -

    3. one load for \( b[i] \) in address 2
    4. -

    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -

    7. store \( a[i] \) in address 2
    8. -
    -
    - -
    -

    A simple test case with and without vectorization

    -

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double *a, *b, *c;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -// Allocate space for the vectors to be used
    -    a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -  // Set up values for vectors  a and b
    -  for (int i = 0; i < n; i++){
    -    double angle = 2.0*M_PI*i/ (( double ) n);
    -    a[i] = s*(sin(angle) + cos(angle));
    -    b[i] =  s*sin(2.0*angle);
    -    c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (int i = 0; i < n; i++){
    -    c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  double Norm2 = 0.0;
    -  for (int i = 0; i < n; i++){
    -    Norm2  += c[i]*c[i];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    -  cout << "  Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Compiling with and without vectorization

    -

    We can compile and link without vectorization using the clang c++ compiler

    - - -
    -
    -
    -
    -
    -
    clang -o novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization (and additional optimizations)

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 10000000
    -Time used  for norm computation=0.04720500000
    -Compphys:~ hjensen$ ./novec.x 10000000
    -Time used  for norm computation=0.03311700000
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 1000000000
    -Time used  for norm computation=58.41391100
    -Compphys:~ hjensen$ ./novec.x 1000000000
    -Time used  for norm computation=46.51295300
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We will discuss these issues further in the next slides.

    -
    - -
    -

    Compiling with and without vectorization using clang

    -

    We can compile and link without vectorization with clang compiler

    - - -
    -
    -
    -
    -
    -
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We can also add vectorization analysis, see for example

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    or figure out if vectorization was missed

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, criteria

    - -

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    - -

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The variable \( n \) does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, exit criteria

    - -

    An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -    if (a[j] < 0 ) break;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, straight-line code

    - -

    SIMD instructions perform the same type of operations multiple times. -A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the if statement is implemented as a masked assignment. -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    double x  = cos(j*1.0);
    -    if (x > 0 ) {
    -       a[j] =  x*sin(j*2.0); 
    -    }
    -    else {
    -       a[j] = 0.0;
    -    }
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, nested loops

    - -

    Only the innermost loop of the following example is vectorized

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, function calls

    - -

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable -

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      a[i] = log10(i)*cos(i);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, data dependencies

    - -

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i] = a[i-1] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value \( a[i-1] \) computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values \( a[0] \), \( a[1] \), \( a[2] \) and \( a[3] \) from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have

    - - -
    -
    -
    -
    -
    -
       a[1] = a[0] + b;
    -   a[2] = a[1] + b;
    -   a[3] = a[2] + b;
    -   a[4] = a[3] + b;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, more data dependencies

    - -

    On the other hand, a so-called -write-after-read statement can be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i-1] = a[i] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency that can be vectorized since no iteration with a higher value of \( i \) -can complete before an iteration with a lower value of \( i \). However, such code leads to problems with parallelization. -

    -
    - -
    -

    Automatic vectorization and vectorization inhibitors, memory stride

    - -

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    - -

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Memory management

    -

    The main memory contains the program data

    -
      -

    1. Cache memory contains a copy of the main memory data
    2. -

    3. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    4. -

    5. Registers contain working data only
    6. -
        -

      • Modern CPUs perform most or all operations only on data in register
      • -
      -

      -

    7. Multiple Cache memories contain a copy of the main memory data
    8. -
        -

      • Cache items accessed by their address in main memory
      • -

      • L1 cache is the fastest but has the least capacity
      • -

      • L2, L3 provide intermediate performance/size tradeoffs
      • -
      -

      -

    -

    -

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    -
    - -
    -

    Memory and communication

    - -
      -

    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. -

    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. -
        -

      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • -

      • Even if you do not use the data, it is moved and occupies space in the cache
      • -
      -

      -

    -

    -

    Many of these performance features are not captured in most programming languages.

    -
    - -
    -

    Measuring performance

    - -

    How do we measure performance? What is wrong with this code to time a loop?

    - - -
    -
    -
    -
    -
    -
      clock_t start, finish;
    -  start = clock();
    -  for (int j = 0; j < i; j++) {
    -    a[j] = b[j]+b[j]*c[j];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Problems with measuring time

    -
      -

    1. Timers are not infinitely accurate
    2. -

    3. All clocks have a granularity, the minimum time that they can measure
    4. -

    5. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    6. -

    7. Always know what your clock granularity is
    8. -

    9. Ensure that your measurement is for a long enough duration (say 100 times the tick)
    10. -
    -
    - -
    -

    Problems with cold start

    - -

    What happens when the code is executed? The assumption is that the code is ready to -execute. But -

    -
      -

    1. Code may still be on disk, and not even read into memory.
    2. -

    3. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    4. -

    5. Multiple tests often necessary to ensure that cold start effects are not present
    6. -

    7. Special effort often required to ensure data in the intended part of the memory hierarchy.
    8. -
    -
    - -
    -

    Problems with smart compilers

    - -
      -

    1. If the result of the computation is not used, the compiler may eliminate the code
    2. -

    3. Performance will look impossibly fantastic
    4. -

    5. Even worse, eliminate some of the code so the performance looks plausible
    6. -

    7. Ensure that the results are (or may be) used.
    8. -
    -
    - -
    -

    Problems with interference

    -
      -

    1. Other activities are sharing your processor
    2. -
        - -

      • Operating system, system demons, other users
      • - -

      • Some parts of the hardware do not always perform with exactly the same performance
      • -
      -

      -

    3. Make multiple tests and report
    4. -

    5. Easy choices include
    6. -
        - -

      • Average tests represent what users might observe over time
      • -
      -

      -

    -
    - -
    -

    Problems with measuring performance

    -
      -

    1. Accurate, reproducible performance measurement is hard
    2. -

    3. Think carefully about your experiment:
    4. -

    5. What is it, precisely, that you want to measure?
    6. -

    7. How representative is your test to the situation that you are trying to measure?
    8. -
    -
    - -
    -

    Thomas algorithm for tridiagonal linear algebra equations

    -
    - -

    -

     
    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -

     
    -

    -
    - -
    -

    Thomas algorithm, forward substitution

    -
    - -

    -

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    -

     
    -$$ - a_i = 0, -$$ -

     
    - -

     
    -$$ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -$$ -

     
    - -

    and

    -

     
    -$$ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -$$ -

     
    - -

    At this point the simplified equation, with only an upper triangular matrix takes the form

    -

     
    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -

     
    -

    -
    - -
    -

    Thomas algorithm, backward substitution

    -
    - -

    -

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    -

     
    -$$ - c_i = 0, -$$ -

     
    - -

    and

    -

     
    -$$ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -$$ -

     
    - -

    All that ramains to be computed is the solution, which is the very straight forward process of

    -

     
    -$$ -x_i = \frac{f_i}{b_i} -$$ -

     
    -

    -
    - -
    -

    Thomas algorithm and counting of operations (floating point and memory)

    -
    - -

    - -

    We have in specific case the following operations with the floating operations

    - -
      -

    • Memory Reads: \( 14(N-2) \);
    • -

    • Memory Writes: \( 4(N-2) \);
    • -

    • Subtractions: \( 3(N-2) \);
    • -

    • Multiplications: \( 3(N-2) \);
    • -

    • Divisions: \( 4(N-2) \).
    • -
    -
    - - -
    - -

    - - -

    -
    -
    -
    -
    -
    // Forward substitution    
    -// Note that we can simplify by precalculating a[i-1]/b[i-1]
    -  for (int i=1; i < n; i++) {
    -     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    -     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    -  }
    -  x[n-1] = f[n-1] / b[n-1];
    -  // Backwards substitution                                                           
    -  for (int i = n-2; i >= 0; i--) {
    -     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    -     x[i] = f[i]/b[i];
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Example: Transpose of a matrix

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B;
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -  }
    -  // Set up values for matrix A
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      A[i][j] =  cos(i*1.0)*sin(j*3.0);
    -    }
    -  }
    -  clock_t start, finish;
    -  start = clock();
    -  // Then compute the transpose
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      B[i][j]= A[j][i];
    -    }
    -  }
    -
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for setting up transpose of matrix=" << timeused  << endl;
    -
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double **A, **B, **C;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Set up values for matrix A and B and zero matrix C
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double sum = 0.0;
    -       for (int k = 0; k < n; k++) {
    -           sum += B[i][k]*A[k][j];
    -       }
    -       C[i][j] = sum;
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  double Fsum = 0.0;
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << timeused  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    How do we define speedup? Simplest form

    -
    - -

    -

      -

    • Speedup measures the ratio of performance between two objects
    • -

    • Versions of same code, with different number of processors
    • -

    • Serial and vector versions
    • -

    • Try different programing languages, c++ and Fortran
    • -

    • Two algorithms computing the same result
    • -
    -
    -
    - -
    -

    How do we define speedup? Correct baseline

    -
    - -

    -

    The key is choosing the correct baseline for comparison

    -
      -

    • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
    • -
        -

      • For parallel applications, this is much harder:
      • -
          - -

        • Choice of algorithm, decomposition, performance of baseline case etc.
        • -
        -

        -

      -

      -

    -
    -
    - -
    -

    Parallel speedup

    -
    - -

    -

    For parallel applications, speedup is typically defined as

    -
      -

    • Speedup \( =T_1/T_p \)
    • -
    -

    -

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

    -
      -

    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    • -
    -
    -
    - -
    -

    Speedup and memory

    -
    - -

    -

    The speedup on \( p \) processors can -be greater than \( p \) if memory usage is optimal! -Consider the case of a memorybound computation with \( M \) words of memory -

    -
      -

    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • -

    • \( T_1 \) uses the main memory bandwidth
    • -

    • \( T_p \) uses the appropriate cache bandwidth
    • -
    -
    -
    - -
    -

    Upper bounds on speedup

    -
    - -

    -

    Assume that almost all parts of a code are perfectly -parallelizable (fraction \( f \)). The remainder, -fraction \( (1-f) \) cannot be parallelized at all. -

    - -

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take -time \( Wf/p \) on \( p \) processors. -

    -
      -

    • What is the maximum possible speedup as a function of \( f \)?
    • -
    -
    -
    - -
    -

    Amdahl's law

    -
    - -

    -

    On one processor we have

    -

     
    -$$ -T_1 = (1-f)W + fW = W -$$ -

     
    - -

    On \( p \) processors we have

    -

     
    -$$ -T_p = (1-f)W + \frac{fW}{p}, -$$ -

     
    - -

    resulting in a speedup of

    -

     
    -$$ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -$$ -

     
    - -

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    -

     
    -$$ -\frac{1}{1-f}, -$$ -

     
    - -

    meaning that if -if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup -is \( 1/(1-.99)=100 \)! -

    -
    -
    - -
    -

    How much is parallelizable

    -
    - -

    -

    If any non-parallel code slips into the -application, the parallel -performance is limited. -

    - -

    In many simulations, however, the fraction of non-parallelizable work -is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. -

    -
    -
    - -
    -

    Today's situation of parallel computing

    -
    - -

    - -

      -

    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • -

    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • -

    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    • -
    -

    -

    Our lectures will focus on both MPI and OpenMP.

    -
    -
    - -
    -

    Overhead present in parallel computing

    -
    - -

    - -

      -

    • Uneven load balance: not all the processors can perform useful work at all time.
    • -

    • Overhead of synchronization
    • -

    • Overhead of communication
    • -

    • Extra computation due to parallelization
    • -
    -

    -

    Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -

    -
    -
    - -
    -

    Parallelizing a sequential algorithm

    -
    - -

    - -

      -

    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • -

    • Distribute the global work and data among \( P \) processors.
    • -
    -
    -
    - -
    -

    Strategies

    -
    - -

    -

      -

    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • -

    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    • -
    -
    -
    - -
    -

    How do I run MPI on a PC/Laptop? MPI

    -
    - -

    -

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -

    -
      -

    • Compile with mpicxx/mpic++ or mpif90
    • -
    -

    - - -

    -
    -
    -
    -
    -
      # Compile and link
    -  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    -  #  run code with for example 8 processes using mpirun/mpiexec
    -  mpiexec -n 8 ./nameofprog.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Can I do it on my own PC/laptop? OpenMP installation

    -
    - -

    -

    If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: -

    - -
      -

    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • -

    • For OS X users however, install libomp
    • -
    -

    - - -

    -
    -
    -
    -
    -
      brew install libomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and compile and link as

    - - -
    -
    -
    -
    -
    -
    c++ -o <name executable> <name program.cpp>  -lomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Installing MPI

    -
    - -

    -

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    - - -
    -
    -
    -
    -
    -
      sudo apt-get install libopenmpi-dev
    -  sudo apt-get install openmpi-bin
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew -

    - - -
    -
    -
    -
    -
    -
       brew install openmpi
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When running an executable (code.x), run as

    - - -
    -
    -
    -
    -
    -
      mpirun -n 10 ./code.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    where we indicate that we want the number of processes to be 10.

    -
    -
    - -
    -

    Installing MPI and using Qt

    -
    - -

    -

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    - -

    You may need to tell Qt where openmpi is stored.

    -
    -
    - -
    -

    What is Message Passing Interface (MPI)?

    -
    - -

    - -

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. -

    - -

    MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. -

    - -

    An MPI computation is a collection of processes communicating with messages.

    -
    -
    - -
    -

    Going Parallel with MPI

    -
    - -

    -

    Task parallelism: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    MPI is a library

    -
    - -

    -

    MPI is a library specification for the message passing interface, -proposed as a standard. -

    - -
      -

    • independent of hardware;
    • -

    • not a language or compiler specification;
    • -

    • not a specific implementation or product.
    • -
    -

    -

    A message passing standard for portability and ease-of-use. -Designed for high performance. -

    - -

    Insert communication and synchronization functions where necessary.

    -
    -
    - -
    -

    Bindings to MPI routines

    -
    - -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The discussion in these slides focuses on the C++ binding.

    -
    -
    - -
    -

    Communicator

    -
    - -

    -

      -

    • A group of MPI processes with a name (context).
    • -

    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • -

    • By default the communicator contains all the MPI processes.
    • -
    -

    - - -

    -
    -
    -
    -
    -
      MPI_COMM_WORLD 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Mechanism to identify subset of processes.
    • -

    • Promotes modular design of parallel libraries.
    • -
    -
    -
    - -
    -

    Some of the most important MPI functions

    -
    - -

    - -

      -

    • \( MPI\_Init \) - initiate an MPI computation
    • -

    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • -

    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • -

    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • -

    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • -

    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • -

    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • -
    -
    -
    - -
    -

    The first MPI C/C++ program

    -
    - -

    - -

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    - - -
    -
    -
    -
    -
    -
    using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -int main (int nargs, char* args[])
    -{
    -int numprocs, my_rank;
    -//   MPI initializations
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -     << numprocs << endl;
    -//  End MPI
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    The Fortran program

    -
    - -

    - - -

    -
    -
    -
    -
    -
    PROGRAM hello
    -INCLUDE "mpif.h"
    -INTEGER:: size, my_rank, ierr
    -
    -CALL  MPI_INIT(ierr)
    -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    -CALL MPI_FINALIZE(ierr)
    -
    -END PROGRAM hello
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Note 1

    -
    - -

    - -

      -

    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • -

    • It is the operating system which opts for an ordering.
    • - -

    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • -
    -
    -
    - -
    -

    Ordered output with MPIBarrier

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    int main (int nargs, char* args[])
    -{
    - int numprocs, my_rank, i;
    - MPI_Init (&nargs, &args);
    - MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    - for (i = 0; i < numprocs; i++) {}
    - MPI_Barrier (MPI_COMM_WORLD);
    - if (i == my_rank) {
    - cout << "Hello world, I have  rank " << my_rank << 
    -        " out of " << numprocs << endl;}
    -      MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Note 2

    -
    - -

    -

      -

    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • -

    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • -

    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • -
    -

    -

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -

    -
    -
    - -
    -

    Ordered output

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    .....
    -int numprocs, my_rank, flag;
    -MPI_Status status;
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -if (my_rank > 0)
    -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    -           MPI_COMM_WORLD, &status);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -<< numprocs << endl;
    -if (my_rank < numprocs-1)
    -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    -          100, MPI_COMM_WORLD);
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Note 3

    -
    - -

    - -

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ -is defined as -

    - - -
    -
    -
    -
    -
    -
    int MPI_Send(void *buf, int count, 
    -             MPI_Datatype datatype, 
    -             int dest, int tag, MPI_Comm comm)}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable buf is the variable we wish to send while count -is the number of variables we are passing. If we are passing only a single value, this should be 1. -

    - -

    If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) -since we are actually passing 100 values. -

    -
    -
    - -
    -

    Note 4

    -
    - -

    - -

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) -is similar to the send call. -

    - - -
    -
    -
    -
    -
    -
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    -            int source, 
    -            int tag, MPI_Comm comm, MPI_Status *status )
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The arguments that are different from those in MPI\_SEND are -buf which is the name of the variable where you will be storing the received data, -source which replaces the destination in the send command. This is the return ID of the sender. -

    - -

    Finally, we have used \( MPI\_Status\_status \), -where one can check if the receive was completed. -

    - -

    The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -

    -
    -
    - -
    -

    Numerical integration in parallel

    -
    -Integrating \( \pi \) -

    - -

      -

    • The code example computes \( \pi \) using the trapezoidal rules.
    • -

    • The trapezoidal rule
    • -
    -

    -

     
    -$$ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -$$ -

     
    - -

    Click on this link for the full program.

    -
    -
    - -
    -

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    //    Trapezoidal rule and numerical integration usign MPI
    -using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -
    -//     Here we define various functions called by the main program
    -
    -double int_function(double );
    -double trapezoidal_rule(double , double , int , double (*)(double));
    -
    -//   Main function begins here
    -int main (int nargs, char* args[])
    -{
    -  int n, local_n, numprocs, my_rank; 
    -  double a, b, h, local_a, local_b, total_sum, local_sum;   
    -  double  time_start, time_end, total_time;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Dissection of trapezoidal rule

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      //  MPI initializations
    -  MPI_Init (&nargs, &args);
    -  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -  time_start = MPI_Wtime();
    -  //  Fixed values for a, b and n 
    -  a = 0.0 ; b = 1.0;  n = 1000;
    -  h = (b-a)/n;    // h is the same for all processes 
    -  local_n = n/numprocs;  
    -  // make sure n > numprocs, else integer division gives zero
    -  // Length of each process' interval of
    -  // integration = local_n*h.  
    -  local_a = a + my_rank*local_n*h;
    -  local_b = local_a + local_n*h;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Integrating with MPI

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      total_sum = 0.0;
    -  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    -                               &int_function); 
    -  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    -              MPI_SUM, 0, MPI_COMM_WORLD);
    -  time_end = MPI_Wtime();
    -  total_time = time_end-time_start;
    -  if ( my_rank == 0) {
    -    cout << "Trapezoidal rule = " <<  total_sum << endl;
    -    cout << "Time = " <<  total_time  
    -         << " on number of processors: "  << numprocs  << endl;
    -  }
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  // end of main program
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    How do I use \( MPI\_reduce \)?

    -
    - -

    - -

    Here we have used

    - - -
    -
    -
    -
    -
    -
    MPI_reduce( void *senddata, void* resultdata, int count, 
    -     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable \( count \) represents the total dimensionality, 1 in case of just one variable, -while \( MPI\_Datatype \) -defines the type of variable which is sent and received. -

    - -

    The new feature is \( MPI\_Op \). It defines the type -of operation we want to do. -

    -
    -
    - -
    -

    More on \( MPI\_Reduce \)

    -
    - -

    -

    In our case, since we are summing -the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). -If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or -\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer -\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \). -

    - -

    \( MPI\_Allreduce \) is defined as

    - - -
    -
    -
    -
    -
    -
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    -          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Dissection of trapezoidal rule

    -
    - -

    - -

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function -\( MPI\_Wtime \). -

    - - -
    -
    -
    -
    -
    -
    //  this function defines the function to integrate
    -double int_function(double x)
    -{
    -  double value = 4./(1.+x*x);
    -  return value;
    -} // end of function to evaluate
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Dissection of trapezoidal rule

    -
    - -

    - - -

    -
    -
    -
    -
    -
    //  this function defines the trapezoidal rule
    -double trapezoidal_rule(double a, double b, int n, 
    -                         double (*func)(double))
    -{
    -  double trapez_sum;
    -  double fa, fb, x, step;
    -  int    j;
    -  step=(b-a)/((double) n);
    -  fa=(*func)(a)/2. ;
    -  fb=(*func)(b)/2. ;
    -  trapez_sum=0.;
    -  for (j=1; j <= n-1; j++){
    -    x=j*step+a;
    -    trapez_sum+=(*func)(x);
    -  }
    -  trapez_sum=(trapez_sum+fb+fa)*step;
    -  return trapez_sum;
    -}  // end trapezoidal_rule 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    The quantum dot program for two electrons

    -
    - -

    - - -

    -
    -
    -
    -
    -
    // Variational Monte Carlo for atoms with importance sampling, slater det
    -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    -#include "mpi.h"
    -#include <cmath>
    -#include <random>
    -#include <string>
    -#include <iostream>
    -#include <fstream>
    -#include <iomanip>
    -#include "vectormatrixclass.h"
    -
    -using namespace  std;
    -// output file as global variable
    -ofstream ofile;  
    -// the step length and its squared inverse for the second derivative 
    -//  Here we define global variables  used in various functions
    -//  These can be changed by using classes
    -int Dimension = 2; 
    -int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    -
    -// declaration of functions 
    -
    -// The Mc sampling for the variational Monte Carlo 
    -void  MonteCarloSampling(int, double &, double &, Vector &);
    -
    -// The variational wave function
    -double  WaveFunction(Matrix &, Vector &);
    -
    -// The local energy 
    -double  LocalEnergy(Matrix &, Vector &);
    -
    -// The quantum force
    -void  QuantumForce(Matrix &, Matrix &, Vector &);
    -
    -
    -// inline function for single-particle wave function
    -inline double SPwavefunction(double r, double alpha) { 
    -   return exp(-alpha*r*0.5);
    -}
    -
    -// inline function for derivative of single-particle wave function
    -inline double DerivativeSPwavefunction(double r, double alpha) { 
    -  return -r*alpha;
    -}
    -
    -// function for absolute value of relative distance
    -double RelativeDistance(Matrix &r, int i, int j) { 
    -      double r_ij = 0;  
    -      for (int k = 0; k < Dimension; k++) { 
    -	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    -      }
    -      return sqrt(r_ij); 
    -}
    -
    -// inline function for derivative of Jastrow factor
    -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    -  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    -}
    -
    -// function for square of position of single particle
    -double singleparticle_pos2(Matrix &r, int i) { 
    -    double r_single_particle = 0;
    -    for (int j = 0; j < Dimension; j++) { 
    -      r_single_particle  += r(i,j)*r(i,j);
    -    }
    -    return r_single_particle;
    -}
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    -
    -static double sqrarg;
    -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    -
    -
    -static double maxarg1,maxarg2;
    -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    -        (maxarg1) : (maxarg2))
    -
    -
    -// Begin of main program   
    -
    -int main(int argc, char* argv[])
    -{
    -
    -  //  MPI initializations
    -  int NumberProcesses, MyRank, NumberMCsamples;
    -  MPI_Init (&argc, &argv);
    -  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    -  double StartTime = MPI_Wtime();
    -  if (MyRank == 0 && argc <= 1) {
    -    cout << "Bad Usage: " << argv[0] << 
    -      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    -  }
    -  // Read filename and number of Monte Carlo cycles from the command line
    -  if (MyRank == 0 && argc > 2) {
    -    string filename = argv[1]; // first command line argument after name of program
    -    NumberMCsamples  = atoi(argv[2]);
    -    string fileout = filename;
    -    string argument = to_string(NumberMCsamples);
    -    // Final filename as filename+NumberMCsamples
    -    fileout.append(argument);
    -    ofile.open(fileout);
    -  }
    -  // broadcast the number of  Monte Carlo samples
    -  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    -  // Two variational parameters only
    -  Vector VariationalParameters(2);
    -  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    -  // Loop over variational parameters
    -  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    -    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    -      VariationalParameters(0) = alpha;  // value of alpha
    -      VariationalParameters(1) = beta;  // value of beta
    -      //  Do the mc sampling  and accumulate data with MPI_Reduce
    -      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    -      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    -      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    -      //  Collect data in total averages
    -      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      // Print out results  in case of Master node, set to MyRank = 0
    -      if ( MyRank == 0) {
    -	double Energy = TotalEnergy/( (double)NumberProcesses);
    -	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    -	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    -	ofile << setiosflags(ios::showpoint | ios::uppercase);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    -	ofile << setw(15) << setprecision(8) << Energy;
    -	ofile << setw(15) << setprecision(8) << Variance;
    -	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    -      }
    -    }
    -  }
    -  double EndTime = MPI_Wtime();
    -  double TotalTime = EndTime-StartTime;
    -  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    -  if (MyRank == 0)  ofile.close();  // close output file
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  //  end of main function
    -
    -
    -// Monte Carlo sampling with the Metropolis algorithm  
    -
    -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    -{
    -
    - // Initialize the seed and call the Mersienne algo
    -  std::random_device rd;
    -  std::mt19937_64 gen(rd());
    -  // Set up the uniform distribution for x \in [[0, 1]
    -  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    -  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    -  // diffusion constant from Schroedinger equation
    -  double D = 0.5; 
    -  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    -  // allocate matrices which contain the position of the particles  
    -  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    -  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    -  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    -  //  initial trial positions
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int j = 0; j < Dimension; j++) {
    -      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    -    }
    -  }
    -  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    -  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    -  // loop over monte carlo cycles 
    -  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    -    // new position 
    -    for (int i = 0; i < NumberParticles; i++) { 
    -      for (int j = 0; j < Dimension; j++) {
    -	// gaussian deviate to compute new positions using a given timestep
    -	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -      }  
    -      //  for the other particles we need to set the position to the old position since
    -      //  we move only one particle at the time
    -      for (int k = 0; k < NumberParticles; k++) {
    -	if ( k != i) {
    -	  for (int j = 0; j < Dimension; j++) {
    -	    NewPosition(k,j) = OldPosition(k,j);
    -	  }
    -	} 
    -      }
    -      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    -      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    -      //  we compute the log of the ratio of the greens functions to be used in the 
    -      //  Metropolis-Hastings algorithm
    -      double GreensFunction = 0.0;            
    -      for (int j = 0; j < Dimension; j++) {
    -	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    -	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    -      }
    -      GreensFunction = exp(GreensFunction);
    -      // The Metropolis test is performed by moving one particle at the time
    -      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    -	for (int  j = 0; j < Dimension; j++) {
    -	  OldPosition(i,j) = NewPosition(i,j);
    -	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    -	}
    -	OldWaveFunction = NewWaveFunction;
    -      }
    -    }  //  end of loop over particles
    -    // compute local energy  
    -    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    -    // update energies
    -    Energy += DeltaE;
    -    EnergySquared += DeltaE*DeltaE;
    -  }   // end of loop over MC trials   
    -  // update the energy average and its squared 
    -  cumulative_e = Energy/NumberMCsamples;
    -  cumulative_e2 = EnergySquared/NumberMCsamples;
    -}   // end MonteCarloSampling function  
    -
    -
    -// Function to compute the squared wave function and the quantum force
    -
    -double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    -{
    -  double wf = 0.0;
    -  // full Slater determinant for two particles, replace with Slater det for more particles 
    -  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    -  // contribution from Jastrow factor
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    -    }
    -  }
    -  return wf;
    -}
    -
    -// Function to calculate the local energy without numerical derivation of kinetic energy
    -
    -double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    -{
    -
    -  // compute the kinetic and potential energy from the single-particle part
    -  // for a many-electron system this has to be replaced by a Slater determinant
    -  // The absolute value of the interparticle length
    -  Matrix length( NumberParticles, NumberParticles);
    -  // Set up interparticle distance
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for(int j = i+1; j < NumberParticles; j++){
    -      length(i,j) = RelativeDistance(r, i, j);
    -      length(j,i) =  length(i,j);
    -    }
    -  }
    -  double KineticEnergy = 0.0;
    -  // Set up kinetic energy from Slater and Jastrow terms
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int k = 0; k < Dimension; k++) {
    -      double sum1 = 0.0; 
    -      for(int j = 0; j < NumberParticles; j++){
    -	if ( j != i) {
    -	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    -    }
    -  }
    -  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    -  for (int i = 0; i < NumberParticles-1; i++) {
    -      for (int j = i+1; j < NumberParticles; j++) {
    -        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    -      }
    -  }
    -  KineticEnergy *= -0.5;
    -  // Set up potential energy, external potential + eventual electron-electron repulsion
    -  double PotentialEnergy = 0;
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    double DistanceSquared = singleparticle_pos2(r, i);
    -    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    -  }
    -  // Add the electron-electron repulsion
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      PotentialEnergy += 1.0/length(i,j);          
    -    }
    -  }
    -  double LocalE = KineticEnergy+PotentialEnergy;
    -  return LocalE;
    -}
    -
    -// Compute the analytical expression for the quantum force
    -void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    -{
    -  // compute the first derivative 
    -  for (int i = 0; i < NumberParticles; i++) {
    -    for (int k = 0; k < Dimension; k++) {
    -      // single-particle part, replace with Slater det for larger systems
    -      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    -      //  Jastrow factor contribution
    -      double Jsum = 0.0;
    -      for (int j = 0; j < NumberParticles; j++) {
    -	if ( j != i) {
    -	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      qforce(i,k) = 2.0*(Jsum+sppart);
    -    }
    -  }
    -} // end of QuantumForce function
    -
    -
    -#define ITMAX 200
    -#define EPS 3.0e-8
    -#define TOLX (4*EPS)
    -#define STPMX 100.0
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    -{
    -
    -  int check,i,its,j;
    -  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    -  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    -  Matrix hessian(n,n);
    -
    -  fp=(*func)(p);
    -  (*dfunc)(p,g);
    -  for (i = 0;i < n;i++) {
    -    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    -    hessian(i,i)=1.0;
    -    xi(i) = -g(i);
    -    sum += p(i)*p(i);
    -  }
    -  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    -  for (its=1;its<=ITMAX;its++) {
    -    *iter=its;
    -    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    -    fp = *fret;
    -    for (i = 0; i< n;i++) {
    -      xi(i)=pnew(i)-p(i);
    -      p(i)=pnew(i);
    -    }
    -    test=0.0;
    -    for (i = 0;i< n;i++) {
    -      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    -      if (temp > test) test=temp;
    -    }
    -    if (test < TOLX) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i);
    -    (*dfunc)(p,g);
    -    test=0.0;
    -    den=FMAX(*fret,1.0);
    -    for (i=0;i<n;i++) {
    -      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    -      if (temp > test) test=temp;
    -    }
    -    if (test < gtol) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    -    for (i=0;i<n;i++) {
    -      hdg(i)=0.0;
    -      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    -    }
    -    fac=fae=sumdg=sumxi=0.0;
    -    for (i=0;i<n;i++) {
    -      fac += dg(i)*xi(i);
    -      fae += dg(i)*hdg(i);
    -      sumdg += SQR(dg(i));
    -      sumxi += SQR(xi(i));
    -    }
    -    if (fac*fac > EPS*sumdg*sumxi) {
    -      fac=1.0/fac;
    -      fad=1.0/fae;
    -      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    -      for (i=0;i<n;i++) {
    -	for (j=0;j<n;j++) {
    -	  hessian(i,j) += fac*xi(i)*xi(j)
    -	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    -	}
    -      }
    -    }
    -    for (i=0;i<n;i++) {
    -      xi(i)=0.0;
    -      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    -    }
    -  }
    -  cout << "too many iterations in dfpmin" << endl;
    -}
    -#undef ITMAX
    -#undef EPS
    -#undef TOLX
    -#undef STPMX
    -
    -#define ALF 1.0e-4
    -#define TOLX 1.0e-7
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    -{
    -  int i;
    -  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    -    test,tmplam;
    -
    -  *check=0;
    -  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    -  sum=sqrt(sum);
    -  if (sum > stpmax)
    -    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    -  for (slope=0.0,i=0;i<n;i++)
    -    slope += g(i)*p(i);
    -  test=0.0;
    -  for (i=0;i<n;i++) {
    -    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    -    if (temp > test) test=temp;
    -  }
    -  alamin=TOLX/test;
    -  alam=1.0;
    -  for (;;) {
    -    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    -    *f=(*func)(x);
    -    if (alam < alamin) {
    -      for (i=0;i<n;i++) x(i)=xold(i);
    -      *check=1;
    -      return;
    -    } else if (*f <= fold+ALF*alam*slope) return;
    -    else {
    -      if (alam == 1.0)
    -	tmplam = -slope/(2.0*(*f-fold-slope));
    -      else {
    -	rhs1 = *f-fold-alam*slope;
    -	rhs2=f2-fold2-alam2*slope;
    -	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    -	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    -	if (a == 0.0) tmplam = -slope/(2.0*b);
    -	else {
    -	  disc=b*b-3.0*a*slope;
    -	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    -	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    -	}
    -	if (tmplam>0.5*alam)
    -	  tmplam=0.5*alam;
    -      }
    -    }
    -    alam2=alam;
    -    f2 = *f;
    -    fold2=fold;
    -    alam=FMAX(tmplam,0.1*alam);
    -  }
    -}
    -#undef ALF
    -#undef TOLX
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    What is OpenMP

    -
    - -

    -

      -

    • OpenMP provides high-level thread programming
    • -

    • Multiple cooperating threads are allowed to run simultaneously
    • -

    • Threads are created and destroyed dynamically in a fork-join pattern
    • -
        - -

      • An OpenMP program consists of a number of parallel regions
      • - -

      • Between two parallel regions there is only one master thread
      • - -

      • In the beginning of a parallel region, a team of new threads is spawned
      • -
      -

      - -

    • The newly spawned threads work simultaneously with the master thread
    • - -

    • At the end of a parallel region, the new threads are destroyed
    • -
    -

    -

    Many good tutorials online and excellent textbook

    -
      -

    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. -

    3. Many tutorials online like OpenMP official site
    4. -
    -
    -
    - -
    -

    Getting started, things to remember

    -
    - -

    -

      -

    • Remember the header file
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Insert compiler directives in C++ syntax as
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp...
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Compile with for example c++ -fopenmp code.cpp
    • -

    • Execute
    • -
        - -

      • Remember to assign the environment variable OMP NUM THREADS
      • - -

      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      • -
      -

      -

    -
    -
    - -
    -

    OpenMP syntax

    -
      -

    • Mostly directives
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp construct [ clause ...]
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Some functions and types
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Most apply to a block of code
    • -

    • Specifically, a structured block
    • -

    • Enter at top, exit at bottom only, exit(), abort() permitted
    • -
    -
    - -
    -

    Different OpenMP styles of parallelism

    -

    OpenMP supports several different ways to specify thread parallelism

    - -
      -

    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • -

    • Parallel loops: Special case for loops, simplifies data parallel code
    • -

    • Task parallelism, new in OpenMP 3
    • -

    • Several ways to manage thread coordination, including Master regions and Locks
    • -

    • Memory model for shared data
    • -
    -
    - -
    -

    General code structure

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -main ()
    -{
    -int var1, var2, var3;
    -/* serial code */
    -/* ... */
    -/* start of a parallel region */
    -#pragma omp parallel private(var1, var2) shared(var3)
    -{
    -/* ... */
    -}
    -/* more serial code */
    -/* ... */
    -/* another parallel region */
    -#pragma omp parallel
    -{
    -/* ... */
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Parallel region

    -
    - -

    -

      -

    • A parallel region is a block of code that is executed by a team of threads
    • -

    • The following compiler directive creates a parallel region
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp parallel { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Clauses can be added at the end of the directive
    • -

    • Most often used clauses:
    • -
        -

      • default(shared) or default(none)
      • -

      • public(list of variables)
      • -

      • private(list of variables)
      • -
      -

      -

    -
    -
    - -
    -

    Hello world, not again, please!

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#include <cstdio>
    -int main (int argc, char *argv[])
    -{
    -int th_id, nthreads;
    -#pragma omp parallel private(th_id) shared(nthreads)
    -{
    -th_id = omp_get_thread_num();
    -printf("Hello World from thread %d\n", th_id);
    -#pragma omp barrier
    -if ( th_id == 0 ) {
    -nthreads = omp_get_num_threads();
    -printf("There are %d threads\n",nthreads);
    -}
    -}
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Hello world, yet another variant

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <cstdio>
    -#include <omp.h>
    -int main(int argc, char *argv[]) 
    -{
    - omp_set_num_threads(4); 
    -#pragma omp parallel
    - {
    -   int id = omp_get_thread_num();
    -   int nproc = omp_get_num_threads(); 
    -   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    - } 
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Variables declared outside of the parallel region are shared by all threads -If a variable like id is declared outside of the -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel, 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    it would have been shared by various the threads, possibly causing erroneous output

    -
      -

    • Why? What would go wrong? Why do we add possibly?
    • -
    -
    -
    - -
    -

    Important OpenMP library routines

    -
    - -

    - -

      -

    • int omp get num threads (), returns the number of threads inside a parallel region
    • -

    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • -

    • void omp set num threads (int), sets the number of threads to be used
    • -

    • void omp set nested (int), turns nested parallelism on/off
    • -
    -
    -
    - -
    -

    Private variables

    -
    - -

    -

    Private clause can be used to make thread- private versions of such variables:

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel private(id)
    -{
    - int id = omp_get_thread_num();
    - cout << "My thread num" << id << endl; 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • What is their value on entry? Exit?
    • -

    • OpenMP provides ways to control that
    • -

    • Can use default(none) to require the sharing of each variable to be described
    • -
    -
    -
    - -
    -

    Master region

    -
    - -

    -

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel 
    -{
    -  #pragma omp master
    -   {
    -      int id = omp_get_thread_num();
    -      cout << "My thread num" << id << endl; 
    -   } 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Parallel for loop

    -
    - -

    -

      -

    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    • Clauses can be added, such as
    • -
        - -

      • schedule(static, chunk size)
      • - -

      • schedule(dynamic, chunk size)
      • - -

      • schedule(guided, chunk size) (non-deterministic allocation)
      • - -

      • schedule(runtime)
      • - -

      • private(list of variables)
      • - -

      • reduction(operator:variable)
      • - -

      • nowait
      • -
      -

      -

    -
    -
    - -
    -

    Parallel computations and loops

    - -
    - -

    -

    OpenMP provides an easy way to parallelize a loop

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -  for (i=0; i<n; i++) c[i] = a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    OpenMP handles index variable (no need to declare in for loop or make private)

    - -

    Which thread does which values? Several options.

    -
    -
    - -
    -

    Scheduling of loop computations

    - -
    - -

    -

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled -and OpenMP defines three choices of loop scheduling: -

    -
      -

    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. -

    3. Dynamic: Selection made at runtime
    4. -

    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. -
    -
    -
    - -
    -

    Example code for loop scheduling

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(dynamic,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Example code for loop scheduling, guided instead of dynamic

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(guided,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    More on Parallel for loop

    -
    - -

    -

      -

    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • -

    • The loop index is private to each thread
    • -

    • A reduction variable is special
    • -
        - -

      • During the for-loop there is a local private copy in each thread
      • - -

      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • -
      -

      -

    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • -
    -

    - - -

    -
    -
    -
    -
    -
    // #pragma omp parallel and #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    can be combined into

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    What can happen with this loop?

    - -
    - -

    -

    What happens with code like this

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    - - -
    -
    -
    -
    -
    -
    sum = 0.0;
    -#pragma omp parallel for reduction(+:sum)
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Inner product

    -
    - -

    -

     
    -$$ -\sum_{i=0}^{n-1} a_ib_i -$$ -

     
    - - - -

    -
    -
    -
    -
    -
    int i;
    -double sum = 0.;
    -/* allocating and initializing arrays */
    -/* ... */
    -#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    - for (i=0; i<N; i++) sum += a[i]*b[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Different threads do different tasks

    -
    - -

    - -

    Different threads do different tasks independently, each section is executed by one thread.

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -#pragma omp sections
    -{
    -#pragma omp section
    -funcA ();
    -#pragma omp section
    -funcB ();
    -#pragma omp section
    -funcC ();
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Single execution

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp single { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The code is executed by one thread only, no guarantee which thread

    - -

    Can introduce an implicit barrier at the end

    - - -
    -
    -
    -
    -
    -
    #pragma omp master { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    -
    -
    - -
    -

    Coordination and synchronization

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp barrier
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Synchronization, must be encountered by all threads in a team (or none)

    - - -
    -
    -
    -
    -
    -
    #pragma omp ordered { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is another form of synchronization (in sequential order). -The form -

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and

    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic { single assignment statement }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is more efficient than

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Data scope

    -
    - -

    -

      -

    • OpenMP data scope attribute clauses:
    • -
        -

      • shared
      • -

      • private
      • -

      • firstprivate
      • -

      • lastprivate
      • -

      • reduction
      • -
      -

      -

    -

    -

    What are the purposes of these attributes

    -
      -

    • define how and which variables are transferred to a parallel region (and back)
    • -

    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    • -
    -
    -
    - -
    -

    Some remarks

    -
    - -

    - -

      -

    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • -

    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • -

    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • -

    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    • -
    -
    -
    - -
    -

    Parallelizing nested for-loops

    -
    - -

    - -

      -

    • Serial code
    • -
    -

    - - -

    -
    -
    -
    -
    -
    for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -        a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -

    • Parallelization
    • -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp parallel for private(j)
    -for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -       a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -

    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • -

    • Why must j be private? To avoid race condition among the threads
    • -
    -
    -
    - -
    -

    Nested parallelism

    -
    - -

    -

    When a thread in a parallel region encounters another parallel construct, it -may create a new team of threads and become the master of the new -team. -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel num_threads(4)
    -{
    -/* .... */
    -#pragma omp parallel num_threads(2)
    -{
    -//  
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Parallel tasks

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp task 
    -#pragma omp parallel shared(p_vec) private(i)
    -{
    -#pragma omp single
    -{
    -for (i=0; i<N; i++) {
    -  double r = random_number();
    -  if (p_vec[i] > r) {
    -#pragma omp task
    -   do_work (p_vec[i]);
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Common mistakes

    -
    - -

    -

    Race condition

    - - -
    -
    -
    -
    -
    -
    int nthreads;
    -#pragma omp parallel shared(nthreads)
    -{
    -nthreads = omp_get_num_threads();
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Deadlock

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -...
    -#pragma omp critical
    -{
    -...
    -#pragma omp barrier
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Not all computations are simple

    -
    - -

    -

    Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads -

    - -

    An example is finding the location and value of the largest element in an array

    - - -
    -
    -
    -
    -
    -
    for (i=0; i<n; i++) { 
    -   if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -   }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Not all computations are simple, competing threads

    -
    - -

    -

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    -
      -

    1. OpenMP provides several ways to coordinate access to shared values
    2. -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp atomic
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. -
    -

    - - -

    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -

    1. Only one thread at a time can execute the following block
    2. -
    -

    -

    Atomic may be faster than critical but depends on hardware

    -
    -
    - -
    -

    How to find the max value using OpenMP

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -    if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Then deal with the race conditions

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -#pragma omp critical
    -  {
    -     if (x[i] > maxval) {
    -       maxval = x[i];
    -       maxloc = i; 
    -     }
    -  }
    -} 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -

    -
    -
    - -
    -

    What can slow down OpenMP performance?

    -

    Give it a thought!

    -
    - -
    -

    What can slow down OpenMP performance?

    -
    - -

    -

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    -
      -

    • We do not care about the value during the execution of the loop, just the value at the end.
    • -
    -

    -

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread -
    -
    - -
    -

    Find the max location for each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    int maxloc[MAX_THREADS], mloc;
    -double maxval[MAX_THREADS], mval; 
    -#pragma omp parallel shared(maxval,maxloc)
    -{
    -  int id = omp_get_thread_num(); 
    -  maxval[id] = -1.0e30;
    -#pragma omp for
    -   for (int i=0; i<n; i++) {
    -       if (x[i] > maxval[id]) { 
    -           maxloc[id] = i;
    -           maxval[id] = x[i]; 
    -       }
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Combine the values from each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp flush (maxloc,maxval)
    -#pragma omp master
    -  {
    -    int nt = omp_get_num_threads(); 
    -    mloc = maxloc[0]; 
    -    mval = maxval[0]; 
    -    for (int i=1; i<nt; i++) {
    -        if (maxval[i] > mval) { 
    -           mval = maxval[i]; 
    -           mloc = maxloc[i];
    -        } 
    -     }
    -   }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Note that we let the master process perform the last operation.

    -
    -
    - -
    -

    Matrix-matrix multiplication

    -

    This code computes the norm of a vector using OpenMp

    - - -
    -
    -
    -
    -
    -
    //  OpenMP program to compute vector norm by adding two other vectors
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of vector
    -  int n = atoi(argv[1]);
    -  double *a, *b, *c;
    -  int i;
    -  int thread_num;
    -  double wtime, Norm2, s, angle;
    -  cout << "  Perform addition of two vectors and compute the norm-2." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the vectors to be used
    -  a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)
    -  // Set up values for vectors  a and b
    -  for (i = 0; i < n; i++){
    -      angle = 2.0*M_PI*i/ (( double ) n);
    -      a[i] = s*(sin(angle) + cos(angle));
    -      b[i] =  s*sin(2.0*angle);
    -      c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (i = 0; i < n; i++){
    -     c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  Norm2 = 0.0;
    -  for (i = 0; i < n; i++){
    -     Norm2  += c[i]*c[i];
    -  }
    -// end parallel region
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm-2 computation=" << wtime  << endl;
    -  cout << " Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP

    - - - -
    -
    -
    -
    -
    -
    //  Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B, **C;
    -  int i, j, k;
    -  int thread_num;
    -  double wtime, Fsum, s, angle;
    -  cout << "  Compute matrix product C = A * B and Frobenius norm." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum)
    -  // Set up values for matrix A and B and zero matrix C
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -       C[i][j] =  0.0;    
    -       for (k = 0; k < n; k++) {
    -            C[i][j] += A[i][k]*B[k][j];
    -       }
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  Fsum = 0.0;
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -// end parallel region and letting only one thread perform I/O
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << wtime  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -
    -
    - - - - - - - - - - - - - diff --git a/doc/src/week9/week9-solarized.html b/doc/src/week9/week9-solarized.html deleted file mode 100644 index 354c9093..00000000 --- a/doc/src/week9/week9-solarized.html +++ /dev/null @@ -1,6276 +0,0 @@ - - - - - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - - - - - -
    -

    Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking

    -
    - - -
    -Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no [1, 2] -
    - -
    -[1] Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway -
    -
    -[2] Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA -
    -
    -
    -

    March 11-15

    -
    -
    - -









    -

    Overview of week 11, March 11-15

    -
    -Topics -

    -

      -
    1. Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week
    2. -
    3. Resampling Techniques, emphasis on Blocking
    4. -
    5. Discussion of onebody densities (whiteboard notes)
    6. -
    7. Start discussion on optimization and parallelization for Python and C++ - -
    8. -
    -
    - - -

    Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week.

    - -









    -

    Why resampling methods ?

    -
    -Statistical analysis -

    -

      -
    • Our simulations can be treated as computer experiments. This is particularly the case for Monte Carlo methods
    • -
    • The results can be analysed with the same statistical tools as we would use analysing experimental data.
    • -
    • As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors.
    • -
    -
    - - -









    -

    Statistical analysis

    -
    - -

    -

      -
    • As in other experiments, many numerical experiments have two classes of errors: -
        -
      1. Statistical errors
      2. -
      3. Systematical errors
      4. -
      -
    • Statistical errors can be estimated using standard tools from statistics
    • -
    • Systematical errors are method specific and must be treated differently from case to case.
    • -
    -
    - - -









    -

    And why do we use such methods?

    - -

    As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. -

    - -

    The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. -

    - -









    -

    Central limit theorem

    - -

    Last week we derived the central limit theorem with the following assumptions:

    - -
    -Measurement \( i \) -

    -

    We assumed that each individual measurement \( x_{ij} \) is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment \( i \) with \( n \) samples as -

    -$$ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -$$ - -

    and the sample variance

    -$$ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -$$ -
    - - -









    -

    Further remarks

    - -

    Note that we use \( n \) instead of \( n-1 \) in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. -

    - -









    -

    Running many measurements

    - -
    -Adding \( m \) measurements \( i \) -

    -

    With the assumption that the average measurements \( i \) are also defined as iid stochastic variables and have the same probability function \( p \), -we defined the total average over \( m \) experiments as -

    -$$ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -$$ - -

    and the total variance

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -$$ -
    - -

    These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit \( m\rightarrow \infty \), the distribution for \( \overline{X} \) is given by a Gaussian distribution with variance \( \sigma^2_m \).

    - -









    -

    Adding more definitions

    - -

    The total sample variance over the \( mn \) measurements is defined as

    -$$ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -$$ - -

    We have from the equation for \( \sigma_m^2 \)

    -$$ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -$$ - -

    and introducing the centered value \( \tilde{x}_{ij}=x_{ij}-\overline{X} \), we can rewrite \( \sigma_m^2 \) as

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -$$ - - -









    -

    Further rewriting

    - -

    We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements

    -$$ -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term on the last rhs is nothing but the total sample variance \( \sigma^2 \) divided by \( m \). The second term represents the covariance.

    - -









    -

    The covariance term

    - -

    Using the definition of the total sample variance we have

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term is what we have used till now in order to estimate the -standard deviation. However, the second term which gives us a measure -of the correlations between different stochastic events, can result in -contributions which give rise to a larger standard deviation and -variance \( \sigma_m^2 \). Note also the evaluation of the second term -leads to a double sum over all events. If we run a VMC calculation -with say \( 10^9 \) Monte carlo samples, the latter term would lead to -\( 10^{18} \) function evaluations. We don't want to, by obvious reasons, to venture into that many evaluations. -

    - -

    Note also that if our stochastic events are iid then the covariance terms is zero.

    - -









    -

    Rewriting the covariance term

    - -

    We introduce now a variable \( d=\vert j-k\vert \) and rewrite

    -$$ -\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}, -$$ - -

    in terms of a function

    -$$ -f_d=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n-d}\tilde{x}_{ik}\tilde{x}_{i(k+d)}. -$$ - -

    We note that for \( d=0 \) we have

    -$$ -f_0=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n}\tilde{x}_{ik}\tilde{x}_{i(k)}=\sigma^2! -$$ - - -









    -

    Introducing the correlation function

    - -

    We introduce then a correlation function \( \kappa_d=f_d/\sigma^2 \). Note that \( \kappa_0 =1 \). We rewrite the variance \( \sigma_m^2 \) as

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}\left[1+2\sum_{d=1}^{n-1} \kappa_d\right]. -\end{align*} -$$ - -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    - -

    Note: code will be inserted here later.

    - -









    -

    Resampling methods: Blocking

    - -

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) -and has become one of the standard ways to estimate the variance -\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely -\( \widehat{\theta} = \overline{X} \), the mean value. -

    - -

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: -

    -$$ -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} -$$ - - -









    -

    Why blocking?

    - -

    The strength of the blocking method is when the number of -observations, \( n \) is large. For large \( n \), the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger \( n \) is. -

    - -









    -

    Blocking Transformations

    -

    We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from \( \boldsymbol{X} \) and form a new vector -\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of -subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and -so on. -Define \( \boldsymbol{X}_i \) recursively by: -

    - -$$ -\begin{align} -(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ -(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + -(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\label{_auto1} -\end{align} -$$ - - -









    -

    Blocking transformations

    - -

    The quantity \( \boldsymbol{X}_k \) is -subject to \( k \) blocking transformations. We now have \( d \) vectors -\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent -averages of observations. It turns out that if the components of -\( \boldsymbol{X} \) is a stationary time series, then the components of -\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \) -

    - -

    We can then compute the autocovariance, the variance, sample mean, and -number of observations for each \( i \). -Let \( \gamma_i, \sigma_i^2, -\overline{X}_i \) denote the covariance, variance and average of the -elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of -\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \). -

    - -









    -

    Blocking Transformations

    - -

    Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since \( h =|i-j| \) -we can define -

    -$$ -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} -\label{_auto2}\\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\label{_auto3} -\end{align} -$$ - -

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean \( \mathrm{var}(\overline{X}) \). -

    - -









    -

    Blocking Transformations, getting there

    -

    We have

    -$$ -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\label{_auto4} -\end{align} -$$ - -

    The term \( e_k \) is called the truncation error:

    -$$ -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\label{_auto5} -\end{equation} -$$ - -

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    - -









    -

    Blocking Transformations, final expressions

    - -

    We can then wrap up

    -$$ -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\label{_auto6} -\end{align} -$$ - -

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    -$$ -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \label{eq:convergence} -\end{align} -$$ - - -









    -

    More on the blocking method

    - -

    Flyvbjerg and Petersen demonstrated that the sequence -\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term -\( e_k \) can be made as small as we would like by making \( k \) (and hence -\( d \)) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by -\( \widehat{\sigma}^2_k/n_k \). -

    - -

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    - -









    -

    Example code form last week

    - - -
    -
    -
    -
    -
    -
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    -# Using gaussian rng for new positions and Metropolis- Hastings 
    -# Added energy minimization
    -from math import exp, sqrt
    -from random import random, seed, normalvariate
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from mpl_toolkits.mplot3d import Axes3D
    -from matplotlib import cm
    -from matplotlib.ticker import LinearLocator, FormatStrFormatter
    -from scipy.optimize import minimize
    -import sys
    -import os
    -
    -# Where to save data files
    -PROJECT_ROOT_DIR = "Results"
    -DATA_ID = "Results/EnergyMin"
    -
    -if not os.path.exists(PROJECT_ROOT_DIR):
    -    os.mkdir(PROJECT_ROOT_DIR)
    -
    -if not os.path.exists(DATA_ID):
    -    os.makedirs(DATA_ID)
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -outfile = open(data_path("Energies.dat"),'w')
    -
    -
    -# Trial wave function for the 2-electron quantum dot in two dims
    -def WaveFunction(r,alpha,beta):
    -    r1 = r[0,0]**2 + r[0,1]**2
    -    r2 = r[1,0]**2 + r[1,1]**2
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = r12/(1+beta*r12)
    -    return exp(-0.5*alpha*(r1+r2)+deno)
    -
    -# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    -def LocalEnergy(r,alpha,beta):
    -    
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    -
    -# Derivate of wave function ansatz as function of variational parameters
    -def DerivativeWFansatz(r,alpha,beta):
    -    
    -    WfDer  = np.zeros((2), np.double)
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    WfDer[0] = -0.5*(r1+r2)
    -    WfDer[1] = -r12*r12*deno2
    -    return  WfDer
    -
    -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    -def QuantumForce(r,alpha,beta):
    -
    -    qforce = np.zeros((NumberParticles,Dimension), np.double)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12
    -    qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12
    -    return qforce
    -    
    -
    -# Computing the derivative of the energy and the energy 
    -def EnergyDerivative(x0):
    -
    -    
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    EnergyDer = 0.0
    -    DeltaPsi = 0.0
    -    DerivativePsiE = 0.0 
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)
    -        DeltaPsi += DerPsi
    -        energy += DeltaE
    -        DerivativePsiE += DerPsi*DeltaE
    -            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    DerivativePsiE /= NumberMCcycles
    -    DeltaPsi /= NumberMCcycles
    -    EnergyDer  = 2*(DerivativePsiE-DeltaPsi*energy)
    -    return EnergyDer
    -
    -
    -# Computing the expectation value of the local energy 
    -def Energy(x0):
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        energy += DeltaE
    -        if Printout: 
    -           outfile.write('%f\n' %(energy/(MCcycle+1.0)))            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    return energy
    -
    -#Here starts the main program with variable declarations
    -NumberParticles = 2
    -Dimension = 2
    -# seed for rng generator 
    -seed()
    -# Monte Carlo cycles for parameter optimization
    -Printout = False
    -NumberMCcycles= 10000
    -# guess for variational parameters
    -x0 = np.array([0.9,0.2])
    -# Using Broydens method to find optimal parameters
    -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})
    -x0 = res.x
    -# Compute the energy again with the optimal parameters and increased number of Monte Cycles
    -NumberMCcycles= 2**19
    -Printout = True
    -FinalEnergy = Energy(x0)
    -EResult = np.array([FinalEnergy,FinalEnergy])
    -outfile.close()
    -#nice printout with Pandas
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Optimal Parameters':x0, 'Final Energy':EResult}
    -frame = pd.DataFrame(data)
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Resampling analysis

    - -

    The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of Marius Jonsson is given here -

    - - - -
    -
    -
    -
    -
    -
    # Common imports
    -import os
    -
    -# Where to save the figures and data files
    -DATA_ID = "Results/EnergyMin"
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -infile = open(data_path("Energies.dat"),'r')
    -
    -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    -from numpy.linalg import inv
    -
    -def block(x):
    -    # preliminaries
    -    n = len(x)
    -    d = int(log2(n))
    -    s, gamma = zeros(d), zeros(d)
    -    mu = mean(x)
    -
    -    # estimate the auto-covariance and variances 
    -    # for each blocking transformation
    -    for i in arange(0,d):
    -        n = len(x)
    -        # estimate autocovariance of x
    -        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    -        # estimate variance of x
    -        s[i] = var(x)
    -        # perform blocking transformation
    -        x = 0.5*(x[0::2] + x[1::2])
    -   
    -    # generate the test observator M_k from the theorem
    -    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    -
    -    # we need a list of magic numbers
    -    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    -
    -    # use magic to determine when we should have stopped blocking
    -    for k in arange(0,d):
    -        if(M[k] < q[k]):
    -            break
    -    if (k >= d-1):
    -        print("Warning: Use more data")
    -    return mu, s[k]/2**(d-k)
    -
    -
    -x = loadtxt(infile)
    -(mean, var) = block(x) 
    -std = sqrt(var)
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Mean':[mean], 'STDev':[std]}
    -frame = pd.DataFrame(data,index=['Values'])
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Content

    -
      -
    • Simple compiler options
    • -
    • Tools to benchmark your code
    • -
    • Machine architectures
    • -
    • What is vectorization?
    • -
    • How to measure code performance
    • -
    • Parallelization with OpenMP
    • -
    • Parallelization with MPI
    • -
    • Vectorization and parallelization, examples
    • -
    -









    -

    Optimization and profiling

    -
    - -

    - -

    Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as -

    - - -
    -
    -
    -
    -
    -
    c++  -c  mycode.cpp
    -c++  -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For Fortran replace with for example gfortran or ifort. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. -

    - -

    It is instructive to look up the compiler manual for further instructions by writing

    - - -
    -
    -
    -
    -
    -
    man c++
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -









    -

    More on optimization

    -
    - -

    -

    We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. -

    - - -
    -
    -
    -
    -
    -
    c++  -O3 -c  mycode.cpp
    -c++  -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This (other options are -O2 or -Ofast) is the recommended option.

    -
    - -









    -

    Optimization and profiling

    -
    - -

    -

    It is also useful to profile your program under the development stage. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -pg -O3 -c  mycode.cpp
    -c++  -pg -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    After you have run the code you can obtain the profiling information via

    - - -
    -
    -
    -
    -
    -
    gprof mycode.exe >  ProfileOutput
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator. -

    -
    - - -









    -

    Optimization and debugging

    -
    - -

    -

    Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -g -O0 -c  mycode.cpp
    -c++  -g -O0 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    -
    - -
    -Other optimization flags -

    -

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag -ftrapv does this for the CLANG compiler on OS X operating systems. -

    -
    - - -









    -

    Other hints

    -
    - -

    -

    In general, irrespective of compiler options, it is useful to

    -
      -
    • avoid if tests or call to functions inside loops, if possible.
    • -
    • avoid multiplication with constants inside loops if possible
    • -
    -

    Here is an example of a part of a program where specific operations lead to a slower code

    - - -
    -
    -
    -
    -
    -
    k = n-1;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] +c*d;
    -    e = g[k];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    A better code is

    - - -
    -
    -
    -
    -
    -
    temp = c*d;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] + temp;
    -}
    -e = g[n-1];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -

    -
    - - -









    -

    Vectorization and the basic idea behind parallel computing

    -
    - -

    -

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

    -
      -
    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • -
    • Multiple processors are involved to solve a global problem.
    • -
    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    • -
    -

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    -
    - - -









    -

    A rough classification of hardware models

    -
    - -

    - -

      -
    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • -
    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • -
    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    • -
    -
    - -









    -

    Shared memory and distributed memory

    -
    - -

    -

    One way of categorizing modern parallel computers is to look at the memory configuration.

    -
      -
    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • -
    • In distributed memory systems each CPU has its own memory.
    • -
    -

    The CPUs are connected by some network and may exchange messages.

    -
    - - -









    -

    Different parallel programming paradigms

    -
    - -

    - -

      -
    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • -
    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    • -
    -
    - -









    -

    Different parallel programming paradigms

    -
    - -

    - -

      -
    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • -
    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • -
    -
    - - - -

    What is vectorization?

    -

    Vectorization is a special -case of Single Instructions Multiple Data (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. -

    - -

    Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. -

    - - -

    Number of elements that can acted upon

    -

    A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. -

    - -

    The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. -

    - -

    IN summary, our instructions operate on 128 bit (16 byte) operands

    -
      -
    • 4 floats or ints
    • -
    • 2 doubles
    • -
    • Data paths 128 bits vide for vector unit
    • -
    - -

    Number of elements that can acted upon, examples

    -

    We start with the simple scalar operations given by

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have \( 3\times 32 \) bits that are not used. -

    - -

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    - - -

    Operation counts for scalar operation

    -

    The code

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    has for \( n \) repeats

    -
      -
    1. one load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    - -

    Number of elements that can acted upon, examples

    -

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i+=4){
    -    a[i] = b[i] + c[i];
    -    a[i+1] = b[i+1] + c[i+1];
    -    a[i+2] = b[i+2] + c[i+2];
    -    a[i+3] = b[i+3] + c[i+3];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Four additions are now done in a single step.

    - - -

    Number of operations when vectorized

    -

    For \( n/4 \) repeats assuming floats or integers

    -
      -
    1. one vector load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    -









    -

    A simple test case with and without vectorization

    -

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double *a, *b, *c;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -// Allocate space for the vectors to be used
    -    a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -  // Set up values for vectors  a and b
    -  for (int i = 0; i < n; i++){
    -    double angle = 2.0*M_PI*i/ (( double ) n);
    -    a[i] = s*(sin(angle) + cos(angle));
    -    b[i] =  s*sin(2.0*angle);
    -    c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (int i = 0; i < n; i++){
    -    c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  double Norm2 = 0.0;
    -  for (int i = 0; i < n; i++){
    -    Norm2  += c[i]*c[i];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    -  cout << "  Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Compiling with and without vectorization

    -

    We can compile and link without vectorization using the clang c++ compiler

    - - -
    -
    -
    -
    -
    -
    clang -o novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization (and additional optimizations)

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 10000000
    -Time used  for norm computation=0.04720500000
    -Compphys:~ hjensen$ ./novec.x 10000000
    -Time used  for norm computation=0.03311700000
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 1000000000
    -Time used  for norm computation=58.41391100
    -Compphys:~ hjensen$ ./novec.x 1000000000
    -Time used  for norm computation=46.51295300
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We will discuss these issues further in the next slides.

    - - -

    Compiling with and without vectorization using clang

    -

    We can compile and link without vectorization with clang compiler

    - - -
    -
    -
    -
    -
    -
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We can also add vectorization analysis, see for example

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    or figure out if vectorization was missed

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Automatic vectorization and vectorization inhibitors, criteria

    - -

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    - -

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The variable \( n \) does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent.

    - -









    -

    Automatic vectorization and vectorization inhibitors, exit criteria

    - -

    An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -    if (a[j] < 0 ) break;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    - -









    -

    Automatic vectorization and vectorization inhibitors, straight-line code

    - -

    SIMD instructions perform the same type of operations multiple times. -A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the if statement is implemented as a masked assignment. -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    double x  = cos(j*1.0);
    -    if (x > 0 ) {
    -       a[j] =  x*sin(j*2.0); 
    -    }
    -    else {
    -       a[j] = 0.0;
    -    }
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    - -









    -

    Automatic vectorization and vectorization inhibitors, nested loops

    - -

    Only the innermost loop of the following example is vectorized

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    - -









    -

    Automatic vectorization and vectorization inhibitors, function calls

    - -

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable -

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      a[i] = log10(i)*cos(i);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    - -









    -

    Automatic vectorization and vectorization inhibitors, data dependencies

    - -

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i] = a[i-1] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value \( a[i-1] \) computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values \( a[0] \), \( a[1] \), \( a[2] \) and \( a[3] \) from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have

    - - -
    -
    -
    -
    -
    -
       a[1] = a[0] + b;
    -   a[2] = a[1] + b;
    -   a[3] = a[2] + b;
    -   a[4] = a[3] + b;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    - -









    -

    Automatic vectorization and vectorization inhibitors, more data dependencies

    - -

    On the other hand, a so-called -write-after-read statement can be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i-1] = a[i] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency that can be vectorized since no iteration with a higher value of \( i \) -can complete before an iteration with a lower value of \( i \). However, such code leads to problems with parallelization. -

    - -









    -

    Automatic vectorization and vectorization inhibitors, memory stride

    - -

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    - -

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Memory management

    -

    The main memory contains the program data

    -
      -
    1. Cache memory contains a copy of the main memory data
    2. -
    3. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    4. -
    5. Registers contain working data only
    6. -
        -
      • Modern CPUs perform most or all operations only on data in register
      • -
      -
    7. Multiple Cache memories contain a copy of the main memory data
    8. -
        -
      • Cache items accessed by their address in main memory
      • -
      • L1 cache is the fastest but has the least capacity
      • -
      • L2, L3 provide intermediate performance/size tradeoffs
      • -
      -
    -

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    - -









    -

    Memory and communication

    - -
      -
    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. -
    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. -
        -
      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • -
      • Even if you do not use the data, it is moved and occupies space in the cache
      • -
      -
    -

    Many of these performance features are not captured in most programming languages.

    - -









    -

    Measuring performance

    - -

    How do we measure performance? What is wrong with this code to time a loop?

    - - -
    -
    -
    -
    -
    -
      clock_t start, finish;
    -  start = clock();
    -  for (int j = 0; j < i; j++) {
    -    a[j] = b[j]+b[j]*c[j];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Problems with measuring time

    -
      -
    1. Timers are not infinitely accurate
    2. -
    3. All clocks have a granularity, the minimum time that they can measure
    4. -
    5. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    6. -
    7. Always know what your clock granularity is
    8. -
    9. Ensure that your measurement is for a long enough duration (say 100 times the tick)
    10. -
    -









    -

    Problems with cold start

    - -

    What happens when the code is executed? The assumption is that the code is ready to -execute. But -

    -
      -
    1. Code may still be on disk, and not even read into memory.
    2. -
    3. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    4. -
    5. Multiple tests often necessary to ensure that cold start effects are not present
    6. -
    7. Special effort often required to ensure data in the intended part of the memory hierarchy.
    8. -
    -









    -

    Problems with smart compilers

    - -
      -
    1. If the result of the computation is not used, the compiler may eliminate the code
    2. -
    3. Performance will look impossibly fantastic
    4. -
    5. Even worse, eliminate some of the code so the performance looks plausible
    6. -
    7. Ensure that the results are (or may be) used.
    8. -
    -









    -

    Problems with interference

    -
      -
    1. Other activities are sharing your processor
    2. -
        -
      • Operating system, system demons, other users
      • -
      • Some parts of the hardware do not always perform with exactly the same performance
      • -
      -
    3. Make multiple tests and report
    4. -
    5. Easy choices include
    6. -
        -
      • Average tests represent what users might observe over time
      • -
      -
    -









    -

    Problems with measuring performance

    -
      -
    1. Accurate, reproducible performance measurement is hard
    2. -
    3. Think carefully about your experiment:
    4. -
    5. What is it, precisely, that you want to measure?
    6. -
    7. How representative is your test to the situation that you are trying to measure?
    8. -
    -









    -

    Thomas algorithm for tridiagonal linear algebra equations

    -
    - -

    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -

    - - -









    -

    Thomas algorithm, forward substitution

    -
    - -

    -

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    -$$ - a_i = 0, -$$ - - -$$ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -$$ - -

    and

    -$$ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -$$ - -

    At this point the simplified equation, with only an upper triangular matrix takes the form

    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -
    - - -









    -

    Thomas algorithm, backward substitution

    -
    - -

    -

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    -$$ - c_i = 0, -$$ - -

    and

    -$$ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -$$ - -

    All that ramains to be computed is the solution, which is the very straight forward process of

    -$$ -x_i = \frac{f_i}{b_i} -$$ -
    - - -









    -

    Thomas algorithm and counting of operations (floating point and memory)

    -
    - -

    - -

    We have in specific case the following operations with the floating operations

    - -
      -
    • Memory Reads: \( 14(N-2) \);
    • -
    • Memory Writes: \( 4(N-2) \);
    • -
    • Subtractions: \( 3(N-2) \);
    • -
    • Multiplications: \( 3(N-2) \);
    • -
    • Divisions: \( 4(N-2) \).
    • -
    -
    - - -
    - -

    - - -

    -
    -
    -
    -
    -
    // Forward substitution    
    -// Note that we can simplify by precalculating a[i-1]/b[i-1]
    -  for (int i=1; i < n; i++) {
    -     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    -     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    -  }
    -  x[n-1] = f[n-1] / b[n-1];
    -  // Backwards substitution                                                           
    -  for (int i = n-2; i >= 0; i--) {
    -     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    -     x[i] = f[i]/b[i];
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Example: Transpose of a matrix

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B;
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -  }
    -  // Set up values for matrix A
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      A[i][j] =  cos(i*1.0)*sin(j*3.0);
    -    }
    -  }
    -  clock_t start, finish;
    -  start = clock();
    -  // Then compute the transpose
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      B[i][j]= A[j][i];
    -    }
    -  }
    -
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for setting up transpose of matrix=" << timeused  << endl;
    -
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double **A, **B, **C;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Set up values for matrix A and B and zero matrix C
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double sum = 0.0;
    -       for (int k = 0; k < n; k++) {
    -           sum += B[i][k]*A[k][j];
    -       }
    -       C[i][j] = sum;
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  double Fsum = 0.0;
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << timeused  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    How do we define speedup? Simplest form

    -
    - -

    -

      -
    • Speedup measures the ratio of performance between two objects
    • -
    • Versions of same code, with different number of processors
    • -
    • Serial and vector versions
    • -
    • Try different programing languages, c++ and Fortran
    • -
    • Two algorithms computing the same result
    • -
    -
    - - -









    -

    How do we define speedup? Correct baseline

    -
    - -

    -

    The key is choosing the correct baseline for comparison

    -
      -
    • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
    • -
        -
      • For parallel applications, this is much harder:
      • -
          -
        • Choice of algorithm, decomposition, performance of baseline case etc.
        • -
        -
      -
    -
    - - -









    -

    Parallel speedup

    -
    - -

    -

    For parallel applications, speedup is typically defined as

    -
      -
    • Speedup \( =T_1/T_p \)
    • -
    -

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

    -
      -
    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    • -
    -
    - - -









    -

    Speedup and memory

    -
    - -

    -

    The speedup on \( p \) processors can -be greater than \( p \) if memory usage is optimal! -Consider the case of a memorybound computation with \( M \) words of memory -

    -
      -
    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • -
    • \( T_1 \) uses the main memory bandwidth
    • -
    • \( T_p \) uses the appropriate cache bandwidth
    • -
    -
    - - -









    -

    Upper bounds on speedup

    -
    - -

    -

    Assume that almost all parts of a code are perfectly -parallelizable (fraction \( f \)). The remainder, -fraction \( (1-f) \) cannot be parallelized at all. -

    - -

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take -time \( Wf/p \) on \( p \) processors. -

    -
      -
    • What is the maximum possible speedup as a function of \( f \)?
    • -
    -
    - - -









    -

    Amdahl's law

    -
    - -

    -

    On one processor we have

    -$$ -T_1 = (1-f)W + fW = W -$$ - -

    On \( p \) processors we have

    -$$ -T_p = (1-f)W + \frac{fW}{p}, -$$ - -

    resulting in a speedup of

    -$$ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -$$ - -

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    -$$ -\frac{1}{1-f}, -$$ - -

    meaning that if -if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup -is \( 1/(1-.99)=100 \)! -

    -
    - - -









    -

    How much is parallelizable

    -
    - -

    -

    If any non-parallel code slips into the -application, the parallel -performance is limited. -

    - -

    In many simulations, however, the fraction of non-parallelizable work -is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. -

    -
    - - -









    -

    Today's situation of parallel computing

    -
    - -

    - -

      -
    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • -
    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • -
    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    • -
    -

    Our lectures will focus on both MPI and OpenMP.

    -
    - - -









    -

    Overhead present in parallel computing

    -
    - -

    - -

      -
    • Uneven load balance: not all the processors can perform useful work at all time.
    • -
    • Overhead of synchronization
    • -
    • Overhead of communication
    • -
    • Extra computation due to parallelization
    • -
    -

    Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -

    -
    - - -









    -

    Parallelizing a sequential algorithm

    -
    - -

    - -

      -
    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • -
    • Distribute the global work and data among \( P \) processors.
    • -
    -
    - - -









    -

    Strategies

    -
    - -

    -

      -
    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • -
    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    • -
    -
    - - -









    -

    How do I run MPI on a PC/Laptop? MPI

    -
    - -

    -

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -

    -
      -
    • Compile with mpicxx/mpic++ or mpif90
    • -
    - - -
    -
    -
    -
    -
    -
      # Compile and link
    -  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    -  #  run code with for example 8 processes using mpirun/mpiexec
    -  mpiexec -n 8 ./nameofprog.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Can I do it on my own PC/laptop? OpenMP installation

    -
    - -

    -

    If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: -

    - -
      -
    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • -
    • For OS X users however, install libomp
    • -
    - - -
    -
    -
    -
    -
    -
      brew install libomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and compile and link as

    - - -
    -
    -
    -
    -
    -
    c++ -o <name executable> <name program.cpp>  -lomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Installing MPI

    -
    - -

    -

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    - - -
    -
    -
    -
    -
    -
      sudo apt-get install libopenmpi-dev
    -  sudo apt-get install openmpi-bin
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew -

    - - -
    -
    -
    -
    -
    -
       brew install openmpi
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When running an executable (code.x), run as

    - - -
    -
    -
    -
    -
    -
      mpirun -n 10 ./code.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    where we indicate that we want the number of processes to be 10.

    -
    - - -









    -

    Installing MPI and using Qt

    -
    - -

    -

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    - -

    You may need to tell Qt where openmpi is stored.

    -
    - - -









    -

    What is Message Passing Interface (MPI)?

    -
    - -

    - -

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. -

    - -

    MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. -

    - -

    An MPI computation is a collection of processes communicating with messages.

    -
    - -









    -

    Going Parallel with MPI

    -
    - -

    -

    Task parallelism: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    MPI is a library

    -
    - -

    -

    MPI is a library specification for the message passing interface, -proposed as a standard. -

    - -
      -
    • independent of hardware;
    • -
    • not a language or compiler specification;
    • -
    • not a specific implementation or product.
    • -
    -

    A message passing standard for portability and ease-of-use. -Designed for high performance. -

    - -

    Insert communication and synchronization functions where necessary.

    -
    - - -









    -

    Bindings to MPI routines

    -
    - -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The discussion in these slides focuses on the C++ binding.

    -
    - - -









    -

    Communicator

    -
    - -

    -

      -
    • A group of MPI processes with a name (context).
    • -
    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • -
    • By default the communicator contains all the MPI processes.
    • -
    - - -
    -
    -
    -
    -
    -
      MPI_COMM_WORLD 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Mechanism to identify subset of processes.
    • -
    • Promotes modular design of parallel libraries.
    • -
    -
    - - -









    -

    Some of the most important MPI functions

    -
    - -

    - -

      -
    • \( MPI\_Init \) - initiate an MPI computation
    • -
    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • -
    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • -
    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • -
    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • -
    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • -
    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • -
    -
    - - -









    -

    The first MPI C/C++ program

    -
    - -

    - -

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    - - -
    -
    -
    -
    -
    -
    using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -int main (int nargs, char* args[])
    -{
    -int numprocs, my_rank;
    -//   MPI initializations
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -     << numprocs << endl;
    -//  End MPI
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    The Fortran program

    -
    - -

    - - -

    -
    -
    -
    -
    -
    PROGRAM hello
    -INCLUDE "mpif.h"
    -INTEGER:: size, my_rank, ierr
    -
    -CALL  MPI_INIT(ierr)
    -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    -CALL MPI_FINALIZE(ierr)
    -
    -END PROGRAM hello
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 1

    -
    - -

    - -

      -
    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • -
    • It is the operating system which opts for an ordering.
    • -
    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • -
    -
    - - -









    -

    Ordered output with MPIBarrier

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    int main (int nargs, char* args[])
    -{
    - int numprocs, my_rank, i;
    - MPI_Init (&nargs, &args);
    - MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    - for (i = 0; i < numprocs; i++) {}
    - MPI_Barrier (MPI_COMM_WORLD);
    - if (i == my_rank) {
    - cout << "Hello world, I have  rank " << my_rank << 
    -        " out of " << numprocs << endl;}
    -      MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 2

    -
    - -

    -

      -
    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • -
    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • -
    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • -
    -

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -

    -
    - - -









    -

    Ordered output

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    .....
    -int numprocs, my_rank, flag;
    -MPI_Status status;
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -if (my_rank > 0)
    -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    -           MPI_COMM_WORLD, &status);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -<< numprocs << endl;
    -if (my_rank < numprocs-1)
    -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    -          100, MPI_COMM_WORLD);
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 3

    -
    - -

    - -

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ -is defined as -

    - - -
    -
    -
    -
    -
    -
    int MPI_Send(void *buf, int count, 
    -             MPI_Datatype datatype, 
    -             int dest, int tag, MPI_Comm comm)}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable buf is the variable we wish to send while count -is the number of variables we are passing. If we are passing only a single value, this should be 1. -

    - -

    If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) -since we are actually passing 100 values. -

    -
    - - -









    -

    Note 4

    -
    - -

    - -

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) -is similar to the send call. -

    - - -
    -
    -
    -
    -
    -
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    -            int source, 
    -            int tag, MPI_Comm comm, MPI_Status *status )
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The arguments that are different from those in MPI\_SEND are -buf which is the name of the variable where you will be storing the received data, -source which replaces the destination in the send command. This is the return ID of the sender. -

    - -

    Finally, we have used \( MPI\_Status\_status \), -where one can check if the receive was completed. -

    - -

    The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -

    -
    - - -









    -

    Numerical integration in parallel

    -
    -Integrating \( \pi \) -

    - -

      -
    • The code example computes \( \pi \) using the trapezoidal rules.
    • -
    • The trapezoidal rule
    • -
    -$$ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -$$ - -

    Click on this link for the full program.

    -
    - - -









    -

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    //    Trapezoidal rule and numerical integration usign MPI
    -using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -
    -//     Here we define various functions called by the main program
    -
    -double int_function(double );
    -double trapezoidal_rule(double , double , int , double (*)(double));
    -
    -//   Main function begins here
    -int main (int nargs, char* args[])
    -{
    -  int n, local_n, numprocs, my_rank; 
    -  double a, b, h, local_a, local_b, total_sum, local_sum;   
    -  double  time_start, time_end, total_time;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      //  MPI initializations
    -  MPI_Init (&nargs, &args);
    -  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -  time_start = MPI_Wtime();
    -  //  Fixed values for a, b and n 
    -  a = 0.0 ; b = 1.0;  n = 1000;
    -  h = (b-a)/n;    // h is the same for all processes 
    -  local_n = n/numprocs;  
    -  // make sure n > numprocs, else integer division gives zero
    -  // Length of each process' interval of
    -  // integration = local_n*h.  
    -  local_a = a + my_rank*local_n*h;
    -  local_b = local_a + local_n*h;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Integrating with MPI

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      total_sum = 0.0;
    -  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    -                               &int_function); 
    -  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    -              MPI_SUM, 0, MPI_COMM_WORLD);
    -  time_end = MPI_Wtime();
    -  total_time = time_end-time_start;
    -  if ( my_rank == 0) {
    -    cout << "Trapezoidal rule = " <<  total_sum << endl;
    -    cout << "Time = " <<  total_time  
    -         << " on number of processors: "  << numprocs  << endl;
    -  }
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  // end of main program
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    How do I use \( MPI\_reduce \)?

    -
    - -

    - -

    Here we have used

    - - -
    -
    -
    -
    -
    -
    MPI_reduce( void *senddata, void* resultdata, int count, 
    -     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable \( count \) represents the total dimensionality, 1 in case of just one variable, -while \( MPI\_Datatype \) -defines the type of variable which is sent and received. -

    - -

    The new feature is \( MPI\_Op \). It defines the type -of operation we want to do. -

    -
    - - -









    -

    More on \( MPI\_Reduce \)

    -
    - -

    -

    In our case, since we are summing -the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). -If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or -\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer -\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \). -

    - -

    \( MPI\_Allreduce \) is defined as

    - - -
    -
    -
    -
    -
    -
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    -          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - -

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function -\( MPI\_Wtime \). -

    - - -
    -
    -
    -
    -
    -
    //  this function defines the function to integrate
    -double int_function(double x)
    -{
    -  double value = 4./(1.+x*x);
    -  return value;
    -} // end of function to evaluate
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - - -

    -
    -
    -
    -
    -
    //  this function defines the trapezoidal rule
    -double trapezoidal_rule(double a, double b, int n, 
    -                         double (*func)(double))
    -{
    -  double trapez_sum;
    -  double fa, fb, x, step;
    -  int    j;
    -  step=(b-a)/((double) n);
    -  fa=(*func)(a)/2. ;
    -  fb=(*func)(b)/2. ;
    -  trapez_sum=0.;
    -  for (j=1; j <= n-1; j++){
    -    x=j*step+a;
    -    trapez_sum+=(*func)(x);
    -  }
    -  trapez_sum=(trapez_sum+fb+fa)*step;
    -  return trapez_sum;
    -}  // end trapezoidal_rule 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    The quantum dot program for two electrons

    -
    - -

    - - -

    -
    -
    -
    -
    -
    // Variational Monte Carlo for atoms with importance sampling, slater det
    -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    -#include "mpi.h"
    -#include <cmath>
    -#include <random>
    -#include <string>
    -#include <iostream>
    -#include <fstream>
    -#include <iomanip>
    -#include "vectormatrixclass.h"
    -
    -using namespace  std;
    -// output file as global variable
    -ofstream ofile;  
    -// the step length and its squared inverse for the second derivative 
    -//  Here we define global variables  used in various functions
    -//  These can be changed by using classes
    -int Dimension = 2; 
    -int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    -
    -// declaration of functions 
    -
    -// The Mc sampling for the variational Monte Carlo 
    -void  MonteCarloSampling(int, double &, double &, Vector &);
    -
    -// The variational wave function
    -double  WaveFunction(Matrix &, Vector &);
    -
    -// The local energy 
    -double  LocalEnergy(Matrix &, Vector &);
    -
    -// The quantum force
    -void  QuantumForce(Matrix &, Matrix &, Vector &);
    -
    -
    -// inline function for single-particle wave function
    -inline double SPwavefunction(double r, double alpha) { 
    -   return exp(-alpha*r*0.5);
    -}
    -
    -// inline function for derivative of single-particle wave function
    -inline double DerivativeSPwavefunction(double r, double alpha) { 
    -  return -r*alpha;
    -}
    -
    -// function for absolute value of relative distance
    -double RelativeDistance(Matrix &r, int i, int j) { 
    -      double r_ij = 0;  
    -      for (int k = 0; k < Dimension; k++) { 
    -	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    -      }
    -      return sqrt(r_ij); 
    -}
    -
    -// inline function for derivative of Jastrow factor
    -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    -  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    -}
    -
    -// function for square of position of single particle
    -double singleparticle_pos2(Matrix &r, int i) { 
    -    double r_single_particle = 0;
    -    for (int j = 0; j < Dimension; j++) { 
    -      r_single_particle  += r(i,j)*r(i,j);
    -    }
    -    return r_single_particle;
    -}
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    -
    -static double sqrarg;
    -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    -
    -
    -static double maxarg1,maxarg2;
    -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    -        (maxarg1) : (maxarg2))
    -
    -
    -// Begin of main program   
    -
    -int main(int argc, char* argv[])
    -{
    -
    -  //  MPI initializations
    -  int NumberProcesses, MyRank, NumberMCsamples;
    -  MPI_Init (&argc, &argv);
    -  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    -  double StartTime = MPI_Wtime();
    -  if (MyRank == 0 && argc <= 1) {
    -    cout << "Bad Usage: " << argv[0] << 
    -      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    -  }
    -  // Read filename and number of Monte Carlo cycles from the command line
    -  if (MyRank == 0 && argc > 2) {
    -    string filename = argv[1]; // first command line argument after name of program
    -    NumberMCsamples  = atoi(argv[2]);
    -    string fileout = filename;
    -    string argument = to_string(NumberMCsamples);
    -    // Final filename as filename+NumberMCsamples
    -    fileout.append(argument);
    -    ofile.open(fileout);
    -  }
    -  // broadcast the number of  Monte Carlo samples
    -  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    -  // Two variational parameters only
    -  Vector VariationalParameters(2);
    -  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    -  // Loop over variational parameters
    -  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    -    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    -      VariationalParameters(0) = alpha;  // value of alpha
    -      VariationalParameters(1) = beta;  // value of beta
    -      //  Do the mc sampling  and accumulate data with MPI_Reduce
    -      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    -      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    -      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    -      //  Collect data in total averages
    -      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      // Print out results  in case of Master node, set to MyRank = 0
    -      if ( MyRank == 0) {
    -	double Energy = TotalEnergy/( (double)NumberProcesses);
    -	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    -	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    -	ofile << setiosflags(ios::showpoint | ios::uppercase);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    -	ofile << setw(15) << setprecision(8) << Energy;
    -	ofile << setw(15) << setprecision(8) << Variance;
    -	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    -      }
    -    }
    -  }
    -  double EndTime = MPI_Wtime();
    -  double TotalTime = EndTime-StartTime;
    -  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    -  if (MyRank == 0)  ofile.close();  // close output file
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  //  end of main function
    -
    -
    -// Monte Carlo sampling with the Metropolis algorithm  
    -
    -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    -{
    -
    - // Initialize the seed and call the Mersienne algo
    -  std::random_device rd;
    -  std::mt19937_64 gen(rd());
    -  // Set up the uniform distribution for x \in [[0, 1]
    -  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    -  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    -  // diffusion constant from Schroedinger equation
    -  double D = 0.5; 
    -  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    -  // allocate matrices which contain the position of the particles  
    -  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    -  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    -  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    -  //  initial trial positions
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int j = 0; j < Dimension; j++) {
    -      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    -    }
    -  }
    -  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    -  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    -  // loop over monte carlo cycles 
    -  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    -    // new position 
    -    for (int i = 0; i < NumberParticles; i++) { 
    -      for (int j = 0; j < Dimension; j++) {
    -	// gaussian deviate to compute new positions using a given timestep
    -	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -      }  
    -      //  for the other particles we need to set the position to the old position since
    -      //  we move only one particle at the time
    -      for (int k = 0; k < NumberParticles; k++) {
    -	if ( k != i) {
    -	  for (int j = 0; j < Dimension; j++) {
    -	    NewPosition(k,j) = OldPosition(k,j);
    -	  }
    -	} 
    -      }
    -      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    -      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    -      //  we compute the log of the ratio of the greens functions to be used in the 
    -      //  Metropolis-Hastings algorithm
    -      double GreensFunction = 0.0;            
    -      for (int j = 0; j < Dimension; j++) {
    -	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    -	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    -      }
    -      GreensFunction = exp(GreensFunction);
    -      // The Metropolis test is performed by moving one particle at the time
    -      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    -	for (int  j = 0; j < Dimension; j++) {
    -	  OldPosition(i,j) = NewPosition(i,j);
    -	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    -	}
    -	OldWaveFunction = NewWaveFunction;
    -      }
    -    }  //  end of loop over particles
    -    // compute local energy  
    -    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    -    // update energies
    -    Energy += DeltaE;
    -    EnergySquared += DeltaE*DeltaE;
    -  }   // end of loop over MC trials   
    -  // update the energy average and its squared 
    -  cumulative_e = Energy/NumberMCsamples;
    -  cumulative_e2 = EnergySquared/NumberMCsamples;
    -}   // end MonteCarloSampling function  
    -
    -
    -// Function to compute the squared wave function and the quantum force
    -
    -double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    -{
    -  double wf = 0.0;
    -  // full Slater determinant for two particles, replace with Slater det for more particles 
    -  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    -  // contribution from Jastrow factor
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    -    }
    -  }
    -  return wf;
    -}
    -
    -// Function to calculate the local energy without numerical derivation of kinetic energy
    -
    -double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    -{
    -
    -  // compute the kinetic and potential energy from the single-particle part
    -  // for a many-electron system this has to be replaced by a Slater determinant
    -  // The absolute value of the interparticle length
    -  Matrix length( NumberParticles, NumberParticles);
    -  // Set up interparticle distance
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for(int j = i+1; j < NumberParticles; j++){
    -      length(i,j) = RelativeDistance(r, i, j);
    -      length(j,i) =  length(i,j);
    -    }
    -  }
    -  double KineticEnergy = 0.0;
    -  // Set up kinetic energy from Slater and Jastrow terms
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int k = 0; k < Dimension; k++) {
    -      double sum1 = 0.0; 
    -      for(int j = 0; j < NumberParticles; j++){
    -	if ( j != i) {
    -	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    -    }
    -  }
    -  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    -  for (int i = 0; i < NumberParticles-1; i++) {
    -      for (int j = i+1; j < NumberParticles; j++) {
    -        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    -      }
    -  }
    -  KineticEnergy *= -0.5;
    -  // Set up potential energy, external potential + eventual electron-electron repulsion
    -  double PotentialEnergy = 0;
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    double DistanceSquared = singleparticle_pos2(r, i);
    -    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    -  }
    -  // Add the electron-electron repulsion
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      PotentialEnergy += 1.0/length(i,j);          
    -    }
    -  }
    -  double LocalE = KineticEnergy+PotentialEnergy;
    -  return LocalE;
    -}
    -
    -// Compute the analytical expression for the quantum force
    -void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    -{
    -  // compute the first derivative 
    -  for (int i = 0; i < NumberParticles; i++) {
    -    for (int k = 0; k < Dimension; k++) {
    -      // single-particle part, replace with Slater det for larger systems
    -      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    -      //  Jastrow factor contribution
    -      double Jsum = 0.0;
    -      for (int j = 0; j < NumberParticles; j++) {
    -	if ( j != i) {
    -	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      qforce(i,k) = 2.0*(Jsum+sppart);
    -    }
    -  }
    -} // end of QuantumForce function
    -
    -
    -#define ITMAX 200
    -#define EPS 3.0e-8
    -#define TOLX (4*EPS)
    -#define STPMX 100.0
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    -{
    -
    -  int check,i,its,j;
    -  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    -  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    -  Matrix hessian(n,n);
    -
    -  fp=(*func)(p);
    -  (*dfunc)(p,g);
    -  for (i = 0;i < n;i++) {
    -    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    -    hessian(i,i)=1.0;
    -    xi(i) = -g(i);
    -    sum += p(i)*p(i);
    -  }
    -  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    -  for (its=1;its<=ITMAX;its++) {
    -    *iter=its;
    -    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    -    fp = *fret;
    -    for (i = 0; i< n;i++) {
    -      xi(i)=pnew(i)-p(i);
    -      p(i)=pnew(i);
    -    }
    -    test=0.0;
    -    for (i = 0;i< n;i++) {
    -      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    -      if (temp > test) test=temp;
    -    }
    -    if (test < TOLX) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i);
    -    (*dfunc)(p,g);
    -    test=0.0;
    -    den=FMAX(*fret,1.0);
    -    for (i=0;i<n;i++) {
    -      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    -      if (temp > test) test=temp;
    -    }
    -    if (test < gtol) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    -    for (i=0;i<n;i++) {
    -      hdg(i)=0.0;
    -      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    -    }
    -    fac=fae=sumdg=sumxi=0.0;
    -    for (i=0;i<n;i++) {
    -      fac += dg(i)*xi(i);
    -      fae += dg(i)*hdg(i);
    -      sumdg += SQR(dg(i));
    -      sumxi += SQR(xi(i));
    -    }
    -    if (fac*fac > EPS*sumdg*sumxi) {
    -      fac=1.0/fac;
    -      fad=1.0/fae;
    -      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    -      for (i=0;i<n;i++) {
    -	for (j=0;j<n;j++) {
    -	  hessian(i,j) += fac*xi(i)*xi(j)
    -	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    -	}
    -      }
    -    }
    -    for (i=0;i<n;i++) {
    -      xi(i)=0.0;
    -      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    -    }
    -  }
    -  cout << "too many iterations in dfpmin" << endl;
    -}
    -#undef ITMAX
    -#undef EPS
    -#undef TOLX
    -#undef STPMX
    -
    -#define ALF 1.0e-4
    -#define TOLX 1.0e-7
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    -{
    -  int i;
    -  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    -    test,tmplam;
    -
    -  *check=0;
    -  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    -  sum=sqrt(sum);
    -  if (sum > stpmax)
    -    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    -  for (slope=0.0,i=0;i<n;i++)
    -    slope += g(i)*p(i);
    -  test=0.0;
    -  for (i=0;i<n;i++) {
    -    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    -    if (temp > test) test=temp;
    -  }
    -  alamin=TOLX/test;
    -  alam=1.0;
    -  for (;;) {
    -    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    -    *f=(*func)(x);
    -    if (alam < alamin) {
    -      for (i=0;i<n;i++) x(i)=xold(i);
    -      *check=1;
    -      return;
    -    } else if (*f <= fold+ALF*alam*slope) return;
    -    else {
    -      if (alam == 1.0)
    -	tmplam = -slope/(2.0*(*f-fold-slope));
    -      else {
    -	rhs1 = *f-fold-alam*slope;
    -	rhs2=f2-fold2-alam2*slope;
    -	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    -	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    -	if (a == 0.0) tmplam = -slope/(2.0*b);
    -	else {
    -	  disc=b*b-3.0*a*slope;
    -	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    -	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    -	}
    -	if (tmplam>0.5*alam)
    -	  tmplam=0.5*alam;
    -      }
    -    }
    -    alam2=alam;
    -    f2 = *f;
    -    fold2=fold;
    -    alam=FMAX(tmplam,0.1*alam);
    -  }
    -}
    -#undef ALF
    -#undef TOLX
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    What is OpenMP

    -
    - -

    -

      -
    • OpenMP provides high-level thread programming
    • -
    • Multiple cooperating threads are allowed to run simultaneously
    • -
    • Threads are created and destroyed dynamically in a fork-join pattern
    • -
        -
      • An OpenMP program consists of a number of parallel regions
      • -
      • Between two parallel regions there is only one master thread
      • -
      • In the beginning of a parallel region, a team of new threads is spawned
      • -
      -
    • The newly spawned threads work simultaneously with the master thread
    • -
    • At the end of a parallel region, the new threads are destroyed
    • -
    -

    Many good tutorials online and excellent textbook

    -
      -
    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. -
    3. Many tutorials online like OpenMP official site
    4. -
    -
    - - -









    -

    Getting started, things to remember

    -
    - -

    -

      -
    • Remember the header file
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Insert compiler directives in C++ syntax as
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp...
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Compile with for example c++ -fopenmp code.cpp
    • -
    • Execute
    • -
        -
      • Remember to assign the environment variable OMP NUM THREADS
      • -
      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      • -
      -
    -
    - - -









    -

    OpenMP syntax

    -
      -
    • Mostly directives
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp construct [ clause ...]
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Some functions and types
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Most apply to a block of code
    • -
    • Specifically, a structured block
    • -
    • Enter at top, exit at bottom only, exit(), abort() permitted
    • -
    -









    -

    Different OpenMP styles of parallelism

    -

    OpenMP supports several different ways to specify thread parallelism

    - -
      -
    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • -
    • Parallel loops: Special case for loops, simplifies data parallel code
    • -
    • Task parallelism, new in OpenMP 3
    • -
    • Several ways to manage thread coordination, including Master regions and Locks
    • -
    • Memory model for shared data
    • -
    -









    -

    General code structure

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -main ()
    -{
    -int var1, var2, var3;
    -/* serial code */
    -/* ... */
    -/* start of a parallel region */
    -#pragma omp parallel private(var1, var2) shared(var3)
    -{
    -/* ... */
    -}
    -/* more serial code */
    -/* ... */
    -/* another parallel region */
    -#pragma omp parallel
    -{
    -/* ... */
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel region

    -
    - -

    -

      -
    • A parallel region is a block of code that is executed by a team of threads
    • -
    • The following compiler directive creates a parallel region
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added at the end of the directive
    • -
    • Most often used clauses:
    • -
        -
      • default(shared) or default(none)
      • -
      • public(list of variables)
      • -
      • private(list of variables)
      • -
      -
    -
    - - -









    -

    Hello world, not again, please!

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#include <cstdio>
    -int main (int argc, char *argv[])
    -{
    -int th_id, nthreads;
    -#pragma omp parallel private(th_id) shared(nthreads)
    -{
    -th_id = omp_get_thread_num();
    -printf("Hello World from thread %d\n", th_id);
    -#pragma omp barrier
    -if ( th_id == 0 ) {
    -nthreads = omp_get_num_threads();
    -printf("There are %d threads\n",nthreads);
    -}
    -}
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Hello world, yet another variant

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <cstdio>
    -#include <omp.h>
    -int main(int argc, char *argv[]) 
    -{
    - omp_set_num_threads(4); 
    -#pragma omp parallel
    - {
    -   int id = omp_get_thread_num();
    -   int nproc = omp_get_num_threads(); 
    -   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    - } 
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Variables declared outside of the parallel region are shared by all threads -If a variable like id is declared outside of the -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel, 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    it would have been shared by various the threads, possibly causing erroneous output

    -
      -
    • Why? What would go wrong? Why do we add possibly?
    • -
    -
    - - -









    -

    Important OpenMP library routines

    -
    - -

    - -

      -
    • int omp get num threads (), returns the number of threads inside a parallel region
    • -
    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • -
    • void omp set num threads (int), sets the number of threads to be used
    • -
    • void omp set nested (int), turns nested parallelism on/off
    • -
    -
    - - -









    -

    Private variables

    -
    - -

    -

    Private clause can be used to make thread- private versions of such variables:

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel private(id)
    -{
    - int id = omp_get_thread_num();
    - cout << "My thread num" << id << endl; 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • What is their value on entry? Exit?
    • -
    • OpenMP provides ways to control that
    • -
    • Can use default(none) to require the sharing of each variable to be described
    • -
    -
    - - -









    -

    Master region

    -
    - -

    -

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel 
    -{
    -  #pragma omp master
    -   {
    -      int id = omp_get_thread_num();
    -      cout << "My thread num" << id << endl; 
    -   } 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel for loop

    -
    - -

    -

      -
    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added, such as
    • -
        -
      • schedule(static, chunk size)
      • -
      • schedule(dynamic, chunk size)
      • -
      • schedule(guided, chunk size) (non-deterministic allocation)
      • -
      • schedule(runtime)
      • -
      • private(list of variables)
      • -
      • reduction(operator:variable)
      • -
      • nowait
      • -
      -
    -
    - - -









    -

    Parallel computations and loops

    - -
    - -

    -

    OpenMP provides an easy way to parallelize a loop

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -  for (i=0; i<n; i++) c[i] = a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    OpenMP handles index variable (no need to declare in for loop or make private)

    - -

    Which thread does which values? Several options.

    -
    - - -









    -

    Scheduling of loop computations

    - -
    - -

    -

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled -and OpenMP defines three choices of loop scheduling: -

    -
      -
    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. -
    3. Dynamic: Selection made at runtime
    4. -
    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. -
    -
    - - -









    -

    Example code for loop scheduling

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(dynamic,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Example code for loop scheduling, guided instead of dynamic

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(guided,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    More on Parallel for loop

    -
    - -

    -

      -
    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • -
    • The loop index is private to each thread
    • -
    • A reduction variable is special
    • -
        -
      • During the for-loop there is a local private copy in each thread
      • -
      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • -
      -
    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • -
    - - -
    -
    -
    -
    -
    -
    // #pragma omp parallel and #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    can be combined into

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    What can happen with this loop?

    - -
    - -

    -

    What happens with code like this

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    - - -
    -
    -
    -
    -
    -
    sum = 0.0;
    -#pragma omp parallel for reduction(+:sum)
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Inner product

    -
    - -

    -$$ -\sum_{i=0}^{n-1} a_ib_i -$$ - - - -

    -
    -
    -
    -
    -
    int i;
    -double sum = 0.;
    -/* allocating and initializing arrays */
    -/* ... */
    -#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    - for (i=0; i<N; i++) sum += a[i]*b[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Different threads do different tasks

    -
    - -

    - -

    Different threads do different tasks independently, each section is executed by one thread.

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -#pragma omp sections
    -{
    -#pragma omp section
    -funcA ();
    -#pragma omp section
    -funcB ();
    -#pragma omp section
    -funcC ();
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Single execution

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp single { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The code is executed by one thread only, no guarantee which thread

    - -

    Can introduce an implicit barrier at the end

    - - -
    -
    -
    -
    -
    -
    #pragma omp master { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    -
    - - -









    -

    Coordination and synchronization

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp barrier
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Synchronization, must be encountered by all threads in a team (or none)

    - - -
    -
    -
    -
    -
    -
    #pragma omp ordered { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is another form of synchronization (in sequential order). -The form -

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and

    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic { single assignment statement }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is more efficient than

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Data scope

    -
    - -

    -

      -
    • OpenMP data scope attribute clauses:
    • -
        -
      • shared
      • -
      • private
      • -
      • firstprivate
      • -
      • lastprivate
      • -
      • reduction
      • -
      -
    -

    What are the purposes of these attributes

    -
      -
    • define how and which variables are transferred to a parallel region (and back)
    • -
    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    • -
    -
    - - -









    -

    Some remarks

    -
    - -

    - -

      -
    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • -
    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • -
    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • -
    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    • -
    -
    - - -









    -

    Parallelizing nested for-loops

    -
    - -

    - -

      -
    • Serial code
    • -
    - - -
    -
    -
    -
    -
    -
    for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -        a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Parallelization
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for private(j)
    -for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -       a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • -
    • Why must j be private? To avoid race condition among the threads
    • -
    -
    - - -









    -

    Nested parallelism

    -
    - -

    -

    When a thread in a parallel region encounters another parallel construct, it -may create a new team of threads and become the master of the new -team. -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel num_threads(4)
    -{
    -/* .... */
    -#pragma omp parallel num_threads(2)
    -{
    -//  
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel tasks

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp task 
    -#pragma omp parallel shared(p_vec) private(i)
    -{
    -#pragma omp single
    -{
    -for (i=0; i<N; i++) {
    -  double r = random_number();
    -  if (p_vec[i] > r) {
    -#pragma omp task
    -   do_work (p_vec[i]);
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Common mistakes

    -
    - -

    -

    Race condition

    - - -
    -
    -
    -
    -
    -
    int nthreads;
    -#pragma omp parallel shared(nthreads)
    -{
    -nthreads = omp_get_num_threads();
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Deadlock

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -...
    -#pragma omp critical
    -{
    -...
    -#pragma omp barrier
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Not all computations are simple

    -
    - -

    -

    Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads -

    - -

    An example is finding the location and value of the largest element in an array

    - - -
    -
    -
    -
    -
    -
    for (i=0; i<n; i++) { 
    -   if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -   }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Not all computations are simple, competing threads

    -
    - -

    -

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    -
      -
    1. OpenMP provides several ways to coordinate access to shared values
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following block
    2. -
    -

    Atomic may be faster than critical but depends on hardware

    -
    - - -









    -

    How to find the max value using OpenMP

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -    if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Then deal with the race conditions

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -#pragma omp critical
    -  {
    -     if (x[i] > maxval) {
    -       maxval = x[i];
    -       maxloc = i; 
    -     }
    -  }
    -} 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -

    -
    - -









    -

    What can slow down OpenMP performance?

    -

    Give it a thought!

    - -









    -

    What can slow down OpenMP performance?

    -
    - -

    -

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    -
      -
    • We do not care about the value during the execution of the loop, just the value at the end.
    • -
    -

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread -
    - - -









    -

    Find the max location for each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    int maxloc[MAX_THREADS], mloc;
    -double maxval[MAX_THREADS], mval; 
    -#pragma omp parallel shared(maxval,maxloc)
    -{
    -  int id = omp_get_thread_num(); 
    -  maxval[id] = -1.0e30;
    -#pragma omp for
    -   for (int i=0; i<n; i++) {
    -       if (x[i] > maxval[id]) { 
    -           maxloc[id] = i;
    -           maxval[id] = x[i]; 
    -       }
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Combine the values from each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp flush (maxloc,maxval)
    -#pragma omp master
    -  {
    -    int nt = omp_get_num_threads(); 
    -    mloc = maxloc[0]; 
    -    mval = maxval[0]; 
    -    for (int i=1; i<nt; i++) {
    -        if (maxval[i] > mval) { 
    -           mval = maxval[i]; 
    -           mloc = maxloc[i];
    -        } 
    -     }
    -   }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Note that we let the master process perform the last operation.

    -
    - -









    -

    Matrix-matrix multiplication

    -

    This code computes the norm of a vector using OpenMp

    - - -
    -
    -
    -
    -
    -
    //  OpenMP program to compute vector norm by adding two other vectors
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of vector
    -  int n = atoi(argv[1]);
    -  double *a, *b, *c;
    -  int i;
    -  int thread_num;
    -  double wtime, Norm2, s, angle;
    -  cout << "  Perform addition of two vectors and compute the norm-2." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the vectors to be used
    -  a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)
    -  // Set up values for vectors  a and b
    -  for (i = 0; i < n; i++){
    -      angle = 2.0*M_PI*i/ (( double ) n);
    -      a[i] = s*(sin(angle) + cos(angle));
    -      b[i] =  s*sin(2.0*angle);
    -      c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (i = 0; i < n; i++){
    -     c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  Norm2 = 0.0;
    -  for (i = 0; i < n; i++){
    -     Norm2  += c[i]*c[i];
    -  }
    -// end parallel region
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm-2 computation=" << wtime  << endl;
    -  cout << " Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP

    - - - -
    -
    -
    -
    -
    -
    //  Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B, **C;
    -  int i, j, k;
    -  int thread_num;
    -  double wtime, Fsum, s, angle;
    -  cout << "  Compute matrix product C = A * B and Frobenius norm." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum)
    -  // Set up values for matrix A and B and zero matrix C
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -       C[i][j] =  0.0;    
    -       for (k = 0; k < n; k++) {
    -            C[i][j] += A[i][k]*B[k][j];
    -       }
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  Fsum = 0.0;
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -// end parallel region and letting only one thread perform I/O
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << wtime  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -
    - © 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license -
    - - - diff --git a/doc/src/week9/week9.dlog b/doc/src/week9/week9.dlog deleted file mode 100644 index e45f9847..00000000 --- a/doc/src/week9/week9.dlog +++ /dev/null @@ -1,175 +0,0 @@ -Translating doconce text in week9.do.txt to html -*** replacing \bm{...} by \boldsymbol{...} (\bm is not supported by MathJax) -*** warning: math block in HTML must have space around <: -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j - - - -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking - - - - - - - - - - - - - - -
    -

    Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking

    -
    - - -
    -Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no [1, 2] -
    - -
    -[1] Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway -
    -
    -[2] Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA -
    -
    -
    -

    March 11-15

    -
    -
    - -









    -

    Overview of week 11, March 11-15

    -
    -Topics -

    -

      -
    1. Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week
    2. -
    3. Resampling Techniques, emphasis on Blocking
    4. -
    5. Discussion of onebody densities (whiteboard notes)
    6. -
    7. Start discussion on optimization and parallelization for Python and C++ - -
    8. -
    -
    - - -

    Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week.

    - -









    -

    Why resampling methods ?

    -
    -Statistical analysis -

    -

      -
    • Our simulations can be treated as computer experiments. This is particularly the case for Monte Carlo methods
    • -
    • The results can be analysed with the same statistical tools as we would use analysing experimental data.
    • -
    • As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors.
    • -
    -
    - - -









    -

    Statistical analysis

    -
    - -

    -

      -
    • As in other experiments, many numerical experiments have two classes of errors: -
        -
      1. Statistical errors
      2. -
      3. Systematical errors
      4. -
      -
    • Statistical errors can be estimated using standard tools from statistics
    • -
    • Systematical errors are method specific and must be treated differently from case to case.
    • -
    -
    - - -









    -

    And why do we use such methods?

    - -

    As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. -

    - -

    The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. -

    - -









    -

    Central limit theorem

    - -

    Last week we derived the central limit theorem with the following assumptions:

    - -
    -Measurement \( i \) -

    -

    We assumed that each individual measurement \( x_{ij} \) is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment \( i \) with \( n \) samples as -

    -$$ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -$$ - -

    and the sample variance

    -$$ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -$$ -
    - - -









    -

    Further remarks

    - -

    Note that we use \( n \) instead of \( n-1 \) in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. -

    - -









    -

    Running many measurements

    - -
    -Adding \( m \) measurements \( i \) -

    -

    With the assumption that the average measurements \( i \) are also defined as iid stochastic variables and have the same probability function \( p \), -we defined the total average over \( m \) experiments as -

    -$$ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -$$ - -

    and the total variance

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -$$ -
    - -

    These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit \( m\rightarrow \infty \), the distribution for \( \overline{X} \) is given by a Gaussian distribution with variance \( \sigma^2_m \).

    - -









    -

    Adding more definitions

    - -

    The total sample variance over the \( mn \) measurements is defined as

    -$$ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -$$ - -

    We have from the equation for \( \sigma_m^2 \)

    -$$ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -$$ - -

    and introducing the centered value \( \tilde{x}_{ij}=x_{ij}-\overline{X} \), we can rewrite \( \sigma_m^2 \) as

    -$$ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -$$ - - -









    -

    Further rewriting

    - -

    We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements

    -$$ -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term on the last rhs is nothing but the total sample variance \( \sigma^2 \) divided by \( m \). The second term represents the covariance.

    - -









    -

    The covariance term

    - -

    Using the definition of the total sample variance we have

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}. -\end{align*} -$$ - -

    The first term is what we have used till now in order to estimate the -standard deviation. However, the second term which gives us a measure -of the correlations between different stochastic events, can result in -contributions which give rise to a larger standard deviation and -variance \( \sigma_m^2 \). Note also the evaluation of the second term -leads to a double sum over all events. If we run a VMC calculation -with say \( 10^9 \) Monte carlo samples, the latter term would lead to -\( 10^{18} \) function evaluations. We don't want to, by obvious reasons, to venture into that many evaluations. -

    - -

    Note also that if our stochastic events are iid then the covariance terms is zero.

    - -









    -

    Rewriting the covariance term

    - -

    We introduce now a variable \( d=\vert j-k\vert \) and rewrite

    -$$ -\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j < k}^{n}\tilde{x}_{ij}\tilde{x}_{ik}, -$$ - -

    in terms of a function

    -$$ -f_d=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n-d}\tilde{x}_{ik}\tilde{x}_{i(k+d)}. -$$ - -

    We note that for \( d=0 \) we have

    -$$ -f_0=\frac{2}{mn}\sum_{i=1}^{m} \sum_{k=1}^{n}\tilde{x}_{ik}\tilde{x}_{i(k)}=\sigma^2! -$$ - - -









    -

    Introducing the correlation function

    - -

    We introduce then a correlation function \( \kappa_d=f_d/\sigma^2 \). Note that \( \kappa_0 =1 \). We rewrite the variance \( \sigma_m^2 \) as

    -$$ -\begin{align*} -\sigma^2_{m}& = \frac{\sigma^2}{m}\left[1+2\sum_{d=1}^{n-1} \kappa_d\right]. -\end{align*} -$$ - -

    The code here shows the evolution of \( \kappa_d \) as a function of \( d \) for a series of random numbers. We see that the function \( \kappa_d \) approaches \( 0 \) as \( d\rightarrow \infty \).

    - -

    Note: code will be inserted here later.

    - -









    -

    Resampling methods: Blocking

    - -

    The blocking method was made popular by Flyvbjerg and Pedersen (1989) -and has become one of the standard ways to estimate the variance -\( \mathrm{var}(\widehat{\theta}) \) for exactly one estimator \( \widehat{\theta} \), namely -\( \widehat{\theta} = \overline{X} \), the mean value. -

    - -

    Assume \( n = 2^d \) for some integer \( d>1 \) and \( X_1,X_2,\cdots, X_n \) is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging \( X_1,X_2,\cdots,X_n \) in an \( n \)-tuple. Define: -

    -$$ -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} -$$ - - -









    -

    Why blocking?

    - -

    The strength of the blocking method is when the number of -observations, \( n \) is large. For large \( n \), the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger \( n \) is. -

    - -









    -

    Blocking Transformations

    -

    We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from \( \boldsymbol{X} \) and form a new vector -\( \boldsymbol{X}_1 \). Continuing in the same way by taking the mean of -subsequent pairs of elements of \( \boldsymbol{X}_1 \) we obtain \( \boldsymbol{X}_2 \), and -so on. -Define \( \boldsymbol{X}_i \) recursively by: -

    - -$$ -\begin{align} -(\boldsymbol{X}_0)_k &\equiv (\boldsymbol{X})_k \nonumber \\ -(\boldsymbol{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\boldsymbol{X}_i)_{2k-1} + -(\boldsymbol{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\label{_auto1} -\end{align} -$$ - - -









    -

    Blocking transformations

    - -

    The quantity \( \boldsymbol{X}_k \) is -subject to \( k \) blocking transformations. We now have \( d \) vectors -\( \boldsymbol{X}_0, \boldsymbol{X}_1,\cdots,\vec X_{d-1} \) containing the subsequent -averages of observations. It turns out that if the components of -\( \boldsymbol{X} \) is a stationary time series, then the components of -\( \boldsymbol{X}_i \) is a stationary time series for all \( 0 \leq i \leq d-1 \) -

    - -

    We can then compute the autocovariance, the variance, sample mean, and -number of observations for each \( i \). -Let \( \gamma_i, \sigma_i^2, -\overline{X}_i \) denote the covariance, variance and average of the -elements of \( \boldsymbol{X}_i \) and let \( n_i \) be the number of elements of -\( \boldsymbol{X}_i \). It follows by induction that \( n_i = n/2^i \). -

    - -









    -

    Blocking Transformations

    - -

    Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since \( h =|i-j| \) -we can define -

    -$$ -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} -\label{_auto2}\\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\label{_auto3} -\end{align} -$$ - -

    The quantity \( \hat{X} \) is asymptotically uncorrelated by assumption, \( \hat{X}_k \) is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean \( \mathrm{var}(\overline{X}) \). -

    - -









    -

    Blocking Transformations, getting there

    -

    We have

    -$$ -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\label{_auto4} -\end{align} -$$ - -

    The term \( e_k \) is called the truncation error:

    -$$ -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\label{_auto5} -\end{equation} -$$ - -

    We can show that \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j) \) for all \( 0 \leq i \leq d-1 \) and \( 0 \leq j \leq d-1 \).

    - -









    -

    Blocking Transformations, final expressions

    - -

    We can then wrap up

    -$$ -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\label{_auto6} -\end{align} -$$ - -

    By repeated use of this equation we get \( \mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X}) \) for all \( 0 \leq i \leq d-1 \). This has the consequence that

    -$$ -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \label{eq:convergence} -\end{align} -$$ - - -









    -

    More on the blocking method

    - -

    Flyvbjerg and Petersen demonstrated that the sequence -\( \{e_k\}_{k=0}^{d-1} \) is decreasing, and conjecture that the term -\( e_k \) can be made as small as we would like by making \( k \) (and hence -\( d \)) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -\( e_k \) is sufficiently small, and then estimate \( \mathrm{var}(\overline{X}) \) by -\( \widehat{\sigma}^2_k/n_k \). -

    - -

    For an elegant solution and proof of the blocking method, see the recent article of Marius Jonsson (former MSc student of the Computational Physics group).

    - -









    -

    Example code form last week

    - - -
    -
    -
    -
    -
    -
    # 2-electron VMC code for 2dim quantum dot with importance sampling
    -# Using gaussian rng for new positions and Metropolis- Hastings 
    -# Added energy minimization
    -from math import exp, sqrt
    -from random import random, seed, normalvariate
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from mpl_toolkits.mplot3d import Axes3D
    -from matplotlib import cm
    -from matplotlib.ticker import LinearLocator, FormatStrFormatter
    -from scipy.optimize import minimize
    -import sys
    -import os
    -
    -# Where to save data files
    -PROJECT_ROOT_DIR = "Results"
    -DATA_ID = "Results/EnergyMin"
    -
    -if not os.path.exists(PROJECT_ROOT_DIR):
    -    os.mkdir(PROJECT_ROOT_DIR)
    -
    -if not os.path.exists(DATA_ID):
    -    os.makedirs(DATA_ID)
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -outfile = open(data_path("Energies.dat"),'w')
    -
    -
    -# Trial wave function for the 2-electron quantum dot in two dims
    -def WaveFunction(r,alpha,beta):
    -    r1 = r[0,0]**2 + r[0,1]**2
    -    r2 = r[1,0]**2 + r[1,1]**2
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = r12/(1+beta*r12)
    -    return exp(-0.5*alpha*(r1+r2)+deno)
    -
    -# Local energy  for the 2-electron quantum dot in two dims, using analytical local energy
    -def LocalEnergy(r,alpha,beta):
    -    
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)
    -
    -# Derivate of wave function ansatz as function of variational parameters
    -def DerivativeWFansatz(r,alpha,beta):
    -    
    -    WfDer  = np.zeros((2), np.double)
    -    r1 = (r[0,0]**2 + r[0,1]**2)
    -    r2 = (r[1,0]**2 + r[1,1]**2)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    deno2 = deno*deno
    -    WfDer[0] = -0.5*(r1+r2)
    -    WfDer[1] = -r12*r12*deno2
    -    return  WfDer
    -
    -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector
    -def QuantumForce(r,alpha,beta):
    -
    -    qforce = np.zeros((NumberParticles,Dimension), np.double)
    -    r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)
    -    deno = 1.0/(1+beta*r12)
    -    qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12
    -    qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12
    -    return qforce
    -    
    -
    -# Computing the derivative of the energy and the energy 
    -def EnergyDerivative(x0):
    -
    -    
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    EnergyDer = 0.0
    -    DeltaPsi = 0.0
    -    DerivativePsiE = 0.0 
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)
    -        DeltaPsi += DerPsi
    -        energy += DeltaE
    -        DerivativePsiE += DerPsi*DeltaE
    -            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    DerivativePsiE /= NumberMCcycles
    -    DeltaPsi /= NumberMCcycles
    -    EnergyDer  = 2*(DerivativePsiE-DeltaPsi*energy)
    -    return EnergyDer
    -
    -
    -# Computing the expectation value of the local energy 
    -def Energy(x0):
    -    # Parameters in the Fokker-Planck simulation of the quantum force
    -    D = 0.5
    -    TimeStep = 0.05
    -    # positions
    -    PositionOld = np.zeros((NumberParticles,Dimension), np.double)
    -    PositionNew = np.zeros((NumberParticles,Dimension), np.double)
    -    # Quantum force
    -    QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)
    -    QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)
    -
    -    energy = 0.0
    -    DeltaE = 0.0
    -    alpha = x0[0]
    -    beta = x0[1]
    -    #Initial position
    -    for i in range(NumberParticles):
    -        for j in range(Dimension):
    -            PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)
    -    wfold = WaveFunction(PositionOld,alpha,beta)
    -    QuantumForceOld = QuantumForce(PositionOld,alpha, beta)
    -
    -    #Loop over MC MCcycles
    -    for MCcycle in range(NumberMCcycles):
    -        #Trial position moving one particle at the time
    -        for i in range(NumberParticles):
    -            for j in range(Dimension):
    -                PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\
    -                                       QuantumForceOld[i,j]*TimeStep*D
    -            wfnew = WaveFunction(PositionNew,alpha,beta)
    -            QuantumForceNew = QuantumForce(PositionNew,alpha, beta)
    -            GreensFunction = 0.0
    -            for j in range(Dimension):
    -                GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\
    -	                              (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\
    -                                      PositionNew[i,j]+PositionOld[i,j])
    -      
    -            GreensFunction = exp(GreensFunction)
    -            ProbabilityRatio = GreensFunction*wfnew**2/wfold**2
    -            #Metropolis-Hastings test to see whether we accept the move
    -            if random() <= ProbabilityRatio:
    -                for j in range(Dimension):
    -                    PositionOld[i,j] = PositionNew[i,j]
    -                    QuantumForceOld[i,j] = QuantumForceNew[i,j]
    -                wfold = wfnew
    -        DeltaE = LocalEnergy(PositionOld,alpha,beta)
    -        energy += DeltaE
    -        if Printout: 
    -           outfile.write('%f\n' %(energy/(MCcycle+1.0)))            
    -    # We calculate mean values
    -    energy /= NumberMCcycles
    -    return energy
    -
    -#Here starts the main program with variable declarations
    -NumberParticles = 2
    -Dimension = 2
    -# seed for rng generator 
    -seed()
    -# Monte Carlo cycles for parameter optimization
    -Printout = False
    -NumberMCcycles= 10000
    -# guess for variational parameters
    -x0 = np.array([0.9,0.2])
    -# Using Broydens method to find optimal parameters
    -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})
    -x0 = res.x
    -# Compute the energy again with the optimal parameters and increased number of Monte Cycles
    -NumberMCcycles= 2**19
    -Printout = True
    -FinalEnergy = Energy(x0)
    -EResult = np.array([FinalEnergy,FinalEnergy])
    -outfile.close()
    -#nice printout with Pandas
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Optimal Parameters':x0, 'Final Energy':EResult}
    -frame = pd.DataFrame(data)
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Resampling analysis

    - -

    The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of Marius Jonsson is given here -

    - - - -
    -
    -
    -
    -
    -
    # Common imports
    -import os
    -
    -# Where to save the figures and data files
    -DATA_ID = "Results/EnergyMin"
    -
    -def data_path(dat_id):
    -    return os.path.join(DATA_ID, dat_id)
    -
    -infile = open(data_path("Energies.dat"),'r')
    -
    -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt
    -from numpy.linalg import inv
    -
    -def block(x):
    -    # preliminaries
    -    n = len(x)
    -    d = int(log2(n))
    -    s, gamma = zeros(d), zeros(d)
    -    mu = mean(x)
    -
    -    # estimate the auto-covariance and variances 
    -    # for each blocking transformation
    -    for i in arange(0,d):
    -        n = len(x)
    -        # estimate autocovariance of x
    -        gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )
    -        # estimate variance of x
    -        s[i] = var(x)
    -        # perform blocking transformation
    -        x = 0.5*(x[0::2] + x[1::2])
    -   
    -    # generate the test observator M_k from the theorem
    -    M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] )  )[::-1]
    -
    -    # we need a list of magic numbers
    -    q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])
    -
    -    # use magic to determine when we should have stopped blocking
    -    for k in arange(0,d):
    -        if(M[k] < q[k]):
    -            break
    -    if (k >= d-1):
    -        print("Warning: Use more data")
    -    return mu, s[k]/2**(d-k)
    -
    -
    -x = loadtxt(infile)
    -(mean, var) = block(x) 
    -std = sqrt(var)
    -import pandas as pd
    -from pandas import DataFrame
    -data ={'Mean':[mean], 'STDev':[std]}
    -frame = pd.DataFrame(data,index=['Values'])
    -print(frame)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Content

    -
      -
    • Simple compiler options
    • -
    • Tools to benchmark your code
    • -
    • Machine architectures
    • -
    • What is vectorization?
    • -
    • How to measure code performance
    • -
    • Parallelization with OpenMP
    • -
    • Parallelization with MPI
    • -
    • Vectorization and parallelization, examples
    • -
    -









    -

    Optimization and profiling

    -
    - -

    - -

    Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as -

    - - -
    -
    -
    -
    -
    -
    c++  -c  mycode.cpp
    -c++  -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For Fortran replace with for example gfortran or ifort. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. -

    - -

    It is instructive to look up the compiler manual for further instructions by writing

    - - -
    -
    -
    -
    -
    -
    man c++
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -









    -

    More on optimization

    -
    - -

    -

    We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. -

    - - -
    -
    -
    -
    -
    -
    c++  -O3 -c  mycode.cpp
    -c++  -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This (other options are -O2 or -Ofast) is the recommended option.

    -
    - -









    -

    Optimization and profiling

    -
    - -

    -

    It is also useful to profile your program under the development stage. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -pg -O3 -c  mycode.cpp
    -c++  -pg -O3 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    After you have run the code you can obtain the profiling information via

    - - -
    -
    -
    -
    -
    -
    gprof mycode.exe >  ProfileOutput
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use valgrind. An excellent environment for all these aspects, and much more, is Qt creator. -

    -
    - - -









    -

    Optimization and debugging

    -
    - -

    -

    Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with -

    - - -
    -
    -
    -
    -
    -
    c++  -g -O0 -c  mycode.cpp
    -c++  -g -O0 -o  mycode.exe  mycode.o
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option -O0.

    -
    - -
    -Other optimization flags -

    -

    Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag -ftrapv does this for the CLANG compiler on OS X operating systems. -

    -
    - - -









    -

    Other hints

    -
    - -

    -

    In general, irrespective of compiler options, it is useful to

    -
      -
    • avoid if tests or call to functions inside loops, if possible.
    • -
    • avoid multiplication with constants inside loops if possible
    • -
    -

    Here is an example of a part of a program where specific operations lead to a slower code

    - - -
    -
    -
    -
    -
    -
    k = n-1;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] +c*d;
    -    e = g[k];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    A better code is

    - - -
    -
    -
    -
    -
    -
    temp = c*d;
    -for (i = 0; i < n; i++){
    -    a[i] = b[i] + temp;
    -}
    -e = g[n-1];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -

    -
    - - -









    -

    Vectorization and the basic idea behind parallel computing

    -
    - -

    -

    Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.

    -
      -
    • Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.
    • -
    • Multiple processors are involved to solve a global problem.
    • -
    • The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.
    • -
    -

    Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models.

    -
    - - -









    -

    A rough classification of hardware models

    -
    - -

    - -

      -
    • Conventional single-processor computers are named SISD (single-instruction-single-data) machines.
    • -
    • SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.
    • -
    • Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data.
    • -
    -
    - -









    -

    Shared memory and distributed memory

    -
    - -

    -

    One way of categorizing modern parallel computers is to look at the memory configuration.

    -
      -
    • In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.
    • -
    • In distributed memory systems each CPU has its own memory.
    • -
    -

    The CPUs are connected by some network and may exchange messages.

    -
    - - -









    -

    Different parallel programming paradigms

    -
    - -

    - -

      -
    • Task parallelism: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.
    • -
    • Data parallelism: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives.
    • -
    -
    - -









    -

    Different parallel programming paradigms

    -
    - -

    - -

      -
    • Message passing: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.
    • -
    • This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult.
    • -
    -
    - - - -

    What is vectorization?

    -

    Vectorization is a special -case of Single Instructions Multiple Data (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. -

    - -

    Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. -

    - - -

    Number of elements that can acted upon

    -

    A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. -

    - -

    The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. -

    - -

    IN summary, our instructions operate on 128 bit (16 byte) operands

    -
      -
    • 4 floats or ints
    • -
    • 2 doubles
    • -
    • Data paths 128 bits vide for vector unit
    • -
    - -

    Number of elements that can acted upon, examples

    -

    We start with the simple scalar operations given by

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have \( 3\times 32 \) bits that are not used. -

    - -

    We have thus unused space in our SIMD registers. These registers could hold three additional integers.

    - - -

    Operation counts for scalar operation

    -

    The code

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i++){
    -    a[i] = b[i] + c[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    has for \( n \) repeats

    -
      -
    1. one load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    - -

    Number of elements that can acted upon, examples

    -

    If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have -

    - - -
    -
    -
    -
    -
    -
    for (i = 0; i < n; i+=4){
    -    a[i] = b[i] + c[i];
    -    a[i+1] = b[i+1] + c[i+1];
    -    a[i+2] = b[i+2] + c[i+2];
    -    a[i+3] = b[i+3] + c[i+3];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Four additions are now done in a single step.

    - - -

    Number of operations when vectorized

    -

    For \( n/4 \) repeats assuming floats or integers

    -
      -
    1. one vector load for \( c[i] \) in address 1
    2. -
    3. one load for \( b[i] \) in address 2
    4. -
    5. add \( c[i] \) and \( b[i] \) to give \( a[i] \)
    6. -
    7. store \( a[i] \) in address 2
    8. -
    -









    -

    A simple test case with and without vectorization

    -

    We implement these operations in a simple c++ program that computes at the end the norm of a vector.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double *a, *b, *c;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -// Allocate space for the vectors to be used
    -    a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -  // Set up values for vectors  a and b
    -  for (int i = 0; i < n; i++){
    -    double angle = 2.0*M_PI*i/ (( double ) n);
    -    a[i] = s*(sin(angle) + cos(angle));
    -    b[i] =  s*sin(2.0*angle);
    -    c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (int i = 0; i < n; i++){
    -    c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  double Norm2 = 0.0;
    -  for (int i = 0; i < n; i++){
    -    Norm2  += c[i]*c[i];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm computation=" << timeused  << endl;
    -  cout << "  Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Compiling with and without vectorization

    -

    We can compile and link without vectorization using the clang c++ compiler

    - - -
    -
    -
    -
    -
    -
    clang -o novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization (and additional optimizations)

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The speedup depends on the size of the vectors. In the example here we have run with \( 10^7 \) elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 10000000
    -Time used  for norm computation=0.04720500000
    -Compphys:~ hjensen$ ./novec.x 10000000
    -Time used  for norm computation=0.03311700000
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for \( 10^9 \) elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. -

    - - -
    -
    -
    -
    -
    -
    Compphys:~ hjensen$ ./vec.x 1000000000
    -Time used  for norm computation=58.41391100
    -Compphys:~ hjensen$ ./novec.x 1000000000
    -Time used  for norm computation=46.51295300
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We will discuss these issues further in the next slides.

    - - -

    Compiling with and without vectorization using clang

    -

    We can compile and link without vectorization with clang compiler

    - - -
    -
    -
    -
    -
    -
    clang++ -o -fno-vectorize novec.x vecexample.cpp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and with vectorization

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    We can also add vectorization analysis, see for example

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-analysis=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    or figure out if vectorization was missed

    - - -
    -
    -
    -
    -
    -
    clang++ -O3 -Rpass-missed=loop-vectorize -o  vec.x vecexample.cpp 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Automatic vectorization and vectorization inhibitors, criteria

    - -

    Not all loops can be vectorized, as discussed in Intel's guide to vectorization

    - -

    An important criteria is that the loop counter \( n \) is known at the entry of the loop.

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The variable \( n \) does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent.

    - -









    -

    Automatic vectorization and vectorization inhibitors, exit criteria

    - -

    An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    a[j] = cos(j*1.0);
    -    if (a[j] < 0 ) break;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Avoid loop termination conditions and opt for a single entry loop variable \( n \). The lower and upper bounds have to be kept fixed within the loop.

    - -









    -

    Automatic vectorization and vectorization inhibitors, straight-line code

    - -

    SIMD instructions perform the same type of operations multiple times. -A switch statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the if statement is implemented as a masked assignment. -

    - - -
    -
    -
    -
    -
    -
      for (int j = 0; j < n; j++) {
    -    double x  = cos(j*1.0);
    -    if (x > 0 ) {
    -       a[j] =  x*sin(j*2.0); 
    -    }
    -    else {
    -       a[j] = 0.0;
    -    }
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as switch, go to, or return statements or if constructs that cannot be treated as masked assignments.

    - -









    -

    Automatic vectorization and vectorization inhibitors, nested loops

    - -

    Only the innermost loop of the following example is vectorized

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations.

    - -









    -

    Automatic vectorization and vectorization inhibitors, function calls

    - -

    Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -\( \sin{x} \), \( \cos{x} \), \( \exp{x} \) etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable -

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      a[i] = log10(i)*cos(i);
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Similarly, inline functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called.

    - -









    -

    Automatic vectorization and vectorization inhibitors, data dependencies

    - -

    One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i] = a[i-1] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value \( a[i-1] \) computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values \( a[0] \), \( a[1] \), \( a[2] \) and \( a[3] \) from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have

    - - -
    -
    -
    -
    -
    -
       a[1] = a[0] + b;
    -   a[2] = a[1] + b;
    -   a[3] = a[2] + b;
    -   a[4] = a[3] + b;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and if the two first iterations are executed at the same by the SIMD instruction, the value of say \( a[1] \) could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results.

    - -









    -

    Automatic vectorization and vectorization inhibitors, more data dependencies

    - -

    On the other hand, a so-called -write-after-read statement can be vectorized. The following code -

    - - -
    -
    -
    -
    -
    -
      double b = 15.;
    -  for (int i = 1; i < n; i++) {
    -      a[i-1] = a[i] + b;
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is an example of flow dependency that can be vectorized since no iteration with a higher value of \( i \) -can complete before an iteration with a lower value of \( i \). However, such code leads to problems with parallelization. -

    - -









    -

    Automatic vectorization and vectorization inhibitors, memory stride

    - -

    For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code.

    - -

    When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here

    - - -
    -
    -
    -
    -
    -
      for (int i = 0; i < n; i++) {
    -      for (int j = 0; j < n; j++) {
    -           a[i][j] += b[i][j];
    -      }  
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Memory management

    -

    The main memory contains the program data

    -
      -
    1. Cache memory contains a copy of the main memory data
    2. -
    3. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory
    4. -
    5. Registers contain working data only
    6. -
        -
      • Modern CPUs perform most or all operations only on data in register
      • -
      -
    7. Multiple Cache memories contain a copy of the main memory data
    8. -
        -
      • Cache items accessed by their address in main memory
      • -
      • L1 cache is the fastest but has the least capacity
      • -
      • L2, L3 provide intermediate performance/size tradeoffs
      • -
      -
    -

    Loads and stores to memory can be as important as floating point operations when we measure performance.

    - -









    -

    Memory and communication

    - -
      -
    1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together
    2. -
    3. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines
    4. -
        -
      • Lines are typically 64-128 bytes, or 8-16 double precision words
      • -
      • Even if you do not use the data, it is moved and occupies space in the cache
      • -
      -
    -

    Many of these performance features are not captured in most programming languages.

    - -









    -

    Measuring performance

    - -

    How do we measure performance? What is wrong with this code to time a loop?

    - - -
    -
    -
    -
    -
    -
      clock_t start, finish;
    -  start = clock();
    -  for (int j = 0; j < i; j++) {
    -    a[j] = b[j]+b[j]*c[j];
    -  }
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Problems with measuring time

    -
      -
    1. Timers are not infinitely accurate
    2. -
    3. All clocks have a granularity, the minimum time that they can measure
    4. -
    5. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)
    6. -
    7. Always know what your clock granularity is
    8. -
    9. Ensure that your measurement is for a long enough duration (say 100 times the tick)
    10. -
    -









    -

    Problems with cold start

    - -

    What happens when the code is executed? The assumption is that the code is ready to -execute. But -

    -
      -
    1. Code may still be on disk, and not even read into memory.
    2. -
    3. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)
    4. -
    5. Multiple tests often necessary to ensure that cold start effects are not present
    6. -
    7. Special effort often required to ensure data in the intended part of the memory hierarchy.
    8. -
    -









    -

    Problems with smart compilers

    - -
      -
    1. If the result of the computation is not used, the compiler may eliminate the code
    2. -
    3. Performance will look impossibly fantastic
    4. -
    5. Even worse, eliminate some of the code so the performance looks plausible
    6. -
    7. Ensure that the results are (or may be) used.
    8. -
    -









    -

    Problems with interference

    -
      -
    1. Other activities are sharing your processor
    2. -
        -
      • Operating system, system demons, other users
      • -
      • Some parts of the hardware do not always perform with exactly the same performance
      • -
      -
    3. Make multiple tests and report
    4. -
    5. Easy choices include
    6. -
        -
      • Average tests represent what users might observe over time
      • -
      -
    -









    -

    Problems with measuring performance

    -
      -
    1. Accurate, reproducible performance measurement is hard
    2. -
    3. Think carefully about your experiment:
    4. -
    5. What is it, precisely, that you want to measure?
    6. -
    7. How representative is your test to the situation that you are trying to measure?
    8. -
    -









    -

    Thomas algorithm for tridiagonal linear algebra equations

    -
    - -

    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -

    - - -









    -

    Thomas algorithm, forward substitution

    -
    - -

    -

    The first step is to multiply the first row by \( a_0/b_0 \) and subtract it from the second row. This is known as the forward substitution step. We obtain then

    -$$ - a_i = 0, -$$ - - -$$ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -$$ - -

    and

    -$$ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -$$ - -

    At this point the simplified equation, with only an upper triangular matrix takes the form

    -$$ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -$$ -
    - - -









    -

    Thomas algorithm, backward substitution

    -
    - -

    -

    The next step is the backward substitution step. The last row is multiplied by \( c_{N-3}/b_{N-2} \) and subtracted from the second to last row, thus eliminating \( c_{N-3} \) from the last row. The general backward substitution procedure is

    -$$ - c_i = 0, -$$ - -

    and

    -$$ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -$$ - -

    All that ramains to be computed is the solution, which is the very straight forward process of

    -$$ -x_i = \frac{f_i}{b_i} -$$ -
    - - -









    -

    Thomas algorithm and counting of operations (floating point and memory)

    -
    - -

    - -

    We have in specific case the following operations with the floating operations

    - -
      -
    • Memory Reads: \( 14(N-2) \);
    • -
    • Memory Writes: \( 4(N-2) \);
    • -
    • Subtractions: \( 3(N-2) \);
    • -
    • Multiplications: \( 3(N-2) \);
    • -
    • Divisions: \( 4(N-2) \).
    • -
    -
    - - -
    - -

    - - -

    -
    -
    -
    -
    -
    // Forward substitution    
    -// Note that we can simplify by precalculating a[i-1]/b[i-1]
    -  for (int i=1; i < n; i++) {
    -     b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];
    -     f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];
    -  }
    -  x[n-1] = f[n-1] / b[n-1];
    -  // Backwards substitution                                                           
    -  for (int i = n-2; i >= 0; i--) {
    -     f[i] = f[i] - c[i]*f[i+1]/b[i+1];
    -     x[i] = f[i]/b[i];
    -  }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Example: Transpose of a matrix

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B;
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -  }
    -  // Set up values for matrix A
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      A[i][j] =  cos(i*1.0)*sin(j*3.0);
    -    }
    -  }
    -  clock_t start, finish;
    -  start = clock();
    -  // Then compute the transpose
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      B[i][j]= A[j][i];
    -    }
    -  }
    -
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for setting up transpose of matrix=" << timeused  << endl;
    -
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm.

    - - - -
    -
    -
    -
    -
    -
    #include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include "time.h"
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double s = 1.0/sqrt( (double) n);
    -  double **A, **B, **C;
    -  // Start timing
    -  clock_t start, finish;
    -  start = clock();
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (int i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Set up values for matrix A and B and zero matrix C
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      double sum = 0.0;
    -       for (int k = 0; k < n; k++) {
    -           sum += B[i][k]*A[k][j];
    -       }
    -       C[i][j] = sum;
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  double Fsum = 0.0;
    -  for (int i = 0; i < n; i++){
    -    for (int j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -  finish = clock();
    -  double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << timeused  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    How do we define speedup? Simplest form

    -
    - -

    -

      -
    • Speedup measures the ratio of performance between two objects
    • -
    • Versions of same code, with different number of processors
    • -
    • Serial and vector versions
    • -
    • Try different programing languages, c++ and Fortran
    • -
    • Two algorithms computing the same result
    • -
    -
    - - -









    -

    How do we define speedup? Correct baseline

    -
    - -

    -

    The key is choosing the correct baseline for comparison

    -
      -
    • For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off
    • -
        -
      • For parallel applications, this is much harder:
      • -
          -
        • Choice of algorithm, decomposition, performance of baseline case etc.
        • -
        -
      -
    -
    - - -









    -

    Parallel speedup

    -
    - -

    -

    For parallel applications, speedup is typically defined as

    -
      -
    • Speedup \( =T_1/T_p \)
    • -
    -

    Here \( T_1 \) is the time on one processor and \( T_p \) is the time using \( p \) processors.

    -
      -
    • Can the speedup become larger than \( p \)? That means using \( p \) processors is more than \( p \) times faster than using one processor.
    • -
    -
    - - -









    -

    Speedup and memory

    -
    - -

    -

    The speedup on \( p \) processors can -be greater than \( p \) if memory usage is optimal! -Consider the case of a memorybound computation with \( M \) words of memory -

    -
      -
    • If \( M/p \) fits into cache while \( M \) does not, the time to access memory will be different in the two cases:
    • -
    • \( T_1 \) uses the main memory bandwidth
    • -
    • \( T_p \) uses the appropriate cache bandwidth
    • -
    -
    - - -









    -

    Upper bounds on speedup

    -
    - -

    -

    Assume that almost all parts of a code are perfectly -parallelizable (fraction \( f \)). The remainder, -fraction \( (1-f) \) cannot be parallelized at all. -

    - -

    That is, there is work that takes time \( W \) on one process; a fraction \( f \) of that work will take -time \( Wf/p \) on \( p \) processors. -

    -
      -
    • What is the maximum possible speedup as a function of \( f \)?
    • -
    -
    - - -









    -

    Amdahl's law

    -
    - -

    -

    On one processor we have

    -$$ -T_1 = (1-f)W + fW = W -$$ - -

    On \( p \) processors we have

    -$$ -T_p = (1-f)W + \frac{fW}{p}, -$$ - -

    resulting in a speedup of

    -$$ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -$$ - -

    As \( p \) goes to infinity, \( fW/p \) goes to zero, and the maximum speedup is

    -$$ -\frac{1}{1-f}, -$$ - -

    meaning that if -if \( f = 0.99 \) (all but \( 1\% \) parallelizable), the maximum speedup -is \( 1/(1-.99)=100 \)! -

    -
    - - -









    -

    How much is parallelizable

    -
    - -

    -

    If any non-parallel code slips into the -application, the parallel -performance is limited. -

    - -

    In many simulations, however, the fraction of non-parallelizable work -is \( 10^{-6} \) or less due to large arrays or objects that are perfectly parallelizable. -

    -
    - - -









    -

    Today's situation of parallel computing

    -
    - -

    - -

      -
    • Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.
    • -
    • Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.
    • -
    • Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.
    • -
    -

    Our lectures will focus on both MPI and OpenMP.

    -
    - - -









    -

    Overhead present in parallel computing

    -
    - -

    - -

      -
    • Uneven load balance: not all the processors can perform useful work at all time.
    • -
    • Overhead of synchronization
    • -
    • Overhead of communication
    • -
    • Extra computation due to parallelization
    • -
    -

    Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -

    -
    - - -









    -

    Parallelizing a sequential algorithm

    -
    - -

    - -

      -
    • Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,
    • -
    • Distribute the global work and data among \( P \) processors.
    • -
    -
    - - -









    -

    Strategies

    -
    - -

    -

      -
    • Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC.
    • -
    • When you are convinced that your codes run correctly, you can start your production runs on available supercomputers.
    • -
    -
    - - -









    -

    How do I run MPI on a PC/Laptop? MPI

    -
    - -

    -

    To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the OpenMPI website. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -

    -
      -
    • Compile with mpicxx/mpic++ or mpif90
    • -
    - - -
    -
    -
    -
    -
    -
      # Compile and link
    -  mpic++ -O3 -o nameofprog.x nameofprog.cpp
    -  #  run code with for example 8 processes using mpirun/mpiexec
    -  mpiexec -n 8 ./nameofprog.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Can I do it on my own PC/laptop? OpenMP installation

    -
    - -

    -

    If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: -

    - -
      -
    • For OpenMP, the compile option -fopenmp is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.
    • -
    • For OS X users however, install libomp
    • -
    - - -
    -
    -
    -
    -
    -
      brew install libomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and compile and link as

    - - -
    -
    -
    -
    -
    -
    c++ -o <name executable> <name program.cpp>  -lomp
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Installing MPI

    -
    - -

    -

    For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)

    - - -
    -
    -
    -
    -
    -
      sudo apt-get install libopenmpi-dev
    -  sudo apt-get install openmpi-bin
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew -

    - - -
    -
    -
    -
    -
    -
       brew install openmpi
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    When running an executable (code.x), run as

    - - -
    -
    -
    -
    -
    -
      mpirun -n 10 ./code.x
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    where we indicate that we want the number of processes to be 10.

    -
    - - -









    -

    Installing MPI and using Qt

    -
    - -

    -

    With openmpi installed, when using Qt, add to your .pro file the instructions here

    - -

    You may need to tell Qt where openmpi is stored.

    -
    - - -









    -

    What is Message Passing Interface (MPI)?

    -
    - -

    - -

    MPI is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. -

    - -

    MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. -

    - -

    An MPI computation is a collection of processes communicating with messages.

    -
    - -









    -

    Going Parallel with MPI

    -
    - -

    -

    Task parallelism: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    MPI is a library

    -
    - -

    -

    MPI is a library specification for the message passing interface, -proposed as a standard. -

    - -
      -
    • independent of hardware;
    • -
    • not a language or compiler specification;
    • -
    • not a specific implementation or product.
    • -
    -

    A message passing standard for portability and ease-of-use. -Designed for high performance. -

    - -

    Insert communication and synchronization functions where necessary.

    -
    - - -









    -

    Bindings to MPI routines

    -
    - -

    - -

    MPI is a message-passing library where all the routines -have corresponding C/C++-binding -

    - - -
    -
    -
    -
    -
    -
       MPI_Command_name
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and Fortran-binding (routine names are in uppercase, but can also be in lower case)

    - - -
    -
    -
    -
    -
    -
       MPI_COMMAND_NAME
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The discussion in these slides focuses on the C++ binding.

    -
    - - -









    -

    Communicator

    -
    - -

    -

      -
    • A group of MPI processes with a name (context).
    • -
    • Any process is identified by its rank. The rank is only meaningful within a particular communicator.
    • -
    • By default the communicator contains all the MPI processes.
    • -
    - - -
    -
    -
    -
    -
    -
      MPI_COMM_WORLD 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Mechanism to identify subset of processes.
    • -
    • Promotes modular design of parallel libraries.
    • -
    -
    - - -









    -

    Some of the most important MPI functions

    -
    - -

    - -

      -
    • \( MPI\_Init \) - initiate an MPI computation
    • -
    • \( MPI\_Finalize \) - terminate the MPI computation and clean up
    • -
    • \( MPI\_Comm\_size \) - how many processes participate in a given MPI communicator?
    • -
    • \( MPI\_Comm\_rank \) - which one am I? (A number between 0 and size-1.)
    • -
    • \( MPI\_Send \) - send a message to a particular process within an MPI communicator
    • -
    • \( MPI\_Recv \) - receive a message from a particular process within an MPI communicator
    • -
    • \( MPI\_reduce \) or \( MPI\_Allreduce \), send and receive messages
    • -
    -
    - - -









    -

    The first MPI C/C++ program

    -
    - -

    - -

    Let every process write "Hello world" (oh not this program again!!) on the standard output.

    - - -
    -
    -
    -
    -
    -
    using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -int main (int nargs, char* args[])
    -{
    -int numprocs, my_rank;
    -//   MPI initializations
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -     << numprocs << endl;
    -//  End MPI
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    The Fortran program

    -
    - -

    - - -

    -
    -
    -
    -
    -
    PROGRAM hello
    -INCLUDE "mpif.h"
    -INTEGER:: size, my_rank, ierr
    -
    -CALL  MPI_INIT(ierr)
    -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
    -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
    -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size
    -CALL MPI_FINALIZE(ierr)
    -
    -END PROGRAM hello
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 1

    -
    - -

    - -

      -
    • The output to screen is not ordered since all processes are trying to write to screen simultaneously.
    • -
    • It is the operating system which opts for an ordering.
    • -
    • If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example.
    • -
    -
    - - -









    -

    Ordered output with MPIBarrier

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    int main (int nargs, char* args[])
    -{
    - int numprocs, my_rank, i;
    - MPI_Init (&nargs, &args);
    - MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    - for (i = 0; i < numprocs; i++) {}
    - MPI_Barrier (MPI_COMM_WORLD);
    - if (i == my_rank) {
    - cout << "Hello world, I have  rank " << my_rank << 
    -        " out of " << numprocs << endl;}
    -      MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 2

    -
    - -

    -

      -
    • Here we have used the \( MPI\_Barrier \) function to ensure that that every process has completed its set of instructions in a particular order.
    • -
    • A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here \( MPI\_COMM\_WORLD \)) have called \( MPI\_Barrier \).
    • -
    • The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like \( MPI\_ALLREDUCE \) to be discussed later, have the same property; that is, no process can exit the operation until all processes have started.
    • -
    -

    However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -

    -
    - - -









    -

    Ordered output

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    .....
    -int numprocs, my_rank, flag;
    -MPI_Status status;
    -MPI_Init (&nargs, &args);
    -MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -if (my_rank > 0)
    -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, 
    -           MPI_COMM_WORLD, &status);
    -cout << "Hello world, I have  rank " << my_rank << " out of " 
    -<< numprocs << endl;
    -if (my_rank < numprocs-1)
    -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, 
    -          100, MPI_COMM_WORLD);
    -MPI_Finalize ();
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Note 3

    -
    - -

    - -

    The basic sending of messages is given by the function \( MPI\_SEND \), which in C/C++ -is defined as -

    - - -
    -
    -
    -
    -
    -
    int MPI_Send(void *buf, int count, 
    -             MPI_Datatype datatype, 
    -             int dest, int tag, MPI_Comm comm)}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable buf is the variable we wish to send while count -is the number of variables we are passing. If we are passing only a single value, this should be 1. -

    - -

    If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be \( 10\times 10=100 \) -since we are actually passing 100 values. -

    -
    - - -









    -

    Note 4

    -
    - -

    - -

    Once you have sent a message, you must receive it on another task. The function \( MPI\_RECV \) -is similar to the send call. -

    - - -
    -
    -
    -
    -
    -
    int MPI_Recv( void *buf, int count, MPI_Datatype datatype, 
    -            int source, 
    -            int tag, MPI_Comm comm, MPI_Status *status )
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The arguments that are different from those in MPI\_SEND are -buf which is the name of the variable where you will be storing the received data, -source which replaces the destination in the send command. This is the return ID of the sender. -

    - -

    Finally, we have used \( MPI\_Status\_status \), -where one can check if the receive was completed. -

    - -

    The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -

    -
    - - -









    -

    Numerical integration in parallel

    -
    -Integrating \( \pi \) -

    - -

      -
    • The code example computes \( \pi \) using the trapezoidal rules.
    • -
    • The trapezoidal rule
    • -
    -$$ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -$$ - -

    Click on this link for the full program.

    -
    - - -









    -

    Dissection of trapezoidal rule with \( MPI\_reduce \)

    -
    - -

    - - - -

    -
    -
    -
    -
    -
    //    Trapezoidal rule and numerical integration usign MPI
    -using namespace std;
    -#include <mpi.h>
    -#include <iostream>
    -
    -//     Here we define various functions called by the main program
    -
    -double int_function(double );
    -double trapezoidal_rule(double , double , int , double (*)(double));
    -
    -//   Main function begins here
    -int main (int nargs, char* args[])
    -{
    -  int n, local_n, numprocs, my_rank; 
    -  double a, b, h, local_a, local_b, total_sum, local_sum;   
    -  double  time_start, time_end, total_time;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      //  MPI initializations
    -  MPI_Init (&nargs, &args);
    -  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
    -  time_start = MPI_Wtime();
    -  //  Fixed values for a, b and n 
    -  a = 0.0 ; b = 1.0;  n = 1000;
    -  h = (b-a)/n;    // h is the same for all processes 
    -  local_n = n/numprocs;  
    -  // make sure n > numprocs, else integer division gives zero
    -  // Length of each process' interval of
    -  // integration = local_n*h.  
    -  local_a = a + my_rank*local_n*h;
    -  local_b = local_a + local_n*h;
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Integrating with MPI

    -
    - -

    - - - -

    -
    -
    -
    -
    -
      total_sum = 0.0;
    -  local_sum = trapezoidal_rule(local_a, local_b, local_n, 
    -                               &int_function); 
    -  MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, 
    -              MPI_SUM, 0, MPI_COMM_WORLD);
    -  time_end = MPI_Wtime();
    -  total_time = time_end-time_start;
    -  if ( my_rank == 0) {
    -    cout << "Trapezoidal rule = " <<  total_sum << endl;
    -    cout << "Time = " <<  total_time  
    -         << " on number of processors: "  << numprocs  << endl;
    -  }
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  // end of main program
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    How do I use \( MPI\_reduce \)?

    -
    - -

    - -

    Here we have used

    - - -
    -
    -
    -
    -
    -
    MPI_reduce( void *senddata, void* resultdata, int count, 
    -     MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The two variables \( senddata \) and \( resultdata \) are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable \( count \) represents the total dimensionality, 1 in case of just one variable, -while \( MPI\_Datatype \) -defines the type of variable which is sent and received. -

    - -

    The new feature is \( MPI\_Op \). It defines the type -of operation we want to do. -

    -
    - - -









    -

    More on \( MPI\_Reduce \)

    -
    - -

    -

    In our case, since we are summing -the rectangle contributions from every process we define \( MPI\_Op = MPI\_SUM \). -If we have an array or matrix we can search for the largest og smallest element by sending either \( MPI\_MAX \) or -\( MPI\_MIN \). If we want the location as well (which array element) we simply transfer -\( MPI\_MAXLOC \) or \( MPI\_MINOC \). If we want the product we write \( MPI\_PROD \). -

    - -

    \( MPI\_Allreduce \) is defined as

    - - -
    -
    -
    -
    -
    -
    MPI_Allreduce( void *senddata, void* resultdata, int count, 
    -          MPI_Datatype datatype, MPI_Op, MPI_Comm comm)        
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - -

    We use \( MPI\_reduce \) to collect data from each process. Note also the use of the function -\( MPI\_Wtime \). -

    - - -
    -
    -
    -
    -
    -
    //  this function defines the function to integrate
    -double int_function(double x)
    -{
    -  double value = 4./(1.+x*x);
    -  return value;
    -} // end of function to evaluate
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Dissection of trapezoidal rule

    -
    - -

    - - -

    -
    -
    -
    -
    -
    //  this function defines the trapezoidal rule
    -double trapezoidal_rule(double a, double b, int n, 
    -                         double (*func)(double))
    -{
    -  double trapez_sum;
    -  double fa, fb, x, step;
    -  int    j;
    -  step=(b-a)/((double) n);
    -  fa=(*func)(a)/2. ;
    -  fb=(*func)(b)/2. ;
    -  trapez_sum=0.;
    -  for (j=1; j <= n-1; j++){
    -    x=j*step+a;
    -    trapez_sum+=(*func)(x);
    -  }
    -  trapez_sum=(trapez_sum+fb+fa)*step;
    -  return trapez_sum;
    -}  // end trapezoidal_rule 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    The quantum dot program for two electrons

    -
    - -

    - - -

    -
    -
    -
    -
    -
    // Variational Monte Carlo for atoms with importance sampling, slater det
    -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG
    -#include "mpi.h"
    -#include <cmath>
    -#include <random>
    -#include <string>
    -#include <iostream>
    -#include <fstream>
    -#include <iomanip>
    -#include "vectormatrixclass.h"
    -
    -using namespace  std;
    -// output file as global variable
    -ofstream ofile;  
    -// the step length and its squared inverse for the second derivative 
    -//  Here we define global variables  used in various functions
    -//  These can be changed by using classes
    -int Dimension = 2; 
    -int NumberParticles  = 2;  //  we fix also the number of electrons to be 2
    -
    -// declaration of functions 
    -
    -// The Mc sampling for the variational Monte Carlo 
    -void  MonteCarloSampling(int, double &, double &, Vector &);
    -
    -// The variational wave function
    -double  WaveFunction(Matrix &, Vector &);
    -
    -// The local energy 
    -double  LocalEnergy(Matrix &, Vector &);
    -
    -// The quantum force
    -void  QuantumForce(Matrix &, Matrix &, Vector &);
    -
    -
    -// inline function for single-particle wave function
    -inline double SPwavefunction(double r, double alpha) { 
    -   return exp(-alpha*r*0.5);
    -}
    -
    -// inline function for derivative of single-particle wave function
    -inline double DerivativeSPwavefunction(double r, double alpha) { 
    -  return -r*alpha;
    -}
    -
    -// function for absolute value of relative distance
    -double RelativeDistance(Matrix &r, int i, int j) { 
    -      double r_ij = 0;  
    -      for (int k = 0; k < Dimension; k++) { 
    -	r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));
    -      }
    -      return sqrt(r_ij); 
    -}
    -
    -// inline function for derivative of Jastrow factor
    -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){
    -  return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));
    -}
    -
    -// function for square of position of single particle
    -double singleparticle_pos2(Matrix &r, int i) { 
    -    double r_single_particle = 0;
    -    for (int j = 0; j < Dimension; j++) { 
    -      r_single_particle  += r(i,j)*r(i,j);
    -    }
    -    return r_single_particle;
    -}
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -		 double *f, double stpmax, int *check, double (*func)(Vector &p));
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));
    -
    -static double sqrarg;
    -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
    -
    -
    -static double maxarg1,maxarg2;
    -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
    -        (maxarg1) : (maxarg2))
    -
    -
    -// Begin of main program   
    -
    -int main(int argc, char* argv[])
    -{
    -
    -  //  MPI initializations
    -  int NumberProcesses, MyRank, NumberMCsamples;
    -  MPI_Init (&argc, &argv);
    -  MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);
    -  MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);
    -  double StartTime = MPI_Wtime();
    -  if (MyRank == 0 && argc <= 1) {
    -    cout << "Bad Usage: " << argv[0] << 
    -      " Read also output file on same line and number of Monte Carlo cycles" << endl;
    -  }
    -  // Read filename and number of Monte Carlo cycles from the command line
    -  if (MyRank == 0 && argc > 2) {
    -    string filename = argv[1]; // first command line argument after name of program
    -    NumberMCsamples  = atoi(argv[2]);
    -    string fileout = filename;
    -    string argument = to_string(NumberMCsamples);
    -    // Final filename as filename+NumberMCsamples
    -    fileout.append(argument);
    -    ofile.open(fileout);
    -  }
    -  // broadcast the number of  Monte Carlo samples
    -  MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);
    -  // Two variational parameters only
    -  Vector VariationalParameters(2);
    -  int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; 
    -  // Loop over variational parameters
    -  for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){
    -    for (double beta = 0.1; beta <= 0.5; beta +=0.05){
    -      VariationalParameters(0) = alpha;  // value of alpha
    -      VariationalParameters(1) = beta;  // value of beta
    -      //  Do the mc sampling  and accumulate data with MPI_Reduce
    -      double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;
    -      LocalProcessEnergy = LocalProcessEnergy2 = 0.0;
    -      MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);
    -      //  Collect data in total averages
    -      MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    -      // Print out results  in case of Master node, set to MyRank = 0
    -      if ( MyRank == 0) {
    -	double Energy = TotalEnergy/( (double)NumberProcesses);
    -	double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;
    -	double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error
    -	ofile << setiosflags(ios::showpoint | ios::uppercase);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(0);
    -	ofile << setw(15) << setprecision(8) << VariationalParameters(1);
    -	ofile << setw(15) << setprecision(8) << Energy;
    -	ofile << setw(15) << setprecision(8) << Variance;
    -	ofile << setw(15) << setprecision(8) << StandardDeviation << endl;
    -      }
    -    }
    -  }
    -  double EndTime = MPI_Wtime();
    -  double TotalTime = EndTime-StartTime;
    -  if ( MyRank == 0 )  cout << "Time = " <<  TotalTime  << " on number of processors: "  << NumberProcesses  << endl;
    -  if (MyRank == 0)  ofile.close();  // close output file
    -  // End MPI
    -  MPI_Finalize ();  
    -  return 0;
    -}  //  end of main function
    -
    -
    -// Monte Carlo sampling with the Metropolis algorithm  
    -
    -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)
    -{
    -
    - // Initialize the seed and call the Mersienne algo
    -  std::random_device rd;
    -  std::mt19937_64 gen(rd());
    -  // Set up the uniform distribution for x \in [[0, 1]
    -  std::uniform_real_distribution<double> UniformNumberGenerator(0.0,1.0);
    -  std::normal_distribution<double> Normaldistribution(0.0,1.0);
    -  // diffusion constant from Schroedinger equation
    -  double D = 0.5; 
    -  double timestep = 0.05;  //  we fix the time step  for the gaussian deviate
    -  // allocate matrices which contain the position of the particles  
    -  Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);
    -  Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);
    -  double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;
    -  //  initial trial positions
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int j = 0; j < Dimension; j++) {
    -      OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);
    -    }
    -  }
    -  double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);
    -  QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);
    -  // loop over monte carlo cycles 
    -  for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ 
    -    // new position 
    -    for (int i = 0; i < NumberParticles; i++) { 
    -      for (int j = 0; j < Dimension; j++) {
    -	// gaussian deviate to compute new positions using a given timestep
    -	NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -	//	NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;
    -      }  
    -      //  for the other particles we need to set the position to the old position since
    -      //  we move only one particle at the time
    -      for (int k = 0; k < NumberParticles; k++) {
    -	if ( k != i) {
    -	  for (int j = 0; j < Dimension; j++) {
    -	    NewPosition(k,j) = OldPosition(k,j);
    -	  }
    -	} 
    -      }
    -      double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); 
    -      QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);
    -      //  we compute the log of the ratio of the greens functions to be used in the 
    -      //  Metropolis-Hastings algorithm
    -      double GreensFunction = 0.0;            
    -      for (int j = 0; j < Dimension; j++) {
    -	GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*
    -	  (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));
    -      }
    -      GreensFunction = exp(GreensFunction);
    -      // The Metropolis test is performed by moving one particle at the time
    -      if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { 
    -	for (int  j = 0; j < Dimension; j++) {
    -	  OldPosition(i,j) = NewPosition(i,j);
    -	  OldQuantumForce(i,j) = NewQuantumForce(i,j);
    -	}
    -	OldWaveFunction = NewWaveFunction;
    -      }
    -    }  //  end of loop over particles
    -    // compute local energy  
    -    double DeltaE = LocalEnergy(OldPosition, VariationalParameters);
    -    // update energies
    -    Energy += DeltaE;
    -    EnergySquared += DeltaE*DeltaE;
    -  }   // end of loop over MC trials   
    -  // update the energy average and its squared 
    -  cumulative_e = Energy/NumberMCsamples;
    -  cumulative_e2 = EnergySquared/NumberMCsamples;
    -}   // end MonteCarloSampling function  
    -
    -
    -// Function to compute the squared wave function and the quantum force
    -
    -double  WaveFunction(Matrix &r, Vector &VariationalParameters)
    -{
    -  double wf = 0.0;
    -  // full Slater determinant for two particles, replace with Slater det for more particles 
    -  wf  = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));
    -  // contribution from Jastrow factor
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));
    -    }
    -  }
    -  return wf;
    -}
    -
    -// Function to calculate the local energy without numerical derivation of kinetic energy
    -
    -double  LocalEnergy(Matrix &r, Vector &VariationalParameters)
    -{
    -
    -  // compute the kinetic and potential energy from the single-particle part
    -  // for a many-electron system this has to be replaced by a Slater determinant
    -  // The absolute value of the interparticle length
    -  Matrix length( NumberParticles, NumberParticles);
    -  // Set up interparticle distance
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for(int j = i+1; j < NumberParticles; j++){
    -      length(i,j) = RelativeDistance(r, i, j);
    -      length(j,i) =  length(i,j);
    -    }
    -  }
    -  double KineticEnergy = 0.0;
    -  // Set up kinetic energy from Slater and Jastrow terms
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    for (int k = 0; k < Dimension; k++) {
    -      double sum1 = 0.0; 
    -      for(int j = 0; j < NumberParticles; j++){
    -	if ( j != i) {
    -	  sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));
    -    }
    -  }
    -  KineticEnergy += -2*VariationalParameters(0)*NumberParticles;
    -  for (int i = 0; i < NumberParticles-1; i++) {
    -      for (int j = i+1; j < NumberParticles; j++) {
    -        KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );
    -      }
    -  }
    -  KineticEnergy *= -0.5;
    -  // Set up potential energy, external potential + eventual electron-electron repulsion
    -  double PotentialEnergy = 0;
    -  for (int i = 0; i < NumberParticles; i++) { 
    -    double DistanceSquared = singleparticle_pos2(r, i);
    -    PotentialEnergy += 0.5*DistanceSquared;  // sp energy HO part, note it has the oscillator frequency set to 1!
    -  }
    -  // Add the electron-electron repulsion
    -  for (int i = 0; i < NumberParticles-1; i++) { 
    -    for (int j = i+1; j < NumberParticles; j++) {
    -      PotentialEnergy += 1.0/length(i,j);          
    -    }
    -  }
    -  double LocalE = KineticEnergy+PotentialEnergy;
    -  return LocalE;
    -}
    -
    -// Compute the analytical expression for the quantum force
    -void  QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)
    -{
    -  // compute the first derivative 
    -  for (int i = 0; i < NumberParticles; i++) {
    -    for (int k = 0; k < Dimension; k++) {
    -      // single-particle part, replace with Slater det for larger systems
    -      double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));
    -      //  Jastrow factor contribution
    -      double Jsum = 0.0;
    -      for (int j = 0; j < NumberParticles; j++) {
    -	if ( j != i) {
    -	  Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);
    -	}
    -      }
    -      qforce(i,k) = 2.0*(Jsum+sppart);
    -    }
    -  }
    -} // end of QuantumForce function
    -
    -
    -#define ITMAX 200
    -#define EPS 3.0e-8
    -#define TOLX (4*EPS)
    -#define STPMX 100.0
    -
    -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,
    -	    double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))
    -{
    -
    -  int check,i,its,j;
    -  double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;
    -  Vector dg(n), g(n), hdg(n), pnew(n), xi(n);
    -  Matrix hessian(n,n);
    -
    -  fp=(*func)(p);
    -  (*dfunc)(p,g);
    -  for (i = 0;i < n;i++) {
    -    for (j = 0; j< n;j++) hessian(i,j)=0.0;
    -    hessian(i,i)=1.0;
    -    xi(i) = -g(i);
    -    sum += p(i)*p(i);
    -  }
    -  stpmax=STPMX*FMAX(sqrt(sum),(double)n);
    -  for (its=1;its<=ITMAX;its++) {
    -    *iter=its;
    -    lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);
    -    fp = *fret;
    -    for (i = 0; i< n;i++) {
    -      xi(i)=pnew(i)-p(i);
    -      p(i)=pnew(i);
    -    }
    -    test=0.0;
    -    for (i = 0;i< n;i++) {
    -      temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);
    -      if (temp > test) test=temp;
    -    }
    -    if (test < TOLX) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i);
    -    (*dfunc)(p,g);
    -    test=0.0;
    -    den=FMAX(*fret,1.0);
    -    for (i=0;i<n;i++) {
    -      temp=fabs(g(i))*FMAX(fabs(p(i)),1.0)/den;
    -      if (temp > test) test=temp;
    -    }
    -    if (test < gtol) {
    -      return;
    -    }
    -    for (i=0;i<n;i++) dg(i)=g(i)-dg(i);
    -    for (i=0;i<n;i++) {
    -      hdg(i)=0.0;
    -      for (j=0;j<n;j++) hdg(i) += hessian(i,j)*dg(j);
    -    }
    -    fac=fae=sumdg=sumxi=0.0;
    -    for (i=0;i<n;i++) {
    -      fac += dg(i)*xi(i);
    -      fae += dg(i)*hdg(i);
    -      sumdg += SQR(dg(i));
    -      sumxi += SQR(xi(i));
    -    }
    -    if (fac*fac > EPS*sumdg*sumxi) {
    -      fac=1.0/fac;
    -      fad=1.0/fae;
    -      for (i=0;i<n;i++) dg(i)=fac*xi(i)-fad*hdg(i);
    -      for (i=0;i<n;i++) {
    -	for (j=0;j<n;j++) {
    -	  hessian(i,j) += fac*xi(i)*xi(j)
    -	    -fad*hdg(i)*hdg(j)+fae*dg(i)*dg(j);
    -	}
    -      }
    -    }
    -    for (i=0;i<n;i++) {
    -      xi(i)=0.0;
    -      for (j=0;j<n;j++) xi(i) -= hessian(i,j)*g(j);
    -    }
    -  }
    -  cout << "too many iterations in dfpmin" << endl;
    -}
    -#undef ITMAX
    -#undef EPS
    -#undef TOLX
    -#undef STPMX
    -
    -#define ALF 1.0e-4
    -#define TOLX 1.0e-7
    -
    -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,
    -	    double *f, double stpmax, int *check, double (*func)(Vector &p))
    -{
    -  int i;
    -  double a,alam,alam2,alamin,b,disc,f2,fold2,rhs1,rhs2,slope,sum,temp,
    -    test,tmplam;
    -
    -  *check=0;
    -  for (sum=0.0,i=0;i<n;i++) sum += p(i)*p(i);
    -  sum=sqrt(sum);
    -  if (sum > stpmax)
    -    for (i=0;i<n;i++) p(i) *= stpmax/sum;
    -  for (slope=0.0,i=0;i<n;i++)
    -    slope += g(i)*p(i);
    -  test=0.0;
    -  for (i=0;i<n;i++) {
    -    temp=fabs(p(i))/FMAX(fabs(xold(i)),1.0);
    -    if (temp > test) test=temp;
    -  }
    -  alamin=TOLX/test;
    -  alam=1.0;
    -  for (;;) {
    -    for (i=0;i<n;i++) x(i)=xold(i)+alam*p(i);
    -    *f=(*func)(x);
    -    if (alam < alamin) {
    -      for (i=0;i<n;i++) x(i)=xold(i);
    -      *check=1;
    -      return;
    -    } else if (*f <= fold+ALF*alam*slope) return;
    -    else {
    -      if (alam == 1.0)
    -	tmplam = -slope/(2.0*(*f-fold-slope));
    -      else {
    -	rhs1 = *f-fold-alam*slope;
    -	rhs2=f2-fold2-alam2*slope;
    -	a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2);
    -	b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2);
    -	if (a == 0.0) tmplam = -slope/(2.0*b);
    -	else {
    -	  disc=b*b-3.0*a*slope;
    -	  if (disc<0.0) cout << "Roundoff problem in lnsrch." << endl;
    -	  else tmplam=(-b+sqrt(disc))/(3.0*a);
    -	}
    -	if (tmplam>0.5*alam)
    -	  tmplam=0.5*alam;
    -      }
    -    }
    -    alam2=alam;
    -    f2 = *f;
    -    fold2=fold;
    -    alam=FMAX(tmplam,0.1*alam);
    -  }
    -}
    -#undef ALF
    -#undef TOLX
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    What is OpenMP

    -
    - -

    -

      -
    • OpenMP provides high-level thread programming
    • -
    • Multiple cooperating threads are allowed to run simultaneously
    • -
    • Threads are created and destroyed dynamically in a fork-join pattern
    • -
        -
      • An OpenMP program consists of a number of parallel regions
      • -
      • Between two parallel regions there is only one master thread
      • -
      • In the beginning of a parallel region, a team of new threads is spawned
      • -
      -
    • The newly spawned threads work simultaneously with the master thread
    • -
    • At the end of a parallel region, the new threads are destroyed
    • -
    -

    Many good tutorials online and excellent textbook

    -
      -
    1. Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas
    2. -
    3. Many tutorials online like OpenMP official site
    4. -
    -
    - - -









    -

    Getting started, things to remember

    -
    - -

    -

      -
    • Remember the header file
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Insert compiler directives in C++ syntax as
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp...
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Compile with for example c++ -fopenmp code.cpp
    • -
    • Execute
    • -
        -
      • Remember to assign the environment variable OMP NUM THREADS
      • -
      • It specifies the total number of threads inside a parallel region, if not otherwise overwritten
      • -
      -
    -
    - - -









    -

    OpenMP syntax

    -
      -
    • Mostly directives
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp construct [ clause ...]
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Some functions and types
    • -
    - - -
    -
    -
    -
    -
    -
    #include <omp.h>
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Most apply to a block of code
    • -
    • Specifically, a structured block
    • -
    • Enter at top, exit at bottom only, exit(), abort() permitted
    • -
    -









    -

    Different OpenMP styles of parallelism

    -

    OpenMP supports several different ways to specify thread parallelism

    - -
      -
    • General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code
    • -
    • Parallel loops: Special case for loops, simplifies data parallel code
    • -
    • Task parallelism, new in OpenMP 3
    • -
    • Several ways to manage thread coordination, including Master regions and Locks
    • -
    • Memory model for shared data
    • -
    -









    -

    General code structure

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -main ()
    -{
    -int var1, var2, var3;
    -/* serial code */
    -/* ... */
    -/* start of a parallel region */
    -#pragma omp parallel private(var1, var2) shared(var3)
    -{
    -/* ... */
    -}
    -/* more serial code */
    -/* ... */
    -/* another parallel region */
    -#pragma omp parallel
    -{
    -/* ... */
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel region

    -
    - -

    -

      -
    • A parallel region is a block of code that is executed by a team of threads
    • -
    • The following compiler directive creates a parallel region
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added at the end of the directive
    • -
    • Most often used clauses:
    • -
        -
      • default(shared) or default(none)
      • -
      • public(list of variables)
      • -
      • private(list of variables)
      • -
      -
    -
    - - -









    -

    Hello world, not again, please!

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#include <cstdio>
    -int main (int argc, char *argv[])
    -{
    -int th_id, nthreads;
    -#pragma omp parallel private(th_id) shared(nthreads)
    -{
    -th_id = omp_get_thread_num();
    -printf("Hello World from thread %d\n", th_id);
    -#pragma omp barrier
    -if ( th_id == 0 ) {
    -nthreads = omp_get_num_threads();
    -printf("There are %d threads\n",nthreads);
    -}
    -}
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Hello world, yet another variant

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <cstdio>
    -#include <omp.h>
    -int main(int argc, char *argv[]) 
    -{
    - omp_set_num_threads(4); 
    -#pragma omp parallel
    - {
    -   int id = omp_get_thread_num();
    -   int nproc = omp_get_num_threads(); 
    -   cout << "Hello world with id number and processes " <<  id <<  nproc << endl;
    - } 
    -return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Variables declared outside of the parallel region are shared by all threads -If a variable like id is declared outside of the -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel, 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    it would have been shared by various the threads, possibly causing erroneous output

    -
      -
    • Why? What would go wrong? Why do we add possibly?
    • -
    -
    - - -









    -

    Important OpenMP library routines

    -
    - -

    - -

      -
    • int omp get num threads (), returns the number of threads inside a parallel region
    • -
    • int omp get thread num (), returns the a thread for each thread inside a parallel region
    • -
    • void omp set num threads (int), sets the number of threads to be used
    • -
    • void omp set nested (int), turns nested parallelism on/off
    • -
    -
    - - -









    -

    Private variables

    -
    - -

    -

    Private clause can be used to make thread- private versions of such variables:

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel private(id)
    -{
    - int id = omp_get_thread_num();
    - cout << "My thread num" << id << endl; 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • What is their value on entry? Exit?
    • -
    • OpenMP provides ways to control that
    • -
    • Can use default(none) to require the sharing of each variable to be described
    • -
    -
    - - -









    -

    Master region

    -
    - -

    -

    It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel 
    -{
    -  #pragma omp master
    -   {
    -      int id = omp_get_thread_num();
    -      cout << "My thread num" << id << endl; 
    -   } 
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel for loop

    -
    - -

    -

      -
    • Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    • Clauses can be added, such as
    • -
        -
      • schedule(static, chunk size)
      • -
      • schedule(dynamic, chunk size)
      • -
      • schedule(guided, chunk size) (non-deterministic allocation)
      • -
      • schedule(runtime)
      • -
      • private(list of variables)
      • -
      • reduction(operator:variable)
      • -
      • nowait
      • -
      -
    -
    - - -









    -

    Parallel computations and loops

    - -
    - -

    -

    OpenMP provides an easy way to parallelize a loop

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -  for (i=0; i<n; i++) c[i] = a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    OpenMP handles index variable (no need to declare in for loop or make private)

    - -

    Which thread does which values? Several options.

    -
    - - -









    -

    Scheduling of loop computations

    - -
    - -

    -

    We can let the OpenMP runtime decide. The decision is about how the loop iterates are scheduled -and OpenMP defines three choices of loop scheduling: -

    -
      -
    1. Static: Predefined at compile time. Lowest overhead, predictable
    2. -
    3. Dynamic: Selection made at runtime
    4. -
    5. Guided: Special case of dynamic; attempts to reduce overhead
    6. -
    -
    - - -









    -

    Example code for loop scheduling

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(dynamic,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Example code for loop scheduling, guided instead of dynamic

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #include <omp.h>
    -#define CHUNKSIZE 100
    -#define N 1000
    -int main (int argc, char *argv[])
    -{
    -int i, chunk;
    -float a[N], b[N], c[N];
    -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;
    -chunk = CHUNKSIZE;
    -#pragma omp parallel shared(a,b,c,chunk) private(i)
    -{
    -#pragma omp for schedule(guided,chunk)
    -for (i=0; i < N; i++) c[i] = a[i] + b[i];
    -} /* end of parallel region */
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    More on Parallel for loop

    -
    - -

    -

      -
    • The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop
    • -
    • The loop index is private to each thread
    • -
    • A reduction variable is special
    • -
        -
      • During the for-loop there is a local private copy in each thread
      • -
      • At the end of the for-loop, all the local copies are combined together by the reduction operation
      • -
      -
    • Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler
    • -
    - - -
    -
    -
    -
    -
    -
    // #pragma omp parallel and #pragma omp for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    can be combined into

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    What can happen with this loop?

    - -
    - -

    -

    What happens with code like this

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    All threads can access the sum variable, but the addition is not atomic! It is important to avoid race between threads. So-called reductions in OpenMP are thus important for performance and for obtaining correct results. OpenMP lets us indicate that a variable is used for a reduction with a particular operator. The above code becomes

    - - -
    -
    -
    -
    -
    -
    sum = 0.0;
    -#pragma omp parallel for reduction(+:sum)
    -for (i=0; i<n; i++) sum += a[i]*a[i];
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Inner product

    -
    - -

    -$$ -\sum_{i=0}^{n-1} a_ib_i -$$ - - - -

    -
    -
    -
    -
    -
    int i;
    -double sum = 0.;
    -/* allocating and initializing arrays */
    -/* ... */
    -#pragma omp parallel for default(shared) private(i) reduction(+:sum)
    - for (i=0; i<N; i++) sum += a[i]*b[i];
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Different threads do different tasks

    -
    - -

    - -

    Different threads do different tasks independently, each section is executed by one thread.

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -#pragma omp sections
    -{
    -#pragma omp section
    -funcA ();
    -#pragma omp section
    -funcB ();
    -#pragma omp section
    -funcC ();
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Single execution

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp single { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    The code is executed by one thread only, no guarantee which thread

    - -

    Can introduce an implicit barrier at the end

    - - -
    -
    -
    -
    -
    -
    #pragma omp master { ... }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Code executed by the master thread, guaranteed and no implicit barrier at the end.

    -
    - - -









    -

    Coordination and synchronization

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp barrier
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Synchronization, must be encountered by all threads in a team (or none)

    - - -
    -
    -
    -
    -
    -
    #pragma omp ordered { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is another form of synchronization (in sequential order). -The form -

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical { a block of codes }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    and

    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic { single assignment statement }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    is more efficient than

    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Data scope

    -
    - -

    -

      -
    • OpenMP data scope attribute clauses:
    • -
        -
      • shared
      • -
      • private
      • -
      • firstprivate
      • -
      • lastprivate
      • -
      • reduction
      • -
      -
    -

    What are the purposes of these attributes

    -
      -
    • define how and which variables are transferred to a parallel region (and back)
    • -
    • define which variables are visible to all threads in a parallel region, and which variables are privately allocated to each thread
    • -
    -
    - - -









    -

    Some remarks

    -
    - -

    - -

      -
    • When entering a parallel region, the private clause ensures each thread having its own new variable instances. The new variables are assumed to be uninitialized.
    • -
    • A shared variable exists in only one memory location and all threads can read and write to that address. It is the programmer's responsibility to ensure that multiple threads properly access a shared variable.
    • -
    • The firstprivate clause combines the behavior of the private clause with automatic initialization.
    • -
    • The lastprivate clause combines the behavior of the private clause with a copy back (from the last loop iteration or section) to the original variable outside the parallel region.
    • -
    -
    - - -









    -

    Parallelizing nested for-loops

    -
    - -

    - -

      -
    • Serial code
    • -
    - - -
    -
    -
    -
    -
    -
    for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -        a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Parallelization
    • -
    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for private(j)
    -for (i=0; i<100; i++)
    -    for (j=0; j<100; j++)
    -       a[i][j] = b[i][j] + c[i][j];
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -
      -
    • Why not parallelize the inner loop? to save overhead of repeated thread forks-joins
    • -
    • Why must j be private? To avoid race condition among the threads
    • -
    -
    - - -









    -

    Nested parallelism

    -
    - -

    -

    When a thread in a parallel region encounters another parallel construct, it -may create a new team of threads and become the master of the new -team. -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel num_threads(4)
    -{
    -/* .... */
    -#pragma omp parallel num_threads(2)
    -{
    -//  
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Parallel tasks

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp task 
    -#pragma omp parallel shared(p_vec) private(i)
    -{
    -#pragma omp single
    -{
    -for (i=0; i<N; i++) {
    -  double r = random_number();
    -  if (p_vec[i] > r) {
    -#pragma omp task
    -   do_work (p_vec[i]);
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Common mistakes

    -
    - -

    -

    Race condition

    - - -
    -
    -
    -
    -
    -
    int nthreads;
    -#pragma omp parallel shared(nthreads)
    -{
    -nthreads = omp_get_num_threads();
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Deadlock

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel
    -{
    -...
    -#pragma omp critical
    -{
    -...
    -#pragma omp barrier
    -}
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Not all computations are simple

    -
    - -

    -

    Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads -

    - -

    An example is finding the location and value of the largest element in an array

    - - -
    -
    -
    -
    -
    -
    for (i=0; i<n; i++) { 
    -   if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -   }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -

    Not all computations are simple, competing threads

    -
    - -

    -

    All threads are potentially accessing and changing the same values, maxloc and maxval.

    -
      -
    1. OpenMP provides several ways to coordinate access to shared values
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp atomic
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following statement (not block). We can use the critical option
    2. -
    - - -
    -
    -
    -
    -
    -
    #pragma omp critical
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -
      -
    1. Only one thread at a time can execute the following block
    2. -
    -

    Atomic may be faster than critical but depends on hardware

    -
    - - -









    -

    How to find the max value using OpenMP

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -    if (x[i] > maxval) {
    -      maxval = x[i];
    -      maxloc = i; 
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Then deal with the race conditions

    -
    - -

    -

    Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as -

    - - -
    -
    -
    -
    -
    -
    #pragma omp parallel for
    - for (i=0; i<n; i++) {
    -#pragma omp critical
    -  {
    -     if (x[i] > maxval) {
    -       maxval = x[i];
    -       maxloc = i; 
    -     }
    -  }
    -} 
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -

    -
    - -









    -

    What can slow down OpenMP performance?

    -

    Give it a thought!

    - -









    -

    What can slow down OpenMP performance?

    -
    - -

    -

    Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.

    -
      -
    • We do not care about the value during the execution of the loop, just the value at the end.
    • -
    -

    This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties

    - -Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread -
    - - -









    -

    Find the max location for each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    int maxloc[MAX_THREADS], mloc;
    -double maxval[MAX_THREADS], mval; 
    -#pragma omp parallel shared(maxval,maxloc)
    -{
    -  int id = omp_get_thread_num(); 
    -  maxval[id] = -1.0e30;
    -#pragma omp for
    -   for (int i=0; i<n; i++) {
    -       if (x[i] > maxval[id]) { 
    -           maxloc[id] = i;
    -           maxval[id] = x[i]; 
    -       }
    -    }
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Combine the values from each thread

    -
    - -

    - - -

    -
    -
    -
    -
    -
    #pragma omp flush (maxloc,maxval)
    -#pragma omp master
    -  {
    -    int nt = omp_get_num_threads(); 
    -    mloc = maxloc[0]; 
    -    mval = maxval[0]; 
    -    for (int i=1; i<nt; i++) {
    -        if (maxval[i] > mval) { 
    -           mval = maxval[i]; 
    -           mloc = maxloc[i];
    -        } 
    -     }
    -   }
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -

    Note that we let the master process perform the last operation.

    -
    - -









    -

    Matrix-matrix multiplication

    -

    This code computes the norm of a vector using OpenMp

    - - -
    -
    -
    -
    -
    -
    //  OpenMP program to compute vector norm by adding two other vectors
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of vector
    -  int n = atoi(argv[1]);
    -  double *a, *b, *c;
    -  int i;
    -  int thread_num;
    -  double wtime, Norm2, s, angle;
    -  cout << "  Perform addition of two vectors and compute the norm-2." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the vectors to be used
    -  a = new double [n]; b = new double [n]; c = new double [n];
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)
    -  // Set up values for vectors  a and b
    -  for (i = 0; i < n; i++){
    -      angle = 2.0*M_PI*i/ (( double ) n);
    -      a[i] = s*(sin(angle) + cos(angle));
    -      b[i] =  s*sin(2.0*angle);
    -      c[i] = 0.0;
    -  }
    -  // Then perform the vector addition
    -  for (i = 0; i < n; i++){
    -     c[i] += a[i]+b[i];
    -  }
    -  // Compute now the norm-2
    -  Norm2 = 0.0;
    -  for (i = 0; i < n; i++){
    -     Norm2  += c[i]*c[i];
    -  }
    -// end parallel region
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for norm-2 computation=" << wtime  << endl;
    -  cout << " Norm-2  = " << Norm2 << endl;
    -  // Free up space
    -  delete[] a;
    -  delete[] b;
    -  delete[] c;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - -









    -

    Matrix-matrix multiplication

    -

    This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP

    - - - -
    -
    -
    -
    -
    -
    //  Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP
    -#include <cstdlib>
    -#include <iostream>
    -#include <cmath>
    -#include <iomanip>
    -#include  <omp.h>
    -# include <ctime>
    -
    -using namespace std; // note use of namespace
    -int main (int argc, char* argv[])
    -{
    -  // read in dimension of square matrix
    -  int n = atoi(argv[1]);
    -  double **A, **B, **C;
    -  int i, j, k;
    -  int thread_num;
    -  double wtime, Fsum, s, angle;
    -  cout << "  Compute matrix product C = A * B and Frobenius norm." << endl;
    -  omp_set_num_threads(4);
    -  thread_num = omp_get_max_threads ();
    -  cout << "  The number of processors available = " << omp_get_num_procs () << endl ;
    -  cout << "  The number of threads available    = " << thread_num <<  endl;
    -  cout << "  The matrix order n                 = " << n << endl;
    -
    -  s = 1.0/sqrt( (double) n);
    -  wtime = omp_get_wtime ( );
    -  // Allocate space for the two matrices
    -  A = new double*[n]; B = new double*[n]; C = new double*[n];
    -  for (i = 0; i < n; i++){
    -    A[i] = new double[n];
    -    B[i] = new double[n];
    -    C[i] = new double[n];
    -  }
    -  // Define parallel region
    -# pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum)
    -  // Set up values for matrix A and B and zero matrix C
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      angle = 2.0*M_PI*i*j/ (( double ) n);
    -      A[i][j] = s * ( sin ( angle ) + cos ( angle ) );
    -      B[j][i] =  A[i][j];
    -    }
    -  }
    -  // Then perform the matrix-matrix multiplication
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -       C[i][j] =  0.0;    
    -       for (k = 0; k < n; k++) {
    -            C[i][j] += A[i][k]*B[k][j];
    -       }
    -    }
    -  }
    -  // Compute now the Frobenius norm
    -  Fsum = 0.0;
    -  for (i = 0; i < n; i++){
    -    for (j = 0; j < n; j++) {
    -      Fsum += C[i][j]*C[i][j];
    -    }
    -  }
    -  Fsum = sqrt(Fsum);
    -// end parallel region and letting only one thread perform I/O
    -  wtime = omp_get_wtime ( ) - wtime;
    -  cout << setiosflags(ios::showpoint | ios::uppercase);
    -  cout << setprecision(10) << setw(20) << "Time used  for matrix-matrix multiplication=" << wtime  << endl;
    -  cout << "  Frobenius norm  = " << Fsum << endl;
    -  // Free up space
    -  for (int i = 0; i < n; i++){
    -    delete[] A[i];
    -    delete[] B[i];
    -    delete[] C[i];
    -  }
    -  delete[] A;
    -  delete[] B;
    -  delete[] C;
    -  return 0;
    -}
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - - - -
    - © 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license -
    - - - diff --git a/doc/src/week9/week9.ipynb b/doc/src/week9/week9.ipynb deleted file mode 100644 index f1a04377..00000000 --- a/doc/src/week9/week9.ipynb +++ /dev/null @@ -1,6042 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9343f8bf", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "66464e58", - "metadata": { - "editable": true - }, - "source": [ - "# Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking\n", - "**Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no**, Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway and Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA\n", - "\n", - "Date: **March 11-15**" - ] - }, - { - "cell_type": "markdown", - "id": "44214dd1", - "metadata": { - "editable": true - }, - "source": [ - "## Overview of week 11, March 11-15\n", - "**Topics.**\n", - "\n", - "1. Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week\n", - "\n", - "2. Resampling Techniques, emphasis on Blocking \n", - "\n", - "3. Discussion of onebody densities (whiteboard notes)\n", - "\n", - "4. Start discussion on optimization and parallelization for Python and C++\n", - "\n", - "\n", - "\n", - "Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week." - ] - }, - { - "cell_type": "markdown", - "id": "fdb87fa7", - "metadata": { - "editable": true - }, - "source": [ - "## Why resampling methods ?\n", - "**Statistical analysis.**\n", - "\n", - "* Our simulations can be treated as *computer experiments*. This is particularly the case for Monte Carlo methods\n", - "\n", - "* The results can be analysed with the same statistical tools as we would use analysing experimental data.\n", - "\n", - "* As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors." - ] - }, - { - "cell_type": "markdown", - "id": "21549353", - "metadata": { - "editable": true - }, - "source": [ - "## Statistical analysis\n", - "* As in other experiments, many numerical experiments have two classes of errors:\n", - "\n", - "a. Statistical errors\n", - "\n", - "b. Systematical errors\n", - "\n", - "* Statistical errors can be estimated using standard tools from statistics\n", - "\n", - "* Systematical errors are method specific and must be treated differently from case to case." - ] - }, - { - "cell_type": "markdown", - "id": "df4d2efc", - "metadata": { - "editable": true - }, - "source": [ - "## And why do we use such methods?\n", - "\n", - "As you will see below, due to correlations between various\n", - "measurements, we need to evaluate the so-called covariance in order to\n", - "establish a proper evaluation of the total variance and the thereby\n", - "the standard deviation of a given expectation value.\n", - "\n", - "The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate.\n", - "Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem." - ] - }, - { - "cell_type": "markdown", - "id": "08b48c0e", - "metadata": { - "editable": true - }, - "source": [ - "## Central limit theorem\n", - "\n", - "Last week we derived the central limit theorem with the following assumptions:\n", - "\n", - "**Measurement $i$.**\n", - "\n", - "We assumed that each individual measurement $x_{ij}$ is represented by stochastic variables which independent and identically distributed (iid).\n", - "This defined the sample mean of of experiment $i$ with $n$ samples as" - ] - }, - { - "cell_type": "markdown", - "id": "cc7b9945", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\overline{x}_i=\\frac{1}{n}\\sum_{j} x_{ij}.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "a1933b45", - "metadata": { - "editable": true - }, - "source": [ - "and the sample variance" - ] - }, - { - "cell_type": "markdown", - "id": "d415d278", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\sigma^2_i=\\frac{1}{n}\\sum_{j} \\left(x_{ij}-\\overline{x}_i\\right)^2.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "e73e9f65", - "metadata": { - "editable": true - }, - "source": [ - "## Further remarks\n", - "\n", - "Note that we use $n$ instead of $n-1$ in the definition of\n", - "variance. The sample variance and the sample mean are not necessarily equal to\n", - "the exact values we would get if we knew the corresponding probability\n", - "distribution." - ] - }, - { - "cell_type": "markdown", - "id": "ede0d2ac", - "metadata": { - "editable": true - }, - "source": [ - "## Running many measurements\n", - "\n", - "**Adding $m$ measurements $i$.**\n", - "\n", - "With the assumption that the average measurements $i$ are also defined as iid stochastic variables and have the same probability function $p$,\n", - "we defined the total average over $m$ experiments as" - ] - }, - { - "cell_type": "markdown", - "id": "7b1560b6", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\overline{X}=\\frac{1}{m}\\sum_{i} \\overline{x}_{i}.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "eff82084", - "metadata": { - "editable": true - }, - "source": [ - "and the total variance" - ] - }, - { - "cell_type": "markdown", - "id": "62654667", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\sigma^2_{m}=\\frac{1}{m}\\sum_{i} \\left( \\overline{x}_{i}-\\overline{X}\\right)^2.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "68cb0f0f", - "metadata": { - "editable": true - }, - "source": [ - "These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit $m\\rightarrow \\infty$, the distribution for $\\overline{X}$ is given by a Gaussian distribution with variance $\\sigma^2_m$." - ] - }, - { - "cell_type": "markdown", - "id": "f42bf77a", - "metadata": { - "editable": true - }, - "source": [ - "## Adding more definitions\n", - "\n", - "The total sample variance over the $mn$ measurements is defined as" - ] - }, - { - "cell_type": "markdown", - "id": "5b148aba", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\sigma^2=\\frac{1}{mn}\\sum_{i=1}^{m} \\sum_{j=1}^{n}\\left(x_{ij}-\\overline{X}\\right)^2.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "ad425337", - "metadata": { - "editable": true - }, - "source": [ - "We have from the equation for $\\sigma_m^2$" - ] - }, - { - "cell_type": "markdown", - "id": "4b7c80ed", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\overline{x}_i-\\overline{X}=\\frac{1}{n}\\sum_{j=1}^{n}\\left(x_{i}-\\overline{X}\\right),\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "b9a0bf94", - "metadata": { - "editable": true - }, - "source": [ - "and introducing the centered value $\\tilde{x}_{ij}=x_{ij}-\\overline{X}$, we can rewrite $\\sigma_m^2$ as" - ] - }, - { - "cell_type": "markdown", - "id": "4a903b42", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\sigma^2_{m}=\\frac{1}{m}\\sum_{i} \\left( \\overline{x}_{i}-\\overline{X}\\right)^2=\\frac{1}{m}\\sum_{i=1}^{m}\\left[ \\frac{i}{n}\\sum_{j=1}^{n}\\tilde{x}_{ij}\\right]^2.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "859bdf66", - "metadata": { - "editable": true - }, - "source": [ - "## Further rewriting\n", - "\n", - "We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements" - ] - }, - { - "cell_type": "markdown", - "id": "a85255c9", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\begin{align*}\n", - "\\sigma^2_{m}& =\\frac{1}{m}\\sum_{i=1}^{m}\\left[ \\frac{i}{n}\\sum_{j=1}^{n}\\tilde{x}_{ij}\\right]^2 \\\\\n", - " & = \\frac{1}{mn^2}\\sum_{i=1}^{m} \\sum_{j=1}^{n}\\tilde{x}_{ij}^2+\\frac{2}{mn^2}\\sum_{i=1}^{m} \\sum_{j1$ and $X_1,X_2,\\cdots, X_n$ is a stationary time series to begin with. \n", - "Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging $X_1,X_2,\\cdots,X_n$ in an $n$-tuple. Define:" - ] - }, - { - "cell_type": "markdown", - "id": "55e9d3e1", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\begin{align*}\n", - "\\hat{X} = (X_1,X_2,\\cdots,X_n).\n", - "\\end{align*}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "d3492b16", - "metadata": { - "editable": true - }, - "source": [ - "## Why blocking?\n", - "\n", - "The strength of the blocking method is when the number of\n", - "observations, $n$ is large. For large $n$, the complexity of dependent\n", - "bootstrapping scales poorly, but the blocking method does not,\n", - "moreover, it becomes more accurate the larger $n$ is." - ] - }, - { - "cell_type": "markdown", - "id": "90283800", - "metadata": { - "editable": true - }, - "source": [ - "## Blocking Transformations\n", - " We now define the blocking transformations. The idea is to take the mean of subsequent\n", - "pair of elements from $\\boldsymbol{X}$ and form a new vector\n", - "$\\boldsymbol{X}_1$. Continuing in the same way by taking the mean of\n", - "subsequent pairs of elements of $\\boldsymbol{X}_1$ we obtain $\\boldsymbol{X}_2$, and\n", - "so on. \n", - "Define $\\boldsymbol{X}_i$ recursively by:" - ] - }, - { - "cell_type": "markdown", - "id": "8791c09b", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "(\\boldsymbol{X}_0)_k \\equiv (\\boldsymbol{X})_k \\nonumber\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "881dde94", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation} \n", - "(\\boldsymbol{X}_{i+1})_k \\equiv \\frac{1}{2}\\Big( (\\boldsymbol{X}_i)_{2k-1} +\n", - "(\\boldsymbol{X}_i)_{2k} \\Big) \\qquad \\text{for all} \\qquad 1 \\leq i \\leq d-1\n", - "\\label{_auto1} \\tag{1}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "a5648050", - "metadata": { - "editable": true - }, - "source": [ - "## Blocking transformations\n", - "\n", - "The quantity $\\boldsymbol{X}_k$ is\n", - "subject to $k$ **blocking transformations**. We now have $d$ vectors\n", - "$\\boldsymbol{X}_0, \\boldsymbol{X}_1,\\cdots,\\vec X_{d-1}$ containing the subsequent\n", - "averages of observations. It turns out that if the components of\n", - "$\\boldsymbol{X}$ is a stationary time series, then the components of\n", - "$\\boldsymbol{X}_i$ is a stationary time series for all $0 \\leq i \\leq d-1$\n", - "\n", - "We can then compute the autocovariance, the variance, sample mean, and\n", - "number of observations for each $i$. \n", - "Let $\\gamma_i, \\sigma_i^2,\n", - "\\overline{X}_i$ denote the covariance, variance and average of the\n", - "elements of $\\boldsymbol{X}_i$ and let $n_i$ be the number of elements of\n", - "$\\boldsymbol{X}_i$. It follows by induction that $n_i = n/2^i$." - ] - }, - { - "cell_type": "markdown", - "id": "979611cc", - "metadata": { - "editable": true - }, - "source": [ - "## Blocking Transformations\n", - "\n", - "Using the\n", - "definition of the blocking transformation and the distributive\n", - "property of the covariance, it is clear that since $h =|i-j|$\n", - "we can define" - ] - }, - { - "cell_type": "markdown", - "id": "c17c5ab4", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\gamma_{k+1}(h) = cov\\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \\right) \\nonumber\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "6df608d1", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "= \\frac{1}{4}cov\\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \\right) \\nonumber\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "004092a6", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation} \n", - "= \\frac{1}{2}\\gamma_{k}(2h) + \\frac{1}{2}\\gamma_k(2h+1) \\hspace{0.1cm} \\mathrm{h = 0} \n", - "\\label{_auto2} \\tag{2}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "d0be2c66", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation} \n", - "=\\frac{1}{4}\\gamma_k(2h-1) + \\frac{1}{2}\\gamma_k(2h) + \\frac{1}{4}\\gamma_k(2h+1) \\quad \\mathrm{else}\n", - "\\label{_auto3} \\tag{3}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "69ca5a78", - "metadata": { - "editable": true - }, - "source": [ - "The quantity $\\hat{X}$ is asymptotically uncorrelated by assumption, $\\hat{X}_k$ is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample\n", - "mean $\\mathrm{var}(\\overline{X})$." - ] - }, - { - "cell_type": "markdown", - "id": "f8328d81", - "metadata": { - "editable": true - }, - "source": [ - "## Blocking Transformations, getting there\n", - "We have" - ] - }, - { - "cell_type": "markdown", - "id": "4c798f11", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation}\n", - "\\mathrm{var}(\\overline{X}_k) = \\frac{\\sigma_k^2}{n_k} + \\underbrace{\\frac{2}{n_k} \\sum_{h=1}^{n_k-1}\\left( 1 - \\frac{h}{n_k} \\right)\\gamma_k(h)}_{\\equiv e_k} = \\frac{\\sigma^2_k}{n_k} + e_k \\quad \\text{if} \\quad \\gamma_k(0) = \\sigma_k^2. \n", - "\\label{_auto4} \\tag{4}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "a4ecc77b", - "metadata": { - "editable": true - }, - "source": [ - "The term $e_k$ is called the **truncation error**:" - ] - }, - { - "cell_type": "markdown", - "id": "f9f794e9", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation}\n", - "e_k = \\frac{2}{n_k} \\sum_{h=1}^{n_k-1}\\left( 1 - \\frac{h}{n_k} \\right)\\gamma_k(h). \n", - "\\label{_auto5} \\tag{5}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "3fface33", - "metadata": { - "editable": true - }, - "source": [ - "We can show that $\\mathrm{var}(\\overline{X}_i) = \\mathrm{var}(\\overline{X}_j)$ for all $0 \\leq i \\leq d-1$ and $0 \\leq j \\leq d-1$." - ] - }, - { - "cell_type": "markdown", - "id": "064ba2a5", - "metadata": { - "editable": true - }, - "source": [ - "## Blocking Transformations, final expressions\n", - "\n", - "We can then wrap up" - ] - }, - { - "cell_type": "markdown", - "id": "b112cab0", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "n_{j+1} \\overline{X}_{j+1} = \\sum_{i=1}^{n_{j+1}} (\\hat{X}_{j+1})_i = \\frac{1}{2}\\sum_{i=1}^{n_{j}/2} (\\hat{X}_{j})_{2i-1} + (\\hat{X}_{j})_{2i} \\nonumber\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "343ef06b", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation} \n", - "= \\frac{1}{2}\\left[ (\\hat{X}_j)_1 + (\\hat{X}_j)_2 + \\cdots + (\\hat{X}_j)_{n_j} \\right] = \\underbrace{\\frac{n_j}{2}}_{=n_{j+1}} \\overline{X}_j = n_{j+1}\\overline{X}_j. \n", - "\\label{_auto6} \\tag{6}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "b9fc08c8", - "metadata": { - "editable": true - }, - "source": [ - "By repeated use of this equation we get $\\mathrm{var}(\\overline{X}_i) = \\mathrm{var}(\\overline{X}_0) = \\mathrm{var}(\\overline{X})$ for all $0 \\leq i \\leq d-1$. This has the consequence that" - ] - }, - { - "cell_type": "markdown", - "id": "9e41a50c", - "metadata": { - "editable": true - }, - "source": [ - "\n", - "
    \n", - "\n", - "$$\n", - "\\begin{equation}\n", - "\\mathrm{var}(\\overline{X}) = \\frac{\\sigma_k^2}{n_k} + e_k \\qquad \\text{for all} \\qquad 0 \\leq k \\leq d-1. \\label{eq:convergence} \\tag{7}\n", - "\\end{equation}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "4dad2b57", - "metadata": { - "editable": true - }, - "source": [ - "## More on the blocking method\n", - "\n", - "Flyvbjerg and Petersen demonstrated that the sequence\n", - "$\\{e_k\\}_{k=0}^{d-1}$ is decreasing, and conjecture that the term\n", - "$e_k$ can be made as small as we would like by making $k$ (and hence\n", - "$d$) sufficiently large. The sequence is decreasing.\n", - "It means we can apply blocking transformations until\n", - "$e_k$ is sufficiently small, and then estimate $\\mathrm{var}(\\overline{X})$ by\n", - "$\\widehat{\\sigma}^2_k/n_k$. \n", - "\n", - "For an elegant solution and proof of the blocking method, see the recent article of [Marius Jonsson (former MSc student of the Computational Physics group)](https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304)." - ] - }, - { - "cell_type": "markdown", - "id": "804f4e29", - "metadata": { - "editable": true - }, - "source": [ - "## Example code form last week" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "e659813f", - "metadata": { - "collapsed": false, - "editable": true - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "# 2-electron VMC code for 2dim quantum dot with importance sampling\n", - "# Using gaussian rng for new positions and Metropolis- Hastings \n", - "# Added energy minimization\n", - "from math import exp, sqrt\n", - "from random import random, seed, normalvariate\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from mpl_toolkits.mplot3d import Axes3D\n", - "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", - "from scipy.optimize import minimize\n", - "import sys\n", - "import os\n", - "\n", - "# Where to save data files\n", - "PROJECT_ROOT_DIR = \"Results\"\n", - "DATA_ID = \"Results/EnergyMin\"\n", - "\n", - "if not os.path.exists(PROJECT_ROOT_DIR):\n", - " os.mkdir(PROJECT_ROOT_DIR)\n", - "\n", - "if not os.path.exists(DATA_ID):\n", - " os.makedirs(DATA_ID)\n", - "\n", - "def data_path(dat_id):\n", - " return os.path.join(DATA_ID, dat_id)\n", - "\n", - "outfile = open(data_path(\"Energies.dat\"),'w')\n", - "\n", - "\n", - "# Trial wave function for the 2-electron quantum dot in two dims\n", - "def WaveFunction(r,alpha,beta):\n", - " r1 = r[0,0]**2 + r[0,1]**2\n", - " r2 = r[1,0]**2 + r[1,1]**2\n", - " r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)\n", - " deno = r12/(1+beta*r12)\n", - " return exp(-0.5*alpha*(r1+r2)+deno)\n", - "\n", - "# Local energy for the 2-electron quantum dot in two dims, using analytical local energy\n", - "def LocalEnergy(r,alpha,beta):\n", - " \n", - " r1 = (r[0,0]**2 + r[0,1]**2)\n", - " r2 = (r[1,0]**2 + r[1,1]**2)\n", - " r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)\n", - " deno = 1.0/(1+beta*r12)\n", - " deno2 = deno*deno\n", - " return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12)\n", - "\n", - "# Derivate of wave function ansatz as function of variational parameters\n", - "def DerivativeWFansatz(r,alpha,beta):\n", - " \n", - " WfDer = np.zeros((2), np.double)\n", - " r1 = (r[0,0]**2 + r[0,1]**2)\n", - " r2 = (r[1,0]**2 + r[1,1]**2)\n", - " r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)\n", - " deno = 1.0/(1+beta*r12)\n", - " deno2 = deno*deno\n", - " WfDer[0] = -0.5*(r1+r2)\n", - " WfDer[1] = -r12*r12*deno2\n", - " return WfDer\n", - "\n", - "# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector\n", - "def QuantumForce(r,alpha,beta):\n", - "\n", - " qforce = np.zeros((NumberParticles,Dimension), np.double)\n", - " r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2)\n", - " deno = 1.0/(1+beta*r12)\n", - " qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12\n", - " qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12\n", - " return qforce\n", - " \n", - "\n", - "# Computing the derivative of the energy and the energy \n", - "def EnergyDerivative(x0):\n", - "\n", - " \n", - " # Parameters in the Fokker-Planck simulation of the quantum force\n", - " D = 0.5\n", - " TimeStep = 0.05\n", - " # positions\n", - " PositionOld = np.zeros((NumberParticles,Dimension), np.double)\n", - " PositionNew = np.zeros((NumberParticles,Dimension), np.double)\n", - " # Quantum force\n", - " QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)\n", - " QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)\n", - "\n", - " energy = 0.0\n", - " DeltaE = 0.0\n", - " alpha = x0[0]\n", - " beta = x0[1]\n", - " EnergyDer = 0.0\n", - " DeltaPsi = 0.0\n", - " DerivativePsiE = 0.0 \n", - " #Initial position\n", - " for i in range(NumberParticles):\n", - " for j in range(Dimension):\n", - " PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)\n", - " wfold = WaveFunction(PositionOld,alpha,beta)\n", - " QuantumForceOld = QuantumForce(PositionOld,alpha, beta)\n", - "\n", - " #Loop over MC MCcycles\n", - " for MCcycle in range(NumberMCcycles):\n", - " #Trial position moving one particle at the time\n", - " for i in range(NumberParticles):\n", - " for j in range(Dimension):\n", - " PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\\\n", - " QuantumForceOld[i,j]*TimeStep*D\n", - " wfnew = WaveFunction(PositionNew,alpha,beta)\n", - " QuantumForceNew = QuantumForce(PositionNew,alpha, beta)\n", - " GreensFunction = 0.0\n", - " for j in range(Dimension):\n", - " GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\\\n", - "\t (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\\\n", - " PositionNew[i,j]+PositionOld[i,j])\n", - " \n", - " GreensFunction = exp(GreensFunction)\n", - " ProbabilityRatio = GreensFunction*wfnew**2/wfold**2\n", - " #Metropolis-Hastings test to see whether we accept the move\n", - " if random() <= ProbabilityRatio:\n", - " for j in range(Dimension):\n", - " PositionOld[i,j] = PositionNew[i,j]\n", - " QuantumForceOld[i,j] = QuantumForceNew[i,j]\n", - " wfold = wfnew\n", - " DeltaE = LocalEnergy(PositionOld,alpha,beta)\n", - " DerPsi = DerivativeWFansatz(PositionOld,alpha,beta)\n", - " DeltaPsi += DerPsi\n", - " energy += DeltaE\n", - " DerivativePsiE += DerPsi*DeltaE\n", - " \n", - " # We calculate mean values\n", - " energy /= NumberMCcycles\n", - " DerivativePsiE /= NumberMCcycles\n", - " DeltaPsi /= NumberMCcycles\n", - " EnergyDer = 2*(DerivativePsiE-DeltaPsi*energy)\n", - " return EnergyDer\n", - "\n", - "\n", - "# Computing the expectation value of the local energy \n", - "def Energy(x0):\n", - " # Parameters in the Fokker-Planck simulation of the quantum force\n", - " D = 0.5\n", - " TimeStep = 0.05\n", - " # positions\n", - " PositionOld = np.zeros((NumberParticles,Dimension), np.double)\n", - " PositionNew = np.zeros((NumberParticles,Dimension), np.double)\n", - " # Quantum force\n", - " QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double)\n", - " QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double)\n", - "\n", - " energy = 0.0\n", - " DeltaE = 0.0\n", - " alpha = x0[0]\n", - " beta = x0[1]\n", - " #Initial position\n", - " for i in range(NumberParticles):\n", - " for j in range(Dimension):\n", - " PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep)\n", - " wfold = WaveFunction(PositionOld,alpha,beta)\n", - " QuantumForceOld = QuantumForce(PositionOld,alpha, beta)\n", - "\n", - " #Loop over MC MCcycles\n", - " for MCcycle in range(NumberMCcycles):\n", - " #Trial position moving one particle at the time\n", - " for i in range(NumberParticles):\n", - " for j in range(Dimension):\n", - " PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\\\n", - " QuantumForceOld[i,j]*TimeStep*D\n", - " wfnew = WaveFunction(PositionNew,alpha,beta)\n", - " QuantumForceNew = QuantumForce(PositionNew,alpha, beta)\n", - " GreensFunction = 0.0\n", - " for j in range(Dimension):\n", - " GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\\\n", - "\t (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\\\n", - " PositionNew[i,j]+PositionOld[i,j])\n", - " \n", - " GreensFunction = exp(GreensFunction)\n", - " ProbabilityRatio = GreensFunction*wfnew**2/wfold**2\n", - " #Metropolis-Hastings test to see whether we accept the move\n", - " if random() <= ProbabilityRatio:\n", - " for j in range(Dimension):\n", - " PositionOld[i,j] = PositionNew[i,j]\n", - " QuantumForceOld[i,j] = QuantumForceNew[i,j]\n", - " wfold = wfnew\n", - " DeltaE = LocalEnergy(PositionOld,alpha,beta)\n", - " energy += DeltaE\n", - " if Printout: \n", - " outfile.write('%f\\n' %(energy/(MCcycle+1.0))) \n", - " # We calculate mean values\n", - " energy /= NumberMCcycles\n", - " return energy\n", - "\n", - "#Here starts the main program with variable declarations\n", - "NumberParticles = 2\n", - "Dimension = 2\n", - "# seed for rng generator \n", - "seed()\n", - "# Monte Carlo cycles for parameter optimization\n", - "Printout = False\n", - "NumberMCcycles= 10000\n", - "# guess for variational parameters\n", - "x0 = np.array([0.9,0.2])\n", - "# Using Broydens method to find optimal parameters\n", - "res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True})\n", - "x0 = res.x\n", - "# Compute the energy again with the optimal parameters and increased number of Monte Cycles\n", - "NumberMCcycles= 2**19\n", - "Printout = True\n", - "FinalEnergy = Energy(x0)\n", - "EResult = np.array([FinalEnergy,FinalEnergy])\n", - "outfile.close()\n", - "#nice printout with Pandas\n", - "import pandas as pd\n", - "from pandas import DataFrame\n", - "data ={'Optimal Parameters':x0, 'Final Energy':EResult}\n", - "frame = pd.DataFrame(data)\n", - "print(frame)" - ] - }, - { - "cell_type": "markdown", - "id": "265f2fe3", - "metadata": { - "editable": true - }, - "source": [ - "## Resampling analysis\n", - "\n", - "The next step is then to use the above data sets and perform a\n", - "resampling analysis using the blocking method\n", - "The blocking code, based on the article of [Marius Jonsson](https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304) is given here" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "3397cf44", - "metadata": { - "collapsed": false, - "editable": true - }, - "outputs": [], - "source": [ - "# Common imports\n", - "import os\n", - "\n", - "# Where to save the figures and data files\n", - "DATA_ID = \"Results/EnergyMin\"\n", - "\n", - "def data_path(dat_id):\n", - " return os.path.join(DATA_ID, dat_id)\n", - "\n", - "infile = open(data_path(\"Energies.dat\"),'r')\n", - "\n", - "from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt\n", - "from numpy.linalg import inv\n", - "\n", - "def block(x):\n", - " # preliminaries\n", - " n = len(x)\n", - " d = int(log2(n))\n", - " s, gamma = zeros(d), zeros(d)\n", - " mu = mean(x)\n", - "\n", - " # estimate the auto-covariance and variances \n", - " # for each blocking transformation\n", - " for i in arange(0,d):\n", - " n = len(x)\n", - " # estimate autocovariance of x\n", - " gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) )\n", - " # estimate variance of x\n", - " s[i] = var(x)\n", - " # perform blocking transformation\n", - " x = 0.5*(x[0::2] + x[1::2])\n", - " \n", - " # generate the test observator M_k from the theorem\n", - " M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] ) )[::-1]\n", - "\n", - " # we need a list of magic numbers\n", - " q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181])\n", - "\n", - " # use magic to determine when we should have stopped blocking\n", - " for k in arange(0,d):\n", - " if(M[k] < q[k]):\n", - " break\n", - " if (k >= d-1):\n", - " print(\"Warning: Use more data\")\n", - " return mu, s[k]/2**(d-k)\n", - "\n", - "\n", - "x = loadtxt(infile)\n", - "(mean, var) = block(x) \n", - "std = sqrt(var)\n", - "import pandas as pd\n", - "from pandas import DataFrame\n", - "data ={'Mean':[mean], 'STDev':[std]}\n", - "frame = pd.DataFrame(data,index=['Values'])\n", - "print(frame)" - ] - }, - { - "cell_type": "markdown", - "id": "1de1b08e", - "metadata": { - "editable": true - }, - "source": [ - "## Content\n", - "* Simple compiler options \n", - "\n", - "* Tools to benchmark your code\n", - "\n", - "* Machine architectures\n", - "\n", - "* What is vectorization?\n", - "\n", - "* How to measure code performance\n", - "\n", - "* Parallelization with OpenMP\n", - "\n", - "* Parallelization with MPI\n", - "\n", - "* Vectorization and parallelization, examples" - ] - }, - { - "cell_type": "markdown", - "id": "4013f9b4", - "metadata": { - "editable": true - }, - "source": [ - "## Optimization and profiling\n", - "\n", - "Till now we have not paid much attention to speed and possible optimization possibilities\n", - "inherent in the various compilers. We have compiled and linked as" - ] - }, - { - "cell_type": "markdown", - "id": "36c01fcc", - "metadata": { - "editable": true - }, - "source": [ - " c++ -c mycode.cpp\n", - " c++ -o mycode.exe mycode.o\n" - ] - }, - { - "cell_type": "markdown", - "id": "405ad9e9", - "metadata": { - "editable": true - }, - "source": [ - "For Fortran replace with for example **gfortran** or **ifort**.\n", - "This is what we call a flat compiler option and should be used when we develop the code.\n", - "It produces normally a very large and slow code when translated to machine instructions.\n", - "We use this option for debugging and for establishing the correct program output because\n", - "every operation is done precisely as the user specified it.\n", - "\n", - "It is instructive to look up the compiler manual for further instructions by writing" - ] - }, - { - "cell_type": "markdown", - "id": "5377c5da", - "metadata": { - "editable": true - }, - "source": [ - " man c++\n" - ] - }, - { - "cell_type": "markdown", - "id": "cb6cd8bb", - "metadata": { - "editable": true - }, - "source": [ - "## More on optimization\n", - "We have additional compiler options for optimization. These may include procedure inlining where \n", - "performance may be improved, moving constants inside loops outside the loop, \n", - "identify potential parallelism, include automatic vectorization or replace a division with a reciprocal\n", - "and a multiplication if this speeds up the code." - ] - }, - { - "cell_type": "markdown", - "id": "1e3fb35c", - "metadata": { - "editable": true - }, - "source": [ - " c++ -O3 -c mycode.cpp\n", - " c++ -O3 -o mycode.exe mycode.o\n" - ] - }, - { - "cell_type": "markdown", - "id": "84e6c1d2", - "metadata": { - "editable": true - }, - "source": [ - "This (other options are -O2 or -Ofast) is the recommended option." - ] - }, - { - "cell_type": "markdown", - "id": "72e6c3cd", - "metadata": { - "editable": true - }, - "source": [ - "## Optimization and profiling\n", - "It is also useful to profile your program under the development stage.\n", - "You would then compile with" - ] - }, - { - "cell_type": "markdown", - "id": "ea5d6f90", - "metadata": { - "editable": true - }, - "source": [ - " c++ -pg -O3 -c mycode.cpp\n", - " c++ -pg -O3 -o mycode.exe mycode.o\n" - ] - }, - { - "cell_type": "markdown", - "id": "3887b522", - "metadata": { - "editable": true - }, - "source": [ - "After you have run the code you can obtain the profiling information via" - ] - }, - { - "cell_type": "markdown", - "id": "3ca2b19a", - "metadata": { - "editable": true - }, - "source": [ - " gprof mycode.exe > ProfileOutput\n" - ] - }, - { - "cell_type": "markdown", - "id": "439672d1", - "metadata": { - "editable": true - }, - "source": [ - "When you have profiled properly your code, you must take out this option as it \n", - "slows down performance.\n", - "For memory tests use [valgrind](http://www.valgrind.org). An excellent environment for all these aspects, and much more, is Qt creator." - ] - }, - { - "cell_type": "markdown", - "id": "1eb68081", - "metadata": { - "editable": true - }, - "source": [ - "## Optimization and debugging\n", - "Adding debugging options is a very useful alternative under the development stage of a program.\n", - "You would then compile with" - ] - }, - { - "cell_type": "markdown", - "id": "de86ccd8", - "metadata": { - "editable": true - }, - "source": [ - " c++ -g -O0 -c mycode.cpp\n", - " c++ -g -O0 -o mycode.exe mycode.o\n" - ] - }, - { - "cell_type": "markdown", - "id": "4842593c", - "metadata": { - "editable": true - }, - "source": [ - "This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option **-O0**.\n", - "\n", - "**Other optimization flags.**\n", - "\n", - "Depending on the compiler, one can add flags which generate code that catches integer overflow errors. \n", - "The flag **-ftrapv** does this for the CLANG compiler on OS X operating systems." - ] - }, - { - "cell_type": "markdown", - "id": "6d207ed0", - "metadata": { - "editable": true - }, - "source": [ - "## Other hints\n", - "In general, irrespective of compiler options, it is useful to\n", - "* avoid if tests or call to functions inside loops, if possible. \n", - "\n", - "* avoid multiplication with constants inside loops if possible\n", - "\n", - "Here is an example of a part of a program where specific operations lead to a slower code" - ] - }, - { - "cell_type": "markdown", - "id": "a96a8b01", - "metadata": { - "editable": true - }, - "source": [ - " k = n-1;\n", - " for (i = 0; i < n; i++){\n", - " a[i] = b[i] +c*d;\n", - " e = g[k];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "f5e3e574", - "metadata": { - "editable": true - }, - "source": [ - "A better code is" - ] - }, - { - "cell_type": "markdown", - "id": "239e6239", - "metadata": { - "editable": true - }, - "source": [ - " temp = c*d;\n", - " for (i = 0; i < n; i++){\n", - " a[i] = b[i] + temp;\n", - " }\n", - " e = g[n-1];\n" - ] - }, - { - "cell_type": "markdown", - "id": "a7a833fa", - "metadata": { - "editable": true - }, - "source": [ - "Here we avoid a repeated multiplication inside a loop. \n", - "Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here." - ] - }, - { - "cell_type": "markdown", - "id": "29636e71", - "metadata": { - "editable": true - }, - "source": [ - "## Vectorization and the basic idea behind parallel computing\n", - "Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements.\n", - "* Pursuit of shorter computation time and larger simulation size gives rise to parallel computing.\n", - "\n", - "* Multiple processors are involved to solve a global problem.\n", - "\n", - "* The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer.\n", - "\n", - "Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models." - ] - }, - { - "cell_type": "markdown", - "id": "bf382314", - "metadata": { - "editable": true - }, - "source": [ - "## A rough classification of hardware models\n", - "\n", - "* Conventional single-processor computers are named SISD (single-instruction-single-data) machines.\n", - "\n", - "* SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data.\n", - "\n", - "* Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data." - ] - }, - { - "cell_type": "markdown", - "id": "f500592c", - "metadata": { - "editable": true - }, - "source": [ - "## Shared memory and distributed memory\n", - "One way of categorizing modern parallel computers is to look at the memory configuration.\n", - "* In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory.\n", - "\n", - "* In distributed memory systems each CPU has its own memory.\n", - "\n", - "The CPUs are connected by some network and may exchange messages." - ] - }, - { - "cell_type": "markdown", - "id": "90e3f414", - "metadata": { - "editable": true - }, - "source": [ - "## Different parallel programming paradigms\n", - "\n", - "* **Task parallelism**: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use.\n", - "\n", - "* **Data parallelism**: use of multiple threads (e.g. one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives." - ] - }, - { - "cell_type": "markdown", - "id": "2ea4473a", - "metadata": { - "editable": true - }, - "source": [ - "## Different parallel programming paradigms\n", - "\n", - "* **Message passing**: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization.\n", - "\n", - "* This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult." - ] - }, - { - "cell_type": "markdown", - "id": "5fb521d1", - "metadata": { - "editable": true - }, - "source": [ - "## What is vectorization?\n", - "Vectorization is a special\n", - "case of **Single Instructions Multiple Data** (SIMD) to denote a single\n", - "instruction stream capable of operating on multiple data elements in\n", - "parallel. \n", - "We can think of vectorization as the unrolling of loops accompanied with SIMD instructions.\n", - "\n", - "Vectorization is the process of converting an algorithm that performs scalar operations\n", - "(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations.\n", - "Consider the following example" - ] - }, - { - "cell_type": "markdown", - "id": "dfb47bdc", - "metadata": { - "editable": true - }, - "source": [ - " for (i = 0; i < n; i++){\n", - " a[i] = b[i] + c[i];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "1d7fe6e0", - "metadata": { - "editable": true - }, - "source": [ - "If the code is not vectorized, the compiler will simply start with the first element and \n", - "then perform subsequent additions operating on one address in memory at the time." - ] - }, - { - "cell_type": "markdown", - "id": "1d29258b", - "metadata": { - "editable": true - }, - "source": [ - "## Number of elements that can acted upon\n", - "A SIMD instruction can operate on multiple data elements in one single instruction.\n", - "It uses the so-called 128-bit SIMD floating-point register. \n", - "In this sense, vectorization adds some form of parallelism since one instruction is applied \n", - "to many parts of say a vector.\n", - "\n", - "The number of elements which can be operated on in parallel\n", - "range from four single-precision floating point data elements in so-called \n", - "Streaming SIMD Extensions and two double-precision floating-point data\n", - "elements in Streaming SIMD Extensions 2 to sixteen byte operations in\n", - "a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length\n", - "ranges from 2 to 16, depending on the instruction extensions used and\n", - "on the data type. \n", - "\n", - "IN summary, our instructions operate on 128 bit (16 byte) operands\n", - "* 4 floats or ints\n", - "\n", - "* 2 doubles\n", - "\n", - "* Data paths 128 bits vide for vector unit" - ] - }, - { - "cell_type": "markdown", - "id": "f0f561a9", - "metadata": { - "editable": true - }, - "source": [ - "## Number of elements that can acted upon, examples\n", - "We start with the simple scalar operations given by" - ] - }, - { - "cell_type": "markdown", - "id": "6212347a", - "metadata": { - "editable": true - }, - "source": [ - " for (i = 0; i < n; i++){\n", - " a[i] = b[i] + c[i];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "2ce79f3b", - "metadata": { - "editable": true - }, - "source": [ - "If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number,\n", - "it means that we have $3\\times 32$ bits that are not used. \n", - "\n", - "We have thus unused space in our SIMD registers. These registers could hold three additional integers." - ] - }, - { - "cell_type": "markdown", - "id": "1a5dab49", - "metadata": { - "editable": true - }, - "source": [ - "## Operation counts for scalar operation\n", - "The code" - ] - }, - { - "cell_type": "markdown", - "id": "db9adf75", - "metadata": { - "editable": true - }, - "source": [ - " for (i = 0; i < n; i++){\n", - " a[i] = b[i] + c[i];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "cb2d35a4", - "metadata": { - "editable": true - }, - "source": [ - "has for $n$ repeats\n", - "1. one load for $c[i]$ in address 1\n", - "\n", - "2. one load for $b[i]$ in address 2\n", - "\n", - "3. add $c[i]$ and $b[i]$ to give $a[i]$\n", - "\n", - "4. store $a[i]$ in address 2" - ] - }, - { - "cell_type": "markdown", - "id": "335c5f06", - "metadata": { - "editable": true - }, - "source": [ - "## Number of elements that can acted upon, examples\n", - "If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is\n", - "we have" - ] - }, - { - "cell_type": "markdown", - "id": "0817b562", - "metadata": { - "editable": true - }, - "source": [ - " for (i = 0; i < n; i+=4){\n", - " a[i] = b[i] + c[i];\n", - " a[i+1] = b[i+1] + c[i+1];\n", - " a[i+2] = b[i+2] + c[i+2];\n", - " a[i+3] = b[i+3] + c[i+3];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "21b11a74", - "metadata": { - "editable": true - }, - "source": [ - "Four additions are now done in a single step." - ] - }, - { - "cell_type": "markdown", - "id": "63cb8bda", - "metadata": { - "editable": true - }, - "source": [ - "## Number of operations when vectorized\n", - "For $n/4$ repeats assuming floats or integers\n", - "1. one vector load for $c[i]$ in address 1\n", - "\n", - "2. one load for $b[i]$ in address 2\n", - "\n", - "3. add $c[i]$ and $b[i]$ to give $a[i]$\n", - "\n", - "4. store $a[i]$ in address 2" - ] - }, - { - "cell_type": "markdown", - "id": "c7767939", - "metadata": { - "editable": true - }, - "source": [ - "## [A simple test case with and without vectorization](https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp)\n", - "We implement these operations in a simple c++ program that computes at the end the norm of a vector." - ] - }, - { - "cell_type": "markdown", - "id": "89e5e83d", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \"time.h\"\n", - " \n", - " using namespace std; // note use of namespace\n", - " int main (int argc, char* argv[])\n", - " {\n", - " // read in dimension of square matrix\n", - " int n = atoi(argv[1]);\n", - " double s = 1.0/sqrt( (double) n);\n", - " double *a, *b, *c;\n", - " // Start timing\n", - " clock_t start, finish;\n", - " start = clock();\n", - " // Allocate space for the vectors to be used\n", - " a = new double [n]; b = new double [n]; c = new double [n];\n", - " // Define parallel region\n", - " // Set up values for vectors a and b\n", - " for (int i = 0; i < n; i++){\n", - " double angle = 2.0*M_PI*i/ (( double ) n);\n", - " a[i] = s*(sin(angle) + cos(angle));\n", - " b[i] = s*sin(2.0*angle);\n", - " c[i] = 0.0;\n", - " }\n", - " // Then perform the vector addition\n", - " for (int i = 0; i < n; i++){\n", - " c[i] += a[i]+b[i];\n", - " }\n", - " // Compute now the norm-2\n", - " double Norm2 = 0.0;\n", - " for (int i = 0; i < n; i++){\n", - " Norm2 += c[i]*c[i];\n", - " }\n", - " finish = clock();\n", - " double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );\n", - " cout << setiosflags(ios::showpoint | ios::uppercase);\n", - " cout << setprecision(10) << setw(20) << \"Time used for norm computation=\" << timeused << endl;\n", - " cout << \" Norm-2 = \" << Norm2 << endl;\n", - " // Free up space\n", - " delete[] a;\n", - " delete[] b;\n", - " delete[] c;\n", - " return 0;\n", - " }\n", - " \n", - " \n", - " \n", - " \n" - ] - }, - { - "cell_type": "markdown", - "id": "622ef350", - "metadata": { - "editable": true - }, - "source": [ - "## Compiling with and without vectorization\n", - "We can compile and link without vectorization using the clang c++ compiler" - ] - }, - { - "cell_type": "markdown", - "id": "c5e37166", - "metadata": { - "editable": true - }, - "source": [ - " clang -o novec.x vecexample.cpp\n" - ] - }, - { - "cell_type": "markdown", - "id": "6c6dec26", - "metadata": { - "editable": true - }, - "source": [ - "and with vectorization (and additional optimizations)" - ] - }, - { - "cell_type": "markdown", - "id": "cdf688b8", - "metadata": { - "editable": true - }, - "source": [ - " clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp \n" - ] - }, - { - "cell_type": "markdown", - "id": "a3acd8de", - "metadata": { - "editable": true - }, - "source": [ - "The speedup depends on the size of the vectors. In the example here we have run with $10^7$ elements.\n", - "The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU." - ] - }, - { - "cell_type": "markdown", - "id": "ecea71ed", - "metadata": { - "editable": true - }, - "source": [ - " Compphys:~ hjensen$ ./vec.x 10000000\n", - " Time used for norm computation=0.04720500000\n", - " Compphys:~ hjensen$ ./novec.x 10000000\n", - " Time used for norm computation=0.03311700000\n" - ] - }, - { - "cell_type": "markdown", - "id": "7f90f854", - "metadata": { - "editable": true - }, - "source": [ - "This particular C++ compiler speeds up the above loop operations with a factor of 1.5 \n", - "Performing the same operations for $10^9$ elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster." - ] - }, - { - "cell_type": "markdown", - "id": "b406ced8", - "metadata": { - "editable": true - }, - "source": [ - " Compphys:~ hjensen$ ./vec.x 1000000000\n", - " Time used for norm computation=58.41391100\n", - " Compphys:~ hjensen$ ./novec.x 1000000000\n", - " Time used for norm computation=46.51295300\n" - ] - }, - { - "cell_type": "markdown", - "id": "d689a529", - "metadata": { - "editable": true - }, - "source": [ - "We will discuss these issues further in the next slides." - ] - }, - { - "cell_type": "markdown", - "id": "f12ac466", - "metadata": { - "editable": true - }, - "source": [ - "## Compiling with and without vectorization using clang\n", - "We can compile and link without vectorization with clang compiler" - ] - }, - { - "cell_type": "markdown", - "id": "d7b74229", - "metadata": { - "editable": true - }, - "source": [ - " clang++ -o -fno-vectorize novec.x vecexample.cpp\n" - ] - }, - { - "cell_type": "markdown", - "id": "4d6354ad", - "metadata": { - "editable": true - }, - "source": [ - "and with vectorization" - ] - }, - { - "cell_type": "markdown", - "id": "eeffbcd7", - "metadata": { - "editable": true - }, - "source": [ - " clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp \n" - ] - }, - { - "cell_type": "markdown", - "id": "a2977107", - "metadata": { - "editable": true - }, - "source": [ - "We can also add vectorization analysis, see for example" - ] - }, - { - "cell_type": "markdown", - "id": "145e25ac", - "metadata": { - "editable": true - }, - "source": [ - " clang++ -O3 -Rpass-analysis=loop-vectorize -o vec.x vecexample.cpp \n" - ] - }, - { - "cell_type": "markdown", - "id": "676d7ed0", - "metadata": { - "editable": true - }, - "source": [ - "or figure out if vectorization was missed" - ] - }, - { - "cell_type": "markdown", - "id": "146bc27a", - "metadata": { - "editable": true - }, - "source": [ - " clang++ -O3 -Rpass-missed=loop-vectorize -o vec.x vecexample.cpp \n" - ] - }, - { - "cell_type": "markdown", - "id": "0dc2e1d7", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, criteria\n", - "\n", - "Not all loops can be vectorized, as discussed in [Intel's guide to vectorization](https://software.intel.com/en-us/articles/a-guide-to-auto-vectorization-with-intel-c-compilers)\n", - "\n", - "An important criteria is that the loop counter $n$ is known at the entry of the loop." - ] - }, - { - "cell_type": "markdown", - "id": "98e50fec", - "metadata": { - "editable": true - }, - "source": [ - " for (int j = 0; j < n; j++) {\n", - " a[j] = cos(j*1.0);\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "878bb19c", - "metadata": { - "editable": true - }, - "source": [ - "The variable $n$ does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent." - ] - }, - { - "cell_type": "markdown", - "id": "0b80829f", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, exit criteria\n", - "\n", - "An exit statement should in general be avoided. \n", - "If the exit statement contains data-dependent conditions, the loop cannot be vectorized. \n", - "The following is an example of a non-vectorizable loop" - ] - }, - { - "cell_type": "markdown", - "id": "4b7841dd", - "metadata": { - "editable": true - }, - "source": [ - " for (int j = 0; j < n; j++) {\n", - " a[j] = cos(j*1.0);\n", - " if (a[j] < 0 ) break;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "5bc29126", - "metadata": { - "editable": true - }, - "source": [ - "Avoid loop termination conditions and opt for a single entry loop variable $n$. The lower and upper bounds have to be kept fixed within the loop." - ] - }, - { - "cell_type": "markdown", - "id": "f5bac65f", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, straight-line code\n", - "\n", - "SIMD instructions perform the same type of operations multiple times. \n", - "A **switch** statement leads thus to a non-vectorizable loop since different statemens cannot branch.\n", - "The following code can however be vectorized since the **if** statement is implemented as a masked assignment." - ] - }, - { - "cell_type": "markdown", - "id": "69a7c7c4", - "metadata": { - "editable": true - }, - "source": [ - " for (int j = 0; j < n; j++) {\n", - " double x = cos(j*1.0);\n", - " if (x > 0 ) {\n", - " a[j] = x*sin(j*2.0); \n", - " }\n", - " else {\n", - " a[j] = 0.0;\n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "ec082f22", - "metadata": { - "editable": true - }, - "source": [ - "These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as **switch**, **go to**, or **return** statements or **if** constructs that cannot be treated as masked assignments." - ] - }, - { - "cell_type": "markdown", - "id": "22ab5df8", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, nested loops\n", - "\n", - "Only the innermost loop of the following example is vectorized" - ] - }, - { - "cell_type": "markdown", - "id": "48e3d47a", - "metadata": { - "editable": true - }, - "source": [ - " for (int i = 0; i < n; i++) {\n", - " for (int j = 0; j < n; j++) {\n", - " a[i][j] += b[i][j];\n", - " } \n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "fae476ae", - "metadata": { - "editable": true - }, - "source": [ - "The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations." - ] - }, - { - "cell_type": "markdown", - "id": "314371af", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, function calls\n", - "\n", - "Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like\n", - "$\\sin{x}$, $\\cos{x}$, $\\exp{x}$ etc are allowed since they are normally efficiently vectorized. \n", - "The following example is fully vectorizable" - ] - }, - { - "cell_type": "markdown", - "id": "42d8c853", - "metadata": { - "editable": true - }, - "source": [ - " for (int i = 0; i < n; i++) {\n", - " a[i] = log10(i)*cos(i);\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "970bc318", - "metadata": { - "editable": true - }, - "source": [ - "Similarly, **inline** functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called." - ] - }, - { - "cell_type": "markdown", - "id": "90869c50", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, data dependencies\n", - "\n", - "One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called\n", - "read-after-write statement with an explicit flow dependency cannot be vectorized. The following code" - ] - }, - { - "cell_type": "markdown", - "id": "890532a2", - "metadata": { - "editable": true - }, - "source": [ - " double b = 15.;\n", - " for (int i = 1; i < n; i++) {\n", - " a[i] = a[i-1] + b;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "992df998", - "metadata": { - "editable": true - }, - "source": [ - "is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value $a[i-1]$ computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values $a[0]$, $a[1]$, $a[2]$ and $a[3]$ from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have" - ] - }, - { - "cell_type": "markdown", - "id": "fb008853", - "metadata": { - "editable": true - }, - "source": [ - " a[1] = a[0] + b;\n", - " a[2] = a[1] + b;\n", - " a[3] = a[2] + b;\n", - " a[4] = a[3] + b;\n" - ] - }, - { - "cell_type": "markdown", - "id": "a4470689", - "metadata": { - "editable": true - }, - "source": [ - "and if the two first iterations are executed at the same by the SIMD instruction, the value of say $a[1]$ could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results." - ] - }, - { - "cell_type": "markdown", - "id": "b862d1e3", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, more data dependencies\n", - "\n", - "On the other hand, a so-called \n", - "write-after-read statement can be vectorized. The following code" - ] - }, - { - "cell_type": "markdown", - "id": "fe4a7658", - "metadata": { - "editable": true - }, - "source": [ - " double b = 15.;\n", - " for (int i = 1; i < n; i++) {\n", - " a[i-1] = a[i] + b;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "1ff661d2", - "metadata": { - "editable": true - }, - "source": [ - "is an example of flow dependency that can be vectorized since no iteration with a higher value of $i$\n", - "can complete before an iteration with a lower value of $i$. However, such code leads to problems with parallelization." - ] - }, - { - "cell_type": "markdown", - "id": "2b3f0ad3", - "metadata": { - "editable": true - }, - "source": [ - "## Automatic vectorization and vectorization inhibitors, memory stride\n", - "\n", - "For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code. \n", - "\n", - "When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here" - ] - }, - { - "cell_type": "markdown", - "id": "4e0c1946", - "metadata": { - "editable": true - }, - "source": [ - " for (int i = 0; i < n; i++) {\n", - " for (int j = 0; j < n; j++) {\n", - " a[i][j] += b[i][j];\n", - " } \n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "d882193b", - "metadata": { - "editable": true - }, - "source": [ - "## Memory management\n", - "The main memory contains the program data\n", - "1. Cache memory contains a copy of the main memory data\n", - "\n", - "2. Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory\n", - "\n", - "3. Registers contain working data only\n", - "\n", - " * Modern CPUs perform most or all operations only on data in register\n", - "\n", - "4. Multiple Cache memories contain a copy of the main memory data\n", - "\n", - " * Cache items accessed by their address in main memory\n", - "\n", - " * L1 cache is the fastest but has the least capacity\n", - "\n", - " * L2, L3 provide intermediate performance/size tradeoffs\n", - "\n", - "Loads and stores to memory can be as important as floating point operations when we measure performance." - ] - }, - { - "cell_type": "markdown", - "id": "6fd5fa43", - "metadata": { - "editable": true - }, - "source": [ - "## Memory and communication\n", - "\n", - "1. Most communication in a computer is carried out in chunks, blocks of bytes of data that move together\n", - "\n", - "2. In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines\n", - "\n", - " * Lines are typically 64-128 bytes, or 8-16 double precision words\n", - "\n", - " * Even if you do not use the data, it is moved and occupies space in the cache\n", - "\n", - "Many of these performance features are not captured in most programming languages." - ] - }, - { - "cell_type": "markdown", - "id": "68de093f", - "metadata": { - "editable": true - }, - "source": [ - "## Measuring performance\n", - "\n", - "How do we measure performance? What is wrong with this code to time a loop?" - ] - }, - { - "cell_type": "markdown", - "id": "16bb3303", - "metadata": { - "editable": true - }, - "source": [ - " clock_t start, finish;\n", - " start = clock();\n", - " for (int j = 0; j < i; j++) {\n", - " a[j] = b[j]+b[j]*c[j];\n", - " }\n", - " finish = clock();\n", - " double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );\n" - ] - }, - { - "cell_type": "markdown", - "id": "ee8650ac", - "metadata": { - "editable": true - }, - "source": [ - "## Problems with measuring time\n", - "1. Timers are not infinitely accurate\n", - "\n", - "2. All clocks have a granularity, the minimum time that they can measure\n", - "\n", - "3. The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick)\n", - "\n", - "4. Always know what your clock granularity is\n", - "\n", - "5. Ensure that your measurement is for a long enough duration (say 100 times the **tick**)" - ] - }, - { - "cell_type": "markdown", - "id": "9edf7838", - "metadata": { - "editable": true - }, - "source": [ - "## Problems with cold start\n", - "\n", - "What happens when the code is executed? The assumption is that the code is ready to\n", - "execute. But\n", - "1. Code may still be on disk, and not even read into memory.\n", - "\n", - "2. Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring)\n", - "\n", - "3. Multiple tests often necessary to ensure that cold start effects are not present\n", - "\n", - "4. Special effort often required to ensure data in the intended part of the memory hierarchy." - ] - }, - { - "cell_type": "markdown", - "id": "b9cc9f8c", - "metadata": { - "editable": true - }, - "source": [ - "## Problems with smart compilers\n", - "\n", - "1. If the result of the computation is not used, the compiler may eliminate the code\n", - "\n", - "2. Performance will look impossibly fantastic\n", - "\n", - "3. Even worse, eliminate some of the code so the performance looks plausible\n", - "\n", - "4. Ensure that the results are (or may be) used." - ] - }, - { - "cell_type": "markdown", - "id": "5ecfabb8", - "metadata": { - "editable": true - }, - "source": [ - "## Problems with interference\n", - "1. Other activities are sharing your processor\n", - "\n", - " * Operating system, system demons, other users\n", - "\n", - " * Some parts of the hardware do not always perform with exactly the same performance\n", - "\n", - "2. Make multiple tests and report\n", - "\n", - "3. Easy choices include\n", - "\n", - " * Average tests represent what users might observe over time" - ] - }, - { - "cell_type": "markdown", - "id": "d413e36d", - "metadata": { - "editable": true - }, - "source": [ - "## Problems with measuring performance\n", - "1. Accurate, reproducible performance measurement is hard\n", - "\n", - "2. Think carefully about your experiment:\n", - "\n", - "3. What is it, precisely, that you want to measure?\n", - "\n", - "4. How representative is your test to the situation that you are trying to measure?" - ] - }, - { - "cell_type": "markdown", - "id": "a8affa8f", - "metadata": { - "editable": true - }, - "source": [ - "## Thomas algorithm for tridiagonal linear algebra equations" - ] - }, - { - "cell_type": "markdown", - "id": "29e4a4c8", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\left( \\begin{array}{ccccc}\n", - " b_0 & c_0 & & & \\\\\n", - "\ta_0 & b_1 & c_1 & & \\\\\n", - "\t & & \\ddots & & \\\\\n", - "\t &\t & a_{m-3} & b_{m-2} & c_{m-2} \\\\\n", - "\t & & & a_{m-2} & b_{m-1}\n", - " \\end{array} \\right)\n", - "\\left( \\begin{array}{c}\n", - " x_0 \\\\\n", - " x_1 \\\\\n", - " \\vdots \\\\\n", - " x_{m-2} \\\\\n", - " x_{m-1}\n", - " \\end{array} \\right)=\\left( \\begin{array}{c}\n", - " f_0 \\\\\n", - " f_1 \\\\\n", - " \\vdots \\\\\n", - " f_{m-2} \\\\\n", - " f_{m-1} \\\\\n", - " \\end{array} \\right)\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "5152bc9c", - "metadata": { - "editable": true - }, - "source": [ - "## Thomas algorithm, forward substitution\n", - "The first step is to multiply the first row by $a_0/b_0$ and subtract it from the second row. This is known as the forward substitution step. We obtain then" - ] - }, - { - "cell_type": "markdown", - "id": "391ad490", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "a_i = 0,\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "42196c57", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "b_i = b_i - \\frac{a_{i-1}}{b_{i-1}}c_{i-1},\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "997f90e8", - "metadata": { - "editable": true - }, - "source": [ - "and" - ] - }, - { - "cell_type": "markdown", - "id": "09c0fd0e", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "f_i = f_i - \\frac{a_{i-1}}{b_{i-1}}f_{i-1}.\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "f5b6ca8d", - "metadata": { - "editable": true - }, - "source": [ - "At this point the simplified equation, with only an upper triangular matrix takes the form" - ] - }, - { - "cell_type": "markdown", - "id": "c98b964a", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\left( \\begin{array}{ccccc}\n", - " b_0 & c_0 & & & \\\\\n", - " & b_1 & c_1 & & \\\\\n", - " & & \\ddots & & \\\\\n", - "\t & & & b_{m-2} & c_{m-2} \\\\\n", - "\t & & & & b_{m-1}\n", - " \\end{array} \\right)\\left( \\begin{array}{c}\n", - " x_0 \\\\\n", - " x_1 \\\\\n", - " \\vdots \\\\\n", - " x_{m-2} \\\\\n", - " x_{m-1}\n", - " \\end{array} \\right)=\\left( \\begin{array}{c}\n", - " f_0 \\\\\n", - " f_1 \\\\\n", - " \\vdots \\\\\n", - " f_{m-2} \\\\\n", - " f_{m-1} \\\\\n", - " \\end{array} \\right)\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "e758d05e", - "metadata": { - "editable": true - }, - "source": [ - "## Thomas algorithm, backward substitution\n", - "The next step is the backward substitution step. The last row is multiplied by $c_{N-3}/b_{N-2}$ and subtracted from the second to last row, thus eliminating $c_{N-3}$ from the last row. The general backward substitution procedure is" - ] - }, - { - "cell_type": "markdown", - "id": "727d974a", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "c_i = 0,\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "1d6271fe", - "metadata": { - "editable": true - }, - "source": [ - "and" - ] - }, - { - "cell_type": "markdown", - "id": "1a876a5a", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "f_{i-1} = f_{i-1} - \\frac{c_{i-1}}{b_i}f_i\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "8534c118", - "metadata": { - "editable": true - }, - "source": [ - "All that ramains to be computed is the solution, which is the very straight forward process of" - ] - }, - { - "cell_type": "markdown", - "id": "332d31d5", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "x_i = \\frac{f_i}{b_i}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "b78e43f4", - "metadata": { - "editable": true - }, - "source": [ - "## Thomas algorithm and counting of operations (floating point and memory)\n", - "\n", - "We have in specific case the following operations with the floating operations\n", - "\n", - "* Memory Reads: $14(N-2)$;\n", - "\n", - "* Memory Writes: $4(N-2)$; \n", - "\n", - "* Subtractions: $3(N-2)$; \n", - "\n", - "* Multiplications: $3(N-2)$;\n", - "\n", - "* Divisions: $4(N-2)$." - ] - }, - { - "cell_type": "markdown", - "id": "521f357b", - "metadata": { - "editable": true - }, - "source": [ - " // Forward substitution \n", - " // Note that we can simplify by precalculating a[i-1]/b[i-1]\n", - " for (int i=1; i < n; i++) {\n", - " b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1];\n", - " f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1];\n", - " }\n", - " x[n-1] = f[n-1] / b[n-1];\n", - " // Backwards substitution \n", - " for (int i = n-2; i >= 0; i--) {\n", - " f[i] = f[i] - c[i]*f[i+1]/b[i+1];\n", - " x[i] = f[i]/b[i];\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "9f65dc0f", - "metadata": { - "editable": true - }, - "source": [ - "## [Example: Transpose of a matrix](https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp)" - ] - }, - { - "cell_type": "markdown", - "id": "9eda2cc0", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \"time.h\"\n", - " \n", - " using namespace std; // note use of namespace\n", - " int main (int argc, char* argv[])\n", - " {\n", - " // read in dimension of square matrix\n", - " int n = atoi(argv[1]);\n", - " double **A, **B;\n", - " // Allocate space for the two matrices\n", - " A = new double*[n]; B = new double*[n];\n", - " for (int i = 0; i < n; i++){\n", - " A[i] = new double[n];\n", - " B[i] = new double[n];\n", - " }\n", - " // Set up values for matrix A\n", - " for (int i = 0; i < n; i++){\n", - " for (int j = 0; j < n; j++) {\n", - " A[i][j] = cos(i*1.0)*sin(j*3.0);\n", - " }\n", - " }\n", - " clock_t start, finish;\n", - " start = clock();\n", - " // Then compute the transpose\n", - " for (int i = 0; i < n; i++){\n", - " for (int j = 0; j < n; j++) {\n", - " B[i][j]= A[j][i];\n", - " }\n", - " }\n", - " \n", - " finish = clock();\n", - " double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );\n", - " cout << setiosflags(ios::showpoint | ios::uppercase);\n", - " cout << setprecision(10) << setw(20) << \"Time used for setting up transpose of matrix=\" << timeused << endl;\n", - " \n", - " // Free up space\n", - " for (int i = 0; i < n; i++){\n", - " delete[] A[i];\n", - " delete[] B[i];\n", - " }\n", - " delete[] A;\n", - " delete[] B;\n", - " return 0;\n", - " }\n", - " \n" - ] - }, - { - "cell_type": "markdown", - "id": "d24069ca", - "metadata": { - "editable": true - }, - "source": [ - "## [Matrix-matrix multiplication](https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp)\n", - "This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm." - ] - }, - { - "cell_type": "markdown", - "id": "66e0fbdf", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \"time.h\"\n", - " \n", - " using namespace std; // note use of namespace\n", - " int main (int argc, char* argv[])\n", - " {\n", - " // read in dimension of square matrix\n", - " int n = atoi(argv[1]);\n", - " double s = 1.0/sqrt( (double) n);\n", - " double **A, **B, **C;\n", - " // Start timing\n", - " clock_t start, finish;\n", - " start = clock();\n", - " // Allocate space for the two matrices\n", - " A = new double*[n]; B = new double*[n]; C = new double*[n];\n", - " for (int i = 0; i < n; i++){\n", - " A[i] = new double[n];\n", - " B[i] = new double[n];\n", - " C[i] = new double[n];\n", - " }\n", - " // Set up values for matrix A and B and zero matrix C\n", - " for (int i = 0; i < n; i++){\n", - " for (int j = 0; j < n; j++) {\n", - " double angle = 2.0*M_PI*i*j/ (( double ) n);\n", - " A[i][j] = s * ( sin ( angle ) + cos ( angle ) );\n", - " B[j][i] = A[i][j];\n", - " }\n", - " }\n", - " // Then perform the matrix-matrix multiplication\n", - " for (int i = 0; i < n; i++){\n", - " for (int j = 0; j < n; j++) {\n", - " double sum = 0.0;\n", - " for (int k = 0; k < n; k++) {\n", - " sum += B[i][k]*A[k][j];\n", - " }\n", - " C[i][j] = sum;\n", - " }\n", - " }\n", - " // Compute now the Frobenius norm\n", - " double Fsum = 0.0;\n", - " for (int i = 0; i < n; i++){\n", - " for (int j = 0; j < n; j++) {\n", - " Fsum += C[i][j]*C[i][j];\n", - " }\n", - " }\n", - " Fsum = sqrt(Fsum);\n", - " finish = clock();\n", - " double timeused = (double) (finish - start)/(CLOCKS_PER_SEC );\n", - " cout << setiosflags(ios::showpoint | ios::uppercase);\n", - " cout << setprecision(10) << setw(20) << \"Time used for matrix-matrix multiplication=\" << timeused << endl;\n", - " cout << \" Frobenius norm = \" << Fsum << endl;\n", - " // Free up space\n", - " for (int i = 0; i < n; i++){\n", - " delete[] A[i];\n", - " delete[] B[i];\n", - " delete[] C[i];\n", - " }\n", - " delete[] A;\n", - " delete[] B;\n", - " delete[] C;\n", - " return 0;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "25a8ada7", - "metadata": { - "editable": true - }, - "source": [ - "## How do we define speedup? Simplest form\n", - "* Speedup measures the ratio of performance between two objects\n", - "\n", - "* Versions of same code, with different number of processors\n", - "\n", - "* Serial and vector versions\n", - "\n", - "* Try different programing languages, c++ and Fortran\n", - "\n", - "* Two algorithms computing the **same** result" - ] - }, - { - "cell_type": "markdown", - "id": "96334923", - "metadata": { - "editable": true - }, - "source": [ - "## How do we define speedup? Correct baseline\n", - "The key is choosing the correct baseline for comparison\n", - "* For our serial vs. vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off\n", - "\n", - " * For parallel applications, this is much harder:\n", - "\n", - " * Choice of algorithm, decomposition, performance of baseline case etc." - ] - }, - { - "cell_type": "markdown", - "id": "1086e7f6", - "metadata": { - "editable": true - }, - "source": [ - "## Parallel speedup\n", - "For parallel applications, speedup is typically defined as\n", - "* Speedup $=T_1/T_p$\n", - "\n", - "Here $T_1$ is the time on one processor and $T_p$ is the time using $p$ processors.\n", - " * Can the speedup become larger than $p$? That means using $p$ processors is more than $p$ times faster than using one processor." - ] - }, - { - "cell_type": "markdown", - "id": "9a29659c", - "metadata": { - "editable": true - }, - "source": [ - "## Speedup and memory\n", - "The speedup on $p$ processors can\n", - "be greater than $p$ if memory usage is optimal!\n", - "Consider the case of a memorybound computation with $M$ words of memory\n", - " * If $M/p$ fits into cache while $M$ does not, the time to access memory will be different in the two cases:\n", - "\n", - " * $T_1$ uses the main memory bandwidth\n", - "\n", - " * $T_p$ uses the appropriate cache bandwidth" - ] - }, - { - "cell_type": "markdown", - "id": "1c92c7b7", - "metadata": { - "editable": true - }, - "source": [ - "## Upper bounds on speedup\n", - "Assume that almost all parts of a code are perfectly\n", - "parallelizable (fraction $f$). The remainder,\n", - "fraction $(1-f)$ cannot be parallelized at all.\n", - "\n", - "That is, there is work that takes time $W$ on one process; a fraction $f$ of that work will take\n", - "time $Wf/p$ on $p$ processors. \n", - "* What is the maximum possible speedup as a function of $f$?" - ] - }, - { - "cell_type": "markdown", - "id": "7d8a1f2c", - "metadata": { - "editable": true - }, - "source": [ - "## Amdahl's law\n", - "On one processor we have" - ] - }, - { - "cell_type": "markdown", - "id": "d69c7bc3", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "T_1 = (1-f)W + fW = W\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "4b683a55", - "metadata": { - "editable": true - }, - "source": [ - "On $p$ processors we have" - ] - }, - { - "cell_type": "markdown", - "id": "8accb3c3", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "T_p = (1-f)W + \\frac{fW}{p},\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "5efec843", - "metadata": { - "editable": true - }, - "source": [ - "resulting in a speedup of" - ] - }, - { - "cell_type": "markdown", - "id": "b5e15977", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\frac{T_1}{T_p} = \\frac{W}{(1-f)W+fW/p}\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "032ee329", - "metadata": { - "editable": true - }, - "source": [ - "As $p$ goes to infinity, $fW/p$ goes to zero, and the maximum speedup is" - ] - }, - { - "cell_type": "markdown", - "id": "6d0abbee", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "\\frac{1}{1-f},\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "b4b3b622", - "metadata": { - "editable": true - }, - "source": [ - "meaning that if \n", - "if $f = 0.99$ (all but $1\\%$ parallelizable), the maximum speedup\n", - "is $1/(1-.99)=100$!" - ] - }, - { - "cell_type": "markdown", - "id": "8d6b48de", - "metadata": { - "editable": true - }, - "source": [ - "## How much is parallelizable\n", - "If any non-parallel code slips into the\n", - "application, the parallel\n", - "performance is limited. \n", - "\n", - "In many simulations, however, the fraction of non-parallelizable work\n", - "is $10^{-6}$ or less due to large arrays or objects that are perfectly parallelizable." - ] - }, - { - "cell_type": "markdown", - "id": "882c8bb8", - "metadata": { - "editable": true - }, - "source": [ - "## Today's situation of parallel computing\n", - "\n", - "* Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective.\n", - "\n", - "* Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems.\n", - "\n", - "* Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP.\n", - "\n", - "Our lectures will focus on both MPI and OpenMP." - ] - }, - { - "cell_type": "markdown", - "id": "188afc27", - "metadata": { - "editable": true - }, - "source": [ - "## Overhead present in parallel computing\n", - "\n", - "* **Uneven load balance**: not all the processors can perform useful work at all time.\n", - "\n", - "* **Overhead of synchronization**\n", - "\n", - "* **Overhead of communication**\n", - "\n", - "* **Extra computation due to parallelization**\n", - "\n", - "Due to the above overhead and that certain parts of a sequential\n", - "algorithm cannot be parallelized we may not achieve an optimal parallelization." - ] - }, - { - "cell_type": "markdown", - "id": "be80bfdb", - "metadata": { - "editable": true - }, - "source": [ - "## Parallelizing a sequential algorithm\n", - "\n", - "* Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part,\n", - "\n", - "* Distribute the global work and data among $P$ processors." - ] - }, - { - "cell_type": "markdown", - "id": "3061ce2d", - "metadata": { - "editable": true - }, - "source": [ - "## Strategies\n", - "* Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC. \n", - "\n", - "* When you are convinced that your codes run correctly, you can start your production runs on available supercomputers." - ] - }, - { - "cell_type": "markdown", - "id": "18e5ec20", - "metadata": { - "editable": true - }, - "source": [ - "## How do I run MPI on a PC/Laptop? MPI\n", - "To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the [OpenMPI website](https://www.open-mpi.org/). See also subsequent slides.\n", - "When you have made sure you have installed MPI on your PC/laptop, \n", - "* Compile with mpicxx/mpic++ or mpif90" - ] - }, - { - "cell_type": "markdown", - "id": "05ee10db", - "metadata": { - "editable": true - }, - "source": [ - " # Compile and link\n", - " mpic++ -O3 -o nameofprog.x nameofprog.cpp\n", - " # run code with for example 8 processes using mpirun/mpiexec\n", - " mpiexec -n 8 ./nameofprog.x\n" - ] - }, - { - "cell_type": "markdown", - "id": "97403a45", - "metadata": { - "editable": true - }, - "source": [ - "## Can I do it on my own PC/laptop? OpenMP installation\n", - "If you wish to install MPI and OpenMP \n", - "on your laptop/PC, we recommend the following:\n", - "\n", - "* For OpenMP, the compile option **-fopenmp** is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below.\n", - "\n", - "* For OS X users however, install **libomp**" - ] - }, - { - "cell_type": "markdown", - "id": "3036ce8a", - "metadata": { - "editable": true - }, - "source": [ - " brew install libomp\n" - ] - }, - { - "cell_type": "markdown", - "id": "372c7f77", - "metadata": { - "editable": true - }, - "source": [ - "and compile and link as" - ] - }, - { - "cell_type": "markdown", - "id": "ad4ce4ba", - "metadata": { - "editable": true - }, - "source": [ - " c++ -o -lomp\n" - ] - }, - { - "cell_type": "markdown", - "id": "7aa3ee46", - "metadata": { - "editable": true - }, - "source": [ - "## Installing MPI\n", - "For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager)" - ] - }, - { - "cell_type": "markdown", - "id": "e4145636", - "metadata": { - "editable": true - }, - "source": [ - " sudo apt-get install libopenmpi-dev\n", - " sudo apt-get install openmpi-bin\n" - ] - }, - { - "cell_type": "markdown", - "id": "c7dfa255", - "metadata": { - "editable": true - }, - "source": [ - "For OS X users, install brew (after having installed xcode and gcc, needed for the \n", - "gfortran compiler of openmpi) and then install with brew" - ] - }, - { - "cell_type": "markdown", - "id": "821936b4", - "metadata": { - "editable": true - }, - "source": [ - " brew install openmpi\n" - ] - }, - { - "cell_type": "markdown", - "id": "f50ba0b0", - "metadata": { - "editable": true - }, - "source": [ - "When running an executable (code.x), run as" - ] - }, - { - "cell_type": "markdown", - "id": "78c6d578", - "metadata": { - "editable": true - }, - "source": [ - " mpirun -n 10 ./code.x\n" - ] - }, - { - "cell_type": "markdown", - "id": "9dca4d6b", - "metadata": { - "editable": true - }, - "source": [ - "where we indicate that we want the number of processes to be 10." - ] - }, - { - "cell_type": "markdown", - "id": "fa2ae697", - "metadata": { - "editable": true - }, - "source": [ - "## Installing MPI and using Qt\n", - "With openmpi installed, when using Qt, add to your .pro file the instructions [here](http://dragly.org/2012/03/14/developing-mpi-applications-in-qt-creator/)\n", - "\n", - "You may need to tell Qt where openmpi is stored." - ] - }, - { - "cell_type": "markdown", - "id": "e3fc56ca", - "metadata": { - "editable": true - }, - "source": [ - "## What is Message Passing Interface (MPI)?\n", - "\n", - "**MPI** is a library, not a language. It specifies the names, calling sequences and results of functions\n", - "or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++\n", - "library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked\n", - "with the MPI library.\n", - "\n", - "MPI programs should be able to run\n", - "on all possible machines and run all MPI implementetations without change.\n", - "\n", - "An MPI computation is a collection of processes communicating with messages." - ] - }, - { - "cell_type": "markdown", - "id": "c3b38944", - "metadata": { - "editable": true - }, - "source": [ - "## Going Parallel with MPI\n", - "**Task parallelism**: the work of a global problem can be divided\n", - "into a number of independent tasks, which rarely need to synchronize. \n", - "Monte Carlo simulations or numerical integration are examples of this.\n", - "\n", - "MPI is a message-passing library where all the routines\n", - "have corresponding C/C++-binding" - ] - }, - { - "cell_type": "markdown", - "id": "45b95d3e", - "metadata": { - "editable": true - }, - "source": [ - " MPI_Command_name\n" - ] - }, - { - "cell_type": "markdown", - "id": "5e70388f", - "metadata": { - "editable": true - }, - "source": [ - "and Fortran-binding (routine names are in uppercase, but can also be in lower case)" - ] - }, - { - "cell_type": "markdown", - "id": "b0ed2ea2", - "metadata": { - "editable": true - }, - "source": [ - " MPI_COMMAND_NAME\n" - ] - }, - { - "cell_type": "markdown", - "id": "d446a947", - "metadata": { - "editable": true - }, - "source": [ - "## MPI is a library\n", - "MPI is a library specification for the message passing interface,\n", - "proposed as a standard.\n", - "\n", - "* independent of hardware;\n", - "\n", - "* not a language or compiler specification;\n", - "\n", - "* not a specific implementation or product.\n", - "\n", - "A message passing standard for portability and ease-of-use. \n", - "Designed for high performance.\n", - "\n", - "Insert communication and synchronization functions where necessary." - ] - }, - { - "cell_type": "markdown", - "id": "8b31c635", - "metadata": { - "editable": true - }, - "source": [ - "## Bindings to MPI routines\n", - "\n", - "MPI is a message-passing library where all the routines\n", - "have corresponding C/C++-binding" - ] - }, - { - "cell_type": "markdown", - "id": "7bfd2f1e", - "metadata": { - "editable": true - }, - "source": [ - " MPI_Command_name\n" - ] - }, - { - "cell_type": "markdown", - "id": "b076a236", - "metadata": { - "editable": true - }, - "source": [ - "and Fortran-binding (routine names are in uppercase, but can also be in lower case)" - ] - }, - { - "cell_type": "markdown", - "id": "281e69e0", - "metadata": { - "editable": true - }, - "source": [ - " MPI_COMMAND_NAME\n" - ] - }, - { - "cell_type": "markdown", - "id": "7369452c", - "metadata": { - "editable": true - }, - "source": [ - "The discussion in these slides focuses on the C++ binding." - ] - }, - { - "cell_type": "markdown", - "id": "44fb9048", - "metadata": { - "editable": true - }, - "source": [ - "## Communicator\n", - "* A group of MPI processes with a name (context).\n", - "\n", - "* Any process is identified by its rank. The rank is only meaningful within a particular communicator.\n", - "\n", - "* By default the communicator contains all the MPI processes." - ] - }, - { - "cell_type": "markdown", - "id": "e3519ab9", - "metadata": { - "editable": true - }, - "source": [ - " MPI_COMM_WORLD \n" - ] - }, - { - "cell_type": "markdown", - "id": "20e5dd91", - "metadata": { - "editable": true - }, - "source": [ - "* Mechanism to identify subset of processes.\n", - "\n", - "* Promotes modular design of parallel libraries." - ] - }, - { - "cell_type": "markdown", - "id": "9ac13a49", - "metadata": { - "editable": true - }, - "source": [ - "## Some of the most important MPI functions\n", - "\n", - "* $MPI\\_Init$ - initiate an MPI computation\n", - "\n", - "* $MPI\\_Finalize$ - terminate the MPI computation and clean up\n", - "\n", - "* $MPI\\_Comm\\_size$ - how many processes participate in a given MPI communicator?\n", - "\n", - "* $MPI\\_Comm\\_rank$ - which one am I? (A number between 0 and size-1.)\n", - "\n", - "* $MPI\\_Send$ - send a message to a particular process within an MPI communicator\n", - "\n", - "* $MPI\\_Recv$ - receive a message from a particular process within an MPI communicator\n", - "\n", - "* $MPI\\_reduce$ or $MPI\\_Allreduce$, send and receive messages" - ] - }, - { - "cell_type": "markdown", - "id": "3621cf41", - "metadata": { - "editable": true - }, - "source": [ - "## [The first MPI C/C++ program](https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp)\n", - "\n", - "Let every process write \"Hello world\" (oh not this program again!!) on the standard output." - ] - }, - { - "cell_type": "markdown", - "id": "72c6466d", - "metadata": { - "editable": true - }, - "source": [ - " using namespace std;\n", - " #include \n", - " #include \n", - " int main (int nargs, char* args[])\n", - " {\n", - " int numprocs, my_rank;\n", - " // MPI initializations\n", - " MPI_Init (&nargs, &args);\n", - " MPI_Comm_size (MPI_COMM_WORLD, &numprocs);\n", - " MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);\n", - " cout << \"Hello world, I have rank \" << my_rank << \" out of \" \n", - " << numprocs << endl;\n", - " // End MPI\n", - " MPI_Finalize ();\n" - ] - }, - { - "cell_type": "markdown", - "id": "50326108", - "metadata": { - "editable": true - }, - "source": [ - "## The Fortran program" - ] - }, - { - "cell_type": "markdown", - "id": "f407c5a4", - "metadata": { - "editable": true - }, - "source": [ - " PROGRAM hello\n", - " INCLUDE \"mpif.h\"\n", - " INTEGER:: size, my_rank, ierr\n", - " \n", - " CALL MPI_INIT(ierr)\n", - " CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)\n", - " CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)\n", - " WRITE(*,*)\"Hello world, I've rank \",my_rank,\" out of \",size\n", - " CALL MPI_FINALIZE(ierr)\n", - " \n", - " END PROGRAM hello\n" - ] - }, - { - "cell_type": "markdown", - "id": "737c6309", - "metadata": { - "editable": true - }, - "source": [ - "## Note 1\n", - "\n", - "* The output to screen is not ordered since all processes are trying to write to screen simultaneously.\n", - "\n", - "* It is the operating system which opts for an ordering. \n", - "\n", - "* If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example." - ] - }, - { - "cell_type": "markdown", - "id": "ea6e04d2", - "metadata": { - "editable": true - }, - "source": [ - "## [Ordered output with MPIBarrier](https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp)" - ] - }, - { - "cell_type": "markdown", - "id": "e179d073", - "metadata": { - "editable": true - }, - "source": [ - " int main (int nargs, char* args[])\n", - " {\n", - " int numprocs, my_rank, i;\n", - " MPI_Init (&nargs, &args);\n", - " MPI_Comm_size (MPI_COMM_WORLD, &numprocs);\n", - " MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);\n", - " for (i = 0; i < numprocs; i++) {}\n", - " MPI_Barrier (MPI_COMM_WORLD);\n", - " if (i == my_rank) {\n", - " cout << \"Hello world, I have rank \" << my_rank << \n", - " \" out of \" << numprocs << endl;}\n", - " MPI_Finalize ();\n" - ] - }, - { - "cell_type": "markdown", - "id": "88698fd3", - "metadata": { - "editable": true - }, - "source": [ - "## Note 2\n", - "* Here we have used the $MPI\\_Barrier$ function to ensure that that every process has completed its set of instructions in a particular order.\n", - "\n", - "* A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here $MPI\\_COMM\\_WORLD$) have called $MPI\\_Barrier$. \n", - "\n", - "* The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like $MPI\\_ALLREDUCE$ to be discussed later, have the same property; that is, no process can exit the operation until all processes have started. \n", - "\n", - "However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there\n", - "are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized\n", - "action." - ] - }, - { - "cell_type": "markdown", - "id": "499296fd", - "metadata": { - "editable": true - }, - "source": [ - "## [Ordered output](https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp)" - ] - }, - { - "cell_type": "markdown", - "id": "80f61521", - "metadata": { - "editable": true - }, - "source": [ - " .....\n", - " int numprocs, my_rank, flag;\n", - " MPI_Status status;\n", - " MPI_Init (&nargs, &args);\n", - " MPI_Comm_size (MPI_COMM_WORLD, &numprocs);\n", - " MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);\n", - " if (my_rank > 0)\n", - " MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, \n", - " MPI_COMM_WORLD, &status);\n", - " cout << \"Hello world, I have rank \" << my_rank << \" out of \" \n", - " << numprocs << endl;\n", - " if (my_rank < numprocs-1)\n", - " MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, \n", - " 100, MPI_COMM_WORLD);\n", - " MPI_Finalize ();\n" - ] - }, - { - "cell_type": "markdown", - "id": "ef7b7655", - "metadata": { - "editable": true - }, - "source": [ - "## Note 3\n", - "\n", - "The basic sending of messages is given by the function $MPI\\_SEND$, which in C/C++\n", - "is defined as" - ] - }, - { - "cell_type": "markdown", - "id": "0ee6b027", - "metadata": { - "editable": true - }, - "source": [ - " int MPI_Send(void *buf, int count, \n", - " MPI_Datatype datatype, \n", - " int dest, int tag, MPI_Comm comm)}\n" - ] - }, - { - "cell_type": "markdown", - "id": "b196f1c4", - "metadata": { - "editable": true - }, - "source": [ - "This single command allows the passing of any kind of variable, even a large array, to any group of tasks. \n", - "The variable **buf** is the variable we wish to send while **count**\n", - "is the number of variables we are passing. If we are passing only a single value, this should be 1. \n", - "\n", - "If we transfer an array, it is the overall size of the array. \n", - "For example, if we want to send a 10 by 10 array, count would be $10\\times 10=100$ \n", - "since we are actually passing 100 values." - ] - }, - { - "cell_type": "markdown", - "id": "b06e842e", - "metadata": { - "editable": true - }, - "source": [ - "## Note 4\n", - "\n", - "Once you have sent a message, you must receive it on another task. The function $MPI\\_RECV$\n", - "is similar to the send call." - ] - }, - { - "cell_type": "markdown", - "id": "710086f2", - "metadata": { - "editable": true - }, - "source": [ - " int MPI_Recv( void *buf, int count, MPI_Datatype datatype, \n", - " int source, \n", - " int tag, MPI_Comm comm, MPI_Status *status )\n" - ] - }, - { - "cell_type": "markdown", - "id": "4d3a1771", - "metadata": { - "editable": true - }, - "source": [ - "The arguments that are different from those in MPI\\_SEND are\n", - "**buf** which is the name of the variable where you will be storing the received data, \n", - "**source** which replaces the destination in the send command. This is the return ID of the sender.\n", - "\n", - "Finally, we have used $MPI\\_Status\\_status$, \n", - "where one can check if the receive was completed.\n", - "\n", - "The output of this code is the same as the previous example, but now\n", - "process 0 sends a message to process 1, which forwards it further\n", - "to process 2, and so forth." - ] - }, - { - "cell_type": "markdown", - "id": "07e57403", - "metadata": { - "editable": true - }, - "source": [ - "## [Numerical integration in parallel](https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp)\n", - "**Integrating $\\pi$.**\n", - "\n", - "* The code example computes $\\pi$ using the trapezoidal rules.\n", - "\n", - "* The trapezoidal rule" - ] - }, - { - "cell_type": "markdown", - "id": "6b931e78", - "metadata": { - "editable": true - }, - "source": [ - "$$\n", - "I=\\int_a^bf(x) dx\\approx h\\left(f(a)/2 + f(a+h) +f(a+2h)+\\dots +f(b-h)+ f(b)/2\\right).\n", - "$$" - ] - }, - { - "cell_type": "markdown", - "id": "84ebd8a9", - "metadata": { - "editable": true - }, - "source": [ - "Click [on this link](https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp) for the full program." - ] - }, - { - "cell_type": "markdown", - "id": "1d5c5663", - "metadata": { - "editable": true - }, - "source": [ - "## Dissection of trapezoidal rule with $MPI\\_reduce$" - ] - }, - { - "cell_type": "markdown", - "id": "8f7a6314", - "metadata": { - "editable": true - }, - "source": [ - " // Trapezoidal rule and numerical integration usign MPI\n", - " using namespace std;\n", - " #include \n", - " #include \n", - " \n", - " // Here we define various functions called by the main program\n", - " \n", - " double int_function(double );\n", - " double trapezoidal_rule(double , double , int , double (*)(double));\n", - " \n", - " // Main function begins here\n", - " int main (int nargs, char* args[])\n", - " {\n", - " int n, local_n, numprocs, my_rank; \n", - " double a, b, h, local_a, local_b, total_sum, local_sum; \n", - " double time_start, time_end, total_time;\n" - ] - }, - { - "cell_type": "markdown", - "id": "e9c01031", - "metadata": { - "editable": true - }, - "source": [ - "## Dissection of trapezoidal rule" - ] - }, - { - "cell_type": "markdown", - "id": "f19a6651", - "metadata": { - "editable": true - }, - "source": [ - " // MPI initializations\n", - " MPI_Init (&nargs, &args);\n", - " MPI_Comm_size (MPI_COMM_WORLD, &numprocs);\n", - " MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);\n", - " time_start = MPI_Wtime();\n", - " // Fixed values for a, b and n \n", - " a = 0.0 ; b = 1.0; n = 1000;\n", - " h = (b-a)/n; // h is the same for all processes \n", - " local_n = n/numprocs; \n", - " // make sure n > numprocs, else integer division gives zero\n", - " // Length of each process' interval of\n", - " // integration = local_n*h. \n", - " local_a = a + my_rank*local_n*h;\n", - " local_b = local_a + local_n*h;\n" - ] - }, - { - "cell_type": "markdown", - "id": "d611825a", - "metadata": { - "editable": true - }, - "source": [ - "## Integrating with **MPI**" - ] - }, - { - "cell_type": "markdown", - "id": "a70a0737", - "metadata": { - "editable": true - }, - "source": [ - " total_sum = 0.0;\n", - " local_sum = trapezoidal_rule(local_a, local_b, local_n, \n", - " &int_function); \n", - " MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, \n", - " MPI_SUM, 0, MPI_COMM_WORLD);\n", - " time_end = MPI_Wtime();\n", - " total_time = time_end-time_start;\n", - " if ( my_rank == 0) {\n", - " cout << \"Trapezoidal rule = \" << total_sum << endl;\n", - " cout << \"Time = \" << total_time \n", - " << \" on number of processors: \" << numprocs << endl;\n", - " }\n", - " // End MPI\n", - " MPI_Finalize (); \n", - " return 0;\n", - " } // end of main program\n" - ] - }, - { - "cell_type": "markdown", - "id": "fafdaa43", - "metadata": { - "editable": true - }, - "source": [ - "## How do I use $MPI\\_reduce$?\n", - "\n", - "Here we have used" - ] - }, - { - "cell_type": "markdown", - "id": "fd41d43b", - "metadata": { - "editable": true - }, - "source": [ - " MPI_reduce( void *senddata, void* resultdata, int count, \n", - " MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm)\n" - ] - }, - { - "cell_type": "markdown", - "id": "6f6f7ccc", - "metadata": { - "editable": true - }, - "source": [ - "The two variables $senddata$ and $resultdata$ are obvious, besides the fact that one sends the address\n", - "of the variable or the first element of an array. If they are arrays they need to have the same size. \n", - "The variable $count$ represents the total dimensionality, 1 in case of just one variable, \n", - "while $MPI\\_Datatype$ \n", - "defines the type of variable which is sent and received. \n", - "\n", - "The new feature is $MPI\\_Op$. It defines the type\n", - "of operation we want to do." - ] - }, - { - "cell_type": "markdown", - "id": "24581a53", - "metadata": { - "editable": true - }, - "source": [ - "## More on $MPI\\_Reduce$\n", - "In our case, since we are summing\n", - "the rectangle contributions from every process we define $MPI\\_Op = MPI\\_SUM$.\n", - "If we have an array or matrix we can search for the largest og smallest element by sending either $MPI\\_MAX$ or \n", - "$MPI\\_MIN$. If we want the location as well (which array element) we simply transfer \n", - "$MPI\\_MAXLOC$ or $MPI\\_MINOC$. If we want the product we write $MPI\\_PROD$. \n", - "\n", - "$MPI\\_Allreduce$ is defined as" - ] - }, - { - "cell_type": "markdown", - "id": "0e6fdf1f", - "metadata": { - "editable": true - }, - "source": [ - " MPI_Allreduce( void *senddata, void* resultdata, int count, \n", - " MPI_Datatype datatype, MPI_Op, MPI_Comm comm) \n" - ] - }, - { - "cell_type": "markdown", - "id": "c02c5d53", - "metadata": { - "editable": true - }, - "source": [ - "## Dissection of trapezoidal rule\n", - "\n", - "We use $MPI\\_reduce$ to collect data from each process. Note also the use of the function \n", - "$MPI\\_Wtime$." - ] - }, - { - "cell_type": "markdown", - "id": "215cfb7c", - "metadata": { - "editable": true - }, - "source": [ - " // this function defines the function to integrate\n", - " double int_function(double x)\n", - " {\n", - " double value = 4./(1.+x*x);\n", - " return value;\n", - " } // end of function to evaluate\n", - " \n" - ] - }, - { - "cell_type": "markdown", - "id": "f4267db8", - "metadata": { - "editable": true - }, - "source": [ - "## Dissection of trapezoidal rule" - ] - }, - { - "cell_type": "markdown", - "id": "584fed2f", - "metadata": { - "editable": true - }, - "source": [ - " // this function defines the trapezoidal rule\n", - " double trapezoidal_rule(double a, double b, int n, \n", - " double (*func)(double))\n", - " {\n", - " double trapez_sum;\n", - " double fa, fb, x, step;\n", - " int j;\n", - " step=(b-a)/((double) n);\n", - " fa=(*func)(a)/2. ;\n", - " fb=(*func)(b)/2. ;\n", - " trapez_sum=0.;\n", - " for (j=1; j <= n-1; j++){\n", - " x=j*step+a;\n", - " trapez_sum+=(*func)(x);\n", - " }\n", - " trapez_sum=(trapez_sum+fb+fa)*step;\n", - " return trapez_sum;\n", - " } // end trapezoidal_rule \n" - ] - }, - { - "cell_type": "markdown", - "id": "fff0c1fa", - "metadata": { - "editable": true - }, - "source": [ - "## [The quantum dot program for two electrons](https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp)" - ] - }, - { - "cell_type": "markdown", - "id": "16ad2582", - "metadata": { - "editable": true - }, - "source": [ - " // Variational Monte Carlo for atoms with importance sampling, slater det\n", - " // Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG\n", - " #include \"mpi.h\"\n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \"vectormatrixclass.h\"\n", - " \n", - " using namespace std;\n", - " // output file as global variable\n", - " ofstream ofile; \n", - " // the step length and its squared inverse for the second derivative \n", - " // Here we define global variables used in various functions\n", - " // These can be changed by using classes\n", - " int Dimension = 2; \n", - " int NumberParticles = 2; // we fix also the number of electrons to be 2\n", - " \n", - " // declaration of functions \n", - " \n", - " // The Mc sampling for the variational Monte Carlo \n", - " void MonteCarloSampling(int, double &, double &, Vector &);\n", - " \n", - " // The variational wave function\n", - " double WaveFunction(Matrix &, Vector &);\n", - " \n", - " // The local energy \n", - " double LocalEnergy(Matrix &, Vector &);\n", - " \n", - " // The quantum force\n", - " void QuantumForce(Matrix &, Matrix &, Vector &);\n", - " \n", - " \n", - " // inline function for single-particle wave function\n", - " inline double SPwavefunction(double r, double alpha) { \n", - " return exp(-alpha*r*0.5);\n", - " }\n", - " \n", - " // inline function for derivative of single-particle wave function\n", - " inline double DerivativeSPwavefunction(double r, double alpha) { \n", - " return -r*alpha;\n", - " }\n", - " \n", - " // function for absolute value of relative distance\n", - " double RelativeDistance(Matrix &r, int i, int j) { \n", - " double r_ij = 0; \n", - " for (int k = 0; k < Dimension; k++) { \n", - " \tr_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k));\n", - " }\n", - " return sqrt(r_ij); \n", - " }\n", - " \n", - " // inline function for derivative of Jastrow factor\n", - " inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){\n", - " return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2));\n", - " }\n", - " \n", - " // function for square of position of single particle\n", - " double singleparticle_pos2(Matrix &r, int i) { \n", - " double r_single_particle = 0;\n", - " for (int j = 0; j < Dimension; j++) { \n", - " r_single_particle += r(i,j)*r(i,j);\n", - " }\n", - " return r_single_particle;\n", - " }\n", - " \n", - " void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x,\n", - " \t\t double *f, double stpmax, int *check, double (*func)(Vector &p));\n", - " \n", - " void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,\n", - " \t double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g));\n", - " \n", - " static double sqrarg;\n", - " #define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)\n", - " \n", - " \n", - " static double maxarg1,maxarg2;\n", - " #define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\\\n", - " (maxarg1) : (maxarg2))\n", - " \n", - " \n", - " // Begin of main program \n", - " \n", - " int main(int argc, char* argv[])\n", - " {\n", - " \n", - " // MPI initializations\n", - " int NumberProcesses, MyRank, NumberMCsamples;\n", - " MPI_Init (&argc, &argv);\n", - " MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses);\n", - " MPI_Comm_rank (MPI_COMM_WORLD, &MyRank);\n", - " double StartTime = MPI_Wtime();\n", - " if (MyRank == 0 && argc <= 1) {\n", - " cout << \"Bad Usage: \" << argv[0] << \n", - " \" Read also output file on same line and number of Monte Carlo cycles\" << endl;\n", - " }\n", - " // Read filename and number of Monte Carlo cycles from the command line\n", - " if (MyRank == 0 && argc > 2) {\n", - " string filename = argv[1]; // first command line argument after name of program\n", - " NumberMCsamples = atoi(argv[2]);\n", - " string fileout = filename;\n", - " string argument = to_string(NumberMCsamples);\n", - " // Final filename as filename+NumberMCsamples\n", - " fileout.append(argument);\n", - " ofile.open(fileout);\n", - " }\n", - " // broadcast the number of Monte Carlo samples\n", - " MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD);\n", - " // Two variational parameters only\n", - " Vector VariationalParameters(2);\n", - " int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; \n", - " // Loop over variational parameters\n", - " for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){\n", - " for (double beta = 0.1; beta <= 0.5; beta +=0.05){\n", - " VariationalParameters(0) = alpha; // value of alpha\n", - " VariationalParameters(1) = beta; // value of beta\n", - " // Do the mc sampling and accumulate data with MPI_Reduce\n", - " double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2;\n", - " LocalProcessEnergy = LocalProcessEnergy2 = 0.0;\n", - " MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters);\n", - " // Collect data in total averages\n", - " MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);\n", - " MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);\n", - " // Print out results in case of Master node, set to MyRank = 0\n", - " if ( MyRank == 0) {\n", - " \tdouble Energy = TotalEnergy/( (double)NumberProcesses);\n", - " \tdouble Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy;\n", - " \tdouble StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error\n", - " \tofile << setiosflags(ios::showpoint | ios::uppercase);\n", - " \tofile << setw(15) << setprecision(8) << VariationalParameters(0);\n", - " \tofile << setw(15) << setprecision(8) << VariationalParameters(1);\n", - " \tofile << setw(15) << setprecision(8) << Energy;\n", - " \tofile << setw(15) << setprecision(8) << Variance;\n", - " \tofile << setw(15) << setprecision(8) << StandardDeviation << endl;\n", - " }\n", - " }\n", - " }\n", - " double EndTime = MPI_Wtime();\n", - " double TotalTime = EndTime-StartTime;\n", - " if ( MyRank == 0 ) cout << \"Time = \" << TotalTime << \" on number of processors: \" << NumberProcesses << endl;\n", - " if (MyRank == 0) ofile.close(); // close output file\n", - " // End MPI\n", - " MPI_Finalize (); \n", - " return 0;\n", - " } // end of main function\n", - " \n", - " \n", - " // Monte Carlo sampling with the Metropolis algorithm \n", - " \n", - " void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters)\n", - " {\n", - " \n", - " // Initialize the seed and call the Mersienne algo\n", - " std::random_device rd;\n", - " std::mt19937_64 gen(rd());\n", - " // Set up the uniform distribution for x \\in [[0, 1]\n", - " std::uniform_real_distribution UniformNumberGenerator(0.0,1.0);\n", - " std::normal_distribution Normaldistribution(0.0,1.0);\n", - " // diffusion constant from Schroedinger equation\n", - " double D = 0.5; \n", - " double timestep = 0.05; // we fix the time step for the gaussian deviate\n", - " // allocate matrices which contain the position of the particles \n", - " Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension);\n", - " Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension);\n", - " double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0;\n", - " // initial trial positions\n", - " for (int i = 0; i < NumberParticles; i++) { \n", - " for (int j = 0; j < Dimension; j++) {\n", - " OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep);\n", - " }\n", - " }\n", - " double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters);\n", - " QuantumForce(OldPosition, OldQuantumForce, VariationalParameters);\n", - " // loop over monte carlo cycles \n", - " for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ \n", - " // new position \n", - " for (int i = 0; i < NumberParticles; i++) { \n", - " for (int j = 0; j < Dimension; j++) {\n", - " \t// gaussian deviate to compute new positions using a given timestep\n", - " \tNewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;\n", - " \t//\tNewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D;\n", - " } \n", - " // for the other particles we need to set the position to the old position since\n", - " // we move only one particle at the time\n", - " for (int k = 0; k < NumberParticles; k++) {\n", - " \tif ( k != i) {\n", - " \t for (int j = 0; j < Dimension; j++) {\n", - " \t NewPosition(k,j) = OldPosition(k,j);\n", - " \t }\n", - " \t} \n", - " }\n", - " double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); \n", - " QuantumForce(NewPosition, NewQuantumForce, VariationalParameters);\n", - " // we compute the log of the ratio of the greens functions to be used in the \n", - " // Metropolis-Hastings algorithm\n", - " double GreensFunction = 0.0; \n", - " for (int j = 0; j < Dimension; j++) {\n", - " \tGreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))*\n", - " \t (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j));\n", - " }\n", - " GreensFunction = exp(GreensFunction);\n", - " // The Metropolis test is performed by moving one particle at the time\n", - " if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { \n", - " \tfor (int j = 0; j < Dimension; j++) {\n", - " \t OldPosition(i,j) = NewPosition(i,j);\n", - " \t OldQuantumForce(i,j) = NewQuantumForce(i,j);\n", - " \t}\n", - " \tOldWaveFunction = NewWaveFunction;\n", - " }\n", - " } // end of loop over particles\n", - " // compute local energy \n", - " double DeltaE = LocalEnergy(OldPosition, VariationalParameters);\n", - " // update energies\n", - " Energy += DeltaE;\n", - " EnergySquared += DeltaE*DeltaE;\n", - " } // end of loop over MC trials \n", - " // update the energy average and its squared \n", - " cumulative_e = Energy/NumberMCsamples;\n", - " cumulative_e2 = EnergySquared/NumberMCsamples;\n", - " } // end MonteCarloSampling function \n", - " \n", - " \n", - " // Function to compute the squared wave function and the quantum force\n", - " \n", - " double WaveFunction(Matrix &r, Vector &VariationalParameters)\n", - " {\n", - " double wf = 0.0;\n", - " // full Slater determinant for two particles, replace with Slater det for more particles \n", - " wf = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0));\n", - " // contribution from Jastrow factor\n", - " for (int i = 0; i < NumberParticles-1; i++) { \n", - " for (int j = i+1; j < NumberParticles; j++) {\n", - " wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j))));\n", - " }\n", - " }\n", - " return wf;\n", - " }\n", - " \n", - " // Function to calculate the local energy without numerical derivation of kinetic energy\n", - " \n", - " double LocalEnergy(Matrix &r, Vector &VariationalParameters)\n", - " {\n", - " \n", - " // compute the kinetic and potential energy from the single-particle part\n", - " // for a many-electron system this has to be replaced by a Slater determinant\n", - " // The absolute value of the interparticle length\n", - " Matrix length( NumberParticles, NumberParticles);\n", - " // Set up interparticle distance\n", - " for (int i = 0; i < NumberParticles-1; i++) { \n", - " for(int j = i+1; j < NumberParticles; j++){\n", - " length(i,j) = RelativeDistance(r, i, j);\n", - " length(j,i) = length(i,j);\n", - " }\n", - " }\n", - " double KineticEnergy = 0.0;\n", - " // Set up kinetic energy from Slater and Jastrow terms\n", - " for (int i = 0; i < NumberParticles; i++) { \n", - " for (int k = 0; k < Dimension; k++) {\n", - " double sum1 = 0.0; \n", - " for(int j = 0; j < NumberParticles; j++){\n", - " \tif ( j != i) {\n", - " \t sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k);\n", - " \t}\n", - " }\n", - " KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)));\n", - " }\n", - " }\n", - " KineticEnergy += -2*VariationalParameters(0)*NumberParticles;\n", - " for (int i = 0; i < NumberParticles-1; i++) {\n", - " for (int j = i+1; j < NumberParticles; j++) {\n", - " KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) );\n", - " }\n", - " }\n", - " KineticEnergy *= -0.5;\n", - " // Set up potential energy, external potential + eventual electron-electron repulsion\n", - " double PotentialEnergy = 0;\n", - " for (int i = 0; i < NumberParticles; i++) { \n", - " double DistanceSquared = singleparticle_pos2(r, i);\n", - " PotentialEnergy += 0.5*DistanceSquared; // sp energy HO part, note it has the oscillator frequency set to 1!\n", - " }\n", - " // Add the electron-electron repulsion\n", - " for (int i = 0; i < NumberParticles-1; i++) { \n", - " for (int j = i+1; j < NumberParticles; j++) {\n", - " PotentialEnergy += 1.0/length(i,j); \n", - " }\n", - " }\n", - " double LocalE = KineticEnergy+PotentialEnergy;\n", - " return LocalE;\n", - " }\n", - " \n", - " // Compute the analytical expression for the quantum force\n", - " void QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters)\n", - " {\n", - " // compute the first derivative \n", - " for (int i = 0; i < NumberParticles; i++) {\n", - " for (int k = 0; k < Dimension; k++) {\n", - " // single-particle part, replace with Slater det for larger systems\n", - " double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0));\n", - " // Jastrow factor contribution\n", - " double Jsum = 0.0;\n", - " for (int j = 0; j < NumberParticles; j++) {\n", - " \tif ( j != i) {\n", - " \t Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k);\n", - " \t}\n", - " }\n", - " qforce(i,k) = 2.0*(Jsum+sppart);\n", - " }\n", - " }\n", - " } // end of QuantumForce function\n", - " \n", - " \n", - " #define ITMAX 200\n", - " #define EPS 3.0e-8\n", - " #define TOLX (4*EPS)\n", - " #define STPMX 100.0\n", - " \n", - " void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret,\n", - " \t double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g))\n", - " {\n", - " \n", - " int check,i,its,j;\n", - " double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test;\n", - " Vector dg(n), g(n), hdg(n), pnew(n), xi(n);\n", - " Matrix hessian(n,n);\n", - " \n", - " fp=(*func)(p);\n", - " (*dfunc)(p,g);\n", - " for (i = 0;i < n;i++) {\n", - " for (j = 0; j< n;j++) hessian(i,j)=0.0;\n", - " hessian(i,i)=1.0;\n", - " xi(i) = -g(i);\n", - " sum += p(i)*p(i);\n", - " }\n", - " stpmax=STPMX*FMAX(sqrt(sum),(double)n);\n", - " for (its=1;its<=ITMAX;its++) {\n", - " *iter=its;\n", - " lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func);\n", - " fp = *fret;\n", - " for (i = 0; i< n;i++) {\n", - " xi(i)=pnew(i)-p(i);\n", - " p(i)=pnew(i);\n", - " }\n", - " test=0.0;\n", - " for (i = 0;i< n;i++) {\n", - " temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0);\n", - " if (temp > test) test=temp;\n", - " }\n", - " if (test < TOLX) {\n", - " return;\n", - " }\n", - " for (i=0;i test) test=temp;\n", - " }\n", - " if (test < gtol) {\n", - " return;\n", - " }\n", - " for (i=0;i EPS*sumdg*sumxi) {\n", - " fac=1.0/fac;\n", - " fad=1.0/fae;\n", - " for (i=0;i stpmax)\n", - " for (i=0;i test) test=temp;\n", - " }\n", - " alamin=TOLX/test;\n", - " alam=1.0;\n", - " for (;;) {\n", - " for (i=0;i0.5*alam)\n", - " \t tmplam=0.5*alam;\n", - " }\n", - " }\n", - " alam2=alam;\n", - " f2 = *f;\n", - " fold2=fold;\n", - " alam=FMAX(tmplam,0.1*alam);\n", - " }\n", - " }\n", - " #undef ALF\n", - " #undef TOLX\n", - " \n" - ] - }, - { - "cell_type": "markdown", - "id": "8001465e", - "metadata": { - "editable": true - }, - "source": [ - "## What is OpenMP\n", - "* OpenMP provides high-level thread programming\n", - "\n", - "* Multiple cooperating threads are allowed to run simultaneously\n", - "\n", - "* Threads are created and destroyed dynamically in a fork-join pattern\n", - "\n", - " * An OpenMP program consists of a number of parallel regions\n", - "\n", - " * Between two parallel regions there is only one master thread\n", - "\n", - " * In the beginning of a parallel region, a team of new threads is spawned\n", - "\n", - " * The newly spawned threads work simultaneously with the master thread\n", - "\n", - " * At the end of a parallel region, the new threads are destroyed\n", - "\n", - "Many good tutorials online and excellent textbook\n", - "1. [Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas](http://mitpress.mit.edu/books/using-openmp)\n", - "\n", - "2. Many tutorials online like [OpenMP official site](http://www.openmp.org)" - ] - }, - { - "cell_type": "markdown", - "id": "c58e3959", - "metadata": { - "editable": true - }, - "source": [ - "## Getting started, things to remember\n", - " * Remember the header file" - ] - }, - { - "cell_type": "markdown", - "id": "7e30d9dc", - "metadata": { - "editable": true - }, - "source": [ - " #include \n" - ] - }, - { - "cell_type": "markdown", - "id": "d523b4fa", - "metadata": { - "editable": true - }, - "source": [ - "* Insert compiler directives in C++ syntax as" - ] - }, - { - "cell_type": "markdown", - "id": "1cde3797", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp...\n" - ] - }, - { - "cell_type": "markdown", - "id": "477f580a", - "metadata": { - "editable": true - }, - "source": [ - "* Compile with for example *c++ -fopenmp code.cpp*\n", - "\n", - "* Execute\n", - "\n", - " * Remember to assign the environment variable **OMP NUM THREADS**\n", - "\n", - " * It specifies the total number of threads inside a parallel region, if not otherwise overwritten" - ] - }, - { - "cell_type": "markdown", - "id": "e9b3615c", - "metadata": { - "editable": true - }, - "source": [ - "## OpenMP syntax\n", - "* Mostly directives" - ] - }, - { - "cell_type": "markdown", - "id": "b14e5170", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp construct [ clause ...]\n" - ] - }, - { - "cell_type": "markdown", - "id": "004c2ab7", - "metadata": { - "editable": true - }, - "source": [ - "* Some functions and types" - ] - }, - { - "cell_type": "markdown", - "id": "ab806620", - "metadata": { - "editable": true - }, - "source": [ - " #include \n" - ] - }, - { - "cell_type": "markdown", - "id": "c0dd8be6", - "metadata": { - "editable": true - }, - "source": [ - "* Most apply to a block of code\n", - "\n", - " * Specifically, a **structured block**\n", - "\n", - " * Enter at top, exit at bottom only, exit(), abort() permitted" - ] - }, - { - "cell_type": "markdown", - "id": "a28c16f1", - "metadata": { - "editable": true - }, - "source": [ - "## Different OpenMP styles of parallelism\n", - "OpenMP supports several different ways to specify thread parallelism\n", - "\n", - "* General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code\n", - "\n", - "* Parallel loops: Special case for loops, simplifies data parallel code\n", - "\n", - "* Task parallelism, new in OpenMP 3\n", - "\n", - "* Several ways to manage thread coordination, including Master regions and Locks\n", - "\n", - "* Memory model for shared data" - ] - }, - { - "cell_type": "markdown", - "id": "02c67899", - "metadata": { - "editable": true - }, - "source": [ - "## General code structure" - ] - }, - { - "cell_type": "markdown", - "id": "9f22aa20", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " main ()\n", - " {\n", - " int var1, var2, var3;\n", - " /* serial code */\n", - " /* ... */\n", - " /* start of a parallel region */\n", - " #pragma omp parallel private(var1, var2) shared(var3)\n", - " {\n", - " /* ... */\n", - " }\n", - " /* more serial code */\n", - " /* ... */\n", - " /* another parallel region */\n", - " #pragma omp parallel\n", - " {\n", - " /* ... */\n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "adfc98a6", - "metadata": { - "editable": true - }, - "source": [ - "## Parallel region\n", - "* A parallel region is a block of code that is executed by a team of threads\n", - "\n", - "* The following compiler directive creates a parallel region" - ] - }, - { - "cell_type": "markdown", - "id": "d9d71aaa", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel { ... }\n" - ] - }, - { - "cell_type": "markdown", - "id": "eeace845", - "metadata": { - "editable": true - }, - "source": [ - "* Clauses can be added at the end of the directive\n", - "\n", - "* Most often used clauses:\n", - "\n", - " * **default(shared)** or **default(none)**\n", - "\n", - " * **public(list of variables)**\n", - "\n", - " * **private(list of variables)**" - ] - }, - { - "cell_type": "markdown", - "id": "bd6b6109", - "metadata": { - "editable": true - }, - "source": [ - "## Hello world, not again, please!" - ] - }, - { - "cell_type": "markdown", - "id": "fe258a73", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #include \n", - " int main (int argc, char *argv[])\n", - " {\n", - " int th_id, nthreads;\n", - " #pragma omp parallel private(th_id) shared(nthreads)\n", - " {\n", - " th_id = omp_get_thread_num();\n", - " printf(\"Hello World from thread %d\\n\", th_id);\n", - " #pragma omp barrier\n", - " if ( th_id == 0 ) {\n", - " nthreads = omp_get_num_threads();\n", - " printf(\"There are %d threads\\n\",nthreads);\n", - " }\n", - " }\n", - " return 0;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "2d3800ea", - "metadata": { - "editable": true - }, - "source": [ - "## Hello world, yet another variant" - ] - }, - { - "cell_type": "markdown", - "id": "b74cb4ab", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #include \n", - " int main(int argc, char *argv[]) \n", - " {\n", - " omp_set_num_threads(4); \n", - " #pragma omp parallel\n", - " {\n", - " int id = omp_get_thread_num();\n", - " int nproc = omp_get_num_threads(); \n", - " cout << \"Hello world with id number and processes \" << id << nproc << endl;\n", - " } \n", - " return 0;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "0757e417", - "metadata": { - "editable": true - }, - "source": [ - "Variables declared outside of the parallel region are shared by all threads\n", - "If a variable like **id** is declared outside of the" - ] - }, - { - "cell_type": "markdown", - "id": "e03e3257", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel, \n" - ] - }, - { - "cell_type": "markdown", - "id": "9d6da977", - "metadata": { - "editable": true - }, - "source": [ - "it would have been shared by various the threads, possibly causing erroneous output\n", - " * Why? What would go wrong? Why do we add possibly?" - ] - }, - { - "cell_type": "markdown", - "id": "7b972a17", - "metadata": { - "editable": true - }, - "source": [ - "## Important OpenMP library routines\n", - "\n", - "* **int omp get num threads ()**, returns the number of threads inside a parallel region\n", - "\n", - "* **int omp get thread num ()**, returns the a thread for each thread inside a parallel region\n", - "\n", - "* **void omp set num threads (int)**, sets the number of threads to be used\n", - "\n", - "* **void omp set nested (int)**, turns nested parallelism on/off" - ] - }, - { - "cell_type": "markdown", - "id": "6faa8d57", - "metadata": { - "editable": true - }, - "source": [ - "## Private variables\n", - "Private clause can be used to make thread- private versions of such variables:" - ] - }, - { - "cell_type": "markdown", - "id": "a72c6377", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel private(id)\n", - " {\n", - " int id = omp_get_thread_num();\n", - " cout << \"My thread num\" << id << endl; \n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "1b3261d6", - "metadata": { - "editable": true - }, - "source": [ - "* What is their value on entry? Exit?\n", - "\n", - "* OpenMP provides ways to control that\n", - "\n", - "* Can use default(none) to require the sharing of each variable to be described" - ] - }, - { - "cell_type": "markdown", - "id": "8c038a25", - "metadata": { - "editable": true - }, - "source": [ - "## Master region\n", - "It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example" - ] - }, - { - "cell_type": "markdown", - "id": "357bace0", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel \n", - " {\n", - " #pragma omp master\n", - " {\n", - " int id = omp_get_thread_num();\n", - " cout << \"My thread num\" << id << endl; \n", - " } \n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "2d6839dc", - "metadata": { - "editable": true - }, - "source": [ - "## Parallel for loop\n", - " * Inside a parallel region, the following compiler directive can be used to parallelize a for-loop:" - ] - }, - { - "cell_type": "markdown", - "id": "c623ee25", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp for\n" - ] - }, - { - "cell_type": "markdown", - "id": "3b608171", - "metadata": { - "editable": true - }, - "source": [ - "* Clauses can be added, such as\n", - "\n", - " * **schedule(static, chunk size)**\n", - "\n", - " * **schedule(dynamic, chunk size)** \n", - "\n", - " * **schedule(guided, chunk size)** (non-deterministic allocation)\n", - "\n", - " * **schedule(runtime)**\n", - "\n", - " * **private(list of variables)**\n", - "\n", - " * **reduction(operator:variable)**\n", - "\n", - " * **nowait**" - ] - }, - { - "cell_type": "markdown", - "id": "6f92aec8", - "metadata": { - "editable": true - }, - "source": [ - "## Parallel computations and loops\n", - "\n", - "OpenMP provides an easy way to parallelize a loop" - ] - }, - { - "cell_type": "markdown", - "id": "993af8db", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel for\n", - " for (i=0; i\n", - " #define CHUNKSIZE 100\n", - " #define N 1000\n", - " int main (int argc, char *argv[])\n", - " {\n", - " int i, chunk;\n", - " float a[N], b[N], c[N];\n", - " for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;\n", - " chunk = CHUNKSIZE;\n", - " #pragma omp parallel shared(a,b,c,chunk) private(i)\n", - " {\n", - " #pragma omp for schedule(dynamic,chunk)\n", - " for (i=0; i < N; i++) c[i] = a[i] + b[i];\n", - " } /* end of parallel region */\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "95ed13d4", - "metadata": { - "editable": true - }, - "source": [ - "## Example code for loop scheduling, guided instead of dynamic" - ] - }, - { - "cell_type": "markdown", - "id": "c80f95e7", - "metadata": { - "editable": true - }, - "source": [ - " #include \n", - " #define CHUNKSIZE 100\n", - " #define N 1000\n", - " int main (int argc, char *argv[])\n", - " {\n", - " int i, chunk;\n", - " float a[N], b[N], c[N];\n", - " for (i=0; i < N; i++) a[i] = b[i] = i * 1.0;\n", - " chunk = CHUNKSIZE;\n", - " #pragma omp parallel shared(a,b,c,chunk) private(i)\n", - " {\n", - " #pragma omp for schedule(guided,chunk)\n", - " for (i=0; i < N; i++) c[i] = a[i] + b[i];\n", - " } /* end of parallel region */\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "c5505d2e", - "metadata": { - "editable": true - }, - "source": [ - "## More on Parallel for loop\n", - "* The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop\n", - "\n", - "* The loop index is private to each thread\n", - "\n", - "* A reduction variable is special\n", - "\n", - " * During the for-loop there is a local private copy in each thread\n", - "\n", - " * At the end of the for-loop, all the local copies are combined together by the reduction operation\n", - "\n", - "* Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler" - ] - }, - { - "cell_type": "markdown", - "id": "e46cbeb1", - "metadata": { - "editable": true - }, - "source": [ - " // #pragma omp parallel and #pragma omp for\n" - ] - }, - { - "cell_type": "markdown", - "id": "d215692e", - "metadata": { - "editable": true - }, - "source": [ - "can be combined into" - ] - }, - { - "cell_type": "markdown", - "id": "7c2689f4", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel for\n" - ] - }, - { - "cell_type": "markdown", - "id": "aea1cada", - "metadata": { - "editable": true - }, - "source": [ - "## What can happen with this loop?\n", - "\n", - "What happens with code like this" - ] - }, - { - "cell_type": "markdown", - "id": "35d873d7", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel for\n", - " for (i=0; i r) {\n", - " #pragma omp task\n", - " do_work (p_vec[i]);\n" - ] - }, - { - "cell_type": "markdown", - "id": "1034c179", - "metadata": { - "editable": true - }, - "source": [ - "## Common mistakes\n", - "Race condition" - ] - }, - { - "cell_type": "markdown", - "id": "bdd76a05", - "metadata": { - "editable": true - }, - "source": [ - " int nthreads;\n", - " #pragma omp parallel shared(nthreads)\n", - " {\n", - " nthreads = omp_get_num_threads();\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "4653e65f", - "metadata": { - "editable": true - }, - "source": [ - "Deadlock" - ] - }, - { - "cell_type": "markdown", - "id": "7eff17bd", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel\n", - " {\n", - " ...\n", - " #pragma omp critical\n", - " {\n", - " ...\n", - " #pragma omp barrier\n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "301a7620", - "metadata": { - "editable": true - }, - "source": [ - "## Not all computations are simple\n", - "Not all computations are simple loops where the data can be evenly \n", - "divided among threads without any dependencies between threads\n", - "\n", - "An example is finding the location and value of the largest element in an array" - ] - }, - { - "cell_type": "markdown", - "id": "8fa3ad4d", - "metadata": { - "editable": true - }, - "source": [ - " for (i=0; i maxval) {\n", - " maxval = x[i];\n", - " maxloc = i; \n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "571816aa", - "metadata": { - "editable": true - }, - "source": [ - "## Not all computations are simple, competing threads\n", - "All threads are potentially accessing and changing the same values, **maxloc** and **maxval**.\n", - "1. OpenMP provides several ways to coordinate access to shared values" - ] - }, - { - "cell_type": "markdown", - "id": "a9f819a5", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp atomic\n" - ] - }, - { - "cell_type": "markdown", - "id": "986b2d1c", - "metadata": { - "editable": true - }, - "source": [ - "1. Only one thread at a time can execute the following statement (not block). We can use the critical option" - ] - }, - { - "cell_type": "markdown", - "id": "fbf8852e", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp critical\n" - ] - }, - { - "cell_type": "markdown", - "id": "d92cb234", - "metadata": { - "editable": true - }, - "source": [ - "1. Only one thread at a time can execute the following block\n", - "\n", - "Atomic may be faster than critical but depends on hardware" - ] - }, - { - "cell_type": "markdown", - "id": "3a9e198b", - "metadata": { - "editable": true - }, - "source": [ - "## How to find the max value using OpenMP\n", - "Write down the simplest algorithm and look carefully for race conditions. How would you handle them? \n", - "The first step would be to parallelize as" - ] - }, - { - "cell_type": "markdown", - "id": "ccc68b6c", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel for\n", - " for (i=0; i maxval) {\n", - " maxval = x[i];\n", - " maxloc = i; \n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "dae6f381", - "metadata": { - "editable": true - }, - "source": [ - "## Then deal with the race conditions\n", - "Write down the simplest algorithm and look carefully for race conditions. How would you handle them? \n", - "The first step would be to parallelize as" - ] - }, - { - "cell_type": "markdown", - "id": "497fa67f", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp parallel for\n", - " for (i=0; i maxval) {\n", - " maxval = x[i];\n", - " maxloc = i; \n", - " }\n", - " }\n", - " } \n" - ] - }, - { - "cell_type": "markdown", - "id": "cdc2e331", - "metadata": { - "editable": true - }, - "source": [ - "Exercise: write a code which implements this and give an estimate on performance. Perform several runs,\n", - "with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can." - ] - }, - { - "cell_type": "markdown", - "id": "87f78818", - "metadata": { - "editable": true - }, - "source": [ - "## What can slow down OpenMP performance?\n", - "Give it a thought!" - ] - }, - { - "cell_type": "markdown", - "id": "6665433c", - "metadata": { - "editable": true - }, - "source": [ - "## What can slow down OpenMP performance?\n", - "Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop.\n", - " * We do not care about the value during the execution of the loop, just the value at the end.\n", - "\n", - "This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties\n", - "\n", - "**Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread**" - ] - }, - { - "cell_type": "markdown", - "id": "9b325ce0", - "metadata": { - "editable": true - }, - "source": [ - "## Find the max location for each thread" - ] - }, - { - "cell_type": "markdown", - "id": "1cfa5def", - "metadata": { - "editable": true - }, - "source": [ - " int maxloc[MAX_THREADS], mloc;\n", - " double maxval[MAX_THREADS], mval; \n", - " #pragma omp parallel shared(maxval,maxloc)\n", - " {\n", - " int id = omp_get_thread_num(); \n", - " maxval[id] = -1.0e30;\n", - " #pragma omp for\n", - " for (int i=0; i maxval[id]) { \n", - " maxloc[id] = i;\n", - " maxval[id] = x[i]; \n", - " }\n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "36861233", - "metadata": { - "editable": true - }, - "source": [ - "## Combine the values from each thread" - ] - }, - { - "cell_type": "markdown", - "id": "12ae10ab", - "metadata": { - "editable": true - }, - "source": [ - " #pragma omp flush (maxloc,maxval)\n", - " #pragma omp master\n", - " {\n", - " int nt = omp_get_num_threads(); \n", - " mloc = maxloc[0]; \n", - " mval = maxval[0]; \n", - " for (int i=1; i mval) { \n", - " mval = maxval[i]; \n", - " mloc = maxloc[i];\n", - " } \n", - " }\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "3631d97f", - "metadata": { - "editable": true - }, - "source": [ - "Note that we let the master process perform the last operation." - ] - }, - { - "cell_type": "markdown", - "id": "4d4f96d5", - "metadata": { - "editable": true - }, - "source": [ - "## [Matrix-matrix multiplication](https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp)\n", - "This code computes the norm of a vector using OpenMp" - ] - }, - { - "cell_type": "markdown", - "id": "266ee021", - "metadata": { - "editable": true - }, - "source": [ - " // OpenMP program to compute vector norm by adding two other vectors\n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " # include \n", - " \n", - " using namespace std; // note use of namespace\n", - " int main (int argc, char* argv[])\n", - " {\n", - " // read in dimension of vector\n", - " int n = atoi(argv[1]);\n", - " double *a, *b, *c;\n", - " int i;\n", - " int thread_num;\n", - " double wtime, Norm2, s, angle;\n", - " cout << \" Perform addition of two vectors and compute the norm-2.\" << endl;\n", - " omp_set_num_threads(4);\n", - " thread_num = omp_get_max_threads ();\n", - " cout << \" The number of processors available = \" << omp_get_num_procs () << endl ;\n", - " cout << \" The number of threads available = \" << thread_num << endl;\n", - " cout << \" The matrix order n = \" << n << endl;\n", - " \n", - " s = 1.0/sqrt( (double) n);\n", - " wtime = omp_get_wtime ( );\n", - " // Allocate space for the vectors to be used\n", - " a = new double [n]; b = new double [n]; c = new double [n];\n", - " // Define parallel region\n", - " # pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2)\n", - " // Set up values for vectors a and b\n", - " for (i = 0; i < n; i++){\n", - " angle = 2.0*M_PI*i/ (( double ) n);\n", - " a[i] = s*(sin(angle) + cos(angle));\n", - " b[i] = s*sin(2.0*angle);\n", - " c[i] = 0.0;\n", - " }\n", - " // Then perform the vector addition\n", - " for (i = 0; i < n; i++){\n", - " c[i] += a[i]+b[i];\n", - " }\n", - " // Compute now the norm-2\n", - " Norm2 = 0.0;\n", - " for (i = 0; i < n; i++){\n", - " Norm2 += c[i]*c[i];\n", - " }\n", - " // end parallel region\n", - " wtime = omp_get_wtime ( ) - wtime;\n", - " cout << setiosflags(ios::showpoint | ios::uppercase);\n", - " cout << setprecision(10) << setw(20) << \"Time used for norm-2 computation=\" << wtime << endl;\n", - " cout << \" Norm-2 = \" << Norm2 << endl;\n", - " // Free up space\n", - " delete[] a;\n", - " delete[] b;\n", - " delete[] c;\n", - " return 0;\n", - " }\n" - ] - }, - { - "cell_type": "markdown", - "id": "f739a26a", - "metadata": { - "editable": true - }, - "source": [ - "## [Matrix-matrix multiplication](https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp)\n", - "This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP" - ] - }, - { - "cell_type": "markdown", - "id": "12ef260c", - "metadata": { - "editable": true - }, - "source": [ - " // Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP\n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " #include \n", - " # include \n", - " \n", - " using namespace std; // note use of namespace\n", - " int main (int argc, char* argv[])\n", - " {\n", - " // read in dimension of square matrix\n", - " int n = atoi(argv[1]);\n", - " double **A, **B, **C;\n", - " int i, j, k;\n", - " int thread_num;\n", - " double wtime, Fsum, s, angle;\n", - " cout << \" Compute matrix product C = A * B and Frobenius norm.\" << endl;\n", - " omp_set_num_threads(4);\n", - " thread_num = omp_get_max_threads ();\n", - " cout << \" The number of processors available = \" << omp_get_num_procs () << endl ;\n", - " cout << \" The number of threads available = \" << thread_num << endl;\n", - " cout << \" The matrix order n = \" << n << endl;\n", - " \n", - " s = 1.0/sqrt( (double) n);\n", - " wtime = omp_get_wtime ( );\n", - " // Allocate space for the two matrices\n", - " A = new double*[n]; B = new double*[n]; C = new double*[n];\n", - " for (i = 0; i < n; i++){\n", - " A[i] = new double[n];\n", - " B[i] = new double[n];\n", - " C[i] = new double[n];\n", - " }\n", - " // Define parallel region\n", - " # pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum)\n", - " // Set up values for matrix A and B and zero matrix C\n", - " for (i = 0; i < n; i++){\n", - " for (j = 0; j < n; j++) {\n", - " angle = 2.0*M_PI*i*j/ (( double ) n);\n", - " A[i][j] = s * ( sin ( angle ) + cos ( angle ) );\n", - " B[j][i] = A[i][j];\n", - " }\n", - " }\n", - " // Then perform the matrix-matrix multiplication\n", - " for (i = 0; i < n; i++){\n", - " for (j = 0; j < n; j++) {\n", - " C[i][j] = 0.0; \n", - " for (k = 0; k < n; k++) {\n", - " C[i][j] += A[i][k]*B[k][j];\n", - " }\n", - " }\n", - " }\n", - " // Compute now the Frobenius norm\n", - " Fsum = 0.0;\n", - " for (i = 0; i < n; i++){\n", - " for (j = 0; j < n; j++) {\n", - " Fsum += C[i][j]*C[i][j];\n", - " }\n", - " }\n", - " Fsum = sqrt(Fsum);\n", - " // end parallel region and letting only one thread perform I/O\n", - " wtime = omp_get_wtime ( ) - wtime;\n", - " cout << setiosflags(ios::showpoint | ios::uppercase);\n", - " cout << setprecision(10) << setw(20) << \"Time used for matrix-matrix multiplication=\" << wtime << endl;\n", - " cout << \" Frobenius norm = \" << Fsum << endl;\n", - " // Free up space\n", - " for (int i = 0; i < n; i++){\n", - " delete[] A[i];\n", - " delete[] B[i];\n", - " delete[] C[i];\n", - " }\n", - " delete[] A;\n", - " delete[] B;\n", - " delete[] C;\n", - " return 0;\n", - " }\n", - " \n", - " \n" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/src/week9/week9.p.tex b/doc/src/week9/week9.p.tex deleted file mode 100644 index 88c69af6..00000000 --- a/doc/src/week9/week9.p.tex +++ /dev/null @@ -1,5966 +0,0 @@ -%% -%% Automatically generated file from DocOnce source -%% (https://github.com/doconce/doconce/) -%% doconce format latex week9.do.txt --minted_latex_style=trac --latex_admon=paragraph --no_mako -%% -% #ifdef PTEX2TEX_EXPLANATION -%% -%% The file follows the ptex2tex extended LaTeX format, see -%% ptex2tex: https://code.google.com/p/ptex2tex/ -%% -%% Run -%% ptex2tex myfile -%% or -%% doconce ptex2tex myfile -%% -%% to turn myfile.p.tex into an ordinary LaTeX file myfile.tex. -%% (The ptex2tex program: https://code.google.com/p/ptex2tex) -%% Many preprocess options can be added to ptex2tex or doconce ptex2tex -%% -%% ptex2tex -DMINTED myfile -%% doconce ptex2tex myfile envir=minted -%% -%% ptex2tex will typeset code environments according to a global or local -%% .ptex2tex.cfg configure file. doconce ptex2tex will typeset code -%% according to options on the command line (just type doconce ptex2tex to -%% see examples). If doconce ptex2tex has envir=minted, it enables the -%% minted style without needing -DMINTED. -% #endif - -% #define PREAMBLE - -% #ifdef PREAMBLE -%-------------------- begin preamble ---------------------- - -\documentclass[% -oneside, % oneside: electronic viewing, twoside: printing -final, % draft: marks overfull hboxes, figures with paths -10pt]{article} - -\listfiles % print all files needed to compile this document - -\usepackage{relsize,makeidx,color,setspace,amsmath,amsfonts,amssymb} -\usepackage[table]{xcolor} -\usepackage{bm,ltablex,microtype} - -\usepackage[pdftex]{graphicx} - -\usepackage{ptex2tex} -% #ifdef MINTED -\usepackage{minted} -\usemintedstyle{default} -% #endif - -\usepackage[T1]{fontenc} -%\usepackage[latin1]{inputenc} -\usepackage{ucs} -\usepackage[utf8x]{inputenc} - -\usepackage{lmodern} % Latin Modern fonts derived from Computer Modern - -% Hyperlinks in PDF: -\definecolor{linkcolor}{rgb}{0,0,0.4} -\usepackage{hyperref} -\hypersetup{ - breaklinks=true, - colorlinks=true, - linkcolor=linkcolor, - urlcolor=linkcolor, - citecolor=black, - filecolor=black, - %filecolor=blue, - pdfmenubar=true, - pdftoolbar=true, - bookmarksdepth=3 % Uncomment (and tweak) for PDF bookmarks with more levels than the TOC - } -%\hyperbaseurl{} % hyperlinks are relative to this root - -\setcounter{tocdepth}{2} % levels in table of contents - -% --- fancyhdr package for fancy headers --- -\usepackage{fancyhdr} -\fancyhf{} % sets both header and footer to nothing -\renewcommand{\headrulewidth}{0pt} -\fancyfoot[LE,RO]{\thepage} -% Ensure copyright on titlepage (article style) and chapter pages (book style) -\fancypagestyle{plain}{ - \fancyhf{} - \fancyfoot[C]{{\footnotesize \copyright\ 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license}} -% \renewcommand{\footrulewidth}{0mm} - \renewcommand{\headrulewidth}{0mm} -} -% Ensure copyright on titlepages with \thispagestyle{empty} -\fancypagestyle{empty}{ - \fancyhf{} - \fancyfoot[C]{{\footnotesize \copyright\ 1999-2024, Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no. Released under CC Attribution-NonCommercial 4.0 license}} - \renewcommand{\footrulewidth}{0mm} - \renewcommand{\headrulewidth}{0mm} -} - -\pagestyle{fancy} - - -\usepackage[framemethod=TikZ]{mdframed} - -% --- begin definitions of admonition environments --- - -% --- end of definitions of admonition environments --- - -% prevent orhpans and widows -\clubpenalty = 10000 -\widowpenalty = 10000 - -\newenvironment{doconceexercise}{}{} -\newcounter{doconceexercisecounter} - - -% ------ header in subexercises ------ -%\newcommand{\subex}[1]{\paragraph{#1}} -%\newcommand{\subex}[1]{\par\vspace{1.7mm}\noindent{\bf #1}\ \ } -\makeatletter -% 1.5ex is the spacing above the header, 0.5em the spacing after subex title -\newcommand\subex{\@startsection{paragraph}{4}{\z@}% - {1.5ex\@plus1ex \@minus.2ex}% - {-0.5em}% - {\normalfont\normalsize\bfseries}} -\makeatother - - -% --- end of standard preamble for documents --- - - -% insert custom LaTeX commands... - -\raggedbottom -\makeindex -\usepackage[totoc]{idxlayout} % for index in the toc -\usepackage[nottoc]{tocbibind} % for references/bibliography in the toc - -%-------------------- end preamble ---------------------- - -\begin{document} - -% matching end for #ifdef PREAMBLE -% #endif - -\newcommand{\exercisesection}[1]{\subsection*{#1}} - - -% ------------------- main content ---------------------- - - - -% ----------------- title ------------------------- - -\thispagestyle{empty} - -\begin{center} -{\LARGE\bf -\begin{spacing}{1.25} -Week 11, March 11-15: Resampling Techniques, Bootstrap and Blocking -\end{spacing} -} -\end{center} - -% ----------------- author(s) ------------------------- - -\begin{center} -{\bf Morten Hjorth-Jensen Email morten.hjorth-jensen@fys.uio.no${}^{1, 2}$} \\ [0mm] -\end{center} - -\begin{center} -% List of all institutions: -\centerline{{\small ${}^1$Department of Physics and Center fo Computing in Science Education, University of Oslo, Oslo, Norway}} -\centerline{{\small ${}^2$Department of Physics and Astronomy and Facility for Rare Ion Beams, Michigan State University, East Lansing, Michigan, USA}} -\end{center} - -% ----------------- end author(s) ------------------------- - -% --- begin date --- -\begin{center} -March 11-15 -\end{center} -% --- end date --- - -\vspace{1cm} - - -% !split -\subsection{Overview of week 11, March 11-15} - -% --- begin paragraph admon --- -\paragraph{Topics.} -\begin{enumerate} -\item Reminder from last week about statistical observables, the central limit theorem and bootstrapping, see notes from last week - -\item Resampling Techniques, emphasis on Blocking - -\item Discussion of onebody densities (whiteboard notes) - -\item Start discussion on optimization and parallelization for Python and C++ -% * \href{{https://youtu.be/}}{Video of lecture TBA} -% * \href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/HandWrittenNotes/2024/NotesMarch22.pdf}}{Handwritten notes} -\end{enumerate} - -\noindent -% --- end paragraph admon --- - - - -Note, these notes contain additional material om optimization and parallelization. Parts of this material will be discussed this week. - -% !split -\subsection{Why resampling methods ?} - -% --- begin paragraph admon --- -\paragraph{Statistical analysis.} -\begin{itemize} -\item Our simulations can be treated as \emph{computer experiments}. This is particularly the case for Monte Carlo methods - -\item The results can be analysed with the same statistical tools as we would use analysing experimental data. - -\item As in all experiments, we are looking for expectation values and an estimate of how accurate they are, i.e., possible sources for errors. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Statistical analysis} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item As in other experiments, many numerical experiments have two classes of errors: -\begin{enumerate} - -\item Statistical errors - -\item Systematical errors - -\end{enumerate} - -\noindent -\item Statistical errors can be estimated using standard tools from statistics - -\item Systematical errors are method specific and must be treated differently from case to case. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{And why do we use such methods?} - -As you will see below, due to correlations between various -measurements, we need to evaluate the so-called covariance in order to -establish a proper evaluation of the total variance and the thereby -the standard deviation of a given expectation value. - -The covariance however, leads to an evaluation of a double sum over the various stochastic variables. This becomes computationally too expensive to evaluate. -Methods like the Bootstrap, the Jackknife and/or Blocking allow us to circumvent this problem. - -% !split -\subsection{Central limit theorem} - -Last week we derived the central limit theorem with the following assumptions: - - -% --- begin paragraph admon --- -\paragraph{Measurement $i$.} -We assumed that each individual measurement $x_{ij}$ is represented by stochastic variables which independent and identically distributed (iid). -This defined the sample mean of of experiment $i$ with $n$ samples as -\[ -\overline{x}_i=\frac{1}{n}\sum_{j} x_{ij}. -\] -and the sample variance -\[ -\sigma^2_i=\frac{1}{n}\sum_{j} \left(x_{ij}-\overline{x}_i\right)^2. -\] -% --- end paragraph admon --- - - - -% !split -\subsection{Further remarks} - -Note that we use $n$ instead of $n-1$ in the definition of -variance. The sample variance and the sample mean are not necessarily equal to -the exact values we would get if we knew the corresponding probability -distribution. - -% !split -\subsection{Running many measurements} - - -% --- begin paragraph admon --- -\paragraph{Adding $m$ measurements $i$.} -With the assumption that the average measurements $i$ are also defined as iid stochastic variables and have the same probability function $p$, -we defined the total average over $m$ experiments as -\[ -\overline{X}=\frac{1}{m}\sum_{i} \overline{x}_{i}. -\] -and the total variance -\[ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2. -\] -% --- end paragraph admon --- - - -These are the quantities we used in showing that if the individual mean values are iid stochastic variables, then in the limit $m\rightarrow \infty$, the distribution for $\overline{X}$ is given by a Gaussian distribution with variance $\sigma^2_m$. - -% !split -\subsection{Adding more definitions} - -The total sample variance over the $mn$ measurements is defined as -\[ -\sigma^2=\frac{1}{mn}\sum_{i=1}^{m} \sum_{j=1}^{n}\left(x_{ij}-\overline{X}\right)^2. -\] -We have from the equation for $\sigma_m^2$ -\[ -\overline{x}_i-\overline{X}=\frac{1}{n}\sum_{j=1}^{n}\left(x_{i}-\overline{X}\right), -\] -and introducing the centered value $\tilde{x}_{ij}=x_{ij}-\overline{X}$, we can rewrite $\sigma_m^2$ as -\[ -\sigma^2_{m}=\frac{1}{m}\sum_{i} \left( \overline{x}_{i}-\overline{X}\right)^2=\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2. -\] - -% !split -\subsection{Further rewriting} - -We can rewrite the latter in terms of a sum over diagonal elements only and another sum which contains the non-diagonal elements -\begin{align*} -\sigma^2_{m}& =\frac{1}{m}\sum_{i=1}^{m}\left[ \frac{i}{n}\sum_{j=1}^{n}\tilde{x}_{ij}\right]^2 \\ - & = \frac{1}{mn^2}\sum_{i=1}^{m} \sum_{j=1}^{n}\tilde{x}_{ij}^2+\frac{2}{mn^2}\sum_{i=1}^{m} \sum_{j1$ and $X_1,X_2,\cdots, X_n$ is a stationary time series to begin with. -Moreover, assume that the series is asymptotically uncorrelated. We switch to vector notation by arranging $X_1,X_2,\cdots,X_n$ in an $n$-tuple. Define: -\begin{align*} -\hat{X} = (X_1,X_2,\cdots,X_n). -\end{align*} - -% !split -\subsection{Why blocking?} - -The strength of the blocking method is when the number of -observations, $n$ is large. For large $n$, the complexity of dependent -bootstrapping scales poorly, but the blocking method does not, -moreover, it becomes more accurate the larger $n$ is. - -% !split -\subsection{Blocking Transformations} - We now define the blocking transformations. The idea is to take the mean of subsequent -pair of elements from $\bm{X}$ and form a new vector -$\bm{X}_1$. Continuing in the same way by taking the mean of -subsequent pairs of elements of $\bm{X}_1$ we obtain $\bm{X}_2$, and -so on. -Define $\bm{X}_i$ recursively by: - -\begin{align} -(\bm{X}_0)_k &\equiv (\bm{X})_k \nonumber \\ -(\bm{X}_{i+1})_k &\equiv \frac{1}{2}\Big( (\bm{X}_i)_{2k-1} + -(\bm{X}_i)_{2k} \Big) \qquad \text{for all} \qquad 1 \leq i \leq d-1 -\end{align} - -% !split -\subsection{Blocking transformations} - -The quantity $\bm{X}_k$ is -subject to $k$ \textbf{blocking transformations}. We now have $d$ vectors -$\bm{X}_0, \bm{X}_1,\cdots,\vec X_{d-1}$ containing the subsequent -averages of observations. It turns out that if the components of -$\bm{X}$ is a stationary time series, then the components of -$\bm{X}_i$ is a stationary time series for all $0 \leq i \leq d-1$ - -We can then compute the autocovariance, the variance, sample mean, and -number of observations for each $i$. -Let $\gamma_i, \sigma_i^2, -\overline{X}_i$ denote the covariance, variance and average of the -elements of $\bm{X}_i$ and let $n_i$ be the number of elements of -$\bm{X}_i$. It follows by induction that $n_i = n/2^i$. - -% !split -\subsection{Blocking Transformations} - -Using the -definition of the blocking transformation and the distributive -property of the covariance, it is clear that since $h =|i-j|$ -we can define -\begin{align} -\gamma_{k+1}(h) &= cov\left( ({X}_{k+1})_{i}, ({X}_{k+1})_{j} \right) \nonumber \\ -&= \frac{1}{4}cov\left( ({X}_{k})_{2i-1} + ({X}_{k})_{2i}, ({X}_{k})_{2j-1} + ({X}_{k})_{2j} \right) \nonumber \\ -&= \frac{1}{2}\gamma_{k}(2h) + \frac{1}{2}\gamma_k(2h+1) \hspace{0.1cm} \mathrm{h = 0} \\ -&=\frac{1}{4}\gamma_k(2h-1) + \frac{1}{2}\gamma_k(2h) + \frac{1}{4}\gamma_k(2h+1) \quad \mathrm{else} -\end{align} - -The quantity $\hat{X}$ is asymptotically uncorrelated by assumption, $\hat{X}_k$ is also asymptotic uncorrelated. Let's turn our attention to the variance of the sample -mean $\mathrm{var}(\overline{X})$. - -% !split -\subsection{Blocking Transformations, getting there} -We have -\begin{align} -\mathrm{var}(\overline{X}_k) = \frac{\sigma_k^2}{n_k} + \underbrace{\frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h)}_{\equiv e_k} = \frac{\sigma^2_k}{n_k} + e_k \quad \text{if} \quad \gamma_k(0) = \sigma_k^2. -\end{align} -The term $e_k$ is called the \textbf{truncation error}: -\begin{equation} -e_k = \frac{2}{n_k} \sum_{h=1}^{n_k-1}\left( 1 - \frac{h}{n_k} \right)\gamma_k(h). -\end{equation} -We can show that $\mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_j)$ for all $0 \leq i \leq d-1$ and $0 \leq j \leq d-1$. - -% !split -\subsection{Blocking Transformations, final expressions} - -We can then wrap up -\begin{align} -n_{j+1} \overline{X}_{j+1} &= \sum_{i=1}^{n_{j+1}} (\hat{X}_{j+1})_i = \frac{1}{2}\sum_{i=1}^{n_{j}/2} (\hat{X}_{j})_{2i-1} + (\hat{X}_{j})_{2i} \nonumber \\ -&= \frac{1}{2}\left[ (\hat{X}_j)_1 + (\hat{X}_j)_2 + \cdots + (\hat{X}_j)_{n_j} \right] = \underbrace{\frac{n_j}{2}}_{=n_{j+1}} \overline{X}_j = n_{j+1}\overline{X}_j. -\end{align} -By repeated use of this equation we get $\mathrm{var}(\overline{X}_i) = \mathrm{var}(\overline{X}_0) = \mathrm{var}(\overline{X})$ for all $0 \leq i \leq d-1$. This has the consequence that -\begin{align} -\mathrm{var}(\overline{X}) = \frac{\sigma_k^2}{n_k} + e_k \qquad \text{for all} \qquad 0 \leq k \leq d-1. \label{eq:convergence} -\end{align} - -% !split -\subsection{More on the blocking method} - -Flyvbjerg and Petersen demonstrated that the sequence -$\{e_k\}_{k=0}^{d-1}$ is decreasing, and conjecture that the term -$e_k$ can be made as small as we would like by making $k$ (and hence -$d$) sufficiently large. The sequence is decreasing. -It means we can apply blocking transformations until -$e_k$ is sufficiently small, and then estimate $\mathrm{var}(\overline{X})$ by -$\widehat{\sigma}^2_k/n_k$. - -For an elegant solution and proof of the blocking method, see the recent article of \href{{https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304}}{Marius Jonsson (former MSc student of the Computational Physics group)}. - -% !split -\subsection{Example code form last week} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bpycod -# 2-electron VMC code for 2dim quantum dot with importance sampling -# Using gaussian rng for new positions and Metropolis- Hastings -# Added energy minimization -from math import exp, sqrt -from random import random, seed, normalvariate -import numpy as np -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -from matplotlib import cm -from matplotlib.ticker import LinearLocator, FormatStrFormatter -from scipy.optimize import minimize -import sys -import os - -# Where to save data files -PROJECT_ROOT_DIR = "Results" -DATA_ID = "Results/EnergyMin" - -if not os.path.exists(PROJECT_ROOT_DIR): - os.mkdir(PROJECT_ROOT_DIR) - -if not os.path.exists(DATA_ID): - os.makedirs(DATA_ID) - -def data_path(dat_id): - return os.path.join(DATA_ID, dat_id) - -outfile = open(data_path("Energies.dat"),'w') - - -# Trial wave function for the 2-electron quantum dot in two dims -def WaveFunction(r,alpha,beta): - r1 = r[0,0]**2 + r[0,1]**2 - r2 = r[1,0]**2 + r[1,1]**2 - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = r12/(1+beta*r12) - return exp(-0.5*alpha*(r1+r2)+deno) - -# Local energy for the 2-electron quantum dot in two dims, using analytical local energy -def LocalEnergy(r,alpha,beta): - - r1 = (r[0,0]**2 + r[0,1]**2) - r2 = (r[1,0]**2 + r[1,1]**2) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - deno2 = deno*deno - return 0.5*(1-alpha*alpha)*(r1 + r2) +2.0*alpha + 1.0/r12+deno2*(alpha*r12-deno2+2*beta*deno-1.0/r12) - -# Derivate of wave function ansatz as function of variational parameters -def DerivativeWFansatz(r,alpha,beta): - - WfDer = np.zeros((2), np.double) - r1 = (r[0,0]**2 + r[0,1]**2) - r2 = (r[1,0]**2 + r[1,1]**2) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - deno2 = deno*deno - WfDer[0] = -0.5*(r1+r2) - WfDer[1] = -r12*r12*deno2 - return WfDer - -# Setting up the quantum force for the two-electron quantum dot, recall that it is a vector -def QuantumForce(r,alpha,beta): - - qforce = np.zeros((NumberParticles,Dimension), np.double) - r12 = sqrt((r[0,0]-r[1,0])**2 + (r[0,1]-r[1,1])**2) - deno = 1.0/(1+beta*r12) - qforce[0,:] = -2*r[0,:]*alpha*(r[0,:]-r[1,:])*deno*deno/r12 - qforce[1,:] = -2*r[1,:]*alpha*(r[1,:]-r[0,:])*deno*deno/r12 - return qforce - - -# Computing the derivative of the energy and the energy -def EnergyDerivative(x0): - - - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - EnergyDer = 0.0 - DeltaPsi = 0.0 - DerivativePsiE = 0.0 - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - DerPsi = DerivativeWFansatz(PositionOld,alpha,beta) - DeltaPsi += DerPsi - energy += DeltaE - DerivativePsiE += DerPsi*DeltaE - - # We calculate mean values - energy /= NumberMCcycles - DerivativePsiE /= NumberMCcycles - DeltaPsi /= NumberMCcycles - EnergyDer = 2*(DerivativePsiE-DeltaPsi*energy) - return EnergyDer - - -# Computing the expectation value of the local energy -def Energy(x0): - # Parameters in the Fokker-Planck simulation of the quantum force - D = 0.5 - TimeStep = 0.05 - # positions - PositionOld = np.zeros((NumberParticles,Dimension), np.double) - PositionNew = np.zeros((NumberParticles,Dimension), np.double) - # Quantum force - QuantumForceOld = np.zeros((NumberParticles,Dimension), np.double) - QuantumForceNew = np.zeros((NumberParticles,Dimension), np.double) - - energy = 0.0 - DeltaE = 0.0 - alpha = x0[0] - beta = x0[1] - #Initial position - for i in range(NumberParticles): - for j in range(Dimension): - PositionOld[i,j] = normalvariate(0.0,1.0)*sqrt(TimeStep) - wfold = WaveFunction(PositionOld,alpha,beta) - QuantumForceOld = QuantumForce(PositionOld,alpha, beta) - - #Loop over MC MCcycles - for MCcycle in range(NumberMCcycles): - #Trial position moving one particle at the time - for i in range(NumberParticles): - for j in range(Dimension): - PositionNew[i,j] = PositionOld[i,j]+normalvariate(0.0,1.0)*sqrt(TimeStep)+\ - QuantumForceOld[i,j]*TimeStep*D - wfnew = WaveFunction(PositionNew,alpha,beta) - QuantumForceNew = QuantumForce(PositionNew,alpha, beta) - GreensFunction = 0.0 - for j in range(Dimension): - GreensFunction += 0.5*(QuantumForceOld[i,j]+QuantumForceNew[i,j])*\ - (D*TimeStep*0.5*(QuantumForceOld[i,j]-QuantumForceNew[i,j])-\ - PositionNew[i,j]+PositionOld[i,j]) - - GreensFunction = exp(GreensFunction) - ProbabilityRatio = GreensFunction*wfnew**2/wfold**2 - #Metropolis-Hastings test to see whether we accept the move - if random() <= ProbabilityRatio: - for j in range(Dimension): - PositionOld[i,j] = PositionNew[i,j] - QuantumForceOld[i,j] = QuantumForceNew[i,j] - wfold = wfnew - DeltaE = LocalEnergy(PositionOld,alpha,beta) - energy += DeltaE - if Printout: - outfile.write('%f\n' %(energy/(MCcycle+1.0))) - # We calculate mean values - energy /= NumberMCcycles - return energy - -#Here starts the main program with variable declarations -NumberParticles = 2 -Dimension = 2 -# seed for rng generator -seed() -# Monte Carlo cycles for parameter optimization -Printout = False -NumberMCcycles= 10000 -# guess for variational parameters -x0 = np.array([0.9,0.2]) -# Using Broydens method to find optimal parameters -res = minimize(Energy, x0, method='BFGS', jac=EnergyDerivative, options={'gtol': 1e-4,'disp': True}) -x0 = res.x -# Compute the energy again with the optimal parameters and increased number of Monte Cycles -NumberMCcycles= 2**19 -Printout = True -FinalEnergy = Energy(x0) -EResult = np.array([FinalEnergy,FinalEnergy]) -outfile.close() -#nice printout with Pandas -import pandas as pd -from pandas import DataFrame -data ={'Optimal Parameters':x0, 'Final Energy':EResult} -frame = pd.DataFrame(data) -print(frame) - -\epycod - - -% !split -\subsection{Resampling analysis} - -The next step is then to use the above data sets and perform a -resampling analysis using the blocking method -The blocking code, based on the article of \href{{https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.043304}}{Marius Jonsson} is given here - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bpycod -# Common imports -import os - -# Where to save the figures and data files -DATA_ID = "Results/EnergyMin" - -def data_path(dat_id): - return os.path.join(DATA_ID, dat_id) - -infile = open(data_path("Energies.dat"),'r') - -from numpy import log2, zeros, mean, var, sum, loadtxt, arange, array, cumsum, dot, transpose, diagonal, sqrt -from numpy.linalg import inv - -def block(x): - # preliminaries - n = len(x) - d = int(log2(n)) - s, gamma = zeros(d), zeros(d) - mu = mean(x) - - # estimate the auto-covariance and variances - # for each blocking transformation - for i in arange(0,d): - n = len(x) - # estimate autocovariance of x - gamma[i] = (n)**(-1)*sum( (x[0:(n-1)]-mu)*(x[1:n]-mu) ) - # estimate variance of x - s[i] = var(x) - # perform blocking transformation - x = 0.5*(x[0::2] + x[1::2]) - - # generate the test observator M_k from the theorem - M = (cumsum( ((gamma/s)**2*2**arange(1,d+1)[::-1])[::-1] ) )[::-1] - - # we need a list of magic numbers - q =array([6.634897,9.210340, 11.344867, 13.276704, 15.086272, 16.811894, 18.475307, 20.090235, 21.665994, 23.209251, 24.724970, 26.216967, 27.688250, 29.141238, 30.577914, 31.999927, 33.408664, 34.805306, 36.190869, 37.566235, 38.932173, 40.289360, 41.638398, 42.979820, 44.314105, 45.641683, 46.962942, 48.278236, 49.587884, 50.892181]) - - # use magic to determine when we should have stopped blocking - for k in arange(0,d): - if(M[k] < q[k]): - break - if (k >= d-1): - print("Warning: Use more data") - return mu, s[k]/2**(d-k) - - -x = loadtxt(infile) -(mean, var) = block(x) -std = sqrt(var) -import pandas as pd -from pandas import DataFrame -data ={'Mean':[mean], 'STDev':[std]} -frame = pd.DataFrame(data,index=['Values']) -print(frame) - - -\epycod - - -% !split -\subsection{Content} -\begin{itemize} -\item Simple compiler options - -\item Tools to benchmark your code - -\item Machine architectures - -\item What is vectorization? - -\item How to measure code performance - -\item Parallelization with OpenMP - -\item Parallelization with MPI - -\item Vectorization and parallelization, examples -\end{itemize} - -\noindent -% !split -\subsection{Optimization and profiling} - -% --- begin paragraph admon --- -\paragraph{} - -Till now we have not paid much attention to speed and possible optimization possibilities -inherent in the various compilers. We have compiled and linked as - - - -\bcppcod -c++ -c mycode.cpp -c++ -o mycode.exe mycode.o - -\ecppcod - -For Fortran replace with for example \textbf{gfortran} or \textbf{ifort}. -This is what we call a flat compiler option and should be used when we develop the code. -It produces normally a very large and slow code when translated to machine instructions. -We use this option for debugging and for establishing the correct program output because -every operation is done precisely as the user specified it. - -It is instructive to look up the compiler manual for further instructions by writing - - -\bcppcod -man c++ - -\ecppcod -% --- end paragraph admon --- - - -% !split -\subsection{More on optimization} - -% --- begin paragraph admon --- -\paragraph{} -We have additional compiler options for optimization. These may include procedure inlining where -performance may be improved, moving constants inside loops outside the loop, -identify potential parallelism, include automatic vectorization or replace a division with a reciprocal -and a multiplication if this speeds up the code. - - - -\bcppcod -c++ -O3 -c mycode.cpp -c++ -O3 -o mycode.exe mycode.o - -\ecppcod - -This (other options are -O2 or -Ofast) is the recommended option. -% --- end paragraph admon --- - - -% !split -\subsection{Optimization and profiling} - -% --- begin paragraph admon --- -\paragraph{} -It is also useful to profile your program under the development stage. -You would then compile with - - - -\bcppcod -c++ -pg -O3 -c mycode.cpp -c++ -pg -O3 -o mycode.exe mycode.o - -\ecppcod - -After you have run the code you can obtain the profiling information via - - -\bcppcod -gprof mycode.exe > ProfileOutput - -\ecppcod - -When you have profiled properly your code, you must take out this option as it -slows down performance. -For memory tests use \href{{http://www.valgrind.org}}{valgrind}. An excellent environment for all these aspects, and much more, is Qt creator. -% --- end paragraph admon --- - - - -% !split -\subsection{Optimization and debugging} - -% --- begin paragraph admon --- -\paragraph{} -Adding debugging options is a very useful alternative under the development stage of a program. -You would then compile with - - - -\bcppcod -c++ -g -O0 -c mycode.cpp -c++ -g -O0 -o mycode.exe mycode.o - -\ecppcod - -This option generates debugging information allowing you to trace for example if an array is properly allocated. Some compilers work best with the no optimization option \textbf{-O0}. -% --- end paragraph admon --- - - - -% --- begin paragraph admon --- -\paragraph{Other optimization flags.} -Depending on the compiler, one can add flags which generate code that catches integer overflow errors. -The flag \textbf{-ftrapv} does this for the CLANG compiler on OS X operating systems. -% --- end paragraph admon --- - - - -% !split -\subsection{Other hints} - -% --- begin paragraph admon --- -\paragraph{} -In general, irrespective of compiler options, it is useful to -\begin{itemize} -\item avoid if tests or call to functions inside loops, if possible. - -\item avoid multiplication with constants inside loops if possible -\end{itemize} - -\noindent -Here is an example of a part of a program where specific operations lead to a slower code - - - - - - -\bcppcod -k = n-1; -for (i = 0; i < n; i++){ - a[i] = b[i] +c*d; - e = g[k]; -} - -\ecppcod - -A better code is - - - - - - -\bcppcod -temp = c*d; -for (i = 0; i < n; i++){ - a[i] = b[i] + temp; -} -e = g[n-1]; - -\ecppcod - -Here we avoid a repeated multiplication inside a loop. -Most compilers, depending on compiler flags, identify and optimize such bottlenecks on their own, without requiring any particular action by the programmer. However, it is always useful to single out and avoid code examples like the first one discussed here. -% --- end paragraph admon --- - - - -% !split -\subsection{Vectorization and the basic idea behind parallel computing} - -% --- begin paragraph admon --- -\paragraph{} -Present CPUs are highly parallel processors with varying levels of parallelism. The typical situation can be described via the following three statements. -\begin{itemize} -\item Pursuit of shorter computation time and larger simulation size gives rise to parallel computing. - -\item Multiple processors are involved to solve a global problem. - -\item The essence is to divide the entire computation evenly among collaborative processors. Divide and conquer. -\end{itemize} - -\noindent -Before we proceed with a more detailed discussion of topics like vectorization and parallelization, we need to remind ourselves about some basic features of different hardware models. -% --- end paragraph admon --- - - - -% !split -\subsection{A rough classification of hardware models} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Conventional single-processor computers are named SISD (single-instruction-single-data) machines. - -\item SIMD (single-instruction-multiple-data) machines incorporate the idea of parallel processing, using a large number of processing units to execute the same instruction on different data. - -\item Modern parallel computers are so-called MIMD (multiple-instruction-multiple-data) machines and can execute different instruction streams in parallel on different data. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - -% !split -\subsection{Shared memory and distributed memory} - -% --- begin paragraph admon --- -\paragraph{} -One way of categorizing modern parallel computers is to look at the memory configuration. -\begin{itemize} -\item In shared memory systems the CPUs share the same address space. Any CPU can access any data in the global memory. - -\item In distributed memory systems each CPU has its own memory. -\end{itemize} - -\noindent -The CPUs are connected by some network and may exchange messages. -% --- end paragraph admon --- - - - -% !split -\subsection{Different parallel programming paradigms} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Task parallelism}: the work of a global problem can be divided into a number of independent tasks, which rarely need to synchronize. Monte Carlo simulations represent a typical situation. Integration is another. However this paradigm is of limited use. - -\item \textbf{Data parallelism}: use of multiple threads (e.g.~one or more threads per processor) to dissect loops over arrays etc. Communication and synchronization between processors are often hidden, thus easy to program. However, the user surrenders much control to a specialized compiler. Examples of data parallelism are compiler-based parallelization and OpenMP directives. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - -% !split -\subsection{Different parallel programming paradigms} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Message passing}: all involved processors have an independent memory address space. The user is responsible for partitioning the data/work of a global problem and distributing the subproblems to the processors. Collaboration between processors is achieved by explicit message passing, which is used for data transfer plus synchronization. - -\item This paradigm is the most general one where the user has full control. Better parallel efficiency is usually achieved by explicit message passing. However, message-passing programming is more difficult. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{What is vectorization?} -Vectorization is a special -case of \textbf{Single Instructions Multiple Data} (SIMD) to denote a single -instruction stream capable of operating on multiple data elements in -parallel. -We can think of vectorization as the unrolling of loops accompanied with SIMD instructions. - -Vectorization is the process of converting an algorithm that performs scalar operations -(typically one operation at the time) to vector operations where a single operation can refer to many simultaneous operations. -Consider the following example - - - - -\bcppcod -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\ecppcod - -If the code is not vectorized, the compiler will simply start with the first element and -then perform subsequent additions operating on one address in memory at the time. - -% !split -\subsection{Number of elements that can acted upon} -A SIMD instruction can operate on multiple data elements in one single instruction. -It uses the so-called 128-bit SIMD floating-point register. -In this sense, vectorization adds some form of parallelism since one instruction is applied -to many parts of say a vector. - -The number of elements which can be operated on in parallel -range from four single-precision floating point data elements in so-called -Streaming SIMD Extensions and two double-precision floating-point data -elements in Streaming SIMD Extensions 2 to sixteen byte operations in -a 128-bit register in Streaming SIMD Extensions 2. Thus, vector-length -ranges from 2 to 16, depending on the instruction extensions used and -on the data type. - -IN summary, our instructions operate on 128 bit (16 byte) operands -\begin{itemize} -\item 4 floats or ints - -\item 2 doubles - -\item Data paths 128 bits vide for vector unit -\end{itemize} - -\noindent -% !split -\subsection{Number of elements that can acted upon, examples} -We start with the simple scalar operations given by - - - - -\bcppcod -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\ecppcod - -If the code is not vectorized and we have a 128-bit register to store a 32 bits floating point number, -it means that we have $3\times 32$ bits that are not used. - -We have thus unused space in our SIMD registers. These registers could hold three additional integers. - -% !split -\subsection{Operation counts for scalar operation} -The code - - - - -\bcppcod -for (i = 0; i < n; i++){ - a[i] = b[i] + c[i]; -} - -\ecppcod - -has for $n$ repeats -\begin{enumerate} -\item one load for $c[i]$ in address 1 - -\item one load for $b[i]$ in address 2 - -\item add $c[i]$ and $b[i]$ to give $a[i]$ - -\item store $a[i]$ in address 2 -\end{enumerate} - -\noindent -% !split -\subsection{Number of elements that can acted upon, examples} -If we vectorize the code, we can perform, with a 128-bit register four simultaneous operations, that is -we have - - - - - - - -\bcppcod -for (i = 0; i < n; i+=4){ - a[i] = b[i] + c[i]; - a[i+1] = b[i+1] + c[i+1]; - a[i+2] = b[i+2] + c[i+2]; - a[i+3] = b[i+3] + c[i+3]; -} - -\ecppcod - - -Four additions are now done in a single step. - -% !split -\subsection{Number of operations when vectorized} -For $n/4$ repeats assuming floats or integers -\begin{enumerate} -\item one vector load for $c[i]$ in address 1 - -\item one load for $b[i]$ in address 2 - -\item add $c[i]$ and $b[i]$ to give $a[i]$ - -\item store $a[i]$ in address 2 -\end{enumerate} - -\noindent -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program7.cpp}}{A simple test case with and without vectorization}} -We implement these operations in a simple c++ program that computes at the end the norm of a vector. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bcppcode -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double s = 1.0/sqrt( (double) n); - double *a, *b, *c; - // Start timing - clock_t start, finish; - start = clock(); -// Allocate space for the vectors to be used - a = new double [n]; b = new double [n]; c = new double [n]; - // Define parallel region - // Set up values for vectors a and b - for (int i = 0; i < n; i++){ - double angle = 2.0*M_PI*i/ (( double ) n); - a[i] = s*(sin(angle) + cos(angle)); - b[i] = s*sin(2.0*angle); - c[i] = 0.0; - } - // Then perform the vector addition - for (int i = 0; i < n; i++){ - c[i] += a[i]+b[i]; - } - // Compute now the norm-2 - double Norm2 = 0.0; - for (int i = 0; i < n; i++){ - Norm2 += c[i]*c[i]; - } - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for norm computation=" << timeused << endl; - cout << " Norm-2 = " << Norm2 << endl; - // Free up space - delete[] a; - delete[] b; - delete[] c; - return 0; -} - - - - - -\ecppcode - - -% !split -\subsection{Compiling with and without vectorization} -We can compile and link without vectorization using the clang c++ compiler - - -\bcppcod -clang -o novec.x vecexample.cpp - -\ecppcod - -and with vectorization (and additional optimizations) - - -\bcppcod -clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp - -\ecppcod - -The speedup depends on the size of the vectors. In the example here we have run with $10^7$ elements. -The example here was run on an IMac17.1 with OSX El Capitan (10.11.4) as operating system and an Intel i5 3.3 GHz CPU. - - - - - -\bcppcod -Compphys:~ hjensen$ ./vec.x 10000000 -Time used for norm computation=0.04720500000 -Compphys:~ hjensen$ ./novec.x 10000000 -Time used for norm computation=0.03311700000 - -\ecppcod - -This particular C++ compiler speeds up the above loop operations with a factor of 1.5 -Performing the same operations for $10^9$ elements results in a smaller speedup since reading from main memory is required. The non-vectorized code is seemingly faster. - - - - - -\bcppcod -Compphys:~ hjensen$ ./vec.x 1000000000 -Time used for norm computation=58.41391100 -Compphys:~ hjensen$ ./novec.x 1000000000 -Time used for norm computation=46.51295300 - -\ecppcod - -We will discuss these issues further in the next slides. - -% !split -\subsection{Compiling with and without vectorization using clang} -We can compile and link without vectorization with clang compiler - - -\bcppcod -clang++ -o -fno-vectorize novec.x vecexample.cpp - -\ecppcod - -and with vectorization - - -\bcppcod -clang++ -O3 -Rpass=loop-vectorize -o vec.x vecexample.cpp - -\ecppcod - -We can also add vectorization analysis, see for example - - -\bcppcod -clang++ -O3 -Rpass-analysis=loop-vectorize -o vec.x vecexample.cpp - -\ecppcod - -or figure out if vectorization was missed - - -\bcppcod -clang++ -O3 -Rpass-missed=loop-vectorize -o vec.x vecexample.cpp - -\ecppcod - - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, criteria} - -Not all loops can be vectorized, as discussed in \href{{https://software.intel.com/en-us/articles/a-guide-to-auto-vectorization-with-intel-c-compilers}}{Intel's guide to vectorization} - -An important criteria is that the loop counter $n$ is known at the entry of the loop. - - - - -\bcppcod - for (int j = 0; j < n; j++) { - a[j] = cos(j*1.0); - } - -\ecppcod - -The variable $n$ does need to be known at compile time. However, this variable must stay the same for the entire duration of the loop. It implies that an exit statement inside the loop cannot be data dependent. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, exit criteria} - -An exit statement should in general be avoided. -If the exit statement contains data-dependent conditions, the loop cannot be vectorized. -The following is an example of a non-vectorizable loop - - - - - -\bcppcod - for (int j = 0; j < n; j++) { - a[j] = cos(j*1.0); - if (a[j] < 0 ) break; - } - -\ecppcod - -Avoid loop termination conditions and opt for a single entry loop variable $n$. The lower and upper bounds have to be kept fixed within the loop. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, straight-line code} - -SIMD instructions perform the same type of operations multiple times. -A \textbf{switch} statement leads thus to a non-vectorizable loop since different statemens cannot branch. -The following code can however be vectorized since the \textbf{if} statement is implemented as a masked assignment. - - - - - - - - - - -\bcppcod - for (int j = 0; j < n; j++) { - double x = cos(j*1.0); - if (x > 0 ) { - a[j] = x*sin(j*2.0); - } - else { - a[j] = 0.0; - } - } - -\ecppcod - -These operations can be performed for all data elements but only those elements which the mask evaluates as true are stored. In general, one should avoid branches such as \textbf{switch}, \textbf{go to}, or \textbf{return} statements or \textbf{if} constructs that cannot be treated as masked assignments. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, nested loops} - -Only the innermost loop of the following example is vectorized - - - - - - -\bcppcod - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - a[i][j] += b[i][j]; - } - } - -\ecppcod - -The exception is if an original outer loop is transformed into an inner loop as the result of compiler optimizations. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, function calls} - -Calls to programmer defined functions ruin vectorization. However, calls to intrinsic functions like -$\sin{x}$, $\cos{x}$, $\exp{x}$ etc are allowed since they are normally efficiently vectorized. -The following example is fully vectorizable - - - - -\bcppcod - for (int i = 0; i < n; i++) { - a[i] = log10(i)*cos(i); - } - -\ecppcod - -Similarly, \textbf{inline} functions defined by the programmer, allow for vectorization since the function statements are glued into the actual place where the function is called. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, data dependencies} - -One has to keep in mind that vectorization changes the order of operations inside a loop. A so-called -read-after-write statement with an explicit flow dependency cannot be vectorized. The following code - - - - - -\bcppcod - double b = 15.; - for (int i = 1; i < n; i++) { - a[i] = a[i-1] + b; - } - -\ecppcod - -is an example of flow dependency and results in wrong numerical results if vectorized. For a scalar operation, the value $a[i-1]$ computed during the iteration is loaded into the right-hand side and the results are fine. In vector mode however, with a vector length of four, the values $a[0]$, $a[1]$, $a[2]$ and $a[3]$ from the previous loop will be loaded into the right-hand side and produce wrong results. That is, we have - - - - - -\bcppcod - a[1] = a[0] + b; - a[2] = a[1] + b; - a[3] = a[2] + b; - a[4] = a[3] + b; - -\ecppcod - -and if the two first iterations are executed at the same by the SIMD instruction, the value of say $a[1]$ could be used by the second iteration before it has been calculated by the first iteration, leading thereby to wrong results. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, more data dependencies} - -On the other hand, a so-called -write-after-read statement can be vectorized. The following code - - - - - -\bcppcod - double b = 15.; - for (int i = 1; i < n; i++) { - a[i-1] = a[i] + b; - } - -\ecppcod - -is an example of flow dependency that can be vectorized since no iteration with a higher value of $i$ -can complete before an iteration with a lower value of $i$. However, such code leads to problems with parallelization. - -% !split -\subsection{Automatic vectorization and vectorization inhibitors, memory stride} - -For C++ programmers it is also worth keeping in mind that an array notation is preferred to the more compact use of pointers to access array elements. The compiler can often not tell if it is safe to vectorize the code. - -When dealing with arrays, you should also avoid memory stride, since this slows down considerably vectorization. When you access array element, write for example the inner loop to vectorize using unit stride, that is, access successively the next array element in memory, as shown here - - - - - - -\bcppcod - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - a[i][j] += b[i][j]; - } - } - -\ecppcod - - -% !split -\subsection{Memory management} -The main memory contains the program data -\begin{enumerate} -\item Cache memory contains a copy of the main memory data - -\item Cache is faster but consumes more space and power. It is normally assumed to be much faster than main memory - -\item Registers contain working data only -\begin{itemize} - - \item Modern CPUs perform most or all operations only on data in register - -\end{itemize} - -\noindent -\item Multiple Cache memories contain a copy of the main memory data -\begin{itemize} - - \item Cache items accessed by their address in main memory - - \item L1 cache is the fastest but has the least capacity - - \item L2, L3 provide intermediate performance/size tradeoffs -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -Loads and stores to memory can be as important as floating point operations when we measure performance. - -% !split -\subsection{Memory and communication} - -\begin{enumerate} -\item Most communication in a computer is carried out in chunks, blocks of bytes of data that move together - -\item In the memory hierarchy, data moves between memory and cache, and between different levels of cache, in groups called lines -\begin{itemize} - - \item Lines are typically 64-128 bytes, or 8-16 double precision words - - \item Even if you do not use the data, it is moved and occupies space in the cache -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -Many of these performance features are not captured in most programming languages. - -% !split -\subsection{Measuring performance} - -How do we measure performance? What is wrong with this code to time a loop? - - - - - - - - -\bdat - clock_t start, finish; - start = clock(); - for (int j = 0; j < i; j++) { - a[j] = b[j]+b[j]*c[j]; - } - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - -\edat - - -% !split -\subsection{Problems with measuring time} -\begin{enumerate} -\item Timers are not infinitely accurate - -\item All clocks have a granularity, the minimum time that they can measure - -\item The error in a time measurement, even if everything is perfect, may be the size of this granularity (sometimes called a clock tick) - -\item Always know what your clock granularity is - -\item Ensure that your measurement is for a long enough duration (say 100 times the \textbf{tick}) -\end{enumerate} - -\noindent -% !split -\subsection{Problems with cold start} - -What happens when the code is executed? The assumption is that the code is ready to -execute. But -\begin{enumerate} -\item Code may still be on disk, and not even read into memory. - -\item Data may be in slow memory rather than fast (which may be wrong or right for what you are measuring) - -\item Multiple tests often necessary to ensure that cold start effects are not present - -\item Special effort often required to ensure data in the intended part of the memory hierarchy. -\end{enumerate} - -\noindent -% !split -\subsection{Problems with smart compilers} - -\begin{enumerate} -\item If the result of the computation is not used, the compiler may eliminate the code - -\item Performance will look impossibly fantastic - -\item Even worse, eliminate some of the code so the performance looks plausible - -\item Ensure that the results are (or may be) used. -\end{enumerate} - -\noindent -% !split -\subsection{Problems with interference} -\begin{enumerate} -\item Other activities are sharing your processor -\begin{itemize} - - \item Operating system, system demons, other users - - \item Some parts of the hardware do not always perform with exactly the same performance - -\end{itemize} - -\noindent -\item Make multiple tests and report - -\item Easy choices include -\begin{itemize} - - \item Average tests represent what users might observe over time -\end{itemize} - -\noindent -\end{enumerate} - -\noindent -% !split -\subsection{Problems with measuring performance} -\begin{enumerate} -\item Accurate, reproducible performance measurement is hard - -\item Think carefully about your experiment: - -\item What is it, precisely, that you want to measure? - -\item How representative is your test to the situation that you are trying to measure? -\end{enumerate} - -\noindent -% !split -\subsection{Thomas algorithm for tridiagonal linear algebra equations} - -% --- begin paragraph admon --- -\paragraph{} -\[ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - a_0 & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & a_{m-3} & b_{m-2} & c_{m-2} \\ - & & & a_{m-2} & b_{m-1} - \end{array} \right) -\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -\] -% --- end paragraph admon --- - - - -% !split -\subsection{Thomas algorithm, forward substitution} - -% --- begin paragraph admon --- -\paragraph{} -The first step is to multiply the first row by $a_0/b_0$ and subtract it from the second row. This is known as the forward substitution step. We obtain then -\[ - a_i = 0, -\] - -\[ - b_i = b_i - \frac{a_{i-1}}{b_{i-1}}c_{i-1}, -\] -and -\[ - f_i = f_i - \frac{a_{i-1}}{b_{i-1}}f_{i-1}. -\] -At this point the simplified equation, with only an upper triangular matrix takes the form -\[ -\left( \begin{array}{ccccc} - b_0 & c_0 & & & \\ - & b_1 & c_1 & & \\ - & & \ddots & & \\ - & & & b_{m-2} & c_{m-2} \\ - & & & & b_{m-1} - \end{array} \right)\left( \begin{array}{c} - x_0 \\ - x_1 \\ - \vdots \\ - x_{m-2} \\ - x_{m-1} - \end{array} \right)=\left( \begin{array}{c} - f_0 \\ - f_1 \\ - \vdots \\ - f_{m-2} \\ - f_{m-1} \\ - \end{array} \right) -\] -% --- end paragraph admon --- - - - -% !split -\subsection{Thomas algorithm, backward substitution} - -% --- begin paragraph admon --- -\paragraph{} -The next step is the backward substitution step. The last row is multiplied by $c_{N-3}/b_{N-2}$ and subtracted from the second to last row, thus eliminating $c_{N-3}$ from the last row. The general backward substitution procedure is -\[ - c_i = 0, -\] -and -\[ - f_{i-1} = f_{i-1} - \frac{c_{i-1}}{b_i}f_i -\] -All that ramains to be computed is the solution, which is the very straight forward process of -\[ -x_i = \frac{f_i}{b_i} -\] -% --- end paragraph admon --- - - - -% !split -\subsection{Thomas algorithm and counting of operations (floating point and memory)} - -% --- begin paragraph admon --- -\paragraph{} - -We have in specific case the following operations with the floating operations - -\begin{itemize} -\item Memory Reads: $14(N-2)$; - -\item Memory Writes: $4(N-2)$; - -\item Subtractions: $3(N-2)$; - -\item Multiplications: $3(N-2)$; - -\item Divisions: $4(N-2)$. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - -\bcppcod -// Forward substitution -// Note that we can simplify by precalculating a[i-1]/b[i-1] - for (int i=1; i < n; i++) { - b[i] = b[i] - (a[i-1]*c[i-1])/b[i-1]; - f[i] = g[i] - (a[i-1]*f[i-1])/b[i-1]; - } - x[n-1] = f[n-1] / b[n-1]; - // Backwards substitution - for (int i = n-2; i >= 0; i--) { - f[i] = f[i] - c[i]*f[i+1]/b[i+1]; - x[i] = f[i]/b[i]; - } - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program8.cpp}}{Example: Transpose of a matrix}} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bcppcode -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double **A, **B; - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; - for (int i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - } - // Set up values for matrix A - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - A[i][j] = cos(i*1.0)*sin(j*3.0); - } - } - clock_t start, finish; - start = clock(); - // Then compute the transpose - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - B[i][j]= A[j][i]; - } - } - - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for setting up transpose of matrix=" << timeused << endl; - - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - } - delete[] A; - delete[] B; - return 0; -} - - -\ecppcode - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/LecturePrograms/programs/Classes/cpp/program9.cpp}}{Matrix-matrix multiplication}} -This the matrix-matrix multiplication code with plain c++ memory allocation. It computes at the end the Frobenius norm. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bdat -#include -#include -#include -#include -#include "time.h" - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double s = 1.0/sqrt( (double) n); - double **A, **B, **C; - // Start timing - clock_t start, finish; - start = clock(); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (int i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - } - // Set up values for matrix A and B and zero matrix C - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - double angle = 2.0*M_PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - } - } - // Then perform the matrix-matrix multiplication - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - double sum = 0.0; - for (int k = 0; k < n; k++) { - sum += B[i][k]*A[k][j]; - } - C[i][j] = sum; - } - } - // Compute now the Frobenius norm - double Fsum = 0.0; - for (int i = 0; i < n; i++){ - for (int j = 0; j < n; j++) { - Fsum += C[i][j]*C[i][j]; - } - } - Fsum = sqrt(Fsum); - finish = clock(); - double timeused = (double) (finish - start)/(CLOCKS_PER_SEC ); - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for matrix-matrix multiplication=" << timeused << endl; - cout << " Frobenius norm = " << Fsum << endl; - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - } - delete[] A; - delete[] B; - delete[] C; - return 0; -} - -\edat - - -% !split -\subsection{How do we define speedup? Simplest form} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Speedup measures the ratio of performance between two objects - -\item Versions of same code, with different number of processors - -\item Serial and vector versions - -\item Try different programing languages, c++ and Fortran - -\item Two algorithms computing the \textbf{same} result -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{How do we define speedup? Correct baseline} - -% --- begin paragraph admon --- -\paragraph{} -The key is choosing the correct baseline for comparison -\begin{itemize} -\item For our serial vs.~vectorization examples, using compiler-provided vectorization, the baseline is simple; the same code, with vectorization turned off -\begin{itemize} - - \item For parallel applications, this is much harder: -\begin{itemize} - - \item Choice of algorithm, decomposition, performance of baseline case etc. -\end{itemize} - -\noindent -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Parallel speedup} - -% --- begin paragraph admon --- -\paragraph{} -For parallel applications, speedup is typically defined as -\begin{itemize} -\item Speedup $=T_1/T_p$ -\end{itemize} - -\noindent -Here $T_1$ is the time on one processor and $T_p$ is the time using $p$ processors. -\begin{itemize} - \item Can the speedup become larger than $p$? That means using $p$ processors is more than $p$ times faster than using one processor. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Speedup and memory} - -% --- begin paragraph admon --- -\paragraph{} -The speedup on $p$ processors can -be greater than $p$ if memory usage is optimal! -Consider the case of a memorybound computation with $M$ words of memory -\begin{itemize} - \item If $M/p$ fits into cache while $M$ does not, the time to access memory will be different in the two cases: - - \item $T_1$ uses the main memory bandwidth - - \item $T_p$ uses the appropriate cache bandwidth -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Upper bounds on speedup} - -% --- begin paragraph admon --- -\paragraph{} -Assume that almost all parts of a code are perfectly -parallelizable (fraction $f$). The remainder, -fraction $(1-f)$ cannot be parallelized at all. - -That is, there is work that takes time $W$ on one process; a fraction $f$ of that work will take -time $Wf/p$ on $p$ processors. -\begin{itemize} -\item What is the maximum possible speedup as a function of $f$? -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Amdahl's law} - -% --- begin paragraph admon --- -\paragraph{} -On one processor we have -\[ -T_1 = (1-f)W + fW = W -\] -On $p$ processors we have -\[ -T_p = (1-f)W + \frac{fW}{p}, -\] -resulting in a speedup of -\[ -\frac{T_1}{T_p} = \frac{W}{(1-f)W+fW/p} -\] - -As $p$ goes to infinity, $fW/p$ goes to zero, and the maximum speedup is -\[ -\frac{1}{1-f}, -\] -meaning that if -if $f = 0.99$ (all but $1\%$ parallelizable), the maximum speedup -is $1/(1-.99)=100$! -% --- end paragraph admon --- - - - -% !split -\subsection{How much is parallelizable} - -% --- begin paragraph admon --- -\paragraph{} -If any non-parallel code slips into the -application, the parallel -performance is limited. - -In many simulations, however, the fraction of non-parallelizable work -is $10^{-6}$ or less due to large arrays or objects that are perfectly parallelizable. -% --- end paragraph admon --- - - - -% !split -\subsection{Today's situation of parallel computing} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Distributed memory is the dominant hardware configuration. There is a large diversity in these machines, from MPP (massively parallel processing) systems to clusters of off-the-shelf PCs, which are very cost-effective. - -\item Message-passing is a mature programming paradigm and widely accepted. It often provides an efficient match to the hardware. It is primarily used for the distributed memory systems, but can also be used on shared memory systems. - -\item Modern nodes have nowadays several cores, which makes it interesting to use both shared memory (the given node) and distributed memory (several nodes with communication). This leads often to codes which use both MPI and OpenMP. -\end{itemize} - -\noindent -Our lectures will focus on both MPI and OpenMP. -% --- end paragraph admon --- - - - -% !split -\subsection{Overhead present in parallel computing} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{Uneven load balance}: not all the processors can perform useful work at all time. - -\item \textbf{Overhead of synchronization} - -\item \textbf{Overhead of communication} - -\item \textbf{Extra computation due to parallelization} -\end{itemize} - -\noindent -Due to the above overhead and that certain parts of a sequential -algorithm cannot be parallelized we may not achieve an optimal parallelization. -% --- end paragraph admon --- - - - -% !split -\subsection{Parallelizing a sequential algorithm} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item Identify the part(s) of a sequential algorithm that can be executed in parallel. This is the difficult part, - -\item Distribute the global work and data among $P$ processors. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Strategies} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Develop codes locally, run with some few processes and test your codes. Do benchmarking, timing and so forth on local nodes, for example your laptop or PC. - -\item When you are convinced that your codes run correctly, you can start your production runs on available supercomputers. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{How do I run MPI on a PC/Laptop? MPI} - -% --- begin paragraph admon --- -\paragraph{} -To install MPI is rather easy on hardware running unix/linux as operating systems, follow simply the instructions from the \href{{https://www.open-mpi.org/}}{OpenMPI website}. See also subsequent slides. -When you have made sure you have installed MPI on your PC/laptop, -\begin{itemize} -\item Compile with mpicxx/mpic++ or mpif90 -\end{itemize} - -\noindent - - - - - -\bcppcod - # Compile and link - mpic++ -O3 -o nameofprog.x nameofprog.cpp - # run code with for example 8 processes using mpirun/mpiexec - mpiexec -n 8 ./nameofprog.x - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Can I do it on my own PC/laptop? OpenMP installation} - -% --- begin paragraph admon --- -\paragraph{} -If you wish to install MPI and OpenMP -on your laptop/PC, we recommend the following: - -\begin{itemize} -\item For OpenMP, the compile option \textbf{-fopenmp} is included automatically in recent versions of the C++ compiler and Fortran compilers. For users of different Linux distributions, simply use the available C++ or Fortran compilers and add the above compiler instructions, see also code examples below. - -\item For OS X users however, install \textbf{libomp} -\end{itemize} - -\noindent - - -\bcppcod - brew install libomp - -\ecppcod - -and compile and link as - - -\bcppcod -c++ -o -lomp - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Installing MPI} - -% --- begin paragraph admon --- -\paragraph{} -For linux/ubuntu users, you need to install two packages (alternatively use the synaptic package manager) - - - -\bcppcod - sudo apt-get install libopenmpi-dev - sudo apt-get install openmpi-bin - -\ecppcod - -For OS X users, install brew (after having installed xcode and gcc, needed for the -gfortran compiler of openmpi) and then install with brew - - -\bcppcod - brew install openmpi - -\ecppcod - -When running an executable (code.x), run as - - -\bcppcod - mpirun -n 10 ./code.x - -\ecppcod - -where we indicate that we want the number of processes to be 10. -% --- end paragraph admon --- - - - -% !split -\subsection{Installing MPI and using Qt} - -% --- begin paragraph admon --- -\paragraph{} -With openmpi installed, when using Qt, add to your .pro file the instructions \href{{http://dragly.org/2012/03/14/developing-mpi-applications-in-qt-creator/}}{here} - -You may need to tell Qt where openmpi is stored. -% --- end paragraph admon --- - - - -% !split -\subsection{What is Message Passing Interface (MPI)?} - -% --- begin paragraph admon --- -\paragraph{} - -\textbf{MPI} is a library, not a language. It specifies the names, calling sequences and results of functions -or subroutines to be called from C/C++ or Fortran programs, and the classes and methods that make up the MPI C++ -library. The programs that users write in Fortran, C or C++ are compiled with ordinary compilers and linked -with the MPI library. - -MPI programs should be able to run -on all possible machines and run all MPI implementetations without change. - -An MPI computation is a collection of processes communicating with messages. -% --- end paragraph admon --- - - -% !split -\subsection{Going Parallel with MPI} - -% --- begin paragraph admon --- -\paragraph{} -\textbf{Task parallelism}: the work of a global problem can be divided -into a number of independent tasks, which rarely need to synchronize. -Monte Carlo simulations or numerical integration are examples of this. - -MPI is a message-passing library where all the routines -have corresponding C/C++-binding - - -\bcppcod - MPI_Command_name - -\ecppcod - -and Fortran-binding (routine names are in uppercase, but can also be in lower case) - - -\bforcod - MPI_COMMAND_NAME - -\eforcod -% --- end paragraph admon --- - - - -% !split -\subsection{MPI is a library} - -% --- begin paragraph admon --- -\paragraph{} -MPI is a library specification for the message passing interface, -proposed as a standard. - -\begin{itemize} -\item independent of hardware; - -\item not a language or compiler specification; - -\item not a specific implementation or product. -\end{itemize} - -\noindent -A message passing standard for portability and ease-of-use. -Designed for high performance. - -Insert communication and synchronization functions where necessary. -% --- end paragraph admon --- - - - -% !split -\subsection{Bindings to MPI routines} - -% --- begin paragraph admon --- -\paragraph{} - -MPI is a message-passing library where all the routines -have corresponding C/C++-binding - - -\bcppcod - MPI_Command_name - -\ecppcod - -and Fortran-binding (routine names are in uppercase, but can also be in lower case) - - -\bforcod - MPI_COMMAND_NAME - -\eforcod - -The discussion in these slides focuses on the C++ binding. -% --- end paragraph admon --- - - - -% !split -\subsection{Communicator} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item A group of MPI processes with a name (context). - -\item Any process is identified by its rank. The rank is only meaningful within a particular communicator. - -\item By default the communicator contains all the MPI processes. -\end{itemize} - -\noindent - - -\bcppcod - MPI_COMM_WORLD - -\ecppcod - -\begin{itemize} -\item Mechanism to identify subset of processes. - -\item Promotes modular design of parallel libraries. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Some of the most important MPI functions} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item $MPI\_Init$ - initiate an MPI computation - -\item $MPI\_Finalize$ - terminate the MPI computation and clean up - -\item $MPI\_Comm\_size$ - how many processes participate in a given MPI communicator? - -\item $MPI\_Comm\_rank$ - which one am I? (A number between 0 and size-1.) - -\item $MPI\_Send$ - send a message to a particular process within an MPI communicator - -\item $MPI\_Recv$ - receive a message from a particular process within an MPI communicator - -\item $MPI\_reduce$ or $MPI\_Allreduce$, send and receive messages -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program2.cpp}}{The first MPI C/C++ program}} - -% --- begin paragraph admon --- -\paragraph{} - -Let every process write "Hello world" (oh not this program again!!) on the standard output. - - - - - - - - - - - - - - - -\bcppcod -using namespace std; -#include -#include -int main (int nargs, char* args[]) -{ -int numprocs, my_rank; -// MPI initializations -MPI_Init (&nargs, &args); -MPI_Comm_size (MPI_COMM_WORLD, &numprocs); -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); -cout << "Hello world, I have rank " << my_rank << " out of " - << numprocs << endl; -// End MPI -MPI_Finalize (); - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{The Fortran program} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - -\bforcod -PROGRAM hello -INCLUDE "mpif.h" -INTEGER:: size, my_rank, ierr - -CALL MPI_INIT(ierr) -CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) -CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr) -WRITE(*,*)"Hello world, I've rank ",my_rank," out of ",size -CALL MPI_FINALIZE(ierr) - -END PROGRAM hello - -\eforcod -% --- end paragraph admon --- - - - -% !split -\subsection{Note 1} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item The output to screen is not ordered since all processes are trying to write to screen simultaneously. - -\item It is the operating system which opts for an ordering. - -\item If we wish to have an organized output, starting from the first process, we may rewrite our program as in the next example. -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program3.cpp}}{Ordered output with MPIBarrier}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\bcppcod -int main (int nargs, char* args[]) -{ - int numprocs, my_rank, i; - MPI_Init (&nargs, &args); - MPI_Comm_size (MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); - for (i = 0; i < numprocs; i++) {} - MPI_Barrier (MPI_COMM_WORLD); - if (i == my_rank) { - cout << "Hello world, I have rank " << my_rank << - " out of " << numprocs << endl;} - MPI_Finalize (); - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Note 2} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item Here we have used the $MPI\_Barrier$ function to ensure that that every process has completed its set of instructions in a particular order. - -\item A barrier is a special collective operation that does not allow the processes to continue until all processes in the communicator (here $MPI\_COMM\_WORLD$) have called $MPI\_Barrier$. - -\item The barriers make sure that all processes have reached the same point in the code. Many of the collective operations like $MPI\_ALLREDUCE$ to be discussed later, have the same property; that is, no process can exit the operation until all processes have started. -\end{itemize} - -\noindent -However, this is slightly more time-consuming since the processes synchronize between themselves as many times as there -are processes. In the next Hello world example we use the send and receive functions in order to a have a synchronized -action. -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program4.cpp}}{Ordered output}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - -\bccpcod -..... -int numprocs, my_rank, flag; -MPI_Status status; -MPI_Init (&nargs, &args); -MPI_Comm_size (MPI_COMM_WORLD, &numprocs); -MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); -if (my_rank > 0) -MPI_Recv (&flag, 1, MPI_INT, my_rank-1, 100, - MPI_COMM_WORLD, &status); -cout << "Hello world, I have rank " << my_rank << " out of " -<< numprocs << endl; -if (my_rank < numprocs-1) -MPI_Send (&my_rank, 1, MPI_INT, my_rank+1, - 100, MPI_COMM_WORLD); -MPI_Finalize (); - -\eccpcod -% --- end paragraph admon --- - - - -% !split -\subsection{Note 3} - -% --- begin paragraph admon --- -\paragraph{} - -The basic sending of messages is given by the function $MPI\_SEND$, which in C/C++ -is defined as - - - - -\bcppcod -int MPI_Send(void *buf, int count, - MPI_Datatype datatype, - int dest, int tag, MPI_Comm comm)} - -\ecppcod - -This single command allows the passing of any kind of variable, even a large array, to any group of tasks. -The variable \textbf{buf} is the variable we wish to send while \textbf{count} -is the number of variables we are passing. If we are passing only a single value, this should be 1. - -If we transfer an array, it is the overall size of the array. -For example, if we want to send a 10 by 10 array, count would be $10\times 10=100$ -since we are actually passing 100 values. -% --- end paragraph admon --- - - - -% !split -\subsection{Note 4} - -% --- begin paragraph admon --- -\paragraph{} - -Once you have sent a message, you must receive it on another task. The function $MPI\_RECV$ -is similar to the send call. - - - - -\bcppcod -int MPI_Recv( void *buf, int count, MPI_Datatype datatype, - int source, - int tag, MPI_Comm comm, MPI_Status *status ) - -\ecppcod - - -The arguments that are different from those in MPI\_SEND are -\textbf{buf} which is the name of the variable where you will be storing the received data, -\textbf{source} which replaces the destination in the send command. This is the return ID of the sender. - -Finally, we have used $MPI\_Status\_status$, -where one can check if the receive was completed. - -The output of this code is the same as the previous example, but now -process 0 sends a message to process 1, which forwards it further -to process 2, and so forth. -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp}}{Numerical integration in parallel}} - -% --- begin paragraph admon --- -\paragraph{Integrating $\pi$.} - -\begin{itemize} -\item The code example computes $\pi$ using the trapezoidal rules. - -\item The trapezoidal rule -\end{itemize} - -\noindent -\[ - I=\int_a^bf(x) dx\approx h\left(f(a)/2 + f(a+h) +f(a+2h)+\dots +f(b-h)+ f(b)/2\right). -\] -Click \href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/gh-pages/doc/Programs/LecturePrograms/programs/MPI/chapter07/program6.cpp}}{on this link} for the full program. -% --- end paragraph admon --- - - - -% !split -\subsection{Dissection of trapezoidal rule with $MPI\_reduce$} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\bcppcod -// Trapezoidal rule and numerical integration usign MPI -using namespace std; -#include -#include - -// Here we define various functions called by the main program - -double int_function(double ); -double trapezoidal_rule(double , double , int , double (*)(double)); - -// Main function begins here -int main (int nargs, char* args[]) -{ - int n, local_n, numprocs, my_rank; - double a, b, h, local_a, local_b, total_sum, local_sum; - double time_start, time_end, total_time; - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - -\bcppcod - // MPI initializations - MPI_Init (&nargs, &args); - MPI_Comm_size (MPI_COMM_WORLD, &numprocs); - MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); - time_start = MPI_Wtime(); - // Fixed values for a, b and n - a = 0.0 ; b = 1.0; n = 1000; - h = (b-a)/n; // h is the same for all processes - local_n = n/numprocs; - // make sure n > numprocs, else integer division gives zero - // Length of each process' interval of - // integration = local_n*h. - local_a = a + my_rank*local_n*h; - local_b = local_a + local_n*h; - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Integrating with \textbf{MPI}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\bcppcod - total_sum = 0.0; - local_sum = trapezoidal_rule(local_a, local_b, local_n, - &int_function); - MPI_Reduce(&local_sum, &total_sum, 1, MPI_DOUBLE, - MPI_SUM, 0, MPI_COMM_WORLD); - time_end = MPI_Wtime(); - total_time = time_end-time_start; - if ( my_rank == 0) { - cout << "Trapezoidal rule = " << total_sum << endl; - cout << "Time = " << total_time - << " on number of processors: " << numprocs << endl; - } - // End MPI - MPI_Finalize (); - return 0; -} // end of main program - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{How do I use $MPI\_reduce$?} - -% --- begin paragraph admon --- -\paragraph{} - -Here we have used - - - -\bcppcod -MPI_reduce( void *senddata, void* resultdata, int count, - MPI_Datatype datatype, MPI_Op, int root, MPI_Comm comm) - -\ecppcod - - -The two variables $senddata$ and $resultdata$ are obvious, besides the fact that one sends the address -of the variable or the first element of an array. If they are arrays they need to have the same size. -The variable $count$ represents the total dimensionality, 1 in case of just one variable, -while $MPI\_Datatype$ -defines the type of variable which is sent and received. - -The new feature is $MPI\_Op$. It defines the type -of operation we want to do. -% --- end paragraph admon --- - - - -% !split -\subsection{More on $MPI\_Reduce$} - -% --- begin paragraph admon --- -\paragraph{} -In our case, since we are summing -the rectangle contributions from every process we define $MPI\_Op = MPI\_SUM$. -If we have an array or matrix we can search for the largest og smallest element by sending either $MPI\_MAX$ or -$MPI\_MIN$. If we want the location as well (which array element) we simply transfer -$MPI\_MAXLOC$ or $MPI\_MINOC$. If we want the product we write $MPI\_PROD$. - -$MPI\_Allreduce$ is defined as - - - -\bcppcod -MPI_Allreduce( void *senddata, void* resultdata, int count, - MPI_Datatype datatype, MPI_Op, MPI_Comm comm) - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - -We use $MPI\_reduce$ to collect data from each process. Note also the use of the function -$MPI\_Wtime$. - - - - - - - - -\bcppcod -// this function defines the function to integrate -double int_function(double x) -{ - double value = 4./(1.+x*x); - return value; -} // end of function to evaluate - - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Dissection of trapezoidal rule} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - -\bcppcod -// this function defines the trapezoidal rule -double trapezoidal_rule(double a, double b, int n, - double (*func)(double)) -{ - double trapez_sum; - double fa, fb, x, step; - int j; - step=(b-a)/((double) n); - fa=(*func)(a)/2. ; - fb=(*func)(b)/2. ; - trapez_sum=0.; - for (j=1; j <= n-1; j++){ - x=j*step+a; - trapez_sum+=(*func)(x); - } - trapez_sum=(trapez_sum+fb+fa)*step; - return trapez_sum; -} // end trapezoidal_rule - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysics2/blob/master/doc/Programs/ParallelizationMPI/MPIvmcqdot.cpp}}{The quantum dot program for two electrons}} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bcppcod -// Variational Monte Carlo for atoms with importance sampling, slater det -// Test case for 2-electron quantum dot, no classes using Mersenne-Twister RNG -#include "mpi.h" -#include -#include -#include -#include -#include -#include -#include "vectormatrixclass.h" - -using namespace std; -// output file as global variable -ofstream ofile; -// the step length and its squared inverse for the second derivative -// Here we define global variables used in various functions -// These can be changed by using classes -int Dimension = 2; -int NumberParticles = 2; // we fix also the number of electrons to be 2 - -// declaration of functions - -// The Mc sampling for the variational Monte Carlo -void MonteCarloSampling(int, double &, double &, Vector &); - -// The variational wave function -double WaveFunction(Matrix &, Vector &); - -// The local energy -double LocalEnergy(Matrix &, Vector &); - -// The quantum force -void QuantumForce(Matrix &, Matrix &, Vector &); - - -// inline function for single-particle wave function -inline double SPwavefunction(double r, double alpha) { - return exp(-alpha*r*0.5); -} - -// inline function for derivative of single-particle wave function -inline double DerivativeSPwavefunction(double r, double alpha) { - return -r*alpha; -} - -// function for absolute value of relative distance -double RelativeDistance(Matrix &r, int i, int j) { - double r_ij = 0; - for (int k = 0; k < Dimension; k++) { - r_ij += (r(i,k)-r(j,k))*(r(i,k)-r(j,k)); - } - return sqrt(r_ij); -} - -// inline function for derivative of Jastrow factor -inline double JastrowDerivative(Matrix &r, double beta, int i, int j, int k){ - return (r(i,k)-r(j,k))/(RelativeDistance(r, i, j)*pow(1.0+beta*RelativeDistance(r, i, j),2)); -} - -// function for square of position of single particle -double singleparticle_pos2(Matrix &r, int i) { - double r_single_particle = 0; - for (int j = 0; j < Dimension; j++) { - r_single_particle += r(i,j)*r(i,j); - } - return r_single_particle; -} - -void lnsrch(int n, Vector &xold, double fold, Vector &g, Vector &p, Vector &x, - double *f, double stpmax, int *check, double (*func)(Vector &p)); - -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret, - double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g)); - -static double sqrarg; -#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg) - - -static double maxarg1,maxarg2; -#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\ - (maxarg1) : (maxarg2)) - - -// Begin of main program - -int main(int argc, char* argv[]) -{ - - // MPI initializations - int NumberProcesses, MyRank, NumberMCsamples; - MPI_Init (&argc, &argv); - MPI_Comm_size (MPI_COMM_WORLD, &NumberProcesses); - MPI_Comm_rank (MPI_COMM_WORLD, &MyRank); - double StartTime = MPI_Wtime(); - if (MyRank == 0 && argc <= 1) { - cout << "Bad Usage: " << argv[0] << - " Read also output file on same line and number of Monte Carlo cycles" << endl; - } - // Read filename and number of Monte Carlo cycles from the command line - if (MyRank == 0 && argc > 2) { - string filename = argv[1]; // first command line argument after name of program - NumberMCsamples = atoi(argv[2]); - string fileout = filename; - string argument = to_string(NumberMCsamples); - // Final filename as filename+NumberMCsamples - fileout.append(argument); - ofile.open(fileout); - } - // broadcast the number of Monte Carlo samples - MPI_Bcast (&NumberMCsamples, 1, MPI_INT, 0, MPI_COMM_WORLD); - // Two variational parameters only - Vector VariationalParameters(2); - int TotalNumberMCsamples = NumberMCsamples*NumberProcesses; - // Loop over variational parameters - for (double alpha = 0.5; alpha <= 1.5; alpha +=0.1){ - for (double beta = 0.1; beta <= 0.5; beta +=0.05){ - VariationalParameters(0) = alpha; // value of alpha - VariationalParameters(1) = beta; // value of beta - // Do the mc sampling and accumulate data with MPI_Reduce - double TotalEnergy, TotalEnergySquared, LocalProcessEnergy, LocalProcessEnergy2; - LocalProcessEnergy = LocalProcessEnergy2 = 0.0; - MonteCarloSampling(NumberMCsamples, LocalProcessEnergy, LocalProcessEnergy2, VariationalParameters); - // Collect data in total averages - MPI_Reduce(&LocalProcessEnergy, &TotalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&LocalProcessEnergy2, &TotalEnergySquared, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - // Print out results in case of Master node, set to MyRank = 0 - if ( MyRank == 0) { - double Energy = TotalEnergy/( (double)NumberProcesses); - double Variance = TotalEnergySquared/( (double)NumberProcesses)-Energy*Energy; - double StandardDeviation = sqrt(Variance/((double)TotalNumberMCsamples)); // over optimistic error - ofile << setiosflags(ios::showpoint | ios::uppercase); - ofile << setw(15) << setprecision(8) << VariationalParameters(0); - ofile << setw(15) << setprecision(8) << VariationalParameters(1); - ofile << setw(15) << setprecision(8) << Energy; - ofile << setw(15) << setprecision(8) << Variance; - ofile << setw(15) << setprecision(8) << StandardDeviation << endl; - } - } - } - double EndTime = MPI_Wtime(); - double TotalTime = EndTime-StartTime; - if ( MyRank == 0 ) cout << "Time = " << TotalTime << " on number of processors: " << NumberProcesses << endl; - if (MyRank == 0) ofile.close(); // close output file - // End MPI - MPI_Finalize (); - return 0; -} // end of main function - - -// Monte Carlo sampling with the Metropolis algorithm - -void MonteCarloSampling(int NumberMCsamples, double &cumulative_e, double &cumulative_e2, Vector &VariationalParameters) -{ - - // Initialize the seed and call the Mersienne algo - std::random_device rd; - std::mt19937_64 gen(rd()); - // Set up the uniform distribution for x \in [[0, 1] - std::uniform_real_distribution UniformNumberGenerator(0.0,1.0); - std::normal_distribution Normaldistribution(0.0,1.0); - // diffusion constant from Schroedinger equation - double D = 0.5; - double timestep = 0.05; // we fix the time step for the gaussian deviate - // allocate matrices which contain the position of the particles - Matrix OldPosition( NumberParticles, Dimension), NewPosition( NumberParticles, Dimension); - Matrix OldQuantumForce(NumberParticles, Dimension), NewQuantumForce(NumberParticles, Dimension); - double Energy = 0.0; double EnergySquared = 0.0; double DeltaE = 0.0; - // initial trial positions - for (int i = 0; i < NumberParticles; i++) { - for (int j = 0; j < Dimension; j++) { - OldPosition(i,j) = Normaldistribution(gen)*sqrt(timestep); - } - } - double OldWaveFunction = WaveFunction(OldPosition, VariationalParameters); - QuantumForce(OldPosition, OldQuantumForce, VariationalParameters); - // loop over monte carlo cycles - for (int cycles = 1; cycles <= NumberMCsamples; cycles++){ - // new position - for (int i = 0; i < NumberParticles; i++) { - for (int j = 0; j < Dimension; j++) { - // gaussian deviate to compute new positions using a given timestep - NewPosition(i,j) = OldPosition(i,j) + Normaldistribution(gen)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D; - // NewPosition(i,j) = OldPosition(i,j) + gaussian_deviate(&idum)*sqrt(timestep)+OldQuantumForce(i,j)*timestep*D; - } - // for the other particles we need to set the position to the old position since - // we move only one particle at the time - for (int k = 0; k < NumberParticles; k++) { - if ( k != i) { - for (int j = 0; j < Dimension; j++) { - NewPosition(k,j) = OldPosition(k,j); - } - } - } - double NewWaveFunction = WaveFunction(NewPosition, VariationalParameters); - QuantumForce(NewPosition, NewQuantumForce, VariationalParameters); - // we compute the log of the ratio of the greens functions to be used in the - // Metropolis-Hastings algorithm - double GreensFunction = 0.0; - for (int j = 0; j < Dimension; j++) { - GreensFunction += 0.5*(OldQuantumForce(i,j)+NewQuantumForce(i,j))* - (D*timestep*0.5*(OldQuantumForce(i,j)-NewQuantumForce(i,j))-NewPosition(i,j)+OldPosition(i,j)); - } - GreensFunction = exp(GreensFunction); - // The Metropolis test is performed by moving one particle at the time - if(UniformNumberGenerator(gen) <= GreensFunction*NewWaveFunction*NewWaveFunction/OldWaveFunction/OldWaveFunction ) { - for (int j = 0; j < Dimension; j++) { - OldPosition(i,j) = NewPosition(i,j); - OldQuantumForce(i,j) = NewQuantumForce(i,j); - } - OldWaveFunction = NewWaveFunction; - } - } // end of loop over particles - // compute local energy - double DeltaE = LocalEnergy(OldPosition, VariationalParameters); - // update energies - Energy += DeltaE; - EnergySquared += DeltaE*DeltaE; - } // end of loop over MC trials - // update the energy average and its squared - cumulative_e = Energy/NumberMCsamples; - cumulative_e2 = EnergySquared/NumberMCsamples; -} // end MonteCarloSampling function - - -// Function to compute the squared wave function and the quantum force - -double WaveFunction(Matrix &r, Vector &VariationalParameters) -{ - double wf = 0.0; - // full Slater determinant for two particles, replace with Slater det for more particles - wf = SPwavefunction(singleparticle_pos2(r, 0), VariationalParameters(0))*SPwavefunction(singleparticle_pos2(r, 1),VariationalParameters(0)); - // contribution from Jastrow factor - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - wf *= exp(RelativeDistance(r, i, j)/((1.0+VariationalParameters(1)*RelativeDistance(r, i, j)))); - } - } - return wf; -} - -// Function to calculate the local energy without numerical derivation of kinetic energy - -double LocalEnergy(Matrix &r, Vector &VariationalParameters) -{ - - // compute the kinetic and potential energy from the single-particle part - // for a many-electron system this has to be replaced by a Slater determinant - // The absolute value of the interparticle length - Matrix length( NumberParticles, NumberParticles); - // Set up interparticle distance - for (int i = 0; i < NumberParticles-1; i++) { - for(int j = i+1; j < NumberParticles; j++){ - length(i,j) = RelativeDistance(r, i, j); - length(j,i) = length(i,j); - } - } - double KineticEnergy = 0.0; - // Set up kinetic energy from Slater and Jastrow terms - for (int i = 0; i < NumberParticles; i++) { - for (int k = 0; k < Dimension; k++) { - double sum1 = 0.0; - for(int j = 0; j < NumberParticles; j++){ - if ( j != i) { - sum1 += JastrowDerivative(r, VariationalParameters(1), i, j, k); - } - } - KineticEnergy += (sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0)))*(sum1+DerivativeSPwavefunction(r(i,k),VariationalParameters(0))); - } - } - KineticEnergy += -2*VariationalParameters(0)*NumberParticles; - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - KineticEnergy += 2.0/(pow(1.0 + VariationalParameters(1)*length(i,j),2))*(1.0/length(i,j)-2*VariationalParameters(1)/(1+VariationalParameters(1)*length(i,j)) ); - } - } - KineticEnergy *= -0.5; - // Set up potential energy, external potential + eventual electron-electron repulsion - double PotentialEnergy = 0; - for (int i = 0; i < NumberParticles; i++) { - double DistanceSquared = singleparticle_pos2(r, i); - PotentialEnergy += 0.5*DistanceSquared; // sp energy HO part, note it has the oscillator frequency set to 1! - } - // Add the electron-electron repulsion - for (int i = 0; i < NumberParticles-1; i++) { - for (int j = i+1; j < NumberParticles; j++) { - PotentialEnergy += 1.0/length(i,j); - } - } - double LocalE = KineticEnergy+PotentialEnergy; - return LocalE; -} - -// Compute the analytical expression for the quantum force -void QuantumForce(Matrix &r, Matrix &qforce, Vector &VariationalParameters) -{ - // compute the first derivative - for (int i = 0; i < NumberParticles; i++) { - for (int k = 0; k < Dimension; k++) { - // single-particle part, replace with Slater det for larger systems - double sppart = DerivativeSPwavefunction(r(i,k),VariationalParameters(0)); - // Jastrow factor contribution - double Jsum = 0.0; - for (int j = 0; j < NumberParticles; j++) { - if ( j != i) { - Jsum += JastrowDerivative(r, VariationalParameters(1), i, j, k); - } - } - qforce(i,k) = 2.0*(Jsum+sppart); - } - } -} // end of QuantumForce function - - -#define ITMAX 200 -#define EPS 3.0e-8 -#define TOLX (4*EPS) -#define STPMX 100.0 - -void dfpmin(Vector &p, int n, double gtol, int *iter, double *fret, - double(*func)(Vector &p), void (*dfunc)(Vector &p, Vector &g)) -{ - - int check,i,its,j; - double den,fac,fad,fae,fp,stpmax,sum=0.0,sumdg,sumxi,temp,test; - Vector dg(n), g(n), hdg(n), pnew(n), xi(n); - Matrix hessian(n,n); - - fp=(*func)(p); - (*dfunc)(p,g); - for (i = 0;i < n;i++) { - for (j = 0; j< n;j++) hessian(i,j)=0.0; - hessian(i,i)=1.0; - xi(i) = -g(i); - sum += p(i)*p(i); - } - stpmax=STPMX*FMAX(sqrt(sum),(double)n); - for (its=1;its<=ITMAX;its++) { - *iter=its; - lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check,func); - fp = *fret; - for (i = 0; i< n;i++) { - xi(i)=pnew(i)-p(i); - p(i)=pnew(i); - } - test=0.0; - for (i = 0;i< n;i++) { - temp=fabs(xi(i))/FMAX(fabs(p(i)),1.0); - if (temp > test) test=temp; - } - if (test < TOLX) { - return; - } - for (i=0;i test) test=temp; - } - if (test < gtol) { - return; - } - for (i=0;i EPS*sumdg*sumxi) { - fac=1.0/fac; - fad=1.0/fae; - for (i=0;i stpmax) - for (i=0;i test) test=temp; - } - alamin=TOLX/test; - alam=1.0; - for (;;) { - for (i=0;i0.5*alam) - tmplam=0.5*alam; - } - } - alam2=alam; - f2 = *f; - fold2=fold; - alam=FMAX(tmplam,0.1*alam); - } -} -#undef ALF -#undef TOLX - - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{What is OpenMP} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item OpenMP provides high-level thread programming - -\item Multiple cooperating threads are allowed to run simultaneously - -\item Threads are created and destroyed dynamically in a fork-join pattern -\begin{itemize} - - \item An OpenMP program consists of a number of parallel regions - - \item Between two parallel regions there is only one master thread - - \item In the beginning of a parallel region, a team of new threads is spawned - -\end{itemize} - -\noindent - \item The newly spawned threads work simultaneously with the master thread - - \item At the end of a parallel region, the new threads are destroyed -\end{itemize} - -\noindent -Many good tutorials online and excellent textbook -\begin{enumerate} -\item \href{{http://mitpress.mit.edu/books/using-openmp}}{Using OpenMP, by B. Chapman, G. Jost, and A. van der Pas} - -\item Many tutorials online like \href{{http://www.openmp.org}}{OpenMP official site} -\end{enumerate} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Getting started, things to remember} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} - \item Remember the header file -\end{itemize} - -\noindent - - -\bcppcod -#include - -\ecppcod - -\begin{itemize} - \item Insert compiler directives in C++ syntax as -\end{itemize} - -\noindent - - -\bcppcod -#pragma omp... - -\ecppcod - -\begin{itemize} -\item Compile with for example \emph{c++ -fopenmp code.cpp} - -\item Execute -\begin{itemize} - - \item Remember to assign the environment variable \textbf{OMP NUM THREADS} - - \item It specifies the total number of threads inside a parallel region, if not otherwise overwritten -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{OpenMP syntax} -\begin{itemize} -\item Mostly directives -\end{itemize} - -\noindent - - -\bcppcod -#pragma omp construct [ clause ...] - -\ecppcod - -\begin{itemize} - \item Some functions and types -\end{itemize} - -\noindent - - -\bcppcod -#include - -\ecppcod - -\begin{itemize} - \item Most apply to a block of code - - \item Specifically, a \textbf{structured block} - - \item Enter at top, exit at bottom only, exit(), abort() permitted -\end{itemize} - -\noindent -% !split -\subsection{Different OpenMP styles of parallelism} -OpenMP supports several different ways to specify thread parallelism - -\begin{itemize} -\item General parallel regions: All threads execute the code, roughly as if you made a routine of that region and created a thread to run that code - -\item Parallel loops: Special case for loops, simplifies data parallel code - -\item Task parallelism, new in OpenMP 3 - -\item Several ways to manage thread coordination, including Master regions and Locks - -\item Memory model for shared data -\end{itemize} - -\noindent -% !split -\subsection{General code structure} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - - - -\bcppcod -#include -main () -{ -int var1, var2, var3; -/* serial code */ -/* ... */ -/* start of a parallel region */ -#pragma omp parallel private(var1, var2) shared(var3) -{ -/* ... */ -} -/* more serial code */ -/* ... */ -/* another parallel region */ -#pragma omp parallel -{ -/* ... */ -} -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Parallel region} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item A parallel region is a block of code that is executed by a team of threads - -\item The following compiler directive creates a parallel region -\end{itemize} - -\noindent - - -\bcppcod -#pragma omp parallel { ... } - -\ecppcod - -\begin{itemize} -\item Clauses can be added at the end of the directive - -\item Most often used clauses: -\begin{itemize} - - \item \textbf{default(shared)} or \textbf{default(none)} - - \item \textbf{public(list of variables)} - - \item \textbf{private(list of variables)} -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Hello world, not again, please!} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - - - -\bcppcod -#include -#include -int main (int argc, char *argv[]) -{ -int th_id, nthreads; -#pragma omp parallel private(th_id) shared(nthreads) -{ -th_id = omp_get_thread_num(); -printf("Hello World from thread %d\n", th_id); -#pragma omp barrier -if ( th_id == 0 ) { -nthreads = omp_get_num_threads(); -printf("There are %d threads\n",nthreads); -} -} -return 0; -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Hello world, yet another variant} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\bcppcod -#include -#include -int main(int argc, char *argv[]) -{ - omp_set_num_threads(4); -#pragma omp parallel - { - int id = omp_get_thread_num(); - int nproc = omp_get_num_threads(); - cout << "Hello world with id number and processes " << id << nproc << endl; - } -return 0; -} - -\ecppcod - -Variables declared outside of the parallel region are shared by all threads -If a variable like \textbf{id} is declared outside of the - - -\bcppcod -#pragma omp parallel, - -\ecppcod - -it would have been shared by various the threads, possibly causing erroneous output -\begin{itemize} - \item Why? What would go wrong? Why do we add possibly? -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Important OpenMP library routines} - -% --- begin paragraph admon --- -\paragraph{} - -\begin{itemize} -\item \textbf{int omp get num threads ()}, returns the number of threads inside a parallel region - -\item \textbf{int omp get thread num ()}, returns the a thread for each thread inside a parallel region - -\item \textbf{void omp set num threads (int)}, sets the number of threads to be used - -\item \textbf{void omp set nested (int)}, turns nested parallelism on/off -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Private variables} - -% --- begin paragraph admon --- -\paragraph{} -Private clause can be used to make thread- private versions of such variables: - - - - - - -\bcppcod -#pragma omp parallel private(id) -{ - int id = omp_get_thread_num(); - cout << "My thread num" << id << endl; -} - -\ecppcod - -\begin{itemize} -\item What is their value on entry? Exit? - -\item OpenMP provides ways to control that - -\item Can use default(none) to require the sharing of each variable to be described -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Master region} - -% --- begin paragraph admon --- -\paragraph{} -It is often useful to have only one thread execute some of the code in a parallel region. I/O statements are a common example - - - - - - - - - -\bcppcod -#pragma omp parallel -{ - #pragma omp master - { - int id = omp_get_thread_num(); - cout << "My thread num" << id << endl; - } -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Parallel for loop} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} - \item Inside a parallel region, the following compiler directive can be used to parallelize a for-loop: -\end{itemize} - -\noindent - - -\bcppcod -#pragma omp for - -\ecppcod - -\begin{itemize} -\item Clauses can be added, such as -\begin{itemize} - - \item \textbf{schedule(static, chunk size)} - - \item \textbf{schedule(dynamic, chunk size)} - - \item \textbf{schedule(guided, chunk size)} (non-deterministic allocation) - - \item \textbf{schedule(runtime)} - - \item \textbf{private(list of variables)} - - \item \textbf{reduction(operator:variable)} - - \item \textbf{nowait} -\end{itemize} - -\noindent -\end{itemize} - -\noindent -% --- end paragraph admon --- - - - -% !split -\subsection{Parallel computations and loops} - - -% --- begin paragraph admon --- -\paragraph{} -OpenMP provides an easy way to parallelize a loop - - - -\bcppcod -#pragma omp parallel for - for (i=0; i -#define CHUNKSIZE 100 -#define N 1000 -int main (int argc, char *argv[]) -{ -int i, chunk; -float a[N], b[N], c[N]; -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0; -chunk = CHUNKSIZE; -#pragma omp parallel shared(a,b,c,chunk) private(i) -{ -#pragma omp for schedule(dynamic,chunk) -for (i=0; i < N; i++) c[i] = a[i] + b[i]; -} /* end of parallel region */ -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Example code for loop scheduling, guided instead of dynamic} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - - -\bcppcod -#include -#define CHUNKSIZE 100 -#define N 1000 -int main (int argc, char *argv[]) -{ -int i, chunk; -float a[N], b[N], c[N]; -for (i=0; i < N; i++) a[i] = b[i] = i * 1.0; -chunk = CHUNKSIZE; -#pragma omp parallel shared(a,b,c,chunk) private(i) -{ -#pragma omp for schedule(guided,chunk) -for (i=0; i < N; i++) c[i] = a[i] + b[i]; -} /* end of parallel region */ -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{More on Parallel for loop} - -% --- begin paragraph admon --- -\paragraph{} -\begin{itemize} -\item The number of loop iterations cannot be non-deterministic; break, return, exit, goto not allowed inside the for-loop - -\item The loop index is private to each thread - -\item A reduction variable is special -\begin{itemize} - - \item During the for-loop there is a local private copy in each thread - - \item At the end of the for-loop, all the local copies are combined together by the reduction operation - -\end{itemize} - -\noindent -\item Unless the nowait clause is used, an implicit barrier synchronization will be added at the end by the compiler -\end{itemize} - -\noindent - - -\bcppcod -// #pragma omp parallel and #pragma omp for - -\ecppcod - -can be combined into - - -\bcppcod -#pragma omp parallel for - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{What can happen with this loop?} - - -% --- begin paragraph admon --- -\paragraph{} -What happens with code like this - - - -\bcppcod -#pragma omp parallel for -for (i=0; i r) { -#pragma omp task - do_work (p_vec[i]); - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Common mistakes} - -% --- begin paragraph admon --- -\paragraph{} -Race condition - - - - - - -\bcppcod -int nthreads; -#pragma omp parallel shared(nthreads) -{ -nthreads = omp_get_num_threads(); -} - -\ecppcod - -Deadlock - - - - - - - - - - -\bcppcod -#pragma omp parallel -{ -... -#pragma omp critical -{ -... -#pragma omp barrier -} -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Not all computations are simple} - -% --- begin paragraph admon --- -\paragraph{} -Not all computations are simple loops where the data can be evenly -divided among threads without any dependencies between threads - -An example is finding the location and value of the largest element in an array - - - - - - - -\bcppcod -for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Not all computations are simple, competing threads} - -% --- begin paragraph admon --- -\paragraph{} -All threads are potentially accessing and changing the same values, \textbf{maxloc} and \textbf{maxval}. -\begin{enumerate} -\item OpenMP provides several ways to coordinate access to shared values -\end{enumerate} - -\noindent - - -\bcppcod -#pragma omp atomic - -\ecppcod - -\begin{enumerate} -\item Only one thread at a time can execute the following statement (not block). We can use the critical option -\end{enumerate} - -\noindent - - -\bcppcod -#pragma omp critical - -\ecppcod - -\begin{enumerate} -\item Only one thread at a time can execute the following block -\end{enumerate} - -\noindent -Atomic may be faster than critical but depends on hardware -% --- end paragraph admon --- - - - -% !split -\subsection{How to find the max value using OpenMP} - -% --- begin paragraph admon --- -\paragraph{} -Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as - - - - - - - - -\bcppcod -#pragma omp parallel for - for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Then deal with the race conditions} - -% --- begin paragraph admon --- -\paragraph{} -Write down the simplest algorithm and look carefully for race conditions. How would you handle them? -The first step would be to parallelize as - - - - - - - - - - - -\bcppcod -#pragma omp parallel for - for (i=0; i maxval) { - maxval = x[i]; - maxloc = i; - } - } -} - -\ecppcod - - -Exercise: write a code which implements this and give an estimate on performance. Perform several runs, -with a serial code only with and without vectorization and compare the serial code with the one that uses OpenMP. Run on different archictectures if you can. -% --- end paragraph admon --- - - -% !split -\subsection{What can slow down OpenMP performance?} -Give it a thought! - -% !split -\subsection{What can slow down OpenMP performance?} - -% --- begin paragraph admon --- -\paragraph{} -Performance poor because we insisted on keeping track of the maxval and location during the execution of the loop. -\begin{itemize} - \item We do not care about the value during the execution of the loop, just the value at the end. -\end{itemize} - -\noindent -This is a common source of performance issues, namely the description of the method used to compute a value imposes additional, unnecessary requirements or properties - -\textbf{Idea: Have each thread find the maxloc in its own data, then combine and use temporary arrays indexed by thread number to hold the values found by each thread} -% --- end paragraph admon --- - - - -% !split -\subsection{Find the max location for each thread} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - - -\bcppcod -int maxloc[MAX_THREADS], mloc; -double maxval[MAX_THREADS], mval; -#pragma omp parallel shared(maxval,maxloc) -{ - int id = omp_get_thread_num(); - maxval[id] = -1.0e30; -#pragma omp for - for (int i=0; i maxval[id]) { - maxloc[id] = i; - maxval[id] = x[i]; - } - } -} - -\ecppcod -% --- end paragraph admon --- - - - -% !split -\subsection{Combine the values from each thread} - -% --- begin paragraph admon --- -\paragraph{} - - - - - - - - - - - - - - -\bcppcod -#pragma omp flush (maxloc,maxval) -#pragma omp master - { - int nt = omp_get_num_threads(); - mloc = maxloc[0]; - mval = maxval[0]; - for (int i=1; i mval) { - mval = maxval[i]; - mloc = maxloc[i]; - } - } - } - -\ecppcod - -Note that we let the master process perform the last operation. -% --- end paragraph admon --- - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPvectornorm.cpp}}{Matrix-matrix multiplication}} -This code computes the norm of a vector using OpenMp - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bdat -// OpenMP program to compute vector norm by adding two other vectors -#include -#include -#include -#include -#include -# include - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of vector - int n = atoi(argv[1]); - double *a, *b, *c; - int i; - int thread_num; - double wtime, Norm2, s, angle; - cout << " Perform addition of two vectors and compute the norm-2." << endl; - omp_set_num_threads(4); - thread_num = omp_get_max_threads (); - cout << " The number of processors available = " << omp_get_num_procs () << endl ; - cout << " The number of threads available = " << thread_num << endl; - cout << " The matrix order n = " << n << endl; - - s = 1.0/sqrt( (double) n); - wtime = omp_get_wtime ( ); - // Allocate space for the vectors to be used - a = new double [n]; b = new double [n]; c = new double [n]; - // Define parallel region -# pragma omp parallel for default(shared) private (angle, i) reduction(+:Norm2) - // Set up values for vectors a and b - for (i = 0; i < n; i++){ - angle = 2.0*M_PI*i/ (( double ) n); - a[i] = s*(sin(angle) + cos(angle)); - b[i] = s*sin(2.0*angle); - c[i] = 0.0; - } - // Then perform the vector addition - for (i = 0; i < n; i++){ - c[i] += a[i]+b[i]; - } - // Compute now the norm-2 - Norm2 = 0.0; - for (i = 0; i < n; i++){ - Norm2 += c[i]*c[i]; - } -// end parallel region - wtime = omp_get_wtime ( ) - wtime; - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for norm-2 computation=" << wtime << endl; - cout << " Norm-2 = " << Norm2 << endl; - // Free up space - delete[] a; - delete[] b; - delete[] c; - return 0; -} - -\edat - - -% !split -\subsection{\href{{https://github.com/CompPhysics/ComputationalPhysicsMSU/blob/master/doc/Programs/ParallelizationOpenMP/OpenMPmatrixmatrixmult.cpp}}{Matrix-matrix multiplication}} -This the matrix-matrix multiplication code with plain c++ memory allocation using OpenMP - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\bdat -// Matrix-matrix multiplication and Frobenius norm of a matrix with OpenMP -#include -#include -#include -#include -#include -# include - -using namespace std; // note use of namespace -int main (int argc, char* argv[]) -{ - // read in dimension of square matrix - int n = atoi(argv[1]); - double **A, **B, **C; - int i, j, k; - int thread_num; - double wtime, Fsum, s, angle; - cout << " Compute matrix product C = A * B and Frobenius norm." << endl; - omp_set_num_threads(4); - thread_num = omp_get_max_threads (); - cout << " The number of processors available = " << omp_get_num_procs () << endl ; - cout << " The number of threads available = " << thread_num << endl; - cout << " The matrix order n = " << n << endl; - - s = 1.0/sqrt( (double) n); - wtime = omp_get_wtime ( ); - // Allocate space for the two matrices - A = new double*[n]; B = new double*[n]; C = new double*[n]; - for (i = 0; i < n; i++){ - A[i] = new double[n]; - B[i] = new double[n]; - C[i] = new double[n]; - } - // Define parallel region -# pragma omp parallel for default(shared) private (angle, i, j, k) reduction(+:Fsum) - // Set up values for matrix A and B and zero matrix C - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - angle = 2.0*M_PI*i*j/ (( double ) n); - A[i][j] = s * ( sin ( angle ) + cos ( angle ) ); - B[j][i] = A[i][j]; - } - } - // Then perform the matrix-matrix multiplication - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - C[i][j] = 0.0; - for (k = 0; k < n; k++) { - C[i][j] += A[i][k]*B[k][j]; - } - } - } - // Compute now the Frobenius norm - Fsum = 0.0; - for (i = 0; i < n; i++){ - for (j = 0; j < n; j++) { - Fsum += C[i][j]*C[i][j]; - } - } - Fsum = sqrt(Fsum); -// end parallel region and letting only one thread perform I/O - wtime = omp_get_wtime ( ) - wtime; - cout << setiosflags(ios::showpoint | ios::uppercase); - cout << setprecision(10) << setw(20) << "Time used for matrix-matrix multiplication=" << wtime << endl; - cout << " Frobenius norm = " << Fsum << endl; - // Free up space - for (int i = 0; i < n; i++){ - delete[] A[i]; - delete[] B[i]; - delete[] C[i]; - } - delete[] A; - delete[] B; - delete[] C; - return 0; -} - - - -\edat - - - -% ------------------- end of main content --------------- - -% #ifdef PREAMBLE -\end{document} -% #endif - diff --git a/doc/src/week9/week9.pdf b/doc/src/week9/week9.pdf deleted file mode 100644 index e7f2de84a2f0a4aecde91e6950a8c89ae32a8658..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 501561 zcma(2Lv$`&)3t#{cWm2EcI@ofwr$(CZQHhO+qRwTSm!xsoZkC4zSUa&HCD}Q)~q6v z7Z&})NXH6IHorWw4$VQxKxk)Z3H|pkG`+Nmt(mhqAtM_z3*rCoK+}s_SUa0I64HxW z8#tQ?n;6*{n?UpOLOVG-ni$wXyKlCrO}TEdA?}>fv=__LCIh`$W7Hgx+Bjv7IV>-U zECK`vk&qMR-@~tbedrnT$u=Z&WY|F?_A z;H$J*$^1GzpPh;{qtm9Kg$7PgYj-}REdqMptyYcNUm`}vjksf$@?D!tKgg|Ho3G}4 z;9vKcVO1x}4OQqI&6YPe!p@c=C2|*@=x-P<@ZD0Feojz$Tdy!W2`s2$@?0jHo3}h9 zTFDCJbkEDGvJz%S8+W+?_qu2&)zropyGhy1zJa3U7A&xjBsy6fvu9a)NwATOLl3e2<~?w|le1P`LGOH1j?Ckt&{17-0A0EW}KWwT-)NX=Wt z@G%Lj;kU1Qa(~YuFOaKzP=R&jfji?^CRXYvMDS7GQE&p@GAM28F!@O;RoiB&X}mIz z%K_i@)hr3@dLfa*4-w>6AD%^WBkaw6OD_CNKnMw`g)U?-0^6%(Q>8^;e~rvz@OWyv zxy$YN@Wv=`0rX}u0Zk|W5x)pU!O|0`Lr1Q1>d(rRp8SPQp1zu@U+LY@A83hv)O=q* z;#Nk^lYTN~R7o17FWIKn;k;>3QNU?E7-qRP77ihS=7BXK?7sQHb^FJx$|bY(0jCVf zn+w@D4N-0jAP>RCshrhHCLyUILqLZv4}35WS949TjvSCfJTmMiVbam8t0iW`E^ISo z0KVU~1Q}}=mYJaH`^EJD*!A)nCM}tATq!KwDH6y%);k)AC%FU2;qS{DyPE6i&Vjxg zu=GmL*0_1zJKj`j*&B9nIAF7Q<9(HhHpUbw6Lcpyut=OdO7=|RO3#P6ch1~Xh+ zh8L8}9mJBb4)_7exwiKevTLRe?0<}QiTQifs0JP=IpZ#XCQ_gv`a}fl@j`2v=#rW9tB9i)2FV`Jn$mm zRbK98Fy_wYezh~rJkvXYD5 z_|J+Be^b+-4-D@Pj)UZ+Q5IVjk_`8555Kmwv+FDW$}8tH9|BPS2zl^?zK8x(D-^r>{I?vi4kR=V{;$1&4X1QNL!5FD2pXL$eO_|i zqukC55(iTVAqemG3FTfZ|Hy3wd;$5bDaAh@dbUxU_F!_*k7)KJfPpwkO=|F|vT~Ie zsX__nd-;YyX!`Dlar(s%meFTFuD~c!a5$}s)o|5P(F*39LUlprFearTk4cX^a?OXw_n6o>)6SiKRG%5!WI0F)exehRWBy2mj z7_pp4SU!5#JBwFQZbRLeC8Xc!MwN2mQ7Mk*6=V8Xi#h%!u(jLu5jQSQrmS9BxbZ2D zM$*U>tC$I9W9zIbjC+_<;$%Hq#+`I+W6y}0so?>pFKF$_yRb zkTJF;8wPm_x6S1MzztpQ=E*c!(U-@%41LkK8>!!M z*HiBwQ+6~=BF>JX^D@$C!q!Mebhz3jAeVD}dQ z&BeO*UR#6zvVCVr$hmOlhHZk%2oRPy&6I@~R~>Mi5kZ8Hq}ak|C6QuCYA({b7mu1P zwp-8LbJhT1ah`wj7}mmNIGsZ+;J(i%?*v>xDZ(jNK`Nq{#)66LZnw<17oQ}m5`^*Q zpAs1R8<@vD*q;#Bg-V1#YvP^(Wr0wGbpfE@#CnP{Cjx$+ccO39xTS>4FX?A!lN|(+ zgg!pE8G5VQyY^$~q3*{G3X`IO1K`mgatOCo#_#iaOh!Qv_Zy@yD1mFwv>TPZgna^jPF5hw)^)ibZr~z>FZq?$x>GU2rZ$6exhlM{Mn! zV4PS;knE}PK9pu?FnMz);2@99G>w0Fd}66Ne!3cn@JuT)4(E@uSA;|D%hVF|h;8QVp5W zQ&>6?qWg4((@D0fU`9g|OL{2JcMIxUqa=!y`;ucp)@0`SP$=I7dHMtDB5z!2Bx0b@i_F|FANp6vLw!t zGEiFmOloelp#GwYGmrpMG44H5>4I;B@#!y7PAwZt9lO6Q6eV}6$p%iX6W`)pBNu?k z2vzOduqP#>LumLI)&ewOvS2=Jg%*Ec~)aPs@45Xw;E)=3vE4Aw>Zj>(Ru z6QDAq)K$gdLc^kqmW`LrckioBs`H_*MCB-uNRU6=Jg(>@o))pSE>_&0)Oh-}v$V}G z>LRloG48Vs2tvj3R9DWKCf_q2`(ulR{I`qk_%kMc!|K8xXb_7m`RI;vO~&NV4$MiC zbV}Ak2+2H?9D2J(J~E?o3jMRIeG&YpwV!rA#OpnD((7WuGmltxDa~lChk`T#U7|h6 zPv{zm1sMu9m?v5GNAjA1LWMG+51*4tME1w?cztGEgYuSyqGY^bU|ZF;eKh>96o_Ou zl8!5y&O3-zv%&(}3I){(!Azw715~j>6fE%peZRDR3AFaK%%cppbvo>G-qMjFLkm)# zF~j!u(297^KyTEj1FkKav- zB@o^jcD@eCHf?imwRuF#E7Epk8ZkhBiQ|A;Pl2QX)O1=Um{%hdjU{HLy@PBAmblQ_ z5#vG&cE!OJ?Mr*kelHAWUpEJ}e_iUkw?p%qo3{_#7`G>erZeS7rBDGoGp)hiFPImAK2N)nwQkjLq+ z2~5J-yUNcj3xCxfusPiJ$NzE)3+MlXQ&`zJ|F4{KqbcKb#D?7Ssb+7dcmp}`D(31m zu4<#$(zfHGqL?xh&}y8865%LpLGCnRw+8`I42fNM++dcdd}53RhP@LLcDp9Cc-Wgi z(x%4p^F8*wg0UyA$sn4o*xlYsla4rSu4QfliBZ|Ei_SOw!;E3=T7Fg8n@1n4&?KWV z2gvie|Gn5SRbdO;v23!0c%AtgyYdsLrNni}w4zT%Hftbj<+>%LZ9tY|Uq86&Kx{f$ zw40|>wX9Efp@|_(Zgf65owij)DmXEp;Wt>I$=*q#kxIs?sVjXw&P17%=?o2K70gvP zLN#zcPPwc62GyQESPoNpQ(&7hcxta}b$QL-jw_q)$m+>*;?}yE{N%ut(RKgRST?}g z5bN?&@%<<<#(+7IEP_*E*L5S~ zp;S}VllN8Nf{Ei5C2=VHx1>wRXA6H^$qWjD@NMLV)pOci3~ADx{Q!;yh;VPN!vgJ} zTcL}{ebCsY5bmDc>j~Ev?f1XqZpQYQR8Ilx)+xN=K^w=yEw3bnSm3cuS7z0|zk?^f zX_t`k`2sM>Rerj9-`*>FxFi8_f}CRMP9ry`mth1J4a3ZFe8AG(c4oHq1R*pQ4Rk+V z&v%GP$Rb`6{E5s0X?I6(%R)9-4b5$5RrdBEZE9$lm{_42qDp4rtG=KD#VtyeAiayy z6`K)_@)}XYO_?Q~mgkc!2llh`q$LG4%!A<0m{m~#v*0I&D$mNkeMpCC0Y8~$6QvKD zLZm6#D?e1$`)7KMk?wo|$>KzSp^*ckY2-jLJu{(}y=259E+1_+@(rxpOSn6yu(#jX z)s_nUMmC5d$fd+~Eg?dixbSp)0A8lN%6_%Fa=e+fPJbArGmK0!<3vK~{SXVOw5+k) z{hWWkiSUFVm1Ok-y`X+F>4R7UGf?szFrm|gK-nVYfj^Jau;T`2_9Com20}h;>Y;hF z7m_Ns!XmQ4%9#ENd4M_;+wdh6C>U{HGXs!)Iz0N>p(eehdH*Zc9k@WM{F2oulX_~q z$mW&>nB2nOZ50&qzo{X5&X=-G#Wi#%0^C8UB^<92-81fM2vy*1ii#+WZN*&MN_>w` zz{iLlW_mX?c>&);bVcuHVWSkxzg3k(OQ!}`d3|;lC4PwFGTbM&q&<~sD_xcz_XYjG z2)asV95IFAhSOUGfwSRQ^R7PT5&`xJuHx8KA?K7K&&h-MR3Y{b7R9J`gP5q5M<1O8 zP<*Q^;M)f*PRP*eP2?(eNdcs}G^D-h^FJUb(K!tkkz~~*;0NSLKc?kG%DSp>;sd^F zJGjJ2!}~j~uc)_p2MVYs{s;5{UKEmnNb@W)aS~w&Z3~ywiqW6c4B1?yL0SDX#J?6{ zy?+oGDlOvC27#)#?--n$k%R6)OL+5yIhbvb3(lUx2s10B9wAs?ST>m2dtK9c4M*Zv z&0y#_7s9WBKtX(>C-go+{v^v4@NTu9kJgP7(A))(r}^|aqALuID?2xz2}+?hl|E;j z&+dCf6G}Xji{ng3R_vYyJ5}fkLG%IE;nOTHMFV;{xCQv8S0wW9*E_Vy$AjW1bljb)I-5A?O>AjIi@SB*m@1Rh@r!hh z(|3NokkTBqTDl0Nn_-N2e6q~O-SWL94}loOpAAPO`)pHD+*9gtgk;bsnl!#A%Vf z)>@F%ybF!7-0-602MdGL*9DZxDuT$HycZG>J~YUC=EzoO9KTZeuy{cqY8zoKQpk+wgquV@QTnK zqI$yF{uP3QdqZD-6rV3ax%A+cg8;CVZdv^|BPM}aO^BiA6Vq0_uanSyjK~Ag#*;_e z1DYC413c$lCbp1>x)WwzL|sv@LeK`<`8=+DIK0T2Vb$G)R|o!v6VR&Qa{yK z(p>9g5RtE73&d5nUulX~iQUSnv!q!30ri?Uvo19~v@?mF4%>J_A_G2d((3LEwx2WG z6B`B)oh>jj4e>djad5J&y$Q&S<0XWuBdXMq^1G&9Ge)gz)pIX5=K3y}I%%IiT_oUD z94Me!Zerw{)1qq=ECMvsjSFI>v5mhZ_;aTGt+y>7dLZ}eC8D1uE zvLf2vN4j=J^!>_%y}4t5mD_ZUt>boWLGAujbz-foJh$q?I)f=~d+P9R~&xTcP!}>#%w`@S9T_s(X2c zN=f)O(ANo-aBTH5ACyP?_0-jPpJTfl`ehH3)mHcZ3WqV0dF9HOY$9+LK`GuJ=1AJS z?`D50>n16&JCEL8^0jQ7e6l$02tqbEl=-bVPg;{2hGu?_DLIS%1`t1mBsY2i)0@pq zF5UMM;L4TU(DUA^%6y1+pPjWe1ao{ee<$bvUgIQ;u_vJ3CXloq@eyYPmB0GsO8Zy; zFIckvKO!3@Hb&dS6wfUlB=EFH}qumcl0r|Sp`0k=PjSs|!z&j*U@Oh2A$^HVs@<1w-jGl3*F^w5O``Tr*ej-*a zMxf5u&&Q!225#$E2l}9SsjvJR=RsN}8}rKSsY%ftswom|=Mnm}@k*lk|Rs8u2}w zkwmRJ91WDI)#pE=A;H}hJ7seSK^Zp)!qe|Y@Y&li4Y z+*t;P^zX_?`k1#YO|wC?8BPk>-Rzh?SJrFQ5q%nGe5V;I4^XFOa?c3iBgNKECO_I& zT;QY+MxIRrWe>nc>aj<4+#lQ{C^TG+)IHj*4((7P7nlpuv*f4iuE$)_Je;pw_8V>@ zE{T}Bk2!pQ*Ac2*!-sv|wqm<%m#&GMc1X9|ty-&0)!!L(Dp8vPz@x?6d(|vfcHQ6( zSi8Cv8|i-6le`74y%LM@a)E=ZGWhJfdT!dPgmwsSJGBb?VCw3+?w>JrUl*8U+;^I1 z+6fA~lS#}S+#qa@@Y}u_nyu*9--_`ynZRVwmWRFQM=&K%EBd$YTx9i*vI=r?4q7VOs+?o&^KYapqXg5O2 zb(b)b5ZrBd*1;tqj{{Ov4&{{qV>K@FXynPA)N3>MnYe+uXDF|BPYH)A9gQ2KF30Mu zXY101ky(<2U5eB@|CAS*C>kqDtTk7t=E)n{A)S2d069Xlcg+ZXelP+7;qwVWqCFI?;AKx+b z#`P?>>;?^fv>KuHi}K+pGO*R>x~T4&X?xw!e6ubHy_hds#zlR8uU+@PTh(^2M1RTi z4y0m$)Uh+b67X3(&t2UHqQv~=b$*hCe#zpnRV(1r@2S(Q%jju)%gL_Cu@tj^`%mKD z9Wcfe3?iJ{P5bofh>_sw6aP^- zt5J%u`10tv0rnWT$vd6A1U&#}sjaeY9xZ#jCr$xaZ)to z)*8=SdLsFcwg51aY$%$p9}V~Rf;?a7xo_@F%k+_5-{7tNxiIjs({O zp2TZ!_~J5|COB^Q5_nWa`1^wAB2m+d1tR>>;%Zv4MYn^Za7c43V&Z&*@GPU6C^+#x z0>iZfhrLwbWQy%E1k{x0;$=)u_ym`=8uHnjm;4d|0ui>Su!r>lN=E}-_$EfE8&v8dZF7jsm z`W0Wxnbro;drwva8{2(IDCP;kAvbS|LzOk-d2_nd(fDBL4Z5L8p?lJXaDU+)BCSS# zL<94{RAnWT^tnYXyWcd!f;7Q(0w$Wp>@1n@cn`zezFhUpxepm$q;qy)d^COgM2YE%QEi5^6Gx_9tW9Jfotn$+G;eRuA?i<~}7p zmtYGJ9v2s!9xv}2kU0#ZfG=h*FLsmxrUNZH34K7VbT|t-N(&`%JQc^nR^Gy}V1IB2 zHrkQdndJ>}9 z>X`?+0}!n2YPf%-hRyCD^H!${4zzG+pus8M@d8}Zv_s_yVgf1jWb@W)N0UAsa0RCI zQc+grIJxOo2XS5;WX%V#NBC$CA>H9)YMOd~4|HLeaV0R1MZAH(;enb0{dCANUtkhkx6-FqeE8bS0*K`CP`7+{yS1b`-c@IUg>OY>;cl z2W%SR$-U~5nn~{5wvw;GBce&vGAeq%gyYU^NkQ62Fonnc{J)*R;<0&81S@&mS%1m{bfCn>*PVN=i$Gbz_!B|GIJ@l7$n?B-aZH}EVJr&VZ&EM|_24AB|9pYOF^PU0 zF%sg)d}^1>Igf`lSG>SCv}NnNYs1dxvZF(C#T{g9MU?s3er2oWQRHyhdKyWR)L4;K zz#MN~$?luL7%EClJT34}Tr|TGFAtgA|06d$KfLs`_So}T40`nEgY-u@PkGBp^bZ=* zYU3@3MnT{H#smJ5EUC*FQuC~VF)^P(aY3Vo+)#y{=@=q3tGXlPoE&Kj2j3}m{`OHx zZ8eojh%vg}-6j!ljGOP^CqwAk&Od>4%-5^$WbJR2rHA6v{g#OrikswEhTLA}BBe#D zJ95eGU_Pf)zCV>9?uLI%+yucwcn1rZBziqo5F%iWxSem>llSrVUrNMu#P)4lt9((m z*ew%ZaF{+~_$poR+wdpPfLU2pjBsufmZJQz{>90pW&Ecv`m#6Mq}knDB)aQ~BO zcQzlxEzYN^ID2GU6D?e?J>6u>!wP2i-fScvx-y>p5i!?r+E9b!s}B!e$_bY4vd$!|{Y zKn##}|0Z*RzE!y8ittA>zX!Y>)V9vwkS4;`hRSLpYf9?(W!9cjBiUlCL#S^aBdc)y zz}}SODXZY^(02esPw)Qz_xAK~>NS%-SU-f((ZQ(=QIgT@DTFmv!UUbYYUFi;T}90n zf6c|o&!Z-Lf=%UjUQo%n>b7diYRKcoHU^JLOLuTi4EKF#hM>>8?q8vPND8`zlPW4Q zdMMzVOOv*GQls|Z20XRHquf8#apMni5+`C%UR-na zGdR1+{c%SiD7FUr?1(dQq=>%ZM7Yu>TyXi}sr0g9{c$D*`ek#~sG(ZSEI+1eMqFRj zVW>Tou9I$MaO-WQscxDQK2{hW-it={feh*Ha4fAZ<78_Q$e0ARmW|cZo;v+s9Q*{D z#x(GrOr$S-^5{>Lz_#d+nJ9hMA} zX1zs6zQxt*p@O^N(+mekp$RPKJB?Pemz))enfs>}VE-Al8cTYYo@A*0S;L3W*Vpmp z%qT9QIZJB28aT@;tzk>o`x`*7dXIb~9srX2H4le)GAU@)^R_jjH=!pY9a9V*aFn=5 zcFc&FIZ2WY3>OQSb=;T-#d`KsV^sWeR5Z89=>#$zepJY5@yf2#wxtZ^5yO_Cg_s>{ z##&LtrcuE#-RVB5<=jpT1HxW!rpl2bmzKCc?gR`D6UI6h^j?F!m`+P30JS}rQ7s~B z>%N)*x+tQ`p0;S>WxW4iP@-mfu`9FNUe@7lTX_PZo#d3w?G(WXKynzx;T-S_wfxA>bvh z@JifaR|oo@D2bEtn*(`qc=v}F17#Xs)E=h|9%^t`23GWG>{Fv~5EaH@FK(+0x;gyybU?@Ao+XAt#)-T@nG| zW+pjTUr36(yoY@FM;%QXA0HOq1-IiuaDJzBZ_5{;dE(fy6{@{?tMT~34Tx(&nI8U> zs6em+pQds~`(aq>sNqiKI{wSMb9V}nIr~0iQo!5~gU9~~mlLsM4h^hXn{|dkfPBnC8AHqs}A&C?KPY{ES-@^vL(TTpI57j3*R% ztE5tLlPA)s0w^~&daE*lb|Ee5PS6#2Oqq1dGxMJ-{(fr8G8clHOKi;Nnflai=MILs z#6T|NN;&&bQE~6dWj27_&Z>*z>b$ZHWoUjq7zR^NXnzNwhGob>cgc8$LlMr+aI;)> zr&u|PcspvKKyM{O0u=sLMjVcr3d$fTqH&CeJqPTutCaRyq(?6*<)8mfu|I1OY}B{(>NFl12`iNq#E zw8djmaU`JD5&aJ?BI00Hr1=+VfE-YdCgT5QjG~T$V%kPxuvYlxlG_N2Mv$0MF1{6H z8Y^am;KX>&A0@+<-g+kfc^p>EPw^4WP-JSa{44p~3)D?|!YE-7BGd5;P#9T*(N{oj z(lc7#J|&s^BV&((Qq+M1ZE)h54?BD02RQHGcGU~~%1Sr*l_OajFM%EPan{y)o(E*E}>&WS> z9roTr4g7o!MV%WyTeu)2k(}H)*Tl~|K%^UvN*gH&KN1=U)W`CXg#XN+U`r{ZZ3L$H zZz*dqgG)3pgf6l)Z>F=Cy#oB=oF0`=cT9^@1yR4^2(LsC&rK4GyE~;&BToL&0xnvU z65f(-Pc|*K|LnIbUwe7D{N9J; zpH(ZN+Fyh{pFce6ow?oOc=fzZyMpcr+m5K_HN--(k7Dx94tK1D04T^_dcpkR-CD;gLebYSSF<1(f+fDF%3ynMz?Rk{?5N@-J(a!&Ez22U31{ zF0A3{y=nzD#ff4SK!8gHsR^6Pe{6q3$DNwkon&9wQhNly5TH!j;ZZc!Yw_EvUe?^^ zeStY3Ych_FSWd7;r9u>@rZtsf#DnHY!4stQ?JWy4IzpK=<36A>O3Y3 zPN8~EY7s@~J=bHFbzAI?7 z;Ej*1iwN2oB1l3Aw2rCT`MM}G2fo>b5EZFiN=AulPZ*{G74SvA*S7aOb+!B=V{2

    lV2 zkQ`j1UNC!Os=m`ijPHJ2`F=KM_b9hIB1k%&Ts1+!8=HVQSRFg`yj9zR)hL1zowu{( zn~&${dt}^58nVll6LzZQ@_?bNH{30gntzG?(3H{BEawsK~piN4pC(^=iZI5K@=)9DU6WgtBnD`yCuy#=ErLh$6l8abTv4heGW zDA;7|gyI_)4(dPqMj6D5oA_VP!2bX842%pM|JTO#j`oHv&IFpzn%e!JKkL;MJ`(vD zpcz0x-CzpwKt)jeQh%aag(pc(#+tsoW^ZD2W(?hwipgDdX)9HHsxEC^ZY!joCc2>V zzD}am`1Z7OeO`M`QHazTL}lshCYAu)Upyn$nwGGBxA(w_dEC&_OodfP|EWj~alp|;-nCIx;m!SH%XPOm zy_U7|b+cspbD{lF?fKEJ$ty_ft7mimfU${!zPAzBJPn~>TcDnym||SgbVb(lezCo7 z-I_UvzV?3QIP9$ipI$e$S9kZcQR(Z^O2sEYwB}JEVWgRf%Ek~u(cL_$!wKfO4gWB0 zuZPfHZv55?I~!g99R3=iLHcrb3H%fcIaF;+s_ig~B6yy3IlG8nnBlSmoUo-n>eG}o z*Wx4I#&Bb;;7j*yoS?X>tUlEYn^ZBJ+ESH9Bh9*Q7`dNJ-$q*BS)tE$?zkT&*-^kM z$vorIQ>;c@;JaAMo|KwRW2^X~Fx{mqgnjx?bxgFMB*%^x+arVqT`#6b*H^M;iV#vq z3V*MOF3duR=T;9&#=|||mr5pLIJl&A^S_qJhw}a9YCa#?>2PJS9gV>_!x$BRPcq0Z z#VdKdDaztK`I__qt_Y#~(j;8+TTXoXw2B>CUH13;pcdO$j+oK~JTU-tJK zO5XOq9UoyE&GbhH=DR;Dn4H~?+>h@|+oiolT|>h0UzkSCa9PX&rUP++?u9_z3;oqf zLo6tUMU=?sUQ4?4CM(CnOp^_xd@5v@2V3SO6IGUWwj=XYwb~79pCEaHW8NoBwa&Yp zpvum@TN$*UPv*^3!q!@t&9Bd(5|D)GTLgDYUVGi99Q<@*;!y)pJHJ-a+-xGcEx6ua zkxZ7*hy25vm(mFXXQR=#>WxrwOHF+@BZOhP`(m{cIt5jr=;y1&B2xwm1rxFht(D@T z%QlY6<)U)sOE_I%YUle!5Zp(wNObTLoln#OK~197Tr27pQ}`t<5pGC)*NUFHmnXDaz$c;Pv8)UIWRX01@}LdCHFoKhrRG2 z1gQ?zebgzVvtJU*8{r?W-fq^DNcYFIeR^jR!bed2atI)YLxBXa89I`m?oi?Ur44bv8N`EZnvu^YZji|q z^8#WE$bzWwQ+N#7kLuTYd8}f_!mt13-mOgy#&Sprg@H5UE)-Cc+g0Hjnn_orOVV6>M@lsI zoo4Y>Pxaa6ssf^nnogD~y;-)b9|0P@hBxDD}6?%i`q z%{ih-RhDpk$M$VHLE=->slZjIL9guU9J6@i;nM<)T>CtApN?Ys+8Z2h&gI#X$~}{* z0o-8_YUGmcU8gbWu&PiI?Yk8LQ|7>Kj?>s4lxv@zUsGM)O%|bOZ29uywC0mAj*isk zPARWHPuo?A>q;9t)ub{omGl}nQa4c}T(zUvW7r(6+2*+Ax*PH*nkO$}dKTGd z{=>Yfb4JEE?0OC*i6Qvx3_WP3ei>jwl#|4%7oG$J$d#bv@u~)Wu}~W@Ljbd0@gFZ2 zwW-6{_f7YxV+)K>1n##u#DDaNiZ0S<4Y8N8ZH!6M`sAc;Tz;|*6{4$8Y$DaDvaS+n zl4mArg|I5`5wwn+>~x9|5$~cV;lI}4c>Nd}NDHTp`L|_}P&6;xoZjN{42Ei~ZN6#8 z0yQ@C!y=Y~jo8%@#adPOky068Gzf89T7d7p-r!3YGY*A1OxC4p@BuvzZ)iqlphr}8w)qh~TLj@U)8*%L@Zy}&)RK1@QOdy~ z0qne{K|Dx-f`g%K2o4HD%vL8rEKTG1;t{Bg$%=7Y_Es8^vxR0v%qG2kGN~oIeqp{THo<+!SZAKE zS<(*BW2Ic4TGJTQ7!OY=Fs}>QyiMYo(JTdu*ikNTf*P?%DhOzyZ&D^RRjg5?m4bMZ z_PByYvD<+wPUM9vm$5vGz*W?+b&8!~y~5CV>@Bx8E(}v;W68FKV>A(tKCGn&A=|B| zb3amMCBJoJ?@9(mGXJ-JZ`8UWnYg(8h)vIhJWFtSa`ya)0)Qj*HqHC@JCSR9>&1;f zvjH#O^cP7IZpd>Cc(jF%BYksFgSAi%&m}=uBC+3Q_Kx07XMr~aT4+Uk&Yxg&9l_|C zgNmX?07IgEu5GP}NkbPhc^9Yz)552(Y0_a9`y&UfBNNs&yXiw=wlZlp^A?ubi2G&w zZhnii(QJ;?>m?kr;ce?T4gaW=?7&%IOs0gs7stW;*hyOS47F^x>`1T=IV7_AZFgW- zoW_Vidjott(muMqxq)YAp7X2mRs7j5dOv< z144`vL&9Ivy4s3&<53CusyWjxZhnX3PJZm%cE1W1BF9q!70E^kh!gLn45HHs$pP`^ z9U88!Glfc&k^c^58a38$-A9-W_>IVaeo5?9d7+LsUG|(Uq#2rIS?~7kIW5mms+}&{ z<_M-V>i8ycJ=HF(Ad!ws)n^6#l7t^ppR8O^%SpOrA^Z@iUlm_UQ@@*)o7B}VgL~Hi zo@Sys5cLo>PEC=-GCDC)kK=NC*xAr`PA2*p3C%o7CqTt5wceik(dBa1Q)tI&)~qVK zH};~j+p>13sEjpbX*Q)sFSd74L%Voi>F1%M7u~`4vq!+&H_gto^Ddl|hHY6_aE8J) zwF;HL{w`Vu#fkFDK{c!qe>B4ofH2Y)`i(yIq z%7UjQDcQ2|>vM!8@j5c}xg9;LGHd5sOX7)$eyPT;?HRcXJOo!Bfo}T06pDRz6-W|H zSr?!o&J|6wG?PpvuCA<_qPpy=YtIgKlJ^r8t?ErTBM{nR(jA1&LPo>mo(O(IZcN0C zxuNPYZHDDS(s^;euqwQFz`0VJD7c*%&Gy&FdU~F&F<)!uJ7X==Q zB~6)?U&}WFr!@;m@4KE^IYa9IdJaye|II0wm^l8=!!kFTT1wmhZ91N*{g=5PaZW)> z^xCa>jFa2TDuzj>PTy`+l8Rty`3KoGum)`C_0>eh+%N`oAc}{iVQHx9Eq}XIX)&*J zu-89)!msZAX5U=@_a_O#0I@2eV$-= z95T@v2u)c6;-*OX)RwT0FfK*@zO#5W`I&Es`h%o(DnbsfgTnDY#_&&x^}^LLBf6*4 z_2$Z!F0=DBK{3({pt*;5RD<7<-$y#AuF$G6Gfwfd8?JUkb1qH2iHv6@%&!fa&h$=e z;4_w_9J#eDVr%e<1f0FFaxH<$hV_dE(1#btV8`-9ET`6?x2x()Lp6eG)pX(}z*sY| zaz)LGX+UYP71F6hHS6T$w}r3k7R`%AH)tBUt+j_Qy24>4u#jPD^0Pb1KmK$f`@*?BMIspLkK5GQp(S<7ZLwX6Z2v~ zmgwgD(Cv!nO;JS@SPuuqOjR6_OtpRxt3I0B9pRl+ge62lWEU~0QA0;VkSif*U|NL? zY0fNbq?i6@W40T+un;5@Zrk+IVuy=CigkZ8t1z%&o-}K0vWZ3q%Rt&e{=Ljsr%&FijQ^1; zfmfJ7Q5O!3lg&kE?SKrmXF}T)66L0;q&=1>C{}dJjXL=&(FZv17pXmaCi`!(;LMfZ zjUBKn7%TEwoh5E7Q|GLVQvyBi)k6xu)s4uQR|TQ^1m;?y5)8%&uLzNU2$m1;lDuDs zGR8V2i7CY|7k`I4{ogKHvobt-7N8P{DbG%LV>V1~J^Y|#oC#g{GDSNxKztrgwBo_{ zOz+bE8mG;q1P$@@<1Ty@jwB}xkI7IG0V%?)LlmM)`y{be9OvM+(IifeG4R@jWV%r_ ziPsetrej!T>Rf#n!iXc6(Ivs=dFkhVK4>YDI1f4J;|a;sJ`}7zp$qwBaUm=dv(P5- zAwjJCz+GQiP7MMjFB9;#4GosP%r3RzQ7&Ibeo34YRc?_wVksZl#2=#LKwet^V;dQq zA0Z_-T(UMyR=p$wCqL?Mf+JLFV9l@DAa3MmLBWN^LD!yZ{uYWRSExL{seDefPm}r| z|E_c_z{dIQG4l3f8{&tkFx9Pmu*V}?b_i${=ov$+R-}pDkvtckKBZT%tVbr$C(klG zG>%VlrcseguXMR#1nMhLDAc?#ZU-L`7NXpFs7z0RpV-)Ja5$;!8-mhCdGD#g^R>e_ zF|CXk7fXqLQ??d~TBtj*b|Slw8>LC27q-^N)$s-od@$>`eG7p!T!0>HYY{X1e++>b zbIJFaL;l(@xs)3%Qu$LJrZ7~v|I95&v4%qzBx2@3txvQ#Bvve>d89ve_=dyOf~!Fl zvvW!|HkP|nB49pfN5NekwYRMLxDrcKye=sHe<*vWCegwySg>r{wr$(CZQHhO+cr+w zb;`DFyXM@8iMSKd{m}EWANCJex$;|ykKNYca*Vw2$1XXcTHj@vcCOPaDcH#(xu-xP zJX4#90mR_H>7rRn%B=U`Yq)Ex)|VI0F20CkGSNny)idZ;Ii#LRq`%1kI1+HnOjV%>@5w$hZA*g{jdrldftr0C?l zUJ=%Uu#&-Tv`MIAVB_Gz4qcPZdnxkUq0y_+UxtmaR0#8+XsFdse_jTucC8(YDnI!x zGdqq8>!y=YR`;Ec{MhZ{_W3&BIk`D~y z<0mz+I}3MsPhWUWUT*sZFVZ{e_h$5Rcz$1WQ@8pPP@fl*>~4!)Z@HQ~vF$;4r)q5p zRI@KqDn!|bmx~tS+tkls6YNY`RaIZLWtwW2*G~lKlC#!}0`rDsoLT+;TQ|o;B3Lr# zwxf2|O>8+vbAQV`i(r>=5S5fr@5DliYQ>R`aws7@*|f1oPxS{!pFh;8&|f$;z=L3Wc6>WN zRB7(-wjHF~7fI1PhqvEZr*bMs@k-#K!3@`X2-a_8sz!FOrl$MZraH6D61%={bq6M^ z*BPtF9;Qb``c2ilV4{+1_8wz5U7x1+^+SdK{SlwjybiVWM7+sm!rM7I^V{z7MX)fu5=hERkh;7k^XhXug(}r|gWFAdZLlrd#v!oS^Qn z<7Qya+kn84YNob-MhH8u62IM%%SA9gnb)88p^5lQhX)dzol4>Ks)nAmnfGQTCC1k= zsFr6~Z}}p^ELlaUx}z9U8pJNe%U;8&$+{_3Sa#hphlX(1@w91D=(|`LI_Fs_z?P$@ zs5-1*X&R9=o0aw0*88qTD&tR{*}(5Pntp4w(Mam%C10A3J#eo0o<~_op8Q zd&7z$oZSqqx+{K@vO6yn@aEiK;O-&StN&b`7#Udpb9Z87=KP$!HT+g@bhHgHk2cQ;5dwtrbii{}-x=~(szj5(IX+KdIJR?3{OKP=*c@7~NlP6I7r zWNp0(-LcpoX2XH6wb~1PpRjWi=-(b9`x&rhc)ip!Q((TVH_8AO#0w^>%D<}GImC#n z^X>p30w7N?nI;nQ7|o_YDaOFqD8^@XNu&X$pJcgvGhhTvNdOb1GL(}VF6toSR!I=b zc@baB(L)T6eGE7lR1Du8!G{y2W|V! zUJgalpko=;te%k`2?rk+x&DSNC=t^S1bxnxzRt_6q)247%p$a&dLg%}{oZjb7CFoGz(z6pLZLtXM6lVIG{TuRqJup_ zf+9xwH|;8JV{?YU65&aOC(47XMHG<6F-CG^jdu-k>MaS%&=o*2;#b~>_Ot46%=i)( zN45{Br`wFVy7?uXUlF8^r0|-10w|R|EE55e86gP*b@G8gsu2NM-%O?a&Pgl4Ix@L+AV{ zSVKq2pD^HrX%0E;Vx^`fH;-+yN==n2#4%Q1fbbF09_a+=8U($UT8O-OH>LFsHF}pL z++q}SOZ-Y_ejzJOQ+$Zm*0y>NI$VRkIUDj4y}eggv+&d?CeWkTlqDr$*7-WxiNP1f z{q#Uf?m1J+*Y)BEQr?X$+G%d$l6puD_VwB znM5MvpgEYf?mLvgJn-q%FKr16BrZ=yR|Q*>i#!0qGq%3Ru^CUEf8BWf`^M+C+%%TX zcifxDx3Cp=r_c0CKTJ0LgsSlY#rvl2e(;ce4~h-kLGXqv*q6!%VW71 zziEuwCsF70%-o5enS8xEY1R7(NfkJiEXFogz3|EH+n$VF8r^L&oLH?4>rpdr%Yu|O z;J62*l9`VBo5wkIOjc3#%#v6mdv5lU!FwTzI3^W2P+ajT~gn|2sVo4}Es%0HH$Q5SXhat;_ry6P% zL&fO{ciE^v$wc|PeLqxl$aoJ+RUGo7uIqaT+Ob&&1SHbN%keo3=83~`u?fBGTDbMQ z7LxN_5n4S=Te7ki zodn$87-2NIB{~Lbwh!xH-+f<*SrMzIs~7uwm~gPLBV}T?d^Oi|f}BK2wHB%?2KQ60 zyaui;G3}y&^fC4hpev>~nxXWj+n-4`+QG@}a|2#ia{8$`6va8f-b-^VEX!9I1g&|3 zicttcCX~`r5iD&w>ZJF(QZdmNgMR^_Y;h<5+YtXRZV^UiR>uGJ==P5QJ8y8H{3E~~ zX8IPuakRVK-SpW4ZQQbHW|S*{9gJwHu#z8_>TRY*>tP1kA#GF87>2mGxl`_R zNko51M7t~~uRdoDFI(y_!c3;(EH4%ur~n1>18FnENP+(*gdv}|@%9H|cdhja#kbp? zUT$}H@9)xzuArVD2Tzx5jX#S@cU@!kTl^-<7*sb6q@3~Zlu*sTNX8j#j+wyqqLR?T z%%hJeR4FK%kjmF9oBq+tk=InHPVmSlNAMvwY>6-A7XJ;iP)s!t6w+6bjUOdZZ8SCD zQes#0D%zWsJ`11n#7}0dDvk&Gq}+^+$Qk=`Q#F`~GIoaHm-Bt17>2RmgL+Q?;b0n( z*u&Jm6|i@N=O#uORQYl-n+{V=*-Rw}Yhszg^g{1bA4N-^0TmpFU(jVetQs8d!Jprq z=N5Y^Y^dCV$i>=^PaVMbBUYB-i1KVvvRwy*`=<8$q~4@ljw7nhDydUH7L!tQHPK~N zVye@(M#s_bssTzsBALrCL<&)usZ4(pH1fCzAKnNg!_FGf)MR$eW0@;gG`5c?f)}bd`!*FFLkX$wZn!zxR#l=8%kF(&9JL&qI({V38@d&U>=tPmF%WtfiPYY{4Vzs}_(L8NxZzE;p|*^$W6Q_Tw9H;M zl{N_epu2!6E5la&E3h6jVc8+L{p!V-Ir2{1*)`WXb$|&C-N^UeRf-j7qvr~nucY2= zvbc9y?0gj3KyFef?*Y@B7e9W*sU4?)_(u;vmzW7chtNhr=_ZQn2x|MO5WoNK^JdE= zR4F(=Sr&#-tJ740hFUSY#EV8MEi#yC9=W&pgVcNFrb@hi$@qXp%9xQRl9iq2M8Oz` zo)@Ex5JHgOQ%lf3GN|^AN=UxEw5VHaMm$0%FE)QnZ%}05CF`vEqDYN7xW01?gyc-F zx~5!-zHZkEU~n>L1Bz;eh;>-+2(mPydPPpa;~D{lF0yZ#pM`*XlxyyES#u~CR<+GW z3k^`&jl-UA=y8NeO57Qfl-6O6BgE11{$P26IPXO7rD%Nk#MDPqgN{pZktym)_?WVc zc(vZO!b_Ab^PQLSvPM#5Ctt!805Yc-0Ws~V+fOdY_vQng=9uy6Lye2*HGNhG}YpEZ;Jsu7TTPU?6G7^jmu;9@~&>Ur8wvZVY;$(Iuc?N z-xo`NVVX5|r^soQt5)pQ+?w@dk0hH*9bHE=`N89^r(Ny(_oNEWdYa4I3c$Xrqe+?C z1^<1iESj1g29>^UDO9;m@D1`1lv+*wymR?j^W%G=&g(Fm(%Qq6X9)~)WtSgevJA;) zC0{~I&_|bnWP~CkkGf7bP3;4a_RjG8x6;AHzq z{u7QH(&LcWbEEw(u0yXajO;=bKHTy5w$S^=n=Rsft89D0`tw-%wFd<7$#3>UY;?&q zCbRi>*N9OI*e)z1oJIBktD_1_6?vQGfhbjzHfm=Hv(`#es8+eiF^4q`RrzYPXjol} z+ExvmY@ZBAN~R%nru*k=gUYU6Yhg&ZpUUKP}ue+cU;y zR{9~H%II3symSo`;Wt@87LM!y!CeE4sWk_9pCc6=r$whno%hNfc0G&EQ~@ge&Hm%4 zgHg&9^XX|6%Ku`T;OlOLIe4I^1bxz((kqhOm&&A*Wae372ETrw<~@Tc=GM!u*Q~S$ z#p}6CgVs&wH*HOmEnXf|S!rKZknw#InTV+4n|1j~O?o@}IVBfJpqlT&Q;rRL#zDNt z9n+@DMZU5PY4Ggq9sGhLzv6L31Opb0Mq+Vv$gFuBM&M83(Z9S5b4jJs5NYp&t$($R zLsl10G@@+nGfKO|HnV2>4cB6xW7cAdhfQ!r!3Rpoa{L3T;k(?Gd6P{8Kbx(5v~kvZ zts(O_GTdC^YG1_3IVR`N39!EtK`6Ll88x@mHxa!;;$Oux=r1QUjw_6m%rhLh`=?1E z4|z0?$6(PG_B}E7*-8a>spv?iJ*&w}I~0!CfA^G95G`}m=T{qDcPXBIRi`|{mti(8 zlyP+;36>La=Z;Yi^kyy}Fn;E0$~H1kMX1E;q=eMM^IpO#VRx!mSX*y2O6R@TLyT&L z-Lh#I7}0NoKMQnqdd>$i1Zq#wHiHl9xB7VFF@PalW*t%t{0S$2)HQ!7=|HL>jzO{e zDbVDtQEDAt48khU*I>za*bauvmz|;_d46l?^(3YsXVgjj=7=mV{^*_*7%7Gy{kpcXdc0n=tYEsLQ|#i3lXU{%IL)syxLKP zu%S!pnEZH)5Uq=j*C^9USZ2^V>OG-IW`$4P>9<%CzA}|k4EgGA;X#`)O)l{XD~91O zbx-e1RDiP{oD9Pn%S|JT1Jta5q2IkCOa!+93WBn9?cbcRV*VuvKf4?xv|vB}YPLzc9&%kx<8b7teV za&#bvWJ=%IbtrQ6al5z`BW>gv}SR;9Unc)2}uU8UhEO!VhhAq>G8wH|=u1sRNcS7_cBZ89Sm&9v{2F;~v|#TPXel`=4bl9G(Jn6kUYq-@K}WapAe@WjhII8jF{RwA;$hgtw&^8xbY%3Y50?|uSM4gNZzcESlFY?jA=+h zZ;5terb%8xsSl}MLTv0;yJFWspB^2IBW`?~dDx9dT0;p>Fk{JQ{je)Ai*F&m_djLC zdMu$YE(7i_W%=DqE;SP}FmmH4wrn;!h!dN&a;Jr$<$4r}THCYO!;Dr8*~qFE&z;BC zxHuX^fgQn<%W6tpmjHNnAlSejvvpNPWtjmMwu`S|2wb$do8j_g+~=Bk4(|!{$wIb&!mm0Y zS}%Yzi>!T=<@t9e@QWj0t~Jr11*m=eG^R99=Gceb;cj}bv@XV1^lS~X_h|H)htX;2 z-h)pmHAa={&3LB6gXg2?=UMc3m|>K^BgDm}3(d!audHMpC}IZR<78@HZOHMvQzp$) z$?En(nDl)5AN%(R2iwP|GBe!J?K33Ph~z+JrwD<)tJd_oYI}p>{4|?E@vA>DT5mee z^D2VInRW+v1qhUagJht7p>l0WyO34Njioi@0sO@LDm8PHE^1VzW-(a&SeqD~IO?Xy z;ZlaOt+!J!^fB(VL*gbG&>FoKHX9XB6OLu0e`F8>IP3soXSITrnNCn~k0KznAl|h> zE9$^uI??FGhDRXuu=mvrc&b!V3q!ZO&L1;0Kp1V*LwVzi3+gvv*#|zH^&8nezPPS& zBFakzYl}eVwZ770ibdLi&qKRBZ%52)*Nf*R*aUTjSz1(&{w{>8^m#U-)0w;@dFQ)+ zx*-F^4E7Wm`9K{eyQZKhoBe5z5V9}TPmTw@_9dyrY`)BoIgU`{D`@u+ax|Lb4o`18 z41fLRROt6QAaHJuvb)X7Ssb#nIZw5{8kZx-zSOAu0w_WFyj}oh&&Ts)RqwTWS%Np^ zsa#~{ zg)RnA=7$2`DX&cjd{W_ zDYp7l+mpO0o)O|2Gt;2QyY8bwe&aht>7usr51!V0Df%L%*dFcI=Hh|Pjo7HJT|8eF zSM`^NvZhSA!2v-y6W!ML;S!tx2+>;6G!tMFVkl!#jN=$D_;hQf=?pngH-jOfRkrZpxT6gtfGdcJIi!)Xvjj|M2;`~`b0jd=O#~oI zas{)4+!de$5(Kjh1$&qXA+ft|;9pa5Mx;CO>30#z9%qNNlLU+dG>vFuxCucG0Hr~k zq-dKl%)uK9b@4xIrK1TLAd++y#&wx6K7hw68P;UP5* zIPL_%1VNc&XrqKX_sTFci#mTZ1=!p<+1YtFg>(?6?;EmbPIPRO$9rXJC;+l1R7L~w zFA($7m}v_yQM>iw8e@CuGW|bnov0?rF65^o#Mg>fr z>z)#4-)6;u@&OSrJBjG6G!{d!H$L5T;pfK_rU@LV)=p=;C)68F>#Em%&3R}sdFL^| zyeQ4N@^y2y#Nl^F+^GC{8i*FYgFWr6eyep6RaA_spel}o->Q46EFX{Gk%8I?l2zPX z_{(G^uBrd(LeZVb-j&d?NnaKWlMl@}uIS+inYBtpY4do|BOV2@6ksK&GNwE*rZ_IN zQfL)Vk_+3EKozTWsgmfw19aGmKGRcji|Y0`t#&!nRj;`Wk=(_=T(ybvCU{h2+PDnb z(c0dKKE9RpG$tjONL9(kyZzDaes6wq>+rcTE8Q?$_AqM|s~P?=`&h|zQ~h&X-2h(H zKfQwT#nqYcDob((QpF1CBMtNxS?%wHXuJlxnXv+e*$Rfio%B3i5zwVFR)q0B4R78Q z&A+?aYxz59yeXEFNFPnBNO)nO6dj_@L!;o>A&c97};1D{^w^)4Qcx=HiSQ) z>SqX&1lFI!K30Qv%&jr(3x<#IX@Eo`GcQI!i)U~=Jw+YUGdu!5&lbCbH z*zr+4yy@@sbV0tpUOo;+rWo!@P&82}Z@o?>YekXD^>9dq&d0mydiB3uOxMn8fmYKRA14EZpWtfXB7!(G9Pxd_au5bs7h&Ju;Lh#Btl6J3^2+ zSJx*lltcR?2>eJ`e_3T#oVR%~sXx^TkpCN~APr%Bl-!z*6o^hG-qY|2@|W@1l|I zCcjw=11FL;YCvCcX95vXQHnmOOeN3)COEjnik13)O$o^CVSuskkMRc1p%Hf&FK1yc z|I{`sD_I1lKdaI~GC?k)h0*!??d;m=AbH&_Q2`1B@-^S`jaATm}#4*F3}q$+ZH42ha20)%iY0F z@{4uG`6=EKVE)}8SKZ0a;mj1nlM;j)&eqM82)6Z_Bl93mS%C{mB5859;dO?@USGHW z$b4390(&ip^Jh(!dSX*Ox-)*hAyp9NZ8J43G%;TNw9V=!s;)+4{*|pEuwB%hm8TPE39AoTh=)yGt5_huo<2!EIcfkpl5^>Z#E%-tHGFrG$saMZLVb?sRG!eqJ zNzc5R@1-6-4WmDC`!S6oSjT|$iyq*bG8z5GJY|8AB-w21uc2RoZ@|s?4RQskI1G+> z33~`J0S^4OPCAJQRJvv_L&jKC`4%3Po@)zqi76-rX!B(qIUr!(vehdkBH6WJ>Teco z(27uuFjN&G|*UzS@=SZ4!?HV+D1MDGgYlV;o~_w+9mE@Bb&J$x60aV!x! zbjrq-xS{6u^e1ndE_Hjj;JagxEswIFa`t`yqR<2P41Jd&z>R>?-IL!q_6nPE<@4i{ zLH*)5yiUC_mj2O-g*0u2WO6%WbOtFMMbiF^nj0;9FCGnGu31f7A-1}&VzK($!vUAO zVS;;yMuubuOyWG}4S;QwsTA4fk&=!$7strOFda}7jZD@1$3x(-v%FCo)n1a|1$P2K zMuRD#r>wCMl}1xGxW-7Zsb9ho81mUo-VkDIkic*w*`Yl+0ZN=jZFW0U2-H!RCwqv! zeCUgWsQ%LDjiQt|4Z-nZMCFaC`7LTc0d6Tzx#sDXyW;0GNotg+2Nj{|P&!!}Iw`px zNKy|?>^6FZiEI(**1))KXaxI&N&gCcB&bKWl^3IHJUNz?V4E zT$G78DJUK`6dp7CeL3-*l&_tCfyl5b2&N5y=t16RC{$AgmbQQj z4Y_vIQPF4-X&)Xo+WUN?7xj+`fENj02&Z3tk{>?xU*%icO@$K7lzcdmsFnr42_BA_ z2Rt?Rt#NdGrOKJ@@TTijE#b)*t90#{k!Tnj!c~zojo|=d-9;7Ka}C&Q`jtPy)QJ+o z<5?J;^x{gjWWN5s_u#zMZvPFqnExAaaWMYR@4DBTn)aJw2!5~id&J8k*mUVrz?RS* z;#(-9+hkxHL=(WWb)Wf;l%hqgw>deSN|cD9jmQ@gZ&KXkosW~edhR1tZ_L+GU0HusOSW4kD;w=D zR`Z}Hi?pV9a=zYc7DG;lWF3NufjKlJ$yFL7fHQOl24=ZAL_h837uabx zv9u?*TTX1qwq+%^>&igCUBkN6nfaYAWtnB@vRp)SWmB>7Zqs^r*wH%%Ql7m`scas; zZPV#uGXAzcZS$Ko!kZVd5wJnSt3e95@uN5rNxn>F;tzEp!4{(E1+^&#W-nK}0yb$A z7W!+tNH@j8v>*pN4g$m;dJn#qFsCydk(->upyPP}u~EepyYS=*;2pTd!ymIaM`52yMk@yn!4PHxb9^e zJm!3|vYyZTDZrz>yV`tG+j)!ljF*xF9zYeztezKP9JV#oJTr(+Fog;T8#uuiXprX z1jpSS-8tv}%J4=@bactx*R)gJ7Z4|E8OOfuwRkNbpahG_fR^}#3#;~!oY6FHpTgEJ zoHJDsY!}yN_mXUTGGz(+w7O?DZYJv~D!n!jON&vocc1sDzP8?i``q=kbs~DPoEpJeWIQFy{wVFnuNkX{$t zMm3~^6BL}cuCp~)ma4h@7qyjp(T(;RtgPRz<)_u}kjxWnRezc#AojF98DBuH3l0apeZ#eXhEV`^tZRzGLmN2++L71C%YQM4dYil7*b++q; z5ngqgd|cQ;AS{*?ekFE4bcO!zJN0%FE7{GJm;$1_1db^YAFyrsSL)Qsxl5DgC7Dy6 zM6zM%1SrCCBIFE#b=G+_&cFo?Fb| z(^gO(xji~_GjM$k-aEHkh_IEclJ=Qc8A!)AREak?hTF z9rr#NlEL5^pTsXe1J@(pbJu+A$KP_dgoDhgph%)3BKk17RCK~%ytl2FA=Of8++U+p zhzzs>d!Hr-z_i)Y+`yIrY@%FjybDLob{!zcvfBEK&C;yG)9f*A)P>_V`49RvKqXWw zgpb@uHS0c}0$6mh!e8|z`Rvh!OzJ#4ORw~SJ-8I)aHcti_VMbi^ykSrm#`?!0NXbh zpo5bfuX2~1QdW-0H|zQ%n*DCoUUp&%*k|8y%)-1faTyrk*TCs2F}fIJLSvUA-wS7` zD_!<6w)H!##?usDA1T2w`N>kc3ig>vY%9{vIEiS#`p^0VJF4d2vq z8(2cMMf939Pc~x0SP*RDCx}6v)WX&kX#rwcr%MKg5MZK6s+*ar-o#Xrg^^2+kbM2+ zD>sY)Z4q|>#^t%Qns|fFP3En##%(1X7^|JU+{CR4hh{=FpU7R5{CLR|b$Lf%d!~9E zNek$F942s2H20j|eJ`c$&E_A&KF~ic9@<%}pv=NSLjUT>D#RDn502GnoX*N7!Qx_C z-L;=seIx;u>cLsx4VqN9+eR!chrO4xlN(C%e;7V$8`>$mjdao0AsD^SMks0mj20hF z7T^Pkw)NWkO~=8v%h<3h#rn$hl)WcOkP62nAHf&bjCtVzCNv?C*C|9589YUvOS|Q= zM;8_ zZmv^1J1r$0e;74a)L-AvLEtOwhN4fM(wBITukq>@-^LEF|BcKSOpy6;6`5ILCZwP< zT~^OA$i|585`WtIz#mljMbP$+;qIBccDFb2){*pAxH0cb*L0xxL6INV&+dN;fsZPW z#{&J&b8)o0dDrEy*q(=xo13rmT;9PBBfJ)FwECG{4OH7vfjKPk#ob$D5Gkrefws^C zVUV{zg)PEmlc`s7@LvE;p;wasLJt4MO#PSJDGU4mN)G>ONGETyA^jtVcM!G2U?cD4}nfnZo?W2^8IV%-vA zyS#~C-xn_v>APlBRfk;?(PuAwQYxtFl34%hvdT5_w%g{Z?{ELIO))vPua~7VESlgT zW-%&g2`gW++uQv<9=AtM7DLI=pyX8?RMi#R0;RN+J(4ZrJ6s}Trfa%zM{}v6!$Kd-x;?Fg>;5!w+jjh`XSmZNy5BqZdNSN^#18`$i z>tITYZ88vYZ^z%*TRWYDHTZ+Rq`N#=XG(v1cI_dik4sPSd$&mWEReU3@bja^4T)bq z3kvvsJT2q_Bp>y~_kH4we5Cq5YKnhulcU|8k3mJeGYJrsD8d{X>?U1(_BA-!IX`M* z`@C9tz|gf%kkak)6iYelbs1$2f?<-5QKh9z=A0Uo<7NC|hl5I;l)iThc8wYJN=`cf zgeCC<=Y>v&l;+Rw8wn5srwVr`H8|~-EJNVlM)M_ur zKQG1n0=|iP8h9PO_owwv6D?g7_1F^^or!RNQ#7NnbqMalHWRuBw{Oal#hbQS=k^Po zp_&Si3ASR(Yo%P(1NDC8fuIMGHL&}X^|30-}vq+>_LfBC`a{l!DA z7V$Lb&Frtv;l`w8i57eUMyr@vO>Iyv){#hZ9Q$yRu=}N^c#qjP@VWQHYwP+LtsSFP zR8MVAdGPq)dUmz;X7Oi0M_?Yh7b%16#-~?=3`S5C5uGHFmpt;q>_B$3bR4R=W2sdU zkEgP*PHc;3SGPEDnxvYN7zr7wL^mz%u=4DPPK+pcBBn1fz<2&n!s zt|>J9VB8l;B2_fTsm&(*9EX_i7D-Az-$U|wVjGxPSJ!sCy~XzzRymv(HrR*XXBqH{ z(#7GhM?lv*j7EeDn5ww;xVN+Nt$*Tw_rO7k&^cX#F(ZAtPx-u{_P7MX3>7@N{~G8y zkPqlZiou)na>{BQVw&J|#|Z_g0_@%7?yK?(j$W>N@$pIjnNyoI z%5_=9H~_vtN>K8n>ybnD9B|33IA$DwprV`PQG%jTq?^gk&8K8SZZoC&@^krmkN{>A z81U3m0B4af4HLsLi43S|)Yj;^RwNuJAxD$%gm=5S0zPqzY7^nO{M$K%#yQ|9I*C_o zFgw$qoa^v;nzk8qUckjS;}jU`{f12?a@9he@E5JZT%&+u8-R_Q0f%yviADpK-wl2Y zrx+yIY&F)si(AiWHS-5C3KhQ$WAt%yK~qUY6V_0->~bvulw+K@DLm|SWs&_1U>6PT zTDW33(B}x7;az^^J}0L|K=4Fb7eh!W0JaNX*g|4@O1GS`%8HT8z!Spth>C|`bwqU# zcC1snLAc+k#OdBA5kZ%w1==P|-@HcS))Kr4D6VcDPao3*I}Dj5tyPI3=3L5x0fti@ zuUSp#C#A?-Oak6)VUGPytPT?gJLwS4>KY<*9GMoS@X*wx(e=eY{|t&Jw^3EY&{ZnV z0P_HzwNryKB{C3Ild3wXt)_j5vNJY;TBaRJkAc!upsq9GGLNt~g%D%M_+QuORT>cV zhhB9`!YE+2G`d;gj+p|Td@3oj_qCQ46psEPeQ3|)>ZKLPl%uL@d7pYguG3-9rnC5} zPJb<@P^VUo60 z<5SjQ04A|4^i^;H z#9vswmR_uI%BPv-q_WJ)<>XOtTF=_mrK^pBH~E|j{XZR^a)^(3s2vQUHhyGgLPlvM zWQtdFF*gDbV z4XjJ*MIf`Y4crt%yb;B%06K4V$VB+1!#ryfi(U|p1ev=s%M05wen&^b$@G|+7Cf0KxCBEV_)Cc!zhLBZsN4UUkw z1{^o{+S1P00W~>@0YLo5XnQUNxmf97`s6F~h5PQZCn-2OEkX+l& zvp30q!8bpP|NbWqv;KcjSjE%9lz?8|$V%D87K&bufRTaW|CGl%IlB=2BftN*T9*C) z$F)>*(+-;*;s3=oFN-#6qf>MCn4zH;ALi(okgJF* z=9HWjalu|RDerN1`s4g*{z?Q~=AR}kX=s1TdoEbA8 za0(GekOnG%$`PtE<`Jk7*CkXda0%Bz)fEvB@N}w#tE&|D45lcok*)y+(q-}{0MBQL z(SXy&kzTVLxiAYP&5SBYZO`!x>fGy_#tISz2d48^vq8|RfkIx)Kn-Tkn7y(1&Je*V0!`VFXYHr8P$kp?-tNga5&_$2a_T$vg zG_POGYVpnU?ke(XFHT=(OxLa1q94;WdL4${Qg0TG+3m?`GH8SI9G&2w{KACjXW|4?rhrRP|uJJ~pPu-p3 z`vPD)Tg7X0;%ZH(gUh|gjxz1=6o&2g0rAH#15s-afMqW9r_GPalDBe&zl*1J-n{Q~ z-P++WO1Yldmoj6CxyH=hhxvoCSNn@QjtaH5laj`*x1#uel=_PgNSOYNN1<7k$Y zvO2pVd$YTP*Lb&Z2c;BK_BKLB^1!D=eIf?i@Sb3et{^|a7=NUJ&Sr#a&pCiB?8y4b zp6q~N3`J2v63A>+Q0OJ|sVgoAwBaxQq*~ht*aJs;+uR<&S;oOQNNobjIY|U`lP{jb zILTeR7rmR zo+lZ=?fL2rpSc~MzR2HA9*;+BewLT>n%->bzVQ8Z0si@UVY{nO|L45x{l&$mCL)|Y<6g}OT*W`tcyH$&f8?Vb9<$gh-hD?|!*aZ=BI)&owfy0w5Ke4@rC z_dzhK*z`VcT6f@AS#kAfem=b#bfBw`lI%ae^Rr{8-%hdx{o7OR{`7jXKVB!+_ktP0 zHMMZzSq7PXSDA=Ok+s2RsH>@x`okb5qNOJXEs(w49I%F}S?~Ew-zT?W~jKis!sx%t`59E^gO8UFJdpjQ;Gk~8r8RGFE_{T z2sG|KgXBKq`YjbChnh9_XSHD&RHxr`oDk? z@W&MY36hMg|3&D_#K8JLUo!tMSv`|bjWs}m=W~*@z4Ay_ktJ2l+s0Yp6hIOa%7jOi z^kn*Z-i)32YWVU67i>Wy+@DZhcC7UyJ+yE@_d}~4_p6Jy*|LkWAggB1M7G` zag(EA-@-Q0r}eNfC`6NA2{W8X^1esLY&-AGk9ZpM0OLkdRZJ?i_K@NQ8+}I?CV*x>hORlVZXyLgN z4;<^#aaGw{xoAZ^5Y?#}HZ{}=tU7qJh!I^YyxEk%wLU|`?E-6WyWNondaSkTf26!o zkuf58Fku;%1C=&^Uo}iT(>8&>WOQFvFS}V{b1f^MJL~j?@ zYm+5qgQxcuoZ)oU1Q|dQ>h2-QzE#UCIR~_w=1D5uU?<7w*-%%I2hIQ~S1mck&%!QX z*!0wCKoWp zgX~;km;?Mc_ngPAT!4D=Fo(%GbC?dtJ1$mM9L6lnMt096!*VbQc#xA$N@y1%oF%t| zDZSl6E}N7J=H?5yl+~AG{@t*F_IS?y8K<$a5Az%&l|rSmKj8lK?qpCw z&_a;(#wNf(7tN0p5QSn;x>>ov*J%^b`+y)qn^sbg7EnmSsTjO+c^3H?aX-eJ6TiZ* zu$2-yTu_@!?V;GLxeIUnp4isNee&^YmSjKV-^y5(^tz+e$BI!~0Ld^v-Zj{a_ETNPB)oR(`K`Xo+t_{a#Bg$*9g58k6MT-QJeqC1yxM zWId>w;jZxB##<{s>VzJe0_y@=leFA+R)Sf+8!Nd#qbXo~#L2h*IwUd~MvqnJZGTtb1cTl$W|N_fal=Mu$^RuS|DU3{6ol~j(iI!|~L;S-2QQgFdgEXkT=%_MTc&b4ry zVjnaV91pV&I1G5{%8HqMtQtD|Mip;L1`7#R>f1dT!PIpBVfPQN_=9-rRn)=bI3oMjWplyBXAXZ%Af zY*|{CQc(*n>ZmdZG&yyvTeJ%Y};lRx@_CFx@_CFX4$rF+xF?TW1ol< zao!JW#uM`=jLbW8j4S&#_6W`{siq+zzD6p9-jF#vTS2*QJ_v7iAaFe42R?65zx`5< zBG{w$*i?kF`=HLmC3Iv6!3a`tI=)HR5R#ysvu(^#4pwt6+369q@t2)TU-{ugS&|U4 zj!Az}`$gcsRDH;;ss1t^ALkbIqK`Efb+--n^ERD9@J{W+@2Qc&KDX9LNTX}s<6H7A z!q3M`t|wy%0{u&;C%6QLbAkbsSCSU$vEIG=((?T}0R&xau&{xz#J%wjHKO2aTqcg` zxMW^_?odSw>{fiMtt0df)Sp6LEN6-l+3n~4W6pgUB~?^V>(fRzP&UdZr^iHe zs;nQ*`CxX%{Iof+KW}(oW$v7}VZX~6$|7^1`aZbxyZ^!)F-SaR-hQ_*9KRh^2Gogn zyuyFKI@rGR#jgDOt_ad*LSx1r@^d_Pq2L_G4j5hgoz`uc)fHni^{#&iOsJQHT#ZyT zTFdI%=3~SNW60tOrEGWgo1^{YADp*=-nQ!3mTf*_#zb!F zYu+NRl69?uExtcMR+44$SJHjo&jFAiz9=r9_#1{9^mI2+L3{o9y>D2hXYZt+o8Mg? zCI}ang8K%{Fm8`ZVOqg~$G|ChciDb@U|8?`?2oFhYeDLcd3xBY@91uRI?0nrIMNgJ1JB$_#!G^XsN^IJ|175?h!`7YnrVaYNTi1V1 z7FQiIawl-wrc{2b(01DSoKS6(3aAO$t_l@O$Lq`##fdSt+^;%&rV*Wf_~lMp1!&*6 z%8f$GYM2p4TuY(?lEh8c!aU3q#N_-~EDqa24kG7xcHj?qvu+2A9e=+dK9n|Ml z+T(VaN+QX`ujyNs=-khaax(!n^@ctKWc@M-3H>{CvNv~=Y4L2d+G2L^oi_ndJ@!I2 zprZ(kcTy#i(+_-@Jl>4gdoJBy0N!~HlkK$s&!=4m4n>R!Y<@^9$hAy*zQ~>k=g-!{ zoJj}QG$9faFV49#Vzdz_q&Tow^74-a-HDcnD(ZB{^ZY?7+%6X=D4&HM7FE`V^kiu| z#rk)ptlHDDA7!s6yzU=C46~%Jj_f_08vxz0_edm7hjY~9RcaqknW!fLo5$L}Ps<@4X>i5Ong&zQ z1Vy{DrlMBRq0Zt#@XEu+D1QbqOfYId)!sL2Qq06uA79SqHp2wQP0Xr2?M>qDLf1m0 zs-Xj|o3QI%6?th=MSQBR)mNS8&$H#w=FzPpb@9PxVnUSYHZJX`P+f>);CVw>AW4Mp@HQt`}Ww%bU84bOfB4+914#D^-vZ*SzV*Au#c}bulsOOq)PWd zgmQDm<{Z;^cMoxvr9iczT87flfWe+`a;B z8ayFpV(SFV%&j3u532-MovyDGTw*BzTJTQqbiP2RcY7A?(&i5q;3IrIU*tEt^9f5q(yQ_yT2~KHBOw! z5CdV19)DdFTI(8*+C3ja&5XRv^SAiGIxBHwsl2#!9(X zseQs4iVeI%z@vMPQUx5*1xk#vPY^sH#n|?-8j`-SsnKAZs}Qmrm?kD+5X-Ih#%=^TNk{+<>)pyzEJX&q4W^8SegrwNdQFwD|73QpBJjz;`LT&1lo-PQtDK$Y)^OJs=5IDki@ zf>d^;gL!X_VBjK{sUj&02dQO11?!N$kKCdpC8?B5>|0ae{mplfv>mI!V7%}kC&58C zlRAWD6keIWJ|L=5>-)s?N@+DSuIO*p)AG-Hvd)H3#0LEXJ6Vs%XyAi})roKGU7M|{ zFVw})^okqJhNw3YrxU$@fKN9aUTZ?Z8P4T9zN~)4uNTx5;Ffl#Texjl7S4?Q+-nSA zuYNzD0yr&}>Mia>u>5Hi3bbunHqqXvKQ-p5y3{Ysf(ef$=3hmC$N~#|T3c5h;2m~@ zc{Hn*A=9Pf;%fHn*svHKLzD$skJO5*XC3lc?^_9Uq>$gp2shgT@>}ld9dexq5~)Cf zZltf}V4TT7wQ3+yh*v_DgXOmF8!iLCNAf<5+|hz!h>`G7Pp1e!6(Gi(6tB>U-d^FD zj)$g9DI9y~$!-ESFMSd+b4S10<2qp-A2{1`7pFV3u&yUM75`xBv|*IEP)?4IFB!ks zk|cQSm|Rpk{78U0Wzha%m(}tLEiC3jSImF~Bd5Z;kDK>?)u2*2d6B`5xVN)C0;uGS zoZsw0QrtLKF-Z22Y`Ni%E5!c=4>8nz>Y_qkgLhqLTZ6#Yjs3G=y)4#t+gKYsQSnpz zQ3A0FoKRQN467CU(GS-fB<(y4$~%j>-Hg)tEV4BxYeaY0ae=D5i#{`0{+rhlZ9=*5 zb&=B7yion?GI~E-K_8iYkdGW~l5}rM>Phgt8Go)V?d^amP-(p&z|-3 z_}$gC%?eviH1>dNd$$Eti^PxBG19ri| z#|Ry}Wq_lwyyG^MS3WY>QbUox2@NHteMJ4TNwlHt_YpY>p?D2nQU0jLXGj?5jJR86 z?gg6JN{t!q)X`@8gea|3o3m;IKpLjWiGvXoal0gLfv~U_cf9x7uyd_KKo zhmYg$*2puCbY)13Y=Yn_cmA&s;ogcaf8IpE&YW60$U=NdMHsSToC6nTcIVqk z0;6Uzh5F};6L{VoxGCkmIz|C=vuec&-|}zWn#72O7|@L-(@z%Ed8vWO;mZDh*TC>% zp_w^#)r42gl#!iDp#}=_xQt0YtfiO}U2A880%@19}boS9&*4w|~3VbQ5u|GilTGtPd# zu)@M6>54W~&{O{XAZ7=XqryXN{wra?uHpkl9^Rzc&G_v=ckLdD(Iw`x@CNwsv9|wb z62Y_}qikP-nH@oIJa)S#_G&zoE4~!6IUm6H=u=))gKxe5?ku?|3qtSr^>v?``GKfL zcw<*XpILr|5AD*ax=HMjCERi^Ck(J867qeDxVax8sa@H%D^lyGhQ$K*2=o0N95~0A zVg`=g56A$~N|Rt);JbmK$*X)qpQll8k@+)YRJN{)pTPj3LkNE`-l{av$Iew92CUh= z!dLsFy?C=e`)7nOs__zGovpCxBb&?lG16QZylaSw;zWY@;vjC@3BItvScA`XMsDQ0 z8V-+ccN!oVwJEkQiXYMQw%zQum7!T;2Yybn2Qf;nUB5}Jt00_^pJ!P3XC#ORmh3^Q zGGp}p8pWBEhK=B~nAq6%0*gSFch0h<-Dhb}{iMPiXx;fUgz0Hlo=xx6`6>^zT{HU& zwo+|{=|4n`>p#{sOsp*b>ov{4nackSoc+V!Vw7Vkk&)L6GO#7Ef=7UzVbH}r_tvxm zH2DCYoURC8SNM))KHTD+=GmO9eCFlITzJ!wgj`YDNaDI#^|wD;pO~)w(3?x$OL95g7Ey>< zxLK!=oe4-d6}j+^8ZM+za9#XTWcMK_SPa>&lHJ*npxe22DqDATA%uV=(FRAX%k{go zznwh&fyWnjK>)?t3!ca_Y-v+m z!mYk?oUJmEB*L?roD-ER>Z^xV?VnVRFJB@*r`*GbnK>@L!JDI8z?sd%QvIF64e8Z_ zmeLV&`3ZqV0?X%>{G{Zibz(m8v58Hr&iCrZKGa(}s!BZ_;(9(9;y8d;Qt0)VUb}O2 z0>HC=3)!8%falF@Z@XT@{|6iP_S8Xe*Xa@AGd@UAfmw!#3QzsTXg_ff8?-mlHZEu1 zLGz-X^Svix^bsvSmWw20mzSsMnt*mM$)On%+N1<%p$r-&#nuQ?y@7d4k$!mG`PCO8 z6l=#GHi)jf%KCL45(;SeM%jfqd@>0-}i| z9MQ0YMo=%`BZ)GQDOHVu1>R*auq+9Eh@U(_T%2yy;g=6VZA)!c?7 z5EJJ*T^O?@Xjo*427q)b+g*vWt|0gu8{9Ys^l?j4w=C*-*3B!%hB2Jx%v#{jg3Q`( z4|HXf+))&!3bgFAGqhw}{Vo#jfoCENcwUc&y0z;vTA7%;hO13<_4+He7|s&Fwgb1w zbL2kgn_yxZ<6AvWp=?EmjP>rBY8zwJ2v~Q?lNiQ}#xXhXNote1cYa&bZ08(8FBQVx z+zli?7DF^g{}8jqqC88Qp7n$R=?zQh5L(>V0hvmvY#sFBAa;T*z5-cb-S?rzhs{4KVH+E%>V1& z^`Fc;ZTz2H@$H|AaHRdA5!NeNZW|@H6278tF0NS;I;MC@L@9Dtmam(wArLgFh4AY2 zoRTH6ey4hc%NG4zx~^U?7qrzMLWP)|E|q{0E}15b?Xfn@I~{v!gpUotdXY_i+LTGr7b0g8!+Hh%cko{JqW0hqSRlo-zHY8&z1*;>U$29Ncj| zwZ^uv&+~%(ZxT9-A?&CTU-@iA7Hia6kLSgC^HTwuj3I!A;@XF!Su4^}|)jIJZP2a_Z|b&@8T?CEN&DM^)Gj@naizuO-0*#&`5 z9xmvXKV=WJv;Gha>PYsis3RkDUxndsBra*OwQZW%d1T*~(DmeS>JOu31aZpsrrGZo zuLti?kw63YocEnA4KkTCA(uOEI~{e9BY~6)_Re>OoTw*g-N_fe_k}HvY4T0sxUb`3 zqcNF<1vk5&>=;XE+6suEFZqmbSLf4eb_WdEH8aGWGzwV@52Im+GxX=3bIrpSMMjrH(te=Wor$~g2tM#g=e}2)e7!qS2^5rP^F%RPbXASpnp4nMW4#2Bj2bsSF`3gls`FP7JGn;uUv`LP3Am9-1!F-m zji=1};1h~EGPAj;KdhXL41)X6ZEG#yE$@cOKQmbx3$cK|?9e&RLPwbkuDariyU^8u zgZ6>Bc7e^s69$6V;UAx_5C$$eUyJgZFn6J9$Zdf_{CZGK?4(rxYWiV>Iu7|DV}WVvV`K4OwPUB@AYknw7nX`eQw1 z>L0!6C21}jiF|A~*GUFtD4dH@gxiwCZ4=dLNbtdQtyZT`(Aj_2DF5%D4_scy%oR4A zzJr6WpIU;UcG!3G+bo9P_p90hfo-$e>6MWNJ~R5rG?1A~PB-B#KYLo7>;yf})xU)= zkYuno(lH0R=7BV<-h)aNp6?MJF;$+EpfwcrK5+)ST~KC$v!tpxSUFry0eG-Ad3OhZ zRk#^keol`?(QFI{z*df=8Kj5<;d_7%Q>hk_Qq*IbA4k}8J%FIr$u-{HPJ5KZF_xFL)abd zz5amO^eyfELcappfmr7vz|ZnJO^8tDY>J=6&#Wj*zL^g7R(h z|1Y`BjQ=$*;{3P6{y)F}afw?yn>Z3Nh+7*tn~0hi*%_O_@bkksIXjvd*uc22|4+#+ z`adPt-nfB)?i9Fy-Ahtwjh9|4`Sth-ILH(t6j`}<^`hvxr&J4t#s6+YzM7-kf3Zn( ztJ?O=3fWzT;ntT6YV%{HFsOwP>jfo2l=VIaLst?6CKrU^QJ7u|Pl*AADPPuI!s=#K zK(kzY>NpFgXMOTm<-J0FqhLOo+_ZdH{4lqFWYw)I#tGy7si0zox`lTZu(Qz6l*DVJ zyrD$iP@9|+kOpuNZmG`HN=;2!tf5vsJv@C0IFE|wOwb(!&!4wM0P&i~iv5gKjh_;% zO}+xqnBZPCy-ffX-(>A)Z8HJm@X}LSuQpvSmljtza#~B&6nSd>Upr0Y^vUe5rANd_ zqEHoXmz9=U$14>GcH-G9$|@gtgsQ;ZMp192E)^_ARf6rmV;p+cuHga+%3%6;e0Qxq z1E>jX_nrZ|965ecF9)fA}M8k}E ziPd5D6nA);3W^x7wiXL$ zGB!~u`m7Gi{gcE}!FgpKf+$?`;$%_wQq|l7lUPqJHqC$GNkPdIzzlInkm~&)=*Y$yd<7-uee#vr)4niaE8WakL_d#&ATKwWAbbnApfdKYQ0=i{(`P9cW1c=ldmf46 zj?g`PMgJ9^99Yh#Z2|QYFIg10U%-jmTN`8)XBs|tUqL>5V5brrE|@xB9CpLs!ITJu z+)n~OQh#Qsg76uhk0m2|-m6k(l9#;jXLJ`lDb6l&aECL>nEO=i`a3n(a%94)iiSQuXlra1;<$&T$ERFieUEv7^ zUi!P0B_Oo{w9cCC+ZagNm6jVUjEb6AtEh9r@ypKWwQu@&PHaiq=6p!Tw})scN64*6 zRSjYWEW)J_;~X0e_=a;;nsyQMvi>13>g}%j@0wV-g9qX1M95nQuXUjZ8h8qO|K9MC z2lWmyvp?rA#4(WlhLw$PO=pTLqV<&l+|cejiYZf5s*Cc%y->;W{R*a=S&)lW5ZxRu z%~k*US3dAg-GKCu>CiAqshbK&SkQiV&ZLcY@59FjcA(yLvN-ozlGw)M0Q z_hP4F;4IdbK_`l!;b00|Z~;qH>hK5c=_TYgq{Y20-_SA{U z#ZvWGXOdaQY3EpHixgGEZRK8i($n6i$b8tI><$nFSsHCe2?L345GP8IjCF0?fS#d& zaL}~Ep?HE_-}jMmg0Byl)`M(YvMD|k_tCFy!tDjw})AKt!Q_FruFS&Zr`U6@_5I*O8Z$LGKTay+a{PKcpN^F(H2M=HMH?%hq8b$i)i@7WYTzQ< z(!{t;h0oWPTwU=Gr@V<#k&DRruST3ZXlU6h*!cYriffKZ5Nzwa8am!UG633Z%pDh8 zRky_eIkl!i4vOv;7uD@ut20g)Ev*v`P1(}Uj(1;@`Xt(zNm`{0Xb}*e^nJ?L-PYBr z>hn*`4#S5KcIMVB+_-jE(tV<#0Qjne}aGhkoL^GX46BWXq(4Z5FKTaw;i#!{KEwVR+KR!l)EPEe0@%SMG6{+SV{FkA9^sDmRU{C?obW|EW1ig2x zJ+hf}u9+IT7%hgxC3S2#=6*S|!yBVPgz{s}g(a-69g4Gbd~jcltHTFzZ94>XNF;1f zDIHwrR3r#bN{(&(C8%!*KK(i>;Z+TId@rbB3~(+S@QpdvZ46|>u8G<~MkRe=`0LP} zfp;BY_8?r-QH#h)5D#P$ObxpQY``ycyjEUw`r9wp7-1QzAh~lKtnQe%%D5p%z)!iI zDP$7iOYR9rs`gnru#Uv|8P1^;37?idQD4A~!u?~$Dm9Yl>zy?Pn*N2nJ)Z1m!0okl zKEGXY$lryN+Qg-4jbN;-mbP2Ot<8&4Tids1>$_dEy-i}@sH-6v0N+nfz0V`o#zd;M z*B#sSjdHB#7$9|K?)jxiF}ST62*NUHpAM`D8DRoO=yVuaGxhA%xv-xAQsum)c#qHC zvTk7jd&NLt9gZoTV!?hN=9O}5zPI?mkK>HJPa*Ey`{l;YJi5W+f?mYk8)WJXi9H!= z6ONcJ71bj8bd7^ABEakbwWPz#Q4<-Y=x+}61|(ziZd?&8f3_0?9+qiD7 zdN5w9IGo3>xziggKdjkQy85I?S zQZl~(m?+EWRDpJ*VyR^sxrGJT{+#YnR&ev$`W2_+MRN&H$K2YX<*=J-kYcK$$>Hwm@qkyaM8}ghyuP_fu z00N5`Pi2J`#`9e1Erq|dzuSE`CE(M^7js5E*vj>l&-)`x&ZF=YY3g7^o>IcPnMf{% zWs&XAkT0f403ufWWo2qc-8Si_bUkO3m%PW?cMbD+z}>@3aj6%I!+Syd>x8m$!~NLD zdzI+Z(}jpnAm+me{ny1CDyA?qe0d+5XijtJP$LcD5d$I&{V6RTX#bA64Z5OhSI5 z1Z~&XdI5rk9%sKx^718liu@~8k61ZIw#i3(W7`9BvA{v5=qL`|otQ4(&gb=IfI&00 z*S#6N0`x-A$cHZTN_PJMC^eL{h|Z8VlxUQIUL*B~#=B>5#G#=1(___-CYQ=}e#}^m z2P7e4UnrbZUr8hR1`xeUCJEBf>gzzuGU$2}X{haOTroa)aUm5)LD%>!9v?2>EIzR>+P zUBB@@#_UoDc-Y0Am#E~H8_Mn-E@xFh_$+=*(c4!lMlUD&p!QP1qc3RFTu2$A(w-B@ zar`2iIcM5VvO4WPDV^6hRs6$W$pHuh47`z=_=D26R-W^HUS!&+sn$S91hw^8pU;lE z+7}0gkvoy@EkeRmcyVxNt&HdC?lrABoviisZ{pb}_4GX&XP@O+woe!xJrrjj6(P$g z*8>3`l}rZoB`@q9KlqD)g%3=e<(phJF-z^SRkfmRot~ct)l7<&IyVi| z6R%^A*XiZE>|BDk&Tsdx(@T#-9%1)?NPy|5VPu2TVyR;?hjVa)OKr7j+!tx~_Kzmb zP>V};F)eEo(hXHg?M6?xuZgt>Mn^{mg+Q=>xd4VJ7Bf0#hDk?5`>sTL9=JV#r3c47 z9BIqq74@h#D1(=$)iCUbyj@2DaeDx-jSyNr++j0cEa%#_p{GSC*|+q zqEXBt%%&vIScWuNyBKHO=y21`epKmqnXf?7FK>n!eMkd)(JT)s93F=p4-bTEYk%Jo zkGR2_bBissu6X=w)E84mXQ+}#5z=(9qy*8~8#Rz>6kBLqe%kej)91Rw8r{Af{D_3? z7EOsd4}&nrq_y)Js`Gc$of|KGh<$2Ss6_#05*a=L65Oa?C!4)fllWgPFiLJj2dBnO z;hWk`xpxo!K31avtlI6Iyy+mdhkuBFe50<>T)A6Ce4O1X*RN=kTuV1Rsh%&}ZWA?@ zAr*y5v@Gw~k-q1O#fm3EbKi8cLgn#_b4JI^)YiEx6Y$R|9A7??ZEY>CM1 zv0jmE;6n2uw~)z2BiTI_YHnZ(ohQ7jG&&2D<^E>xC&qNSs<;>K|Gqpz2E&Czbc}Iz zp8rx4v03as-eovPCWUPmu}{9dK|lJ+kkz3F*j11Hka#D#@goy9W11Kb{6#Yf2R!j5 zh4Zzhdqym2Ido&HE{)|lIWh0I>iLd^m7_3{?dT5_>tiyA6vlWpRL^hQiy{6Ubsb{T zp^J>KpnVoAhtv4(Sx4d}4o@X62L}pN5~z@tuNmr0z<;DuO+z+o<*v($03?}py(j9W z*OV3cUW6Jb_)I_Ki*kKo(-$(fJXtOLC}E<%gJ7*=L}qRaG8~I+z!_vSqgl2!O(7uE zi6aB(0SwV|D~`U=#U)6d=1fCTm~ufmul;o^0?j=pvQUl}Bp*^z+VK zW_Th3!PK0>#f+%BX<_w4>tItFto?IjI5DUdBJMh~q=BWjNaDx}ko^@|w{wnE+^72J z_~!v0m9CvO@zc_6SHudWz)#9nxTz>JcIk*s&B|dozuGY0n9k&F21k9jp$oB1f&Cqy z*l^PQwDI#+POF2tueuS6`#`mE0yMMO7-pAQkA4AVQjHn(D98D1^}*-<(bzTYI{q%> z*v4uOF}^wri|l_!jGR{jysSA96fgD)W{GZV{Co62nx%OYx#3!qTopRiB!`$dp^en)3sK;}Xn$z~Ix)tn&xf zk;>FH(D8pi2Poofj5`9!du5pIx*!6LdaHI!iUUYT9T~DAj*&$X^(R+R@q`qRPIQ00 zYVAer0`FtLwM|KB2Vk#Tnk@}0D6T$uW{Z}aXMNbZ% zC<&Cvr&osrtf*Zg)6k9XgIzFkC8{!so``O-SMiRn2^Wol_0Wy(bV?<#x|K~w;(BoP zQV&!Fa>0p2E3lbzQ!-8;zp>LHf&id|AyO!7%vkU;!lPkej+Fj5yYhdQ+dF*_ErslY z`U+1cQP=HMD&M!1ak1?g8f3;gQX;~9^M;r`E=Kr22lu6g)!0{&<50p?CRzk&(_R%Y zOLrCuh8IR+O@P}C|P>nuHP3IFANO2U4A|e{5oWG{3}1g0`~>l zoc`&q_u(AMx1Olu~UD(P{86}sk{~>ToTJdbl1?g7Jg~fXOV`^|J5G3%vovBXGv}CH; zZCCOllUOy;Crt4pE19ZlP!!iR2%j4_%J7#EZoalPcIj1{6fY>_B9J{Tzm7EzfOh=_ zv{VYP_t2j7crd8W8_Q`V-CdHPswP*7WskMz7mB$ylfJe7DVFB2PZ+o;l8tU8gEJBo zD!#g+w14wUND;OiHTYS|(;2RF(_K+azRYkWy-!Po$T z*#Q5VfXtNrzK?Z2Lg;k}1pZLa0FWLJPXZ(3>Wz&nZ%Mn`I2NBD*`2jO5oa84_xqwM z=Yk9B33RUg4L{D#1K$|^PPhW;@^T|fMh0hC+P9>O&Dbt6%rCsFnnoUoIX=8eszMG< zA#Dk7_|}?3mp_Xvf;@ZJhwPc}oKdfy1zPg3c++7e4vsTgk5P$ChYZBxJyB^`W~wFy z`J6qtBoKSg33Czi*ev){11;bbU?@t4%(rcX9?AI<93Y~4z4g@38 zDzJ&mkUXaU3?6O`g)RhV=z1%xn8=Gz3FSWx>=;0$T0gy2vt&7=|X(nL?Hqb^Fn`Y%!?dLu9$ z`FQoq9KH`QHE_Uk=hePk0f$n0aVTZSyX1V;1&G+ppQENI5EzCyMcp0E?Cu# z8)*gK)-E`;5?6sfg;^Jr&9{~)xi`@GwjCoP`xV#--2+&}OU#q44knQa1*B!nWd0TA zE9E6~Kl^rE+ziE%PV@OuFIPV1tpPkmU7w7O0<_BSTJ z5S=RbdD*-LRTSO4ZC~Y^JIl@B+Oh7ZF&#@exrT@H@(kR^6#l6$lyrJ|pvWz5;~0QV z{Z})QlLRZl4ktEsyP7Z>Ns^$vfAyGMfY(m?SSz(jgYHdG~}wlNHP+*KZ+GwJA^*d8qAc-I18F z84oBNLXYV)=*GgAtCJXqy3%}JtXLNaQPGHl|D>&{h0+rcYzwo+%O?#)~hZmO%Rc%1wDnM zcn_iv?TS=?Ubp8^(-({v8W^-ORC0D*cDJ6GWF2dq2^>b}@@}Ruy(%X`=F4w&Dw7b2 zoQsx08Ns4E;vq%HGt$PT5t|B>>hJpGEE5;+xu~e{Pri+Z1LdJs6&kiR5LQ zHFP=#nuEenM^!C%KmPJD|KqU-S|`)1fF;!5tQ_7W6vOec@z#soyDmWsCnU?&Ym(%t zsBRa1i54Fq2N4o z@Jq?Ckfl3%ysNRXj-~d&c#k?Ol%0Rguu*4NpujqvNN%e;14+0)GrV%EGQSp28;o*A zOnW$8wIa*8C5E~bhE40Wu4rtjy)bb`hihk5(Q}~f>)OmoQ#E0k9|7BZoP1xqzg94~ zaR4r9_;=eAD^P758c~s#^nM3axC~5wgLcEJaO6m;%{Si&ZA+waH#UP5RNx6sD2u#} zHZv{;;b2jxOJXp81oOS+f&;U>eW@z+j7YeaYT#Pwee_c+DL z|JX5s%;f-m(wR@UlbSu7Ppf*P-#j&|9qTRt&&`tn5gQWXXwdEG;q2~wJZw1dDKx`b z0T*M_3*Kn&717MfPXB4CEt~I!715oWFbV-!!TV`+nH*8@rv`(?h&rr>@GPpcS8;z>>kNQ6J_R-k{I_L0CT30aaNJMIl0 z!=iI4I3&a$3n>o!PK$LbFQSC?Cu-wz8HY)~r9~|&2M{nN1i<-t*}?bc?q(BjX~pm0 z7G8mP?~w>rL5QKju3*0HhZm{B{P7713g${Y0;A%Q>DiDlIRlwiKV6gl%D$Th!|z1Z z9sgdlw+@xkYh!~B5{1+zmh%(04P_gtao4f^VVBNB345g^Z{H(0I{B?%OQZD=hY605tv)z1?2m32oZh| zXP{o=-00)OwGSq$jD+1wye#?92*^5b-n4Xf%1h?nc8u06J&IB1*>n`X7_s7VH?G)m zsGK`8fV!4&#VfZ9wAH#kL;Av@j;`C<7VNStM97M!-x!#i-nLmc@}Q$^d1we6 z(D4CuXgO`6QkY3Hp(s-8n^AA}*(`4s`SeF&7Wc5w(0D~Jc0t;Y=&pB#taDJ9`8_Px z1BI?U{|i|jKYIuOy6$GNO2b6xUh?p{Eb+{>R#|~LT#|HSvf1DY43j{vVTlqMMG*zr zG40|Yy@J$5Bahenerh6kyG}6X^emM76^&}6Fg?6d)pq2rW{t<&>eQ%B1Ss^FOXJ!i zBg-zygWELAZ^To-9ovW9wdm!|o#ccCCPKqaua;IjBND3EGNQ>91yh$((_NqbAIkoL zS=4aZ!b7KR+cwvn`L6|ti)2A%j$8bAQyEafn-%(>h*DHuU;)U$1 zl*f-pL}C3r;Rs)`HcuF6DIP|%Tu8c|8M+?brs_xm;3_1@P;$XQ5s&+OX1J6)L<$7P z!a$ws46r&*3hPnpfLHY2c0PBH8VxNd5@7^$7H>u2ccV_WJ`zL{HDb|c4zZ=szWC5R zfaYh}Df*EYOccw64H?>I->Em1VWwqqRs~=jlGzQBJ0;GL+zm!E85gPjcTe%A)vc=7BP@SvEDC+Imt%^Ns5E>81uOc<$n}In!u(S477S+g2 zf_F0>W->C#mX=tLawj;}9$2Kl<%bq{N4bxpiW_DwM>y;YsVYe3s0?%cgOv<(GGB27 z8cRO8Md^YkH^u;BfJiUnfWaTpj0qeDXrqsO#075$G|c4(ddlpRfW8bME8v6>0g-%6 z38|DnHZ&Gn1#@F56#m#}YM#(7Z=8vVVK5=P1dwK80T)qKhSx|G(Lh9b!4?gQVho;+ zx`Sa5A^-tASwvw{Whmb;W^iW!gt1AU@-L-vhBM5sA6I~p3BwJ7G;InL2p+f-JS`|n ziB1#fG)MqJQl2i55K}%4226hp5eSyr_D*PSJTr&+%akCFhEk4bAcEY0vB9pm)q?+z zAwVTE&KYnU($!g^75Y{_x~{<{uhI~Jm&1VS80D-gh5ncVg#f5gP{1BTKU^3BZb)!~ zN}Vo46yTK*TJ4yBK=0n3Xuxw|s5ittqTdJr3@}jy4u&W(3MAVsaKO7D3N|s#p09pa zKmQj1BVnREL?8)@ohdbn-3@GTSZsIx-ky2DKNAwnj4shSG-&>o=Ncxe2&Eu`82O~Z z`k4r0RTLl{(UAyDPb@Ud(6Sxs00LL?QrJ3NObpmEv~z^G=tj57GQRIm@S6)KVV^!} zHUeeiv6IIiw}&I$y4fn8DG1p=Tq73o1^ViciIo_CEzoT%L|Rw0-`4mxQbTiO&avWj%#RnIS-v& zInmjS*^4DVzbQeT0_HGG+B#K~a)oRVhSTphAdq^@_$o1?}>gvVx7!J02k`R=1Ub>%RnmK^wC0P*fy8rt%`3 zEM*1M@D**9`fGVg4kr2&MaN6;1{U23<9|7P#54~OuU`HPahg7sKIemp3yjOc|+zkfz*6p*e~I zRy|cVH)DazKrAuQ&ukXReZ(NWm|2P!&1`8no`V^;rxt41r536vZPWl(E%g?n>97jB z+J&z$n?C5$ed?Ohc=mj5@Q8u@*kg|84cYKk%rB1Zw@4vNYKXBjj#4PJ0KGAO&j+5p zST=rK=Hjhf|lKJLE8XSmTN ztEcpQMbUNHr4n4nx3#DDlV`1`%X+sd)m!Z$b2{*0)4dSWwK6Ou(O&9>(PHb}&N-k` zaEGsV^W^phxoH{M%5~$^od=t4)3WJ|?2r=4k)=yrcIMJx6-G?z>kC>WWB90-_dPUM z9zW!JW8W=o`Ek5Sw+VaP^DwF)Z^D!0magv952DFLU#v*TCrMh5$LZ$$0&hbyah)^f z-!+WEF@H~Jl7|hj=QT$lHl#h~TA@pja!7HI*@uDks~VdX%Z4kLP4i`s^K-szlNM|C zRX%IBmAlMt-;NhDVRk#hRn>gRcC5qH&5HGNf#WXgdF3md3ydun8SYhDSE$iTS!8EF zLf;W|KiVGdYJT=!Tz<%Ous(OV66Ts#sFnty*{H4~6-#!c?5E-)VoujR?HOZFC3hDG ziI~>?#Z>kX`XlKw_9WBRG?B8AfX|*EQnyLEiChWSlNplQ{zX8f{8;lIitSe;%%*B}v>j{Jxmy z^8a1VnEn%Cf{Bs&zc=V=ZvP`pApP4=<%_+$Oo!zbY-sIubldm?+xXkSJ`s)w7L0}0 zwIpjkm{x05D_+I zdRVN3;9!p(Zd?V{DF=)Q8#lB@<}(O@m?^|oLQEVa@lgJSkun8ZLn7)cinT1MVlk7% zT98J{UML%FmSM(9IPS-ul3tuL7cFnZWIQ>kgD9H<_=P`PSZKui-dA9l7NEP6IJs7@ z5OckXBmeZ2kSJgR@H1eSOCYJ!6Ty(;!h3o1|1ia-U}4!r3tljKW~5656}aycepqKM z00f`qM}&aP8oM#K80+?hX*-JreDz7N6~AnMh5w^T%0C}39&Y*s3Wfvr5xxY?o(mk3 zijXq2C{SQ@xxWu-5zFz1R0Aw08F3>$#;NmQuak*iY4aPid7#?xZno&D>2dBuzLWlx z535By5bP6*%G#dbJ>Uy`N$nc|0?p4cG_I;M?u)DrlA*MS>Hvsh9om(%b%-VH*D1rJ zw1<>gs|7E_C9!4ryhyfsu^u;DmiNHvZld|taZx`6gX)#kJEnxlytX!Pp>^L;!WcdZ zGqOHPPy#R>1E6x7DlyX|$EY@Lf<0GfRf^*EwD$UqiC#2o1poHmr>Co7+1O#VQK4fH zivXb8E9K9`bIg)JL&~z>(v+LFkVAT3@U^wkT0(1kA3aj?7f^DOF&}EK%@1mhT^-dA zR@pDcs4$qc00rs{7dqZ$lHW0r%_iL>TJYv8`GmrU^>5{-h6yKuHp=^rkn?7qNxb~8 z;=V_UHEEV!VlTXr+=X}FC~${>1SDfa>q4eHXeuJv;;C8~s2@PJ1`%hVg$PC2DJG3x zUN^_!p;fB;e2NXX&WhvaPdhJy`b5FazdJhK?QtONoT`{x-OgJ!;*^ofk(db$)>^c= z`F{g`4h2F&))q1qe}+`I)Ccib^F=+ zCPx~C^Y@ngtpM&W0g8#rF~VtT3jFof@sRMv6yz*&_4$PfRZ>B@MAPNtalk=?<)`#G0O{@Q#R#x znIr3fSilFLr~ketzz}+pBEw%t zS4818d&EcT1AwNkA<-qssbow^F7iu{o5_&->=jsC zvqKalEYDWQHmHnr+uQs!6;LosY$=TwL9Wca`=bu-wL!wt=gm0RubvAXJOLqDT;=>VyhmfS8-5@tcfcpgaV@Ne%aYJlw7Tx@V;*;)oK03r7lf zg4bE$qjiz3CIFH#q=lwj25KK6cb2pL+pKsPYUX(BD#0xb#1=L`0~Lgap$PTyn+^Id&SSneHVUEU5<+bP(ipnEBk-YE<%{Gn4M! zUEuL7+V7@T@*fT;tINo}(M@48aLM;FF>hZi4RAq8mIk@e-3|26j?{YUhVg7M(odn)`M~YU+`I6pM9(5>w`=fNX(A>)QD|M$=$6ve&XaPyu%Oyi4 z1l{BRDE<9jOIaLQtLe%xs;nPL5+D>~q!U7z)h1(KlLR29aIb8E$%GOFE#2<2PI+kB zwWMed%wK!Nf;ycwCh*vt~9 zFtBqi&<+sp2E$r=P}9cEgBwknc+XWz%E;= zQslFWvqnIR;~Ru(;Tyvq57dAE;vff(OQv9_NXBW+2z)*rP^S5HXj_?O;Mcv*JKJw~ z{&QnIt5z5GdObK>DCKAO2RZ*OD37oTy8Y7P?l6+xo!vHuCyTy%?@b+)QJG(U3mozg zJjc0y_Q@B<3|)+VSLi^NNH!FSE>?9Pfcy!0=c!oe{lk`m5Ayh24`srLY6?H$)9>fE zA8$Y4^R#jJG3$8psq{Q@XXpGr-S&Rj-c8&S*@kQ9WLV6~Dx;J$({2^wnh)^}pO1#P zlA~uLlJ@F38-Y$*~1dQ@$=fKw)t1u(7aGrcWv3tw8!?ep%F-&8dCgL7C;YGE43om ziIkOu%~=N^Zf=OrT+9g+av^(0HpEtCFhw-NNF#(rOR}Pvn!k}`vI$j&tvH>2+H;cq zn%?v__P`%-inC7~(*o{8nA;bGEUyN2avz2@u>*yK?bf9YD+1Q)hBE9ScS3<4m)R#Y zXs55}9ey5_L|iRnvc&X-F7{*!M>t9ceJH02(oUR-_stIGRiyCYg(^^>5o!(+zMk(d!i{m! z9|8CRZY#~*U!)1KCQ5{e?|XQvmqKJm=``bId1aZx{sxP>9_C$;xV1)F%(1Y53aS_j zOE;T|j6l(6@dRh7e;aXUyYDlbM1-7I6J)=gs#}cirRatt`b$mv83Ik~Ms)h~iv^R* z6N@_voJx)&!2HWJ&R};`VO{mNG2EEF;M8LujIG{&I`a=1=cNkFA&neC*G++Cp{A8= zZJTWh@tjre=|EcLl~QemX4Je!D%u7DO@-#M?ExBl9@F5MKP8>}ag;+GpQ^@IfxuK7 zHwpc=WCcymfnhgyfmJA-=M$bXtvees;KfgqUj3n}gQV$V356IHlw%x9C>FUl>;jm2n?I(+O0fr53W;48^8ru7h;UHoLI z`$|ytg#bU6k>mO>xW?0MEBGrRs!xY81#rut`sl#UQu+g?`AY*d_o7q!T&Kc9GmaK@ zW^Sq3rZrq?11ZK$aBY)wqx6^dCUs*V3p|LI)c-)3m7xs#u4h)PBD_pZ8yGdM`6O zH!mv}>XL)e(P{S3Y05CC=7;n`ZX*5&W@YD^UA_^9Pi}b`xew>%mDwCG<$Jn=i4POZka`Er64j*Po@I)o>f4hhfs81(8-DTQQ;0TqYsY3 zk6NFfTEF}WmJix~t#i9Cc`$%*WY@d1gx5TcDq}^(K6sa^0$czTkhd?BgnA^3o?zw+ z*7Ll)0OM>nvOx2>xag=^3kkfk5;^squxQbD;Ng154^6Cm{ZCTE_Wv62{m%f8k>S5) z?j5UZ*dB=gUsAJ%&JcJHX_`=lB9HMe57&idKUyT-2;01F8?TjTO83|M^JyL7p^m;2 z%VJA;^>eJ3PtDLt}s>ZORh0F%FB3^9^dAqGV^|61$A)FG{%Ukvf5BRxu ze=0tvx`}XfaRxvEH*|MfK4&u5u=9@IG1CiDT+dB?Y9Ra43s1hebaT21(=?F3@#qo~ zJ)FYD`xaEG%cSffqnaN_FEs*RpY>)7G#CkoQUoRN*g4zwyi|Sa^_T0_*=@HpZSlXb z*D7;ys(bR}X_iepEC+uSHBNnUM8-Z+afD(ET(%EQHw}tC1rI%OKkQw{APx``#E7Ll zd0SF-VD&EHf)ZHpkdoN>RT^^HiVghBi9?nA&JCw7wHN(x)H zM(x09QhhVK>0R@YLfRTuZ0GnNT7QKE#-n6P#h0%W5s@&>Yji#SB$_W$=>Spm$ywhZ z%qvY=5{~an+5~WRabTPxKsXwJ&pAw=1ou5sF8d=uX&E&>9^aobpl61fJ>|wy8y}00 zCAU^ymhryNV%3}WF5XuzfJ%KKUIu~=$?q-wEF*2_9IUnIJ}skCVuMp`i_VQu?nag= zyUfFtG)r1p<)!u6A8;pOku9g%^D4xaKg*?Rd0dk7xW#8NI!bN&ke#wYavr8UF$|Kp}L!qtCl2oqTjt;iiO`w;PJrj#kjfY3I+~@8Hw8+D&@T`=z zZo*d5C}@1%o$ntPA5TZ=XXRoenHN>B!s_;5I}c?92=T1VvE8|Zv7zQaU(Mm=C}})D z(N96!sT8@nz2SpxBjQZ+;l~8!1Tj34^<_?lZ=}a{Y=b9Nev%;H)P(=ewlTEaddm2! zJeLy_j_?T*YaDh!KDQNLk(#o-78`EY9BizoRZ})h4b-(;Ciia6eCPKqSWo9*PK*u6 z4I2JPw;DXOb-aWBHVW+g2bKC~#`=HO5dWi6%>VL6{>#JaX0`tx%S3#cOt9z781vK&NUg(YWy&V~YLqQ(M{HJsAG|<@k;3 zoB7wxF=?H{3extZDG@>oXnXf0v%f_3(Ot)=zay;lPh5EKgAyRhBq0W`J^t;!>|=JU?R)M$kE zd#{+)@)EfMXA3XmQyKtz3(PY)=cGXy< z==LcCGoS70XXSU@!kOrc4plyHi9ih7^sme~Djif-O}Y#uJ>E?Qs}EK9ORKw~-~A^i zyN?Za9lnBXhoinJE`+bf<>4C3mbRKmQ%{PPicY~%TRM8PRd+L|mX1}C$0jzVkKBFy zM^yf$O;sVqE6!Yo&hLEJy*4N z5Knu^t3+Sgmv)Qh5R(31+eq1EmBygOuJkN`mGP|mX0z~JYg3*AW~y7_M)z2iITxWk zaLaX=ko`H@rSw*7wi~Y%EH(6wi;YJGv!Hak$AYv(cCIz=J}`8r@w zx%-~I|EDX?NP5NcD!4?QcWV2w5WTPFppfDIEb0Rv_vk{-(I@UB7Hxp(B zaw=Ye$W2u-;gP>v=pD)*n~mk*YQg(N1qnwrxW0~X_R<_X2HeAW$_mZBVho!II!g(v$(2}R+N85eBE@v%q} zATR=m%F-vNae6S1IN!WRS*P_AlL2eEq3Pf3X*i)Vqda4)&4nckQzT=dq&FzrYG%of zg=io4RssO!jZ2fg0+4o>B!i?fG1ZXHM*@NmNoJxzAAm-FNa>T}^PoHdVJsNqUG^2m zOp2f*4QX;tw869!$O-&^nf;NoI&?v3efn$i0VHz6^`!6QtOUc4`o&8!W-9tjP~Y56 z(Zprr#G^H_#$pysS?+Eut zT0w04jwEK_rDyx@57b98egHs=)aObYoWWaa*{ZIh;Yi4uxvG^zjZ}=pYT0h& zM>;1Se=@O>sY8;i-&#u^cOX+~>Da9G zQTO%Tuu4`>J^qQ3b8n%`;NiiUG}`pu_;PzqWxGw*A^rX*{mUy#S_Z*#uSxZvLeW%lHm z@xDi}QUlH2T>M8O`=H)3STN|2v`BLf0Qu>${;-TKm6z1cJ|=~)M;e(@ zte?b^NM-JsGmnK!sGZDHUW99PBjzRm4*_sHRGMpE%jcVa4AZK0Qdwo>5A0_)Q@61g z96!AKctuqgPS#o9+Xu(0!WtL~+6Lr9vO9|eZ&ZKP(*ui&vy*+Zx3jz_(l-43AO);+ zHI{>=o7AqMksV7*h7c&pF3}(|c+NVuVa;^Bne^F@8#UxUH5SC^2H)|ncscruuHDa2 zY{I@l&xgsb@L)_xm6bs5c*l^L#I9)%w8P_!8h0+v%Qf`z#>08U#B1%33v1pZ;&7Pq z=Y5kBq%0<2fd^=2QAG|or(~}+7rdc~txaTZqRk)#{Kaea)hwX1AHpvTWH|C;?FFFi zytxFkw1C+5kFfDuw65!s>L*i_i@-^ES2b5sXZ{b11Yo#{&bRi~s=^-@kzX_Qsko#T0 zl;|P`+l@QgX?Wq@V$h_JVPo-ygnYy)M`r;b8nk$3;&fy7B*O96_ke;yn)^Y71D3u7 zp&C}&N>u-I3_(5<#MF*-Si*u{4SwM+!l6$=tAQpn3ZHcsC%=vXqr+t|32@d5kgq7G zS6hM!02{!yuoOjDVJe8ssrx(|yKrAHeJwz?D%=*|&c2-nWi}Jp@Q;3;i7s$ZwIduH z7C%pGd4af)fMFA*g8W9#A2ZI}ge3UPYI!*29VnYcdu{s>W-dB$Enp>OW?2cydKi=} zvMCz4SNL&?PP8t7<#%*4t~mQK%H_prh=}?KHBFBc@~$%@oLXy9f05^Vab?0>`n8=&-|q&chzJDqq|H zejO{n&s?N~aO1fYC?vbyr_Q)Fu-rL2pS>RUc7(i;cvVek?5yziT~HI@53jFjW` zr|ge^EF9DH(I$jldy^9a$!TkjJB#Fu$l`P!Es`HB^mM?w)@JV#$nOsrY&dAF2r`vw zC#IZ=Hh}1$CaTtpeu>)v5cz2V$;O{g~-%s`8tRip`41$g(zL;Rmfk6?n0*eD-VhCi-=sAwyFl9{*nMEaK z7*Ef>^^OJpat%fKNs~z;D6-p&Vu1puvi7qO>=hzl>=jr;^)-JjNQmyc5eP z;W00STQt+Rio`A->b7YTU5N=P(wZI}LMwGD9+xVy}V zbgp8O@{lh@1etw7MkMJORtcvZ4jp?e!2)km>U~KY+=`%(@#HTehk2)w0SWl!_?+ZwQaPW|V|nr3{*kP~wbfxDlQzag9+2I(-z}rSI?(!EXqt9OlAZ?sk}7`weYhHhV%B?gc82nfiTLQeUSfh& zlq>Dnc=yG$DU^FJ;}D0|0s(LiZQ(+jzH!F}S}>z!ymN3XmxU z)Gy}PQRWwgUs%{UOQ@P>WU1o}Z=}S2_1*GZDUBD(ygc;v)qzdg`=q{BzFE^dq>cL+g`w-j>Fym@Qz{pPeD_x42=IDI?Vq$0#d~c zLzP@=9Q`8^4&1-JPl(Wu zPY?3@)8cu5VNDgl9#cUh6|l`4RvZ=41@ll9hLjH4N%ls)WnIC7^G7Q^D0-vscaffW zDuM=Qnx%3ahDRop=X-~`aznM1r84x!Cr8?M?W_CK(fwz+~~z7EUu@#AAu&QaPGQZ1M7* zl$&C~&NehArmMF-zrT7+xO61m*5-X;6rw#n_^iML3*-|xD?Iz+K6pnX*?&~x`kqC$ zLrAI^rqti((j`RYQdKDcxC(BMt{oKLcC^sO{r<$Ylr;)3rbP5O#z|76DPiO)26>k^ zYEkvjq$6i7?<_QL1E>NDlCLhwSPmF+RuR0rTWv<8OjCdm9EIZJ ze2ImNr4J*^v##P2P2%9(Z6p|6L|Nz36Tte32gIa3%1{ETJCnhUvCVUV z2bM8!FMHPRSe!b-bO#||nf4G8Y~{GkN%JFrA)h25>+ z36F8#t_i=Ybzh6P0mC6@Tapq&!6qdO6o;j$rHD2P(+vhM&d4q=m|gyQ%)AR2H6pH- zG#Lj+!w#hL=^glP=bZD#1eJ6g>h%}U^?lJ^O`8`6okV4#uaLyL$prYLAM?yG>aoy4 zVArh>Imr)lxW>MdJO!8aG{w@}L8FZvPEYZqIsxgcke9=r*Lv+dL?2{|4XA(cd~g7K z7r2Ikh?2U&T$C=6$Tgm7(13HnDI6v$b#4%2JjxCz5tx}@ZgIzRv_4grYs*cYYn%w*bp&Lf*1&Zjdm^|2A-UEw5J%?(+Tx+@$4M# z7rqJ(Y~PL`FrQjHn;u!EIPgHb?Sw$G=}Ob#8K{r9ZdUu*W)J86{s}w~Wdns=`j75m zGNm24ZhAzNekgQ=Weqh~4?=$5I*=!{8ZKhnKaFK#t7y(faXI0c7Ff3^BK%m%E;ujy zO=oq!0E)Gr>-f6m=gLhG^$MZ$v<4gNnVmpZ1>wzY*uEDChx@+iFDO z!=@w7IceK%3$2o$N`|HsxP40Q#rNs+ART@MJGY*X{!9!SUm6`sK3__;u(EB-%YH7Y@y^+_=%09cSnuUm zE&qOloP@b_=4cws7F)(N%3WO*m5Bh{fc-Z{>Yi?REUh4!m^iM_{A&y#(-GD^)KRmC z{*-YOh!F0F=*>9{wcZ|8=$LKE~x=U*89SJv9Wzo7DZHgTt4$`_Ulb&kBq0c z+L_JE>DTJ@eI<6TnC$<_oI0t}_rc-HQI`Gy#Qi4l^05;K`!vHq=Lg5c;ZvrG5F&p- zl?u9@qGtb1{9!?^ubedHc{C>2u1a_39}_@hrnkr`HhE`I8t4{oaM;W*dHyWWn=kuI zB~w`3Q`+CrTs8b6OAVyWNHpis$aIQfch_oPE-|ijlS3k{UoiXF|xZ1T7IvrA)5`$0#kkTJYT8{>C>i_+65nnBHUK2$@t&CF3{u<5ADBN z+0SR<`SwMWLThmw**du`Jd7KXXd!3S-UwGBRx9}+Pc_nZDN%tHwz{<3xYPS$g*8eQq zAO{0|)%RK4X4${J90xz{Y<-MGXe65Kt#ywFMAn==lO*u-gV`cUyk@s-Eg~U}DrHdG zMX&XUvC(jTujWM;&t>i}CX2x31h-E^_jOtyg}X`PuxJ#s7cNOBQB7Z$VQYR5A+mB9 zdiM#_NK2@3D zdprO&nP7YcvQAeAJ1My4^LnE^zOG?%M0-C&KEudb+<`oZ!x0MN%C|R+fQBRB=aqfp72!6*D}A@RZq=lcKc zR5!ntQBG70oQn~}!Nb$X!32o{hg2z0lbZVc;k5#}5iz3zb^Q#X;Ccf_rt)H_8{1v+ zWsLv@0Sf(TDF6~C0pF>T8fuv1ofz2Mjj#yC)&2hwR|*-q^>6sI|>i zkpbn-sGTV*VPj@(K{G69fP(Oj%+_A7c_50|NP2wbfkFbBTxX{JCZ zpF!#^A%&rh{RRMBX(Pq_0FAMQ0kSQ~r7QPJs2v%E!8Ug=*FQjUVWgolG_+QTCBfg^ z1+eta{D>S?!O1?;Cq0~TmkeQF)gPz7v8wDF1@+krKd)+ER&Mg1qM-o_wF zc)y;DJBbIZ?of-%*4l4W%t#QOV|W zY-svApjonKLj{t@9*&XhfgMVj3H0*?F z_VQ1XpRNJ(N`2mAI)j|?Tk`P0khW5G3^SZm-Ko%v9FR2AoERCey?WEfk2Znw)e`$D zJH;xF3~3I28h$tTc>aF#vE`?fY|Ad2x+OnCmN>2s$1*QKGIATHFI;2X&#zx)CVjYO z4_65BjJFJklaQ^C5l4U3Kh963k17wMNwY;W`1db%L`KYB{BP3cls$K+gkA%PLs6&* zK?8?C#CUw}DY`H4hOnvSf2ecJ|H&P|#_?Zg2pp^b-)Mx}+BpSL!)ozvPs$AuwlyGu zL?{0y@d&>Zb1iiqZOIKW-+z4(@%jU&wM$}-NBj^W$59urPtvUw&aS)j$>ZnfW#i-I zoNLfs14Y8j^9G{89;yAskix+@fkO@SX`tt_Q+Gv0!#Yih08)GHt~TvY+t2$K6GZmZ zq#@>Jc@Dh*ZJDDmgeK7bkibPT;nPaqu|vFMD{CGRgpPbmTsYTOE_`Lb&kOJ~O!gn> zeHZ$A%ue{VRR_I7X8}ELsoR8>5kVYJGx9k7QU|N;vgjM}hxu49mFEqXx{!-Wt*h1L z!KOc0U}$x?g>1%G#pwiGAJrrC{+m23maC~8&2TG30@YY=f9eYOi_Li3hSvF}MmQ3> zHpNc96ocXu18ffwys4Sx{#l(-2of+wim@YJ#v`)fC&fkSt};sn-(KOloXOlnm9|J> zbDY|+xA|Pp84YK%bdxJxN;5swe3Ai!d%Qp3yv!_w9(-SZTkU?JB7RAH!rXx3mZXB5 z3fP~65JmxrlwMQy?z7u^6_6DM1;N5z(g1L&3vb1(q#rY;$QVN4*aye`L}=A(z% z-Kf{Wyu2GP>#2sV_{K)u^XbUEH6G_6+TK!%JNj5@?-;_!kwJU@GHBeiY~1a8hwkNx z#V91TUp%WjSNL*|_z0=?H}X-rP~%v7)s*GLPI#F?SUPMw4=Dkiu=TSCVhy~GC2^Pu zX*D2N#Y3(oc;_^(!w0~>VRV!~TKf)&^rRNwBj!UJKqIM%XnW4`C(igqkRWID6uscC z61oA6T|x6G-u-lN!fH|Iql0jwUpY*Z0yd_(4)SaBm$CVwmsaAu2-@k!i!tjVDoQN!f95 zRb3`@UlMGygLd?=cDx0Il@c$l4+Yt2!eTgzQxp`=xiv!g-Mf6|>9JDyKiXA0;03OM zhXTy zFV2_-aB+u@L{Q@i=Xx>LHH=sj4B$v2n%geZbgg2{r2;-Z=L6s7zn_=zsoP|fAc;fZ zE8KQ?FRGc}mVd@&wW1vK@u@R&wPoxE=?AgIZrW=MvwJ&eG?B1U9$Bp9dpn!lYU!E6BlBNP)feO*ul&d_Er6 z&9l&c6IL&uLCi-FkO4}Q*S|FJTMhgIO!ZaO80O`PDKIx7deJX`=qr$l9dBsHV zs{gRR5vqqtpJp9KU3x`d!)+DXUnpW_95`X2A-7b5gC55>nKyk|jb2PA5d z2l{p}HxD6F+ck`#%!w~%mpwSP9v=<9!CqK%Z41i{6~`>y3LVollyH$!x{Vm3ORfiVY~6*i_+{)L9=R@jO8TT*9JS6#_B zh@qaHg!FL zg9HiTcTXc4!)_6uO_QaIX zU4!s+#@UzyTRp-jaU8=;Od5pS(T(qgwT6S8R{%!sZMdf=fB)l3U-}dSXcDe!A1lk^Itc@c@)<5(t>5Ia;&^=}lpi z`Mjc%Oub#g(1nefO__z8d~~HE)3VWYHrq3rbrOw-7ZJ44%HhaJHf<78O47t}!)*Bk zMz>^AmVCkZfCk|cPy=o`w9cc{CQG$O4*JHYY3cfJ<15qs2+%Fpep?98VYRBixY zIVAZ-C5KC4`YhOG9*D@19*|-JwH$)A9+<2k7wsqOR^ICd-2XbJ+AFAa*VATHR|_Tj zRN8|!EUIL(udC^VmV+Qe&;biwuc%b(?QiU8Y;C%D1oJL~9BETIjEAFu2HbiO$RDc? zWy-Pyp%_2J1x3k+?PXCF1PzGqFlJ=4<07XDlm0U$ z;`pQXP2m3^&f>7Wx*2PDf#3uJUt@PaBC1&MAxcS?0tA5URp#G7G6fhX;nxMEhRa7BwF@h1dyHPig{~9AH-@va zN5X(%ALUS>*=sZuV?d-rQlkksYjJQ@X7@KrBJM65`dcAoLGIl6<^Rl@V5VcI9}=!j1MwQ^VuTf(0G4(sr!ie0Q1UEazDeSqKE$gBf(|z zy4~D+_sjXC4Hq$9^r6S9ouf36KatydnBydQ+znJsT#LS`jz+z>42)n+Lx3!{r&MZG z&uR*K3LhVEz7e7nu-ayb?|lcj768xqC6~H620ZJJDGr0i3o*0m#OK3bRQqo~*;jU$ z3gHM`wKm!~l!HzWA4_$;EEL;d_;DTo^SN`bflZcU+AOlZ>ifi-el+fKB;c*I`LvoF zuh~vil>}*&*G*%2Yr-|bJsAt}6v4VGi~Dq-736wLo4Y z(r9k7N#^5JT8Wm>i-(oBt8IIcU%u6zqmS#plR365 zM1#;>6H>xw*!BjUhG=N@Vrs=-HoM2LJhF=r`Sn)=b&L?~0VqUl!|wMnRE}?(8?iRP zI3mPSndLXeZ`;ypJnqQ`ebD6oE|x@Vw9I9&VUc`wsu9q==E=v&#q6^_&-GBo%5alP z+ZzrLs%?fpnM=GpMq;?e%FC6#Kl5%%Z~QwKXSd=2H!+g9a5WO$7l1Nc9P!ZLjfa6#E*7Ambk`&PQM;~-pV5JK^+Cc?CZz4aW+$51>k{n+XRtfJZP=pHJ)5s>FT;{*tO zl6IM!!w+{10-8jh(~0PwWxS(Xibm$ers>H6ynOe~_~&G+>Ks8Bt{&kaq$qMtFH%KQ z<&#cW$%o;nbohyC8%L_3LOO4i!Hxp6g5(^>0n~ry$M_c{oy>QgwynwXz2J;o6$LT@ z(=ci*N+%~_{v*qmoAMZ5x|Dhx@h8bWrMed+jbAMvm@b3j1QYhkRd9hxIb0J>a*F@M zxxDhFFFk-YBL@w0-~F95A%q&3JII5%`dDFC;%At-)|ht${*-iPnxlH+E!!!e0nplw zNjzrKh+s6OFKCcFbRvvLPF&FT4pVl`=s`zG&~^q*>B|sdArAr{du$)}j$jxg}cql{L*-(zR5xuCTCiF@Th(1&~ z)TPR(WsW16YERX3zu}$5aW$=(lHf?O_ui#aoiLrTF5UA5zH_~haiuw6qWzaGJs@rl zNk%l~mHmujAV_$I0=LYO$T%@>&k7Crh*O=kP#2gLA6sFP50oT95fX7YGAAX{LU*Ad z;V2OKm(W8!FfR1WqzUS4Y4%026(#wJwu#*6H*@0~$yDN7vONSn>=FWktWZc8;iHaN zl3*#=pej+UMEe*??RYA^z3>b*dAbF>T${ze2zSI6Uc_k41t2(8w!h0<{GzF7(l6BR z@AmGJe^o(KaMBgqz`xlJMh3X>lPkI~9YU^?%cBbAce@pt61%+##kLs7VRcfg08)h> z=IbYX^iB;8yT!Qjd6j(=`JytdPD0BN>xe58vaGrAjlppM{yWIj9=X+^Wf2!bxB%2HPRO7HQrf^OkR!| z_S|ffShS{Kt$mdSN+F%rTR-3g19{9G*#uuF5?nsTwlb55;B5z+c)VoJR_lrz^oiEd z3U9i5?7pq1T^m6&hGt(2$B{sew5^Y=8?-#Z7C(7*FX-%v3%;XAGLB7Lk2rTnOT8kR z1Yt{%WMXad%Pjr{T=sN-hDYT^#31y36;zjPcu>Gy{IQ?qCVMHNo>>xbhp)I~HO{Y- z;$X*O=;BVf3RRp{Sk@CBt~lqQf)!!A82gv^3(HlnsFiGl4lZD1lu@C&8)nsEdW#+M z!*^%jmazUU<%%pCS33$n)Kpq^*+%O00L$tg*3oH8f-31xbaQj`3WIq0Tz^f#m<>nB z<4OqaQ|{2WlH`@vMnFs<*=NotuXme-bZW4uK;21J;)SkkE#10C(Ja5B?bS>3sV~W_ zEZzo+bgLQSSYOEN)t^^*gOWx1OF1_BekcS7wBtqlwaC;NQL$~$4JM@uzK^*D_`eQ3^*7jZYUt-Ll}>sbze@zX-c@JN zxKtr&udce%DNHTXL<}QlzV`3t&nh8=F(^)k8~;RQ=%puJ;V;3!XH1IKhxtEuCoOQx zv=?)8bAS8NEXkdJjOWS_44dn;^Lwa{8ux+jpjVcN$L6mcLMkd4z zjOH&19kgoIBksaVVvp0#`Svs8Ea~~XKH;?@kBg<5h&F-k=Ui-t=W(L9+v|n4deXcg zvC|&|!T<-LCV!Dn*P{eeaoitOW}+`~@4DT7`?h`eVaJqy-@A-o$Pa(o>4ytml>y*9 ztkNFSs!nD{heR?Kwe~+SX3(O{k7WgvRt8`WgknnTV0k-cG^B)YTY}@})Frej`Zec; z`uR8h#A0$hUK+4kKlGH@&F`^1UyRLKuZwLT!{S^;aW`+Apmvuea zWJ2cnBd_oV)IbOrLrA*~WMn|Hd=^oCK`8INhtytYLb<%RK}5RRC1O9Xt1cd*%5>_Y zY#^M~?ZoUk0PJ+|mfqaK?3a$=!{fQ=zZ;msH5CG%oOLn%6RQN>W4oR+xOMBbA?3~)Hp-g1>ws9h=WYTc*6YdK_9HV&>`ZE2R+Y&${AJny>&t|3D^Ca761+)fF2wh#3 zj^&wee-h34UUVL=s_E<*PmpCmTP!9}G7}2qA%a1|1u2z2noczqx&IxVX8x>JT&z@z zfTLB2(24kvc>WT^V_^(QdP1)&nz(S9N|C`q4xRZby(If8&4d#+-&h=8RmXOQVSR?Y z8Xw1r(;_-0hlh~!x`wG*0(t7+Z+CYdqPU1@neKq1SKxPL9ad&Z3#r*2CvC+IZ zK}SCQGAD}@k+sBh3LB+Ml0;r|MFa~@R44TiU{iBl7P8?56KbBHxVu}k|7E|!``u*0 zDaQ|&=n({U{=5+)QBLx-rt%#=ztRs&nsOIQZ&%~vq;lv3dhPy9sq&VC$x@1Dh=q7QV4BaO_5HR9IOD6uUh+Ok22S+8k}VTX4acLcVfEh|jsTZyCY&6j7A1ig zy9z=WQUo@X@XEtexNwbG*yp%ld5z8oyNVEaEy9esU7*|}yECAZTVFG41GjFpni!*u1H=${`DLXrl6q zXEeG%O$K+d?3T|EUY)%9{vVL1PNJLbXK`r-q{}~4xMf`2-TGU9=#x2NRhU(NhiJV0 z9;$0{Pp4}~79L5f&US)b^#?lRpgSZ#L>ErEdAHSsgFHOS#Qx`6=R#Vs(R!MrMJ~pq)bN-JO%wVJ)^lc z#^yuDC9zvY@nDm^t`{ZIwfhQrQgcg)$DUGHP82hk+GiU z*%&^(f!XC8-PjRR&N5ZO1m9t?6Y!>F+YVR*sJ@Jb)+Z7^kjL^38wKap>1=xOL(xc= zx$tYVi_b%JtiOi?-Hb0b=#d3v>+}*4wOS4+W}AC5Q9|}Oa{P(gUYMaT_{MDzwz801 zFg6leVUX0UL|rzjIlrbvHkUFGD}_ID%(w%ajZwCz2agt&-WD4vmST`6{34u+dpG>w ztpzhX=l_tc{C~Omad7>=Zhjmb|Ml>_1lX|qar5(C(YS5&WlTSG@SutT>Rhx90At(- zv#@>@O2ceZ1SprfCV%?Q)%eVtsx91uj}LoI%&!rfd;Bya_~xc5u;QL$Q&qHl<+M`i zDY4l0^*O_+q7+{R<3)4g$iI$?l<+D~id2Xt^Hp0A?CVJ&djRCdcL>y;XA-GRwyvhw zzW}DTLHiku4S~lYiCnLFGJ0zU1uWRC8GoRdE1?|J*{2wx1vV(*D;zLD$tcl{b7F+C z1x$U;Q2QpC?VI@00}ZFgkBBf}IioB|ka24yP2`ONJ;Y=So|$3d;9&S4M0OD}Q}8NTS+af>WSPh*At&Ychb2yC4#$ z)x23`<3*c?>1pzG{$5Th9OjFXF_fokRe`gl_xo5Vv_WbX6n^k|7x3V~RFkmTSGxs3FcNQO5 z9qaX`@M6GogEb!>y!FCho7$xjTZZ$mK<>tr*1#_@12PfY2$;bkb`>bM$&mA3 zw^wifPMl&d0qgKY2X|GUkJk_Qiz&yF`>m{YGJL#$u8K&J-tg+=<#FfBk2k&XjINGN z_$0A16fdD)wJ?9!(semE7;$dHjO3p;_n!W64qE2UR&D>WkUh4cddfEkM5Csu7+gt=x%nBR17HaTgwQp+Sj#M*qdjw1!x6l&0uJNM&%2#`Q) zGTqH!BUZx*%x&jBQiXC;Z-!UM_HHZ5PPMhqWLsslHP3au+3qHt7`I7~jdSi!l#R5Hr9#btzMYYLjw?B;^?-aHG(OXqEr?3s?=AN$5nvD`LMBW8PrK z)QIS*rM%>4q{WQxPI6}&f$^-RAz|QZm*39rOiyq}_eS*O8Qd7(0DtDUMhMMP5-`O1 zd0L*XCRTJ0a^g><4zf7bP*QCt+H1_xhH37_JRt5*>lRY1D92MRM&zb_kmM^>n8q{S zWyNZExXAA9g|4sE>pz)j#lO&(i|}RUYE*-#Y?h-t`}}vWubn`yZ1}DdY)H?a8y_C>ip#W*1MlW*;S%1= zwLf!u`5<@_5zBhjd%qa9hmOB%V4G(KUn615;G{(dA2X1uh>|Kd%!Jk$wtR2gVn+?v zHK=yp%@$|QpWQp^_1=@T&u6n0=B_d)uzIzdCPlUrZkm#*^8le4^%5I9%ZC_CCVw?G zP}j^>x6ll;k;BWY4tFkzWus0M+ANqd$4zboFP{wATeveLH>Mz20y-$f=Brr`xpNeJ z{%v#{4Iv9t580R3M<1U4U4^zPlEbLoQ%W>viqrz#TLIw)%zkQ5*I*8r_e7X66h6-$ zoJXG9ZPUdi*b(EIy+<;(*+)|czS(xA$vPD&t}Xf_BDa{{Zree$*?KYbSJzv6jdRjB zn>PNfSvYy*wFETJIaIf%!^;C9RSVgFy6bVW{EzDTf3L2Y|9=ebTiDh=qR@M9Kk#`( zaMwomO~_8NI(I^Pc_%7*sXZ)r!mJ^Kl2kEK68NO1Iq>WLo#|Qn(bUQgw`{apc#C`% zp8DM1%#rlPWL{5X*jlgV)K-p?Tg@5@{~Z4vuF0f|<}{PYiMjMeb)?9ed#03TL1&7e zOr8FI)$L}j1;+2lpB*12y~w*g#!v2JebwU~ZQ#HwfC-`Au5vG?=vz3XH6(3SY$-VkVeaMk_s zY9zC~x7BS+|JPyPh}iszb@^b{-FsS-TazGwQ#c`*pEMdhfFt4Y$^fn3+7p zq_bodKVVxL*{_mJw2O>0JH78k2A9&nliIx;b$R7kk4z7t|`+d!0)&Ew$W`lFS!+K?Q_NTYZOjj>hnzI4vX6>6o6gk3dKxW24ulfGL z{piTB$4*4HOwW#>Bp1}i`5!i0FXSaDTjh3;8=$$*jL*bJQt!=w_zcWJu!24COUY#U8ICg;71~_a+N`6&=Flg zem%0*`K$4o8tmnta5Lp~n~Ca6-Gh^WafLo~Yq!K(pBtxGpV)+gtwi?oou7)4DaNp}zCbNT$E zbxy)5cYl2(OR8+~0u0xb;DE*g62O9^By9tr&1Wh3PnCRjC5vK?by%Ru5C;wLR8Sg% zExwKKAXv=n|9u0=zx7SPlTEd^5z08Ob zuf?uzGLYY{Avs`=-^;>jdSfyRPyY1PXH4;)B~iY1&zi5`14g``J1zNbM&(fHEHh$c zttQOaBsuY__@XVbv67);fMppiN+whgLcas{t`4;k4bv6;kU5zdUW;qj!;Big1Q<;Ap%;>4E$s7?o zv(ZFjVLPOJ*4TL>nlxCO&<4}`F8N(;*fktDN4*UvWcf1bXA%xFc#EW9X8($Yu?A}A z{1FEV^Xug_7(EBE%qkg9@-<$7*6OY7Y6Z2hSR((GvMdyXK{`mq+Y%t6!VPEiK;{gr zqcs37Eh6ZW$nq2IC~3q9gz3Ox$Oc5=Jk^7KwrXY3KDra%%yl~7(jrKaTnNX^01C+q zB~-+Oo#k>IQKgyyTYJ4D9bC5E5mS>ht!w~(k6YyDFIBQIVt%urP7(4SgLy>+1*xv()7T{X_;Cj)7=B}Oh{y&fpvT*bIB10^RVOO4gM^y(YdSXgLi)d7jKNS zcRA}UwmyHI^h{4$jy8Yg`I-vu40ijHAXKu@@qT%YRsb+-=o#S-fH%<`8LUG z-xTJb9>mDQi0Pj<)u_5X`U`{aa5CfWPDXCD7D`7LGekG!x5EdJsH= zs_4d)Sj^PSwtKM63^xc(`geTIaXP@&X`7@>wA}2OR?wUKr6e&0tBL5EA#oZ1TvYwA zXjT*2P5~DyH$=^4FqSNVDB;Pz!5hBPhLinkl=B?1UPnPrTE0Fm>+)vjPSYD8DK~zpJi0 zd-g}u^_xwd*yjy&jxtjNP(H1vxw_f$p!bSFVdZjtbY=f1>Cct( zqj#!9-?ig|?9QQf^G1G?%KcOPrGpfo`y0rY16vZoTD^{m7h|xxCP3#H1-6S0!gn0c zc3>YVdvd!TH%m*M5QR$4`Wd@0Lc_MXRIlHT8}5^}C|pvAIrI$EUg71G!g?@Jhn!0# zVG8_^pceeapweJJVB+=&ZHn480dNav9F;>`-Gp^9;B*1emx81In=N$;d4&>KpJ+?D z?J|v8n^w?Xb$Zvlz6?LA0+{7caje&q!IH3RD4Eo?3}-t*1*?=;;m|y8ao$tYz(Ep| z%a3JsO8$2n=WHNZ%{#}@P+I(u2Irr_@QNyWTWPajRqqDjQ}zWX+oG2;Lf1QTAvDcn zM73w`wZjkZDeGoznRrG`Q!Kyj5>zsuEh@(c$3^!wTl2-)`>vHT31IYm<}}mm4sp%g zashtFq`{Jd0SmVg`WexakbcHkI=_jVhWtL?O2@m5J;ul}O!NnY$J5X;*L&H#Qsh){ zm9IgjtT!5eoY@{C%Wgk#?5^VvKXNdUCUc}E(gRgxp6aG*28-(0%Q$2(-qj*2h#>QV zDepIZA(OdRANM~ezJckpC0GCV;Lh@+xcy(e2{R+>f6be0;b_}$w7vKC3l9XabjD)q ziArlN^IiU}8+Kwx1BS@4V?@ht44rHuo{-x3dV9Dd0VJW9r{OVdMT;sQy7wpkKtl*_ zdIH1GUtdej4;s_X!e}tmIn}t3#f$+8tHcO*Acv^uP}iiB9nP&T+npM)TsRAi4jS1; zy;L?I;=Lm6nlTZ$CuJKS!50;8y3wVw_~)HfR>E{7kkXDFvRncXL6g~p4lqagGa$ab5v@r zAsa|0MNX_BF21_~leBr_&;^g3xaFkco5Hir6%iViY!y2fl+j6|6d2i!6ajPRWrs0R z0h$Jb{lFeAA?dSAlMfm>D;T#((1V7#dEdXxMr5r7(V3Wz+W44WgTD526%;fM;CaFAiXnjD0!HQfU00 z4~RzbP873_)%VjHWvL~`nqiNeCPWH!GMa6n;AN5pdqJHae9d4M718e(k*h$S*i#FE*k>0wVOH%+R4=?oK#ph0Cm; za9MIL9?N+uAH-x4P8_0CEBQs}V3<2>PL6yqqeWyv$@lc3y14-@i zZ0uu z0zVL<2=J8w^xgk32$~qi`h1423T0QOWh>BGv??3vI@|uT&P(Et*$_9fKTVWp@s*rE!a>c}4=9iSbxTShdTvf+T-^E#V z_7BHIRr_nJ2zCgjcSM*hUqR$ML~~}|C7Rkiql$AzR8iA_0DWEQd~2R$vuGfmhbX@zISv zj#mg=eaqYus8-hgSVZY+y%Jp3LavwPa>!ZmjXENHGt>R^lhEtnGHZGR$K;9<)%m~i zU7@V;$wa!Qo{Uy&$%q|oY%t<`P4rRvn&(1sP_`9M%NQ5S9gx2$Pw*}E`z~2FW+JaX zt8jq|z}_SF=@(dZeb$9u8S!bzIVp!8shF8DX|`1Ypdia1c?nCsvH@NGX_>B7JRf-z z*_^Fc8CET)W*Q&N}HH3QptoFTPO0lMB+WpOm4Rj|BErP4{I9w|dUB30R z12(Ol@&qfy>uHEe)SJgD5p2oIB(-2lM}PHVV1gty!Pf(Qu1|!>*Q2*X;-C-V@gU&+ zUw=O-+boQgY{nAEaRB?2ev)-@HH#9EbPr<(Vafh${S1gEh{Z-JFz{x+vsaOyp>nCB zA;d8;7Bwe*@eJ2U6v`_-oO-@pFUyBWIKaUz^kTeFieZN9Cf?bROAZlf^aUI>Qy*uK z{5o;WM8c#_B_N(zDmp`gy+)PJRp!81XVBPZO*rE!)iw#myME6uj!&=mzl zmxgcI_s`}bs%0dNcK1)xadv>d1_J?>E*|?8u>QpzA$*Lc5!Ct7)SG~Z-j;lOtquuC z^3HgJYWn4vD82vNC*#J}Zp3dq+{n)?+IH%^?sa1NgH;rG@u8wu1EZO5p9V?=1of-w zSM`;b3s)PT(C2#I4d$J){5{EJ)IVW$A`@=h3Uct{q6=0#S}>SMpfxB_svBQvkB)x` z4A~Y?QSfdGw@@X%zK3RWNBbmkNv|pt>B$rxf-;D*CN>T43Mhx?nGLr2LS% z6poOkLz)aehz2+uIr7jq;Y&_j69)=T5nyZuJM;{{{6C+cR|pkoVwuwsk|QjnL`_#1 z)JWPmIim#I3LlsN9$YN}YWx30=BrG0;_vtq8_wj_;C#^iDG{aYi%m$UnpItWOQx<^ zx)W`7YA(d=njl8;U17r&Xk^sRhU^!(NjHgCjhS+KWKrRc2%uq3VV znqS#xtCM#P29Bqxixlq>e%rx;K-D@|9kV`s>+|@(=U*yD!ByACA%$4lwOZ2o?jlD z{25hYF?kMd#Om>GJ8@H3$B;LuXNVIyaBNxo;@}x3xEJ1!0R(_tV?Q+(G#-{$n}0)2 zI&cL4?|y=n@qZoy{-0F^Gy8v?n0NJ~F*q1Q_uJ7Yek?42v39#2wnRb8_Gou)tONZW zjV1{boMw?tCx%0Yz5;!}<9#d~B{DS`)fTcv!a%~7$K_nN^JYfN+nKpLp|5ZF{rSGX zbm5Mt041aa#ogJFhcuRubSDZEN{N~rJ$e{^T9Uzx!geN?$_j8~$z>uh`l#t-vv+iP zznXZ|$Mp9PO*Be=l5l0gj>)NS_xp--V2VskOUm}G6jvQd#zu`$>bF&4m+}MDr}4=A zhN{u>U+o-(n$G=XL{$~l=i1vE>a__%MrZ>*=v!T*e9gcPr#i{)$?>1uz5~gk?v(MT zZw5crs=a@dplw*-q{1_#a2< za}G85WdDr$+VKH5_u0iB;~9sjletV0LUiyXCG5*J@5ISX{g@Lk6 zl6l$6ojWw@WWHU+*%>9b1AT8+wQyDQ0z=0RO&TaL)aC?eSs8r|H(d?{TO#Go?OuNq z4eO5qQR=eh5ZB+Z>Ljd2O>gT37#uodINCBtlO0c&>hbW$y!jkXf%kfA9F#?7zzm&lVF(qf4aX5Ds927TGl} z?Fhg)%(827fveTD>hF&o{w07U^?}CKSm<4|Zok0ZUKe3IcC!^Lju4rx_Qnkg%?xjy zUbo&}&lOZHqqAZKp|@&EW1VU|MT_zq{qO%1j6{`+3xRxeM`LlzAl zkU_{cLi%bgK;kFOmT^yN)>DQ1LO!!#s)gm8rD&x5YOc0s_sJM57E;DSZ{gCk{ZyEn z+LmU?%^GVxT-5{H%UazJ0LcQaJxAF^u-p}#zCKc9k(@bU4S;8*olzeN)Y(tk+7IvY~ zF3s5LHb8gvPXh`iRqDI*$U$o2@C=tO3XAaE=X_W?3R$wAcMY?&iL(4WbHjTs0g+ z&JD1tjQ12?Zp$1x*n>2^fkm8RoC7vobo5eU##Fz>Pw`^f&8`mu;Z=7*hKvOlizX&m zMj>Sn8~qfEg-(ds(c>Dh%Wb9Nzhgpc(G_yx6u>7$rEYwYmAdjqCkud0fe1cy;GscT zh{S+*Qzlolynta&h$oFyZ>9lY!GS=cMF24K)oHJc)`wkpo@b@TROy!*oc%^Nb|{&B z(mutD{z>VBPpXhjy*h<(!+^w+Xk3<750U=7wm0 zxP!4`-&7kIl=L8^G&cENSLfR;oOU`$2~W$9$sTJOPU-XQD%QCz4(EC_Bw)N_hU@Ye zKF3z!92^SUrX$^dw`wH*=8yZfqprqa1}31`@K;H7X%;=)2hyu;QgG~PQUTbU2iCkTBNvGj8tZOI@-!e zEso_l0Pe8WHBtgac##;A`@X5g=_Y{RnY9TA@WQ|%`%hn->qIUPCYa9bnOsJwV0%X` zzMyQIK-*^dZbf56)EItb(4Hcyzfe5O2Fe3`eLegyO^}G`ba!#lHhc=1n zKapB&9RIyJx)#9s2S)g@`PeOCP{kht6THRx<#~$So-0*$Fwcr;`5RO!K@L%@g4afY z`Rhg(senv5iZm%rx151j&>-F(vwMnM_u{WpZ8Pagjz1P*ZqJhrtT7ssc+K7BP)gAN zB3(XsgrZrj`Le`EIp^GHON1%6lf{4y*UAaWflS9x+bh`tw!))@I)%~W%4>Ua`(iT* z+S`amiY}TM+$snUtoJ~u(Ny^lli<+3H#XT}S9v%ws2~(+-IY|b5 z`#2MWsiVu}%|GNuf%z_edj{p9dnPp{t6o!1GB-bd$GhXl02>Ua;3U$~f#T%~UE<^n zUj__Vkz6_^x}RK z>cTu1fgsq#;}QTh9}DnKz-mu zKUNA|dd4%=agAFL)-mfr(i-#f2gV>uxuC|*OlxH_GJ$6|aiuHonirlm;gVVx1+}V0 zM4ulUe_I>dj6%h0;8u0UB^3v1BL*Tz0&{@prgUpOBcSU1*lxljEKbG%#k#I7hYo5k9!fe{Dbgv zg)>Qv3b}&x&TrI@_a6#24?-yaWOEKfCQu& zLD#2-=pxI$-6=mxjQR_H-HKI^Uv!6o$4naw%3uzvYkfx*g((1ITWwSg#UCNs!kKL$E*MX*T+c z&;1Ut@4Y{;Z?5@AaQ_JjW-GFPPFv8%<~lQvB7W`X@%t|2{vHv|PA9*K{`JyDVO_4< zWGEuyl;2F-0lp!W|8-($+r>q<(B7?=BJHKtjLqP{heVlyqzN~_jb6uvwh85f7if1h zB-9o}{tq)m%!b|8<6POiV4qJ?jn^W#<%Q`tr03d9-nvxN*KS(b2L!qmmDtIh3dt?}aDBTptqnfn*%9Lhp({fH`NPXg@1L0@b%%>$i#V+vq! zp%ResFv=XcFqMAHz85-vPVYGn$WH@ZDT@C=kE*HW#AM|67hli3P2(ofQjwY%i%P}& zEr%X@?6%Xn#_j$f0qFm#avp4O>Qg!X`&Y9y^!q8Sp2O7m(1(V^53_usBnSR%{RVhb z;&(jB$EXo@aW!VI7y^O*p~eC@L)|v-uM~Y#-rR}Nha@fr=G!NZxk)x7Xl2-c(+PP3 z5{1T{yOupZ>i_QcA@=*GiTq6NbkSGZjrHqsU?3@rRB1|7Mg|z_USJgaweCa5(|x>F z=eh@K-rI69o4vIQkkQnDqN%FnAwTuXpl!L`a+gg3v@*%7CyquaJRF^G%!$ces+~%$ zQ!8C)OZ1iwYT&gUY6%q@$Y9%e_ z`D}7bsceCnUQdBZ(6}maDXApul%ocwC#(<(S9%h!K#)? zc#z>~t8Z_}j6t|v*x?y6Kd4m3waqoLm%17J;H+XjN1&o?)?mBP9U&1Z*Yr%q3!+M2k*9Bqbg|{-MqhC%`^l zqOa+=>_EE?HD2FHhjKf?$0y<)g|30&fevMoob8TW4xN3o2}kaP--MAEWhaNT~z#VYbP70=ct1AV(NC^)1$6v@Gm}0x&x4(t~ z1Be^wQDH8)U_kC6okf~RrTM`G8B>c{PE`%sQKGaemzey(f&KWNE3!RQF+Xk~%(2hL6v!TRn*#Jh$Xg(`P*qbZ^fxlrwi>5n|D8mfE^nfJZi-3Kbl zmkx!cro@RiMT?u{RVa4e+De_L;9>l+=2r%g5$70q3#b)G%(_P8F7AjuuvKVLBqSKa zF!CvW(3B!Ym-4S54Fe1^L3W z@q5tWN9s)+t;l<_tqNi@z_jb%FV#rA1SSK!K-Miy&anaZ?A!?x;Z zS9RF-ju{^7<&Rh}AN;8mv^>N}=Ga(c9i2*DUm7tZl=6G62a0D0#Zr?Jann1RR@y&6 z6ZA!spI2y#xw+^^MOT4LmetZctL|<9*EHemm!V+Se+ej9y7MGB78r=CZVd}XNYzY} zlrBk2&T1uuwVc4rtadF1CP}iPrj5#KCGVu&iP9A#;uDr{v~IrL`en`tCOHFNB$MW%@kCy(W|^Z#LKyQ%zv8PC2hyIyFW+ zYC-Mxk3C0AkIVp@y?1rLDX*UWFt1(H_J8)?q@bq}64P7wXkZ@3cgbWD13Nwy^Wj?6 zFjDGIW7f3>YSyvnw{b1?B73QIw?BK9o~YI(-6f1Pc7&{!`*m4)R742#pQA)`Cz|B@x^!>ZpAvZ~0yuyM2wS91BB3M0hYjZka1&scx{#V#5m z-2qoO!@Qv?06zsOpSaf(GcUd~vO~9vz6lp8iqa?Jgj$`tb)0*-Q~+7I-Y9bXA=kmp zWYBFCOI?4YN9RFo@f~}*iVF4*?p+J@oTH#jD*WeOCd9j0NJ{1q^)8T!;WOgA9tYe zVz}G`r@q_&-DE|nn4zO(N!l+kJ9Q6sntiRj{)&YA?DDhFN#)ZmSDH=YXAfE1$3HqiNz^I) zgwMYbfG#sb2P;91eX`zwVIVq)SyiUlv~QHyuf)sjY89TVHdBmgc}+#A@;FH}V@8NB z$ha%41Q0jhUbeK^w9rxXO&{73ata4F%SZs|kZEV2?h@*n{c>rLoK%Jq%m_72%8q_n zGyV!SAHpe02~}nK@R^+kE|ejj&ZG#x0jcWR$nwx?GSy{kx|Nre5exA8%jYX|IsUY@ z3ZC}?ujI8W(VFBHcU79AOvt?>_dA`D7akN+962TmG?m`=r3~`ZKjd3JY@RP_z+->V zqQ4^tBUeMq)`Fa2yGRTTUoA@injL>}$2hM*)b^XdcMtjaiBkqnRxnuF%wNXE9WCZJ zX5t5Yni3JlW8X&a>tmn0(`F4jr%Sp@%mfvMXuyu@D3UY!f&?^o@oLqotF3jDZAHxd ztP#?fuRGj@>Z#CLEhFGlfeFX-=f`*L;IY|TDHL-}UDF9$x}xicwsV=8lI5;Fo_=2#o$3lMJXP%0&i z_6AH9rAvex2>NUwB4Y{R)bU#x$;rqv;`5<7{zfAZKgsS1Vl|PnEsh_*eQp*WHEJy-5Q8L^OYcawD>g}8|?E0FNF9A zsV8PQ8ZR!qShF&LA`m?)pSG0GP;z^(WN64pp>c=ayCB4WtwTK~HF2mwEJ1$3Xs4S} z8|}1%{BJu1r{(EEv53>ztvSwq|v3m1tS5z=8^AuQ7j&3AHKuE7Njf`x)E}=)KooR}#Z=MVkdV>N6 z0@??2hHS!)ye$rcBY7A$+5HjFguLj&EEsuvQitrEg=^~Fpea^BAm}PP%ggawLx5_R z(?L*H8oQbX=Lo2dno`)0w_xN8IizUMK-%KZ8rd4ZR{6{Vvo&_?ES4qeA6w`%TnIh= zF>_ls(4nxGU0 z9yI9oAxuqqDcahgNiav0?_NcbT`a4r>;_pXuK=Zw2_4nrcqjVs%}mOotBiV$4VWTZ zb_++a?6El`z2PTPhiL7z!@2tSRIKqz7aPeWS(Flds-oU_o=^Qqv<7u0A8S^-LR)MY z^9_X&*YG~e4a=@v=clI?Lp7PVJdeCRXew=cm*Z4!hMP)tF4iRv5p;_#AG^oo!?fa@W}!Apr-nzf}+$9yfss?hdN0Ir_Y~=Ncb(4bBJ9^JMFn zVh094X$ZrLarb0ZrN`D*sG#-)H5a9Ji9{&oT_=?2qW_9I&7Cdeq&TOM*RyFHQtktD z#z}+Qp}3#(N-Z6bePNfz5-dfGu6wfF zc0z36Z&4YD&6h=wq|_#3+S1hQ1U6K%;-G4xyWYAF3t@(|jjFG#B1zRgrdxvWi*2%J zkW|#3Sm+iGP^BB8Nn76(spkB9hu=X}P(nW&X0*Wge^7Q#L6$Ylny&7$ZQC}xxXZR} z+qP}nw(Tx=*|zQJe`Zd^i8xrR_T7qok(u$X%;!01sZoxeBAPI}ouIHepScrZ zF(S;c*jp}T0j!xa-hdO};0$7pLsm7H3+oOZ@Rtv)Hn-&`0e6{Cc9f_7^xEpDdU7ZX zN%*WF&BV=~d7_s=15I4P?UcX!qP-i_#ZXZTwQYWcD~Nu&6W?R?u1FFKk|7T`QV%vTsOmmk~T^ zLtGPzm~L%!U#Pww6JU+iw~98fM9DNqIa{J=0o?jU39|TXbZ}_`)xuX6>UmD~YeLvk z1Yu7(R9_WKOkOv<$;(JQO-bFu$Y`coJye?PWu;h;MHq|IPNi^X!@Y*)c$3{J`yTsZ zzVjge(=7212umi0zX0cd36#0i+H}oiL-D=V?SGn0Bx5|B#8`8Jx6G*FCf8mvx)fSV zEcCl5`*8pzmht%Do6C>HhEdC?XP|`_P+ukXEUDV-=`0XWmO#|)QxqoaUL@HFoh}xf z;@a5Lo6GCrQG_Klo?yMeQq)*ql+jUTR$>Gv+E4;t%&f|_{rYvVu$oI9T{TuBjg1qV#ITcKA&qXz@pEg$lGv8a8hfF@VyvPBl9mk6!K6Acf4$|u<57K9 zTl0--B7cUZ-Y`)KAzwVOZEuHDtr!9h&T5p#R`J1fPOV|m_>ob=)MKJ9iCt0IpB4AL z_Nm4tDXF1XF#6*i#^4(XX^YJyS1|w=BP1phX; zECH$eX3tXWDdmwXr!Lg?i!64nP+WSk&Jm2VtibLfoPRtzy(}S60`iMb@W;(;Zyi-b zd13#esvVyn5krJ{-4f`{cb|{Gc9B9HSP0p9(wMutukHB2v9_Fn#A!s}K+4e45%3Uu zk39|t;(!R{)}%)}`=Zop!)4ba-af&em7~N^Do7HE zry`79$!xjWkiTxggd#k3X`f}hZ`PchW?ljTCgQau;#NEeZ=cTB()S?j0WKg5AieH5o8OJpf41WB;Sv3 zo}ZmYo&6yz^Lr^Gll>r`X%9|%hI@}= z!ye-NwF&DT{ezkW8;_@|GD1Q3{;yzY+c-tc0R&g)8wGx%X3SgS#g@mpU~{EIgP(y$ zGXypKMwh#1JQXM@Zcvf_@H zzD~totkDsf^Q5NN0sK4K2P>NTD}ifar3jZvSJ0L39#aKJdSL>-mDnHOgGR%CLzWo) zKg&{cfqy+Cp(P8+L%vsj^irh)+E6!sK8~ZH#{My(rOG4nP}3~Xj5a^bw3f70q@02- zZ*)-_hK3&spm!Xh!;0N!aIA-e8-M^IS`c7x9Vwkg?Vl~a4HF9)pl75Ahi^T~daz@j zIZzRw9v_@%5$-%LD@1cj#z755%(C&HNCNz|4kp%7KrEvp3%AN0@bse9Y5Zji7AKf) zG6$tNXZmr#Ansg|_U{vDcs?cBN*J#yVy)_dhoL0FpQP;Y=O}e_3r;H^ffv%CrZ6%O z6K3?w$1`Vs!iWXo$l_G7!12c-TlA_#1ApCZAj|k>BU}1}kf>GTwBReeLka?-G~AO~ zZ(I!1EAV(?u;!n)88;^kzafyxdF59*Hk$=^?k}l#1B@J6VKX-ecaReY^hInb=sob2 zvlXhO^42Y@Dn_g%TS7*-SW^UYg-%3vlxr&{=1NS?8p6=)*c}ZyztSW1CdeS(JxJ85FGPu9N%L)-5aM z$PwxS>w=_QGQ@Z3TuI3|kakc#Kxzvb@M~O&> z;IJ1!D%v81og&^rx?)LDq)ge$viVl4a-p2DFWq|-fGK&4p%v?*r1D2^Aod#yRnXRa z6LwiM0}@B-QV->+2Bg$uR#kTPG=rm3$P8%ATIro`z@jjr{iXizNz$8Wj=+1<;iT!R zwE6bRyaMw>c@PV7uOy}zGbTG!iMN^Zv-@j!u(#zqf|0t-*>%a}k1PZ?de45i>q3c1= zFNP0Zm^1c?0@xYSlF6nf`mubu8sr@3gNKyf+nc&=4(C-g5{G|CQd!?2)c{QME9%z> zLgAAe8CXQrTa=Er?dykF+7S=4MnIp@#d}A`qCbm{ZPK}(Q!0&AAJ$O|%=-jb5>%~^ zATvpw)6%x$4bq7!J$`FdyTM7h?aA?!H5LIWJY-7mwHtqy)w(Qc-Iph&Vnb|sc@c}j zbaC6OQmh}Wiz}&|yaDyw!bhEsE%P!=13JDa3%1X5sopjZTobDow|vT@&E>(Bx?hYO zb^Yc&@A;JF2S}kFjwOVbU zF-Z%_c~4a}jr{^PeWtJYhtYxMA4oime*x(KAESd~CI`ZQMu!FIwGogy#;2lZhpADi z>-kC6_Ri2PY0-M)P!M7&0`sGi?|1Q(7lagK;=X=Z_^9~3%S1U&vt~YDI&erE}We&z)93D*2203_93Ntl6*VmH+de#T37O=XViOeC_N!HYlbp$9Z!G zA_9KgZNz1Z4jHeu7Loe7h1~pUI!7J@vV|2V~&;73J>aq?0}g7%H)w;}_Dap{hj1dZ0}tO}Dv$O9+( z9xzr+qK`ToGG&t(7w0VQpg2jpkCG;Sb&t)n;yC*y46ZTLMgcB94zshdPU|XQPZQ4eP4Y772rHji&PB z#q?lhr~pkBlb+fNtR~RGHy!DSJD#$lrz3uLT53pRmS*;FflGnh(lfIWKHi|8TMpp$ zY2~4)27eZK$%gUI3#-7=^F$Ct8BX^>+%Bl4n~vd8-<0{*5&DfI_Nd{PQtTgSy{$?Y zoItuEa9Y>ueF`1zyRtxK!buQ%YO#R6f3AQk{k)m}0S1jBfMZh&adpcXm#&W^C14ON zx!_36A!2_eGRQTp?sBfSNHU|bcC5e1KM>+^%L;}e7fud{zkEQ-f}%0=60j^fqcr8MsB3U#KOKeq>{!y{pOV|yHBDsgy-MI z{Q}y8bqOk`P$+{TWz6!GOqud-0TLr3Br)5<0Z~tlDzehI0 zIf};y^qE$Eo&|+gdYPVn_JK3Glv&MNqFGoic8@h1?Hm!SRH-yKSvcJLhR!tk=hqZh zRlH*|Mj2eNk9gbb_~@N_GfAxKB||mw$bW#G!*)Sx<51phRbo>pe1Q*VUEc144%v8Y zCS`5-Um%VbY*(KPAsOMVM%TX^x!J@DjRv&tzz0r}nPKGj;eOclP<72*jpA$b9g4K@g|G5xLLoV+9e-YYF(v z(sh0Rz&oyJWppr|;-=l964v2bbnT1Ghp;>S9bW-sV86w4GE;aS(q=27wyQJNkuvFB zv4rr>I_`eenogH+7{#H+U8|T=#tR;Q>LhF;YUJ{tM+zyEDv!j&t;mk?Ords@|>S(=U^4>*lLePZ<>{<7M$SwjP1R1iRjbgznc= zN}XAqrcx*>Oj5Nxp*tyli6RJYaSCn(mq%4qU+2eVHihz<01tCb0UK_7S<=m~on90B z_lvFU3?tvX=x`B@!Ncpym6WSAlIpsVH!~K45Kat;Rvg`)H~hhf@T`C8i~n$q_E-7) zuL*U3<9Wv{w*SQQ1PkdghS)HB4GcDT^G7P|=31Vm3+YV@gjFupL7=~fWylu3b^M3u z>W0)w^wcx|N?^l?PqVJYj)@4Np#{)%coXh*%?qH;CEF0FNYRHoLV}@CTaW`el9lPI6toqlJ~T)rup#CnyT)Y+wOU6qy#Y zZbm;x!GCd46wA2ZA~Vxd_-`j3f@9>Y>qp_B12zgbN{+!rMU}4%q^Nb@=YHu)@GnTb zg&tj&_qQrJtF^g*B%yOqpkkh{WJGEpETm787-T*)BH-FHpZ<>7o9*?66LK;fJpn8E z3tv)SeJaHzk{WroLbt5y4q}8U{5jI1z*j8uoYonXw*4{oaa7tg3d5XiCZ3_N^zn^R ze$u6MfRx}Xgg1nI0}US%Nk16k1l1a_XU)E?O4zCrHhHi44mOQEBW|BvF6tpLnjROc zDbS@A`tnf=nLmgn_#A;*+Nls)U?Jm{Jh6gbY%v=FK~-HR(87&iZ;sT8J7EAl;p(B^ zwS`dkG-fDNINH);{xoiAS>RkS&lSt@(fySL&h|?_*~wW!BLR#@C{23`;hYbSrECyH zETvcpQ%KM|H0Ujg{?Cx{bvh^{72`Fnh@-;sBgmKae%~PAe)A>){d-eHy^Y2Q-6QfP z;w2D2k}wOP*PfNdGydiXY3Ps$a=?CLcrrZZGTzIu z|IYY%%&dY*@2gbqE_}YRGhp=C0iDh9z{}W6I7|qG#6D0l)_-@zCx)q(ef5iD#(ja; z=9$!PZV?c~2*MW9cU2$SVJc8{6d~ku$o58BphQ!9A={pHWQ{x6v2*_rOi`O1HNV z4QB+E^^%81(jl}=z|C0nfUKuL_G3(-#&Xo{$Mjcj=na>Dj!-$Eof7l?K-GOC11yps zjJA#9a&>CPq!G%Mi^X}gYS)Y@T78hwa-~EOlxqsBVlbCrq-Zyd#j8ww(9^WtHKl3w zdNTw#HlgPh7p7lerpNgdyyZbr9BUg_6Kbr8qlZ#_gNm zOF@!xZmO0QR0tkXLBMNZ)b-)WSXunjQjE|;yQKC<{ThuYiY0h<_cTMl4-FWXy^Z1X zpMBfIPSk=dR@2`-Pe^i zqzT%Dwmtbam$tE7fNlW>UEzDjBqfxB3GA9L{#gIw!dJabRejChh~)T#Qp4IbdBoWZ z-cu1j!Ax_)H-{Pf7aj|lw}$-P_{&_r_s2=x+>8agkCWNQ z$?T^4OpDBST$yLT%=Wu;>aCyUW&KD%PU9g-I57Jdk1O}V>OFF@b<=F&JMSmwEA8Pz zn5>BQq{Wup_+eb_P;o-!PjY|4^b`@SFMY7%vBDzX>>lqY^mfpOjV}L&Qe~pgy^n;A zvIH2B#Pk+nG;2yQ80YnAX5!ori7pY(!RH zkAoo&N#TkuG+7jrzJbX12qT|%f*@2iMqczquFqu2C1meMZns!chBe&j1glzLXg->5 zDsw86Aj?$F&rn0OiK{ii4YHXoXv6gU2ctPr9ok&fyX9DH>a@TZt6q6!38eezex&Io z;#;#5qiZ@+_@D5c^9s2C(B4@82|>>E*V^!}6XXAq`|T*kuN{Q2Q*yw?{S==g0Kh>9 z`!~TyxS1(unutIl${qB&rmDJ)`L#86f$)X>MEi5`+s@?a-6S;uR5vbooBrSfTRr3E zzTrN2Z7lV!`zQcrf~ma>;9+YVwDG=vw|B@b(h7o2E2W5@l9QK)m7uK1;EY{<@KsFg z;*y1na3)8+>V((!>+9w<2>d9zhFAeoJsGfG`mN8yx5IgrM8^F+3zds|?b)SaOC zlEr%rR=GurV;MhoC}!#~t|vi4Kz+(YOjcGBkC)Uf#e91}>Y4jH4VM)WUsEhi2z2od z5}0@oth4nG6Q;Eef;yTL4(+v&l&hkQ1}x7*&>f2oqisaeLZNF`L@GxlFf1tIYRX7B zU%v!f>p9&e+B+4zEyA+Y5574X0m~B(8ILm+sD3;FW93oq1N(1w9+s|q#;I;?_OzGA zgnw-gk3WAg?vj!IUdxfC&IpGQpbSM3fUxGH{DPvQ}Y> zZlW)eVOg`ER|2{|>q&O!Res;p-GZ=AaxhNL0BUwtHvQ+5IpDNv@B_?Rw~C^;i=v5j zdGzAh(1dM|N8IttqjP%yJW6TWah`%yYB9W>()CTz{kVPPl(W0sz~r#Z?&nQR6=%FP zZ0ZPaU5zl>BDSU#--~#uAf>tJYq3mQr=@_Ik}8b;GLKtq_kdgK&8pl9SB|^N_Lm51 zeM6fwqe@k>o(WLQLlW5q$iv~SM~|^}l5t$e_oKeh*E>bUL^7{*4>u0F_Pz zg~g@Vr_1vzbn|j~i?VwcoI_t?U6+gCQ3mwhH)CIpcxiTPN?CC`=A!i+Xsh?OPY3Yo z-<3gaJP~E@+L$Q-0YnayO-h|j9D?>=bNqDBh4aj__a`Rs^BO}in$w8mn}Q;|DSy|< zMnm=I)JzspVoCUKU&#DtHHzb$V}$DW-9@RVBx&T; z3^K8^pQ|m|Z(8iszuY^!Uhqpwm1X~_V*aB%n}y|HS4z%3nl zB2#%f8>>dLhGMY|ew}~NZ?KsiO2y`HKDd}PbZ+Kn9B`RWP;PX$tDI-mLI~FYq7SE% zq}+yL$o0uL8MCSlmFq=Reb4dh>T09sFAiQ$*Xpwny~!B!vjG-_xQeleeTWkMaWUqg za&j+s+U{4ww@Ks!D?+<&gHu20hV^Z>+}`IaA5|FA-*i;a{P5a8vRfJN1iG ze>ic>fQmO8V{-;1F*oUL^`hwEsH?)J$kAv}q_Y?VV7?R}a4o4cc-F&cqGGC8ki+je zM~`W{*jj*-*}ACy;4pPVH_>IAwx1P}P!uG~e(Eyg!*llh>MEv0MZ6|}(Io-KtnFor zA0Ca9SUwq}Z^i6W46F7D;%G!frS!qzkHGg+_+ZFFX+4as)+AdOOZ`Jbj&Su!0=Y@? ztNI1#Nt$6f$O`k63%RraI9p;+==Z+#8aXjNT^Xi0nzF;{mS&BWt9$auZRW@NeDoJ( zPjGp!%K}3T(w(c4mq)(^CZHB*bkCp1e@AqS2)*ri4@r^Ml^)j0Iv7LEvDW|72HD+5 zPVV^lbZYT;3CHesWxDROYl@c@*Ok3jRMqmo|ruS5&eydht` zSV%{B+{34LP?xf}%oQLf)L>Y42&SL(gd!lnf5B)J>aRW`K}G}A1GnH9zi zLQ(ulC9(}Sy``$$WlGHJaG=ozwxRciJj;%${gTMGcHku9YC!u4JxO;^EGt|m?e{QH zIL871-W#9%Eok^Skkl;kG~>cSz_y4QRxL27U?4>JD@X6BCw zWBQuCwD3`cV$EBn+G~_no7F#&>m1;!9I93wHku4k88!s3BQxL4GT}M1*-%^T(7-Nl zwUbnWosYff{ck_0R7KpR)pxID+{IIAhp!gDa@JBFOzeh>x9^`4qW`$$FLb8tuF||> zWNLrro=( zaN8O|9BnIAwUwI=acgaB_zcZR%{GK81kgMERM$K1i2^^N$VV|R4J9EF-O`Fl8~F08 zlhS9?Gdy-nDQ^5{SK%n+&M?VNK)99y5l=VmDz^u`c8ms=apew?FIV#vp?GuBcH;ZT z=rGsv0{#e=R66Tn)!;A1QyA4y&0VtLhh<20M#XLzqDKl{C?F>Kj1cG7OYYXh_VMi_ zl{!nT^^gK@mg-xvXn|QQRv1EjE%|h)i#8`V0i+@gsO8}^lmcOT`xIcYv?AMLvNm){ z_?+0DS2x4i7#=7e%L&^iFBh@>E%V*5Ku}V5$L%4!`|P!K?2eXRsYl6CZ7QUetK_R_ zm@2>v3ADX@z;YDD1P8j&k)?EW?p{P@avDg3Wz9o_hQm~AS@r~o%uD5eBoB3P!Ts4m zs^&C0s)O_B_$={{`4V2UoPI4X<<7t?Y)Ez<6lH;jx5Mk+ik5y^0U_Rz;K-Q_Zk|U- zi-C|5V<91(Jc4N3Qki4+5Cux1hRL(6**1_+tzV0T`neyF*5qm~Mt#W(r)F)c71_yq zvT-}X_3qLyu8u)HITnhyu`m08q7sgmp1^VFLOgkCg5y|#+l=-k9zV`OInZkl%(;s> zrsnFQF^?f0%q^V7d&g#{D$u6KA_-mJN`KG{XuT!7+w<$HG--}8h4xkV&6dBNKzUu+ zG96kzdD3mJ<$Uo^NWr9-p!anNUCdUG+W-Y(#@4yLknLJ<>K|7N{hj69K}I}^LUJ(H zVvUaT=bT>A;}`Ec{9xDXKYfAuM;j2+zxR(0tN!I4{qLj+p$^?WT21^9>M9h;phA<` zY{5<4&q+Lt%$9bsB*M9No{Qe=xX>ul^+^%+dUSCfGWg!>4O|aN`iCw2;45i|Gi&_I z`Sp0O%`4tayl&pugg?|`>Biyg+HUfg@f^<$EaZ&R-KQJxgWsNa#}E6FD4`@U0%>R9 zUg_PykQa;W$K9Di^bF8liZnO#h*eJ%q$ClBFXI$FFbi^v=Mjr3%74`nq4GR&eg+(m zHsDi@a6dK~M}2T`e*OTi1LF3m$L=B^bXS9cd3U(_^}W3z3qXj4QWzp-ni&nLM^7RN zpZEU+m|+51Oi*H5zuhu}fGuhE5Y<0%x&*LFJsIz{=~kgrhQ}?l8_gS4*(Z8??>epwx4(UXzY z$^6Q`PTMiRSJ8 z<*O^i^}x~n)>u><#Y6`(bu``FjWPUg1KLc$%*gY_7$=xJ)QpLzkk>!+QN-RK9W+V7 zcasenVIz$5TCM?UQd#a6oAxLODs8ddS>O`gS-i0{)7K&dH01}(P^H2O;YxWQm9>KFe^w;(XB`| zuM4O|pDpc|-#fvV8gv#TM}MRjCI6;j)6Yo8Fa-s&nUdzmWlUAJ2tTA&m8}RY(;cBU zD<_9zWBtu7vEvqi!-jIBjJ=)bI{ayA*dhHsYC;1LXA*916!r!Apj18%PRD4X|qAfJ~sZ@E5Ku>}u36H{V zIx)cy?)+)94qVTmG9mbCLmQ*dYrT08F=b$KFk^m~>HWIgzzX@Y&F*tysqz)s!zXU5 z1h@F}ivD(bAJwc{D}@v#2G%62%lzx#2<| zVpMz{E3gVk1F8t7MNP;Rag7$t$F(n|XykA6Kk-ej`2(h>Cg#7(%a@HFuk?m4lWVe! z7(~EXNJJP%D)5a)H4n+cJZPy*2iwhFr66%MteY}+XG>Nx+1Zw^>T&A0HEBZH_vY&S zWH1K1a`;QsJjsUj7HSs?eSs(ueH;E^a%caitb_UA>(;tew{7>?V0^B1_4VlCYUajM z4)6yVc3RGz*{ja;r*hUj?W{>zu{1+2o2Gn!DI_6tt2Sx3ZVFk2r%>D>zsB!wP3&)- zBI0k+K=DkIgn}RIK+l&S>^+>M4$r;I`bTvI70Z)!56?p<`5F78m!|~_x1ce$;#Q)^#pY)2UIrFhrGqLq#WK0P0;ve@B18Ejmr&ml0#mjIJ~?R2dZA^q|) z&q#;z(pH6)F!(iE-Y=T3zQqE*gxiJ^1f-JC2Naa=5vWKW%eDRRrq_*vysth6KeLnX zaRmcs;-BT|k4}yPxyk!0KFl*;Wes%naJO#?edj6mV;J{9~2SDY43@b7j< zZnd}x;R;aPDTJ7oVai~MifQnI+>*As8J6jTMFX2=ZL{L?nX>trp@KXYYV+w_dG~SP zA(cL&s!_*}^R>_h$5s6c)~Su+W_at>XFFXdY8+xJWvV7Z8sZdwrK#coSN=KYm~lUK ze^(-dL)Uc=umC z>JfdA)WHN6uZ#h+xXh08b?L2zhY9iJRWSWwM!C)9>p|z#5(m}xjX!=r(*O%qbYYhs zL&lNuSl`DtNzOf6D2fVuOC6hoRJmewBZp*3ZdQWuoi0TF&0C8+5{lpCRRpcSeDReS zyKthUh3v^_kJS;oZI(t%9AaL!+MP7!=Qq^apg$Fq`c>jEGVKp`Z?=D;YJAj*}t!sogesZGEcL`VM=`&6P02p!?T+fI94-&PN+^eLCy= zGJ4oiA*G`6B=6MLaF@00UqFE&hT2Lty$RZZYpW%I;LK{>5}pLd72uRe4N@pXJ@FB3 z^EIW7ME^X~qCdSjceXhHIAY`2;gxe=Rd(8IdP1Ns@eB|l7N^A6w9YWG8#VSQ4S&<1 zViODFzAw93Mi@} z3zdm}Lg>S`m=rSY#c*AdSaWh7A!jgzxuzoVnaD@&c zF<`FUZUW+*>7qN4xSY-^=|CV_)VrdI?G|>P-Dz2KKTP1 z$O31yO)eW$yUYmjo@1qO*l*~F0)xU1~DcLm80K- z$d7c3?{2;=H>5}?35+3K9m6&a(HDJcSu{^PN{sL9AIv~U8SLqUFfk6SDCU{2I6A^o ziKBQ@PV8}5D4S43r=c^&n2l78s2cyJ*bhKn>JaCDf^$h~N--z5>D1ie(b)1bI}Q<~IggB4;rxzYH zS)GYC;ct}@2kOu!rA=oOtuVi{&?&W}X<#M%O;3on9kvQ`AxWXxgPlEhXP2-qu`9@kjT#y&+!>VTROCr z$1ygQXTQ1viMZ`sDTZe^iUQeP7!%}g}``+jQFRqD9wjB|Q?1yYi}1GzmD6 zzpF$cl7$8+Pmv=>^h)`lk~2FQQCwO_nBV@!`5^| zI-eO!$tb|+LScXv3k$t1&U&&xYLF_8j}x^1XwME-FAK}7F$w#KqHcl;F2#_gK+d@s zoqP!m0rAfq4ljlDW)xq^nVm`jRpOj%q?tkVez=n$J-Nw}2%warf!R|q90rme*#$9X z4POse%$F3|L{QhK8~HH-m&8r!*T~bY*5wnToa~i6fGkVXx>~U24WbOku?h{y0}4y+ z=~P^m)3ME!S34-2{R3QZhY&QBvEGE*!gZVJ)|A6z|A3Om6R@GgzoP(paDy#8Rjn5< zigd)Uo>yv(U{|2ReW26%+ zO}hR@cIf#dvU;KS6~j(l3WjI2UoS8-mA}6HwB{c0w)J>_U`ZBi2yEp1VO^7+`9b-I z#rI3()+^9qEZa1w)!66R>=@4DDJYd`NPVNc7pItUKlJf(bSf8=kWfy$U-~_!;n$Tf zn8%Z>dY0F9?WX+Il;UEl368#FO>rEhVrubgo!&QJ`wLVn1c2rrDiFs%RUkIbe+~az zRR6CEMCe4J2hTXE+MTxv1*4`527?V{_&JN>Z$;U{pbkZ{Dj@&Rdp$;ME$))NQp0=@ zDV0;Uf4l4G%9G*29P43AU-nMQcTW+If|ulhD%P0*Z%glkt=r``Sacmsa_4;-ZOc;v z0%Mp{`*R@pZY2EK{AR%#bq4L$uBZ(wDN@sFV?;UY*xbfbxg{d=A%be{cWomG7R>ga z`!)&ZT(qe!iTxtBYrfcTHSEf2Ia=y6b|&1cuS&f2#B@$MaLAxm_q1Ra68yEiw8hR) z%=P#_wd=(Cx6~N_P$>gcAerUHruGq5UG&M|5PtekV^7{6HQ+nc8Q8o9-4rJHnXg$x z7+h{obW`_8`d?})?g3Mo6+t`J* zX>r4JSx;-5+ZSPCC0P@uxXuJ^JZ_1!UzTV+O;}gYp*_q|Y49Tgol{+#(+uI2RFai= z7)s|Vmz3NIxDc-DIh#0B1AG%w137n((~n^;40Wc?-TU6xe(ERXa6RD$p}vuX)<*2b zxDZ-VUP$$@0c($BCAQ$~-$grYIi<=GSE!vM_eHoXXA8s`dL%_>^28e4usu$~dPSl^ zw%mRnCb!(IHA?1iEe0;zen|!A}D994qpg99?d}8km8p5r#%FVicfWJIFG` zIOd7@5Px=TC|3B%ha@k2=RTYIWwerD<)FDM^XdI`)U{3sr{>xXtP#iqI+fy47rfGo z$bqJcbJ&Ld^PlI*zRRhifdtiyr~E z2bz~=xoQI5;Hf@+P*7XRzuqX6hNM0V3r2mwJ=Y%3R$C2>=Fa~7GF^|_YoRKYeT(u= zo7O&&nKX8kfzavZBCPpUB+Ss*bj^Ro=1SnI&OEAOLJD*sockulD^GH@R*_ZWoV~Q- z{e+Qx4B6%f21df+Nb~vwEN8PDwB{LymJfMfp^LN4k z#7|h&n@RhPUxNsdRpK?g(hz)N^X7a;9d)uz{4fLrCP5tTN9c*#q)LC|L!Y^_r-1tRFp|Ynf7fypzWW8)z8pzB z?Qc{)J=&2J2k2}N0Ix9!x~dQnUiH1DW72Gvz7SDrZmXjWiwDrNQuqTOK2B$%(uL1{ zL0?T1&S9-@Sq7wGCy#6C01}*g&`0FCO3;is!I;CREk&@{xSe zESo|?&$F|(&W53>C_I@oU(T4rcX2;C&-vf^Sm-3E->AxxOSpPLBqWOC2o3<3 zHzq@GIzCD+VXp58f(J0`4|W8p17MI9o5e&!ULA+X_g9D18VzG6Pjmgvz#E_~%4ij! zPfonYaA8Csz|m+Vfi%s9%M7i4nZ0g1A&*kF8j4@n? zwPtMS#8hXmkA*L{MW=Q3k)un+r^!l1H?Q*LVBF=R#Jh@DVl9^)LfYCQFb4a;s53bM zp0@K)+Hoa+=9%8cL(G{9u2AsudxeLtuf<}jcK0_=g-Gjt2XJ<%HLn%<%}V-o_E}vH z%YuDv&hLRWs}pVSrRyFmzH-CB9{{KD)oi`=D~yJlTk4I--MW6I(Z)Ecjw+hJQsi}$9!rS8Zph*9gmM4;0x zvj=7hHHOjyw5jo_-;i{*_6L?Ftx?^>t!x3UuS;YM0f5)vVIGIf#P=s!9Rk8?xN+Ke zldWjoCUdPeJZ8f)aV8g!zzIe!v~fi%ZLyQaI*#p9ogG>^=MbO)NA z7UPluVo)&^JNqqMYufrbxT(mA&Tf>H2CS*Wc$*|gCnsk6QBuY!)YG}U1GAfC&ISY^ zx;8L8SdYkBn*vxeCyEU{AfCKCRyKiL^}=MXASKFLldqd`SKE5>h;==qwn?ZWsdGoD zNG!D>B^4@+Xl)ni5Z0oAd>l4)6LQ*#lUc8Gvp)~-;X5;2jIZx{dvfcSsdQ%!_`#a) zh2G?fCXN-cTV|m3PjK6Ka09}XjA(Z~;uY+rs7YVM71E`c*LoZ)&r5ZLI#*}n)++$r z6;^?EI_PF%;{b?@&~eSiOEYv`SK?Yc_H|B3rnM7I{S(l}kh6^-BKw_;aWb|KmQ4iG z%4d7J=M@sWk;`Go1^bRkW*{#Rd)GAV}Ad(+nrF6 zTs0e=@r$!6`%U%sS#{!SFs7bB)>|@5hpPP$bUW<+zlS~wK3!+t=dQG`DY)$2w%-m$ z+_@tjhggF(rHJhxEGhM^8|mx;t()oW{_^>juXMU#fG~TUiN5K+R_N!@?o*tF{q97K z3Z+)A&;3^yjnb+2f$CHI}iL1kK5o|YR zDuCw`>woT9Ex#3HqC9lt`_}r4M>RV^Z|MOb7*Z6Jn3)5dn{i;QEi8yKu`F%3b$(@n z%A>r}^Pzt>cCtDfC+F<|LYZ#LjO`^^^PnY_z6wS{; z9uRQCVm~x1xw=-y2*NL-f0%Jv;8Q>&pHM_9pwoVLi@{X@q}zIyL@E4YhvrSe{`>AgctUC9{Uem7Wv7PHsT5ryVNMnqO8 zC-e0+cPDN7rr`x%EF<`Yobv^0t^+p@JKmg5`Ws@u<7@);`0QmzpL#=YYLME#`>sL$ zcbbmObw}inrhUXGV&?wOh8^&jC~<@drD*AkAj8NJZZo6@1V6qLPuDfnCd;3(76x}4 zzbFUMt6PS1jCNn>B@v~-8GyA~>TCfd=Ux{DO?kj3?7rD9I$DRgkI-ujegAS3A(x*i zykLC&Vs=gkW0Jm*@Of+p>bJC(U3e~xRwiKLcD5e_+|*Wo>Cl!KTD4xX8QK zyh{0e0mn1#>9Nbrx@!Co*C{?LdI-HX#8NATsihysMz#qt)NFv1z36NiEn= zC$k?RS7Nfm2%Ib2fto$Jc=zTY;j!`js1JYk0lO2 zulqayj4x$yE^%xX9e;g}?|f0^aYiQ)L$oY}u1@MO_#HmA7!=9knwdM&EnpV{*4LC?wg+xR{4kP}Qy0Ka^Ho^~RwF5Si6 zKm45@N6PIg^7LVqe(G`ERUyXTmfWt6Pg1kA?KTXdPtCi(7vxEyf+l2c#k7l`;s?!y zV+V!ar8A3K%OfdxEDyAxBv`TNFZ!O$7M72F93~5sE`&Ef$`Ax&5vqA{O!Y!}>O&)a( zoZ>hSpt#408*~1W`7+h)?o$>o6m?|ME_8tQW#Wx2kYM1!`)5P;vW;JE1Ml8rE$^J( zdAvA`U@T2C8fInpKW>P2Vaj#ihDNK5(8U0^`ZCq}!H(|m14)B_(}knC1g7vQ1@bf3 z`w%nw_7~@9@WE0jO<$QEO;W1t?-0e(iU81A73!#X3&2hkI7Mu*ZjfU}nUGw#huVZ?*j`T$X5cFm#9_H5nOI^81C9BRfehdo zM%@y&8)uku2Uu1e`VZeN0HR1#@8$saReJy-^Q7eX`a}kOB@)IVQeiVfJJ~Rl3*fDN zBD1ITEQ16!Y?k0jAw21zdTzI}ZV2o0mDOH~qEKu$;nIPuQoNraPLZ zVN&)jTlb8vQ^niP8XX8eL`V3%K-~50nhaU3D>J3lfUwj|Tq+edB2pJ_^$n$H2(|>h zf754T0kxxZuN2byOv@h!9+%1~*!^(n#zNypvRvbhTHQXP3k)v{GX#zqi!7o0;6AcMY}L zwZkEv!>n~+EZ6q711brAv53gSy(J;iZS8YHFX3_s>!|f zz0N^r@buis@dhWG1Lpwd-DS%o|xTyf)>cEt)pa3 zA?%Qa^3h)aGJbb1COCFa)6kR{jBdF=^-D?qc;r>QPDAeN#nSI>A9Q$P zN_ht`c76Inj#d!-{mbRGLme#$i(wguA&32?4z?jF1zvhTQhN{#g+zk;G};j}A(-JZ z+O?Y;_kAtupaZQWl(YH1epOX<(3j4#I@79wYZ-?vo4iV-%InZFUs%NrIf>0qM}q+f zeka815OK|fe55wwH``Py{MHbZn@qJ1veoL)21^637jl)*#>eyc&wz!%ja2(HZ^&B{ zwreLjjm+yCu&QE~=pVU+k>MZpJ=>2Q=YLCm`hTIJ0t3B6VnyMIrO6G$)0UQtP*;%) z!@dsRe3B7TN~us{wyV(3|6~)Iir9-%iFpG7fQW<5ZJt9Ow-b<=Ey!#!u>hYBfjtmj zC3tfdC$pu8zw`6(KR-cOFsbahnS*rBS-X(>;b<^BgVbO8ABRJG-S!g~CJ&y^l2$h) zj?J(guek^BsxYL!cOXSRSSIuZu5XNb{(~J}mB{t59j7^9uYo`>A`v)E6qsKEqc}wO z_DtDoxX>9tM2CF?%Jb9mr3w(At*%9{vhQzAK{L-|qUK45X^@B?khtPQ~MG zVoCjdhjwbPd30~WH@b9cYwCBQX(i-$RXVq#D3S7SXP3-;%k&nbL*p^tlUxOmZWpy_ z@y!Xj`H#JJOHioVbN=20kuDV~*>xrak-4yg$ZziESfjrSNLvA~So62l`6qjy-5nMY zUk*75ui?JfoF8XVo#87K8HdAixlY#wbTZ^1n{<6#qftmI2kKYfXrXL}v{z7D;%1*2 ztHVBCBqTOno_~xehGx;BpSTm>C}ObbB7G(lv)_tshlZk%=86Vhnk0m`VrVPH7Aq~2 zS-K*$>PFr{7|=6@4XQP$A0-m=(E;Gwsfqi|6@tY3-b*@}II4Z}i)%=Gt> zR^-qmC&PboqxANZB99p|e6FG37T73C;(5(dFrqv%Zv_?C{4Ly^wFCQ!ii#V%=}TYt z#RL_|lE4@Z2BW8;ukD9N;!y4qa!}fX%oqfi!Pa%AeOOq-zM5=}nOfwox#c)k zQ~g>vZ+pa!x(93rGc9c=_2AO(c5>%?O6cn61sk{qzWj}Y)GE21cY!El@$zA!5p@z~ z6>*@-b8fsXE2Cq+l>xk)SRCN+0Oq8)4pPe?gFim04!@~L_SleFn6+6z$W`6N=MA&i z23!g4#w5|tba>F9o(kxKP+``>Lt%CpG#M`Xt%?=BmXkBV?mT~!kW6!Y(D?{|q8o;y zjrjJ#6-$!akEOd7!kB}{DptKWQi@dBz*koS&c*Iy0?50rrKNdqXF6E?;umv>l+n#7 zII2u;*J1u6LnIP}cl7~9J-u8TRa$$fx5cTur#sfTHJ+f5X&%PdW($FC>!q+a&fkP( zU}G!Y{6V~DuGToD?oaT!IM@u{imRy$0WtvtoReb!!CP#3z*?xMn|;Zl8uxAiwl0wl z#9lxl=V4-VUBKKkqWso*-|#*$oRT18$Oo4+crgSovytY2F8JnCFw1H%!fj6dYFWYr zZNN!u+}fwwI>)w1ibvqS_L9B@GpAG|UgoaAnGEc|{|wbPF;}^+7gQkzg>`?3<&t5; ztTa#;Ns*rG%Uj$Edj5n~4j$3X^-Jp$4SMXC;!zg$Y;BW2H$?6q2F|Tt?s%c&_zekR zrSYr5aKfs)Lk}M%GbrIRk@(iPg~K;np**=Kq|rz?X7z=Uw1|i!2m?$TXa@DWMQZ5* z&_R4pzn&IVki*O_Y^y*kRZ$keG#eqeOB(^SIy@;J>_~A-hJ3aR5Woa}#gN}8vAc+a z-KA%0ir5JXp!di>gx!|Yw3HjP>8CQNf%wQEV$+}yMMPXNM=xH1z_KNIuDKUq@ zo0hE6ASH;Awy0vXL|Nt^O;l@_5w~7w$WWW*gj+69&d8$HGjZZ9?njBGqwZOD3i4I5evo2P&O<>}z?FYG8H0Xb@*Xw*eGeQ2^YAq7gIv~U!4N3hh)sJ`@PbPmZ103z%`vFL`A zeIMKi@)R2QJD5aa3Q3Z=00UWi#Rvc!av7xA7lQ%Hy#tgs2#81Rw_wglq{NnUUC^N5 z(=(zzpg;m`$>6zDNEaXlihlMXC0==Zw;8ja=;Suf_Q@I zy??=!x{KS1JRCIIyEU_l)+hF+AQm<>?b!O)wn+?!YQV2X@8HP$uykfXQSYjPrgwxO zMS*GdRI&gjXzUL7WQY8bknlesA@&_0<#|O<%9zT;>Q;{w**qMOk428bJrda3bx-s< zN-7uJe&glk#s%JXsr0S!0oDLQ6p#r6aR!g=Ayfe#HH_WZQu#E^>uR^RKLu&&1SFYW znXQ@}foHU@Y`Q!kLTlGKn=qm$)E7;B&&4!Id5D4j@*nS_rvAM2VmG$Ex^3F4jXwQg z%a`6Fjzv)oV%Btv_1M=4JJ|DPcj(045NkColXghz=@3}O&fH(Tx`^n`wJ2Mj5 zNz~kX?uULn12@`^FQ9jv_Y66E#?GmR-)90=i7gaZ(eKg=Z{jz1KP3@~Gz0KJoctoc z>KAA=m0lsu5M*0+H!asx89igkH$HL8HC}PCYgHG0r#nA7SH1=f3?>{bR5dIA&nw1HO ztN#9hJDeeJ+Ho^|YPPmIoeNEGp#zaqlvZh#pMz~;pbu;$@T~j_BzO44fxyxUSUgEk zs2c)B-ZgOkWe%z~W>h$i7~a+g7@gkTc%c_wf6i)DPfX{}CwVon zf9_x>OGp@a$P6iL-0(1^|9YddduB*QkobsER5p$z5J7&i5hj6wRI~ULHqu}-`DafS zjefeAC9>;h`$Sr5JthPs#ohdisO~AD`Vpvd|H>7k`27X<3|p&JJS4;}h*g2q?Dofw zBTEh8B8QqWco}u@gU9R6X$QkHAO=ys26yrj2!&t)oeZL#)w~FwRk7Zj{z^$?3vUv* z7I#DDx@JcA?SnvUAi@h!V-#-!@z5H0`XjKkjel7snF5ppMy7u$kG*{R{F2V#HEdq< zw}8x%YfSNTn3iH)qPipHVdBS@!=16y-6=geWr8*6N zG^$KDsrJ&aDfv#aA9mNqmCEl*51o!rK);p=uOgo(*N4?rB|ILq#?wAbyTsoq#5Sv_ zR67F`b8EeM)_Qj>*xgOXO_3w(d{rq3gE3IkVI0-^&a zAi8rmaT}x!=E`86vBUwQl_|-MThZ?$81Qh$H!{pVj>r4)$*2B}gM*_*g_~Kvh(Ctk zY-dw&{g;=zo9P}3B0k-pN9nTb4Q^k`CTYsdJjt>1>gMhGq9%TSY-k4o7NZktYu~WJ zFT|-pK^Ed$W-Pvxhm|+1{EOZ6W$4YZq^P1<+2-`K>z^>An}zG6M@Y;bX+Z?cX#5$V zzk$p6Nz6Z9q=L=)(G+v0Xo*d0yV1MXagBDKlQ_X}aP!1|CV#^aWmni%qfXIj$*a&g z2h)|M%`ioh-NyX|`IAbH03hs#z4bApdv!tO9_yJ$)8bTJUd`5 zN+m&Td>jkFc%$h8v4WE2BE$-yElQ%%&2RGw&|^~5MVs5JpR#XkDhBc}i9&B?3+zHYeg$+aAFXeH86LGK9@ z$nM?odOM3|JsKtN4`0YUyuUg9?k_{ekR>H3NTiO@Adj-mRfr`m2il~MCl9H8wyiIz z|7e!hY^Oi#uz7KPJIx-gLuXbM6PFlL*6Rmb<`{S3XG0y{hR**rM@IX@LtKW*>rV}t z0$LzR7Wu2#m^s7FzhBQvD+r=md>?jpr=?bxV0h2UN>fRtWkAD9t+v9iPolTw%UM2k z*Ph%g*?gun%vQ^dMTwimXUn%}O8ZE7zUV4y;*yg~eEUjs^DRQQMq_I4H*T<1jmA50 zVs^3#2V>9zqe6Qi5o)70R^4CJ${CaMcR28vGQCyy3Nz;4tL#0Q5*e8*8BX}BthQC1 zJ4?`;2^yrr2{WsALOdI#_FAaL$3UhI31=PKBeV_+Yyys+#J{OY(!oP`^GtSte0k*D zepl(-NFVX8k-SmIGx8@M-vS{ewSpiKX?q$Boa@Qo+rl=|-Te*XsU%PM+;s6-nqL;T zIzQ)8%-lH8H)50dz~xhuk+(4L>{D-+#eoNaAq4%z#GsRoaV6y7Vof;+c4SuJ7aS46 zee2)raOoZTx(F?CxcQ~}d%BV(E5@cbP7g3aYfd;Q zHHXVB<${NH_ePHEOhba8hh!qEUUaxfnSR5awzJaB{XJghM6bFuA_`jyKPdVz~uJc3~@#G7F;(vv+UA@ zh!6h|l@GzSCxWNG*Fp;>I}r6XYrMT{Q(zXfe&|X&_OzyYf=vNF!67Hg-*#> z^148FL9&ouy6}7k6a#b<>4d^)s32Bki+lU`FT___@;YCNMjZ;2W;3mzgPEevH96{!!I=_G5QR7IO?Y3y582|6>iI7WG$Y)BikTHv@mzN>GBy- zno1t7`k|9X)}eaqly{Z2m-PHm{&FHiY0R02N@!yU&x(l%bCMdC8mR#jGf=AAQSdUT z)MRlR7p@>=YW^U551aI*M*_=VEDZm`S3wr#o5lAAp0ER0{*SoN@(+ZMgYo~gpZ`U* zHm$a8w?U8KcT!j1oHj=@67k2o`~vrRUec?)6v_g=T95!e93RqAu&Uy~ZFS|(AHD<+Yk2{Y!yx*H3WFXnmL;wO7XIt*D)i^W@64hQgno7 z7Mk}eYsqMY6J_6WSj{2#@g{5+e050x5KeoOCe6suBOl9|a1u~uNs#ub;ik_SsM43# z9V*g#7UC$+4h6b@umWlLsbV4-35qwaNqPFV$D~-HMdE{A!9L-_#X^<3ZA}Yy*8;Q$ z(ZoVGTt?Y@ZUyQL0ASgkYSsqrBcNoD)BsQQD-?8zfso)T6jZGaiKIky0&h8g&!Et> zF(Wm(=jfb7y3CUPENdZzT+00^1DBX!NQ2qs7TC2uc!y{W^bsAMI30o5f4SrE8VsTZ z|CyKmkds6RFkT?RB47*-rT1oqFgvJ{h5c@eLG6WTLFI1#pZc}BM<4xD;-aQ5NNlh6B0#F()!gxMOTn9=Si zSGXl3nTOZMBEj_!uk{ZICG9Ycq8g%Qp2?h6NaOcoZI%EWmWUNJK>a1G9$i@G%8GMP z7o(-?o2?BWUp|Ysv@t&wsbx1pBTu>Glqu);j{^?U3{1Td$M5Pa(<}+1Z8-^U%fVPP zl?XCUcNgi~$#@+Xuo_edbF@Zdn9-SFXbqyozDcyLrtqxe&AXy-Fja~ z*Ibb|vl3%vNst~fP?mm_>8!)io?_Xs;Ih+65|wCwUKx-lYYEoK-Z|2~Ers*J;k8S# z5Pd#g6GtE$EDHcC6ZsQL21Dl|lnz%z7}`GJnjTdA6n1n&hAL z9U&UZLLNq57xxAhs;KU~6ME!myOt_b**UmfjAcH{>o(4;&x!&;F+!=?aBYO%I}K(B z3l3>F^SAT(J8r7$O=4O0&xmOod;TiTtx=`#Z8_Yme*q6y0ABycS!eynS^uf={(JE` zb&a^68qarEk076p5s3jsLPva&5Hu47bOR+IfS1@~Pg1)M0VkCBW}fi7TWDsk1-op` zJxc`fP~@0{DfR|hRm5iw6J5apJQu6 zXoF1+n(Hi6i&IZ;Ii=!~i(fZ!gG#KJn$9p;v(=}y(T$iME4o%RTYGiUq}PE?k)R&r zytPd!gT?Whaxvw%MxWK~TDwYAH_Jc|o`V}LT9wv%Ip@GZC`*cz@^NfsW0hE`j;hW(`6QZlqhLGQ1dY?(UsP|xCaSH*75ZGGqIuEkYYk_j15O;7v6^dKf2?+gIN zIi$#IqNb;-9*qVQFV9=f`g4wq-dEkOM&9jMc$e>PF`hQ$+u;mBX@oaRynIX2BE$5b zZ4Pum`-ayYKz;lImW0B)3mOvcpJNcAWfWZ7%nALDw(m(t|%0WZH#LgO- z4#E`0`HHX?B|PpGohscyq+qkr9VCsNo^Y>!7${_Y1)WO_=W;TmuC?L9BkH-I?}L!) z7y{G!H z`+fu^^E|40{Q8@KBN`YQWQ$P3VH^q&5-ZJfoeUxj?yu^}82l+IU;i6f_KrHf8w6O5 zNvW?r27^L$VGX>Xj`wG6nfFZBXG|T+0fEC_GX13UDPN5SKo84z#_oz=&POZl!F*>W zJ0q2-kb+~<$wfN>+dqrVC9#QF3P^*T6fl)jsaH_i2J{pLj$Zo`eTiv5v$Uq8L@0dG zK;KrnJ)=dhmb5k48BK(uGmXH_S*(#h+o~5)ul7wLmq<9m;x2eo% z`y6l z1D()!cGzohJQcJA{@bIo6|W=&U7IsUa~=jm&g=qo`o8yLPd3|O^(Xryq~^tiNaSq0 zkHIesogCkpN!Lue^;pdE>ifeQywxFeJ)b)y3}~@C5!o z2DSMKWMZEuU(1eqYWSvzxvd)vPoLQHhq38niLH}RnyC2bX1@R<$U-~*5&7Bvg)*?S z{nt#CD-DgfB~b+56E*gUD6vcQ&Q6~Y1Ywj|VL^eclyDGplb;Px@Hn2ken;|e9zB=e zZdV5EuIdO0_l(!JtrxZDb)McQOLsXRtKJ`f-%dATye8ZN`=kct*~Q3Y@k`LeB9KEG z$c#_1QD=}}%u6nY6-NrRPOA^*HP-9gm+F||H>g{Sx+9RVsxVhf1MtCZ+m!yag z5utabkn6#<$mI7(fyojh(#X4Xf;Rt%0BM@YHG7}^nied+)|h584HZzUWlOpC%r#lI zSWFZS=xm{K=?|*ZpcXk`rE||TtKr3{JZF)U%S;7Vb>Em@!Z$j4`6X2X^a2;SPWQkKT($-MCRM4qdp$vr9 zu43WZBzW64V~jKZJtMVhs$5Wc8|fK~-miPp1+n01TYK@|wb*}to7nde<_DN0vzIF# z;~97f#$%tv7rq??!pt>_?{AtJj@#$a;^^XAzewWW zSDsmqOkv6$tLR#b%PG;&r{EImd8`EcT1RI9+Kz?1K$-*CrSN` zZueAWRd}zIcUv)3>eSY(qql_8_Pe%H0^JKa!WZ>MzS;8OZgcJtt|To*!LszC-*Y~r z%&fa$=vmm!!Y+_{z)6&oe~1z{LaZbah}H!D(KXffu}PVqI9-eWR*t@arhHF@A2 z#x_gB7Lfu&QSgn)H$@pL<7$Cz;x#Fag328q1)8*I?m6eB!f<>G--(C8^YCK*| z;)y5sr}$v3b2ot6JHbQX94)%~HUA@$+sY*x5dd@sxM|JUM8=zf)44=qoZbOICc%Rt zK|_GE(U%)ue7|&G9#B({*_f2*d_-BZKOJIG7GG=R5h*g& zp?(!Ok?qrc$J|1Dw-XuDMwn?XYK1}+?xuo3vuLJaJIPNJbdTnh=2s!xZiZ=UsW3Qh zQ3#|p?y?p2Q~)OE7`^dGEjg33Ob`X^%~Oi8%?xvO&b-=}3|;%(4=+YqZFi<*pncJJ zGB%W5_+acXio}hL@uDDKD`K0lwv=M114GdA!zIyO^xdcSsA8(ZzHE@{Y&>*_CoK4X zbmO3+J&a7^z(X{U8A4J2DzwQuJ!BKB_th8TuRQpUr*L-=AYjwA{v~Fh5 z69#}SXs#7Ao7-)eHf!ivdNCCph6M!i143H$6ut^ zxUbY{=Ad$yM#XfXuU@$V1Ij2&S_tHq-bN+Pfxu!O*qf?FRnO`t#v4`h@;S7k!W*6H zqlSV1ayC+2F}4GM<+TjpvsM5$oVRkC1|n(*fh=yWQMh&)eQ2bOmwui+Iga^UI~V_x zxdMJ{264V(Yx&x4S-CwdQ$F`#WMu$1A52K1NUA6fp&XTDNSR$ z>zTPjwT_U~$<-DFUZgbgjv4kG3ljv5_RvbicHX(zaD!5C)mcW>nK_3d!q3^Kpc}v8 zRoZx6_Xs9dY=kv|Gny3U_QlQXY483rL+8iMwbr_-6|^wj{iLheUrOF%9(t0}!?;}H z|0gKo+?=vb+(o{l{aYmp_41Dz%|q3&Z3t6Ceiq2ek$<zckji&k=W&1%RAEnUjbw=ge(y zI>lDMKy4Ps`2UfmIsUaF$H@NQizocw{;Zhpp7i*fSKkg-lNl6>7#66veIzi|ILjYS zEzB+IN4!PIkzA_Lt?3zetxFw6X2$zrS|Aq2rUqjJ4=vOaX?NwB??@AC%KM_01t#73 z4L*8kV+W@C)5X={Y24YH2EX8hW5c>6BS1`hc7c19EcME=Nj>YrnniQ!iyIm&?Z)+p zYp9v}RSM#3(>|t?LreUQOyk6!X(>*9Gb`ZthWDjr?bn_4B{_bP8&~FxMi)Ta@wmLg z#LFTZ=MWo^4fR-(H(+veID2xM@#T9(bi?@B2&Rxw!ULNPdE7RWHBJ^Fq_sso9qiW) zHU(o^M;9c6Vh<|rm(vsEC7TRVN9}NvgLNpeJwU`&Lf^3Ucg>h+O9h=qZuCgY&G{1; zcxQsy1+z_}WKW%>I;CiR-_qIG5>by!(Q^u9%sl<9GOhe_IopFwDpZ{oB?@cW+Aqz! z$(G`a=va^sk(QJol?`Pqjzd%t({XSNp^t2v?Y~CkLFU}GEJZ3g$+M~$KgLV8bfi6b zh#$J;=UqpIHrp7*>v*Rwp2v+{zCE6FggBGki65d(7~Eb>vgcV{|^s@&P`AUq7? zDk?$s-it7MZ-MA|MQUuGoCbytzIUT-v-mc(;eYbEOzcK&9Ixku@~+l!Stqxb|1lhGzkZI+aNq4HtBbiG}6#V+_HP{1td+?*-16;4!@bmv=hu~pssei@wxV_Zvr^-`%`Isx>eS-nUb1HkwG=MG5uD{89)ax;C$W*jlnDjSt@!Q*U+^$Vs7jrB!w1m3*sLvv zH}?liQ@5q5)8m7|4qp-5SS-^aMA>_RANP<64Cp?ROaY!vbQw6`VUVS;5_+T`+WENf ziW9u^PoYMs_&kZoAKEla1esX&+T}4)`27SPIy=8O$TP~GBU!N&J$xf-Y_-?oH%^(@ zv&cxW6q&O7H@Ly;s>(>plXGCv%}XVUu#N*F4jRcg$yF*g6$vISs~{Y>vlcAcspP5){kEW{&z@a zrT?E4eQ6V0GiP&r21bVeI;3{1N&ILKqV$}oL1aW3iAAc?X@H5BPUnL%GMW20#rUV1 zXVnN-XdY0Me%>72i$}C%w}~hm;%9naysWgXj5yZBnbCUV9h<4e>7r{WnX%%At$5pb z{ONP}Y+#8cjl~mQ;coa9q$9)VZ6M~$+?JHbO_j^m=6l`s_dv9k*Mkjh@-0p}f<{=1 zK~3%clUL>eeYsB!l|1whC*4uO&cHNu>_u1g9&6ezDmN>ttmhK`pkk!kef;4xCFl&O z&W?YzpCERg_*R33VaFGaE;3XxD5XN?f$|sSUFe~5{RR`+BjI3~Tc2W~OsF%&`=L1u za+j!#=0m-*mhwSxm`DTx9j2eG|0(Hk+C7`a8M+ek>J+ssQ^!|wS_YkNdrFunC7Is{ z3kU?Yw}3@Z70-vWIS;8eDD1rO@DYOW^u#oQD7AJt!q$#p@Y0p}(DWCnv8R&R`Rr8m z{x#OU_b3i8kKdbtc&%oNO{g2u&VUF!5*~~gbR+YSnh@eIL6+B=WW+;F-5!;d?8{3wF@p1rtHSeD}6zCl3M@fAe^ z`u8zL$oviP>O89@U(EF`Ze_oo^b^Rmd_h#~Oiy4R^R@iT2B=ReT_RZvYuDc1J2s9x z3CU2zzd-=rl$7d)V--T;mYwTD3x^mg@(ny~t?a@~$t0UjuDok?ekI9-ik|=$qcDT# z`h=^^Z?{u``XhIp$A*ydz>*+5LpnoJxQ!$gUJ~DHSt^Her=n*C3Cw&6*&}BkVAACq zE9A*189%W~Y@rP!t;$HaXy8aVBz#y~8T!eZr+9lLj^^<;f>Juu%sQDW96o?fJe$ri z!%%afb1g#rDkoD;JFLoym<_>^E);M@bO&b5lSoKJukbBbYpNQ>HwI;aJnAPcL`iy)8xdEtLZZ;GG|OGWwqwCVYj1y)rRO!=k?k5 z2Ja-tA2*$ko+iwZBBXl3KLVV3)@J$@Im)PKAemWNLwd??l@CII6MzKa`>st+^n!GX zQ{b>c9KYJ?ph8BW(S0*M!Xvq2a*kAV`3WhQA+wnzY#N?;;3Bz>uKsP`(p^~qo(XoF zTUgN@!36Cs0oRWbUI^pn>H!7){nTh#cy|>8nmSCPz}`173Xq>NLuumnKk%n=OfAN(tXs!w@B(W`HR7f|JZ2R*uI)pWxJvKp}E}y zPQs#Ed7F`XBKsQ;C@Kx`fX5%758iII%gJ|R>c9&EYXv*P3^Gz3Yrx3BGT=hTjPX$= zg5fJKzA@J2vkx}M{Chcfiv%<{5(F?4%v8MAnDdglL=Jmm!e!`*pI8g=X5$elw;6kLV#)S$ zYiFLSBDIl``p6`cRCChXy1i)}_n!ui=g$NNJ*<|S%+xe+Snsk$)S zqg;OX!L9JLU@b=)Cl6X<;PNfK0l_bKnXQohRN@q*$$mjx2*Z1%zbNITlw2m=^R(^s zJ*Q!GPr!n&!CNptnp6zJ2hv|^X;U9orz!;YC+zxV?I1x6u$qjtRKd+_6OC#|qCy<~ zdm(J#6_I?9=ETVr3Zwe}IG8=kbMYSCyXwd4q}9o#psba$6yr%`g11Atg)<$yp*knK zxy)cMnAaUElQysxt*K6fY)#pZo#YKO=msdC&Vy77yTwGnw4zJExBa3PWrgi9gJJ}M z?;7mBpqaamgqN00&caQz`cM^|9Ymn`MQxxKJs~_X@5>jrx;k*YhP_U-+%D4G1uusj zE1&hZIP=!fvvMOp=CKnjbn4ZqEjC`<0th19h`fa*APb^sqO@$!gZu(!?NVTzJ12+n zi$FVwnl@e54luxA_$FA`jpWJ^(4UpToA@4oqsH7V1(zwYMiGo@^-OQZyoHX`WQ4nB z_NIUwAOpY~`~YSFY|9f7K^Tl&8A8j~F94+Vcj!M+mqPB7IYPBJ=;B*0IpJfBDooae6@s5fS z8H}3G3#wU2pV#sR-b8Md-N@dnsWEu`Ep+LnV#d#kUx{ zm+?kyQF8uk})9tv3=J(?%e;BT4{G z#QCYiK~PR3W!ktz_;pTyEP4+syf9i4aHN2B_nxyNv;tB6o6dpk6>YfkUY1nKxSs7Z zDXRrl*dr#XO#XzCF-;y9ZzD*J$G|YX6mcUw>DO?c3H(Z3f9AoRf=yC}npo=#_m97d$yLL2P5qDu~|4iIr*Ao5wlej!S@HRmn*3s%wLel0UGhzY2` z6XT%u%LqI|dJ~UZL2yhQ4sEis@7%viF{xBJKPfX+kxX?HEE(Az8!hA99x2@vB&ENz zjy8o42gd9=JjCvj%lE4(z^e0j?LJ0;o)I{V>b17;&d~Q6ID!qk;C~lQEc8tOgQ@lZ zrEA2-_J4JaSXlmROO|Pk|DkKNf6pq^NDp+V*b0cbRF9lrvk$JJK`dxegI@z(rnk#P zMbcxQ;%IVok54nsgAHdkGX>`+1UFG0uVFGkLV3YdE3uAZKRwZ2J%)Z{*tbk+S8|hV z)_$Wxzd$Ck9y=yH95F9P=^13D%5ad&Ldo%1qY_baqYM^WJ=%!7n(vs(OJNlVLyt)j zGbTuMfg_rq7=y?d)Hq`R0#p;Hj)Q|G%yW{0iqD}R4hkFqQ!^%jpENdZZj76d#3Slo zP(F89~k_Uop828WoFU=Z$9QMXKsvPkWM-#E=)8ot~lcbK8}7@Xz9-%7yzl| zn4NFwpAAugf=@Pa4p=H9D@so_=_;tYGv+$hx6z1$)YOEB$b2gc1774omI@smnjS0u z7O!siof0hFYk3xo*?v|py5PwJrDFDP>m zEQ-XDFFqOtaqc`6t{M`HF<(Gn)t?Wzg^p+&#?J^8FhCwz9jG1{EJ$n`Kp+_06AT+n z97zHpZFm9_pmCYqb*m^E9RBlg6uqWFZWf~FxCzy7jFJmXb@!n0GhG@=S%{!+HO8o~ zc#scVh9m%R87N<*@Oqn!gbfVJhvJ$`Lj=OEw-be_M@ZB6`Gv(~%wX0&97)t{^al~1 z9_9EjUQ|r1==>>6q^2_IuG3SQ$L`pM$4+exod8NsY}umQvNkQrb9q>m?Xl9A3y06j zcB#D|fXLa<6^$rd-LMwv0Q!17N%k+a3vNd|ugWr>-&Y8P1LuIle0-?p!^QfgA=`?Z zdaSBedMAvuC>eSQ1kB3?jA&Yb(F&5Oh`yYFmKszl6@)ul^#g#@wVLEZDqvF5D2rpVh{Es)`2xfiGg+b4) zId5T`_1q9bUAu{ut7KEB536mLIUYS6sw-EYWU6>=8q#EqUkP%EZLY=0B(6)GJ^eqq zbKd*8PM4rXb(aZJ0&`YF3;fpWdfV>QFm!SccN1l>ySq0Wav&xvODb0vACA%tH4kZW6*M zfrwgE@SvVw$}DjFstj;cmFm_76U}Yy_o+Y;X2%MSvsrSuCIVDwvmwSSGK9;cCpX^y zHm(2Z1=|NBuI$sD;2Cap(z^(D$UT{^d)WJ=FJFhz5>|U z1a$BRKJYhQfZj=we(wuFNGCuIbof1`M@bKj3mbyiR<~%;8DCoebNfNRgrF=i6~&e8 zY%ZO5nF`RNJn6Fp>rbkKXq4B%mpXTR*+v_EBnCH&_q!zqfyD=f{&OvYlODR*L6d|@ zbAAYp2Ox*bxAp7CxE@aQ?U2!s1rXLtk;m!mgW(Kf?o%bWsf~2zq2c9x9=LvDztraa z@qg!ltH}Ek|K1Ld4fNvFgk}v>iJ=DeJbo$|lQa5EjuzPe_PQtjN;7*1PSf-XR_V!R zv#Y0fF5wh{qemmV<%iYYL^wU_OR{f{W}pS z&aBCk^|LkW@uM)em$PCe%`e}xDVuGQibsRmcWCBDo)X^OE8$~DoS6InVeFfNGzr!= z+xE0=+qOAv+qP}nwx?}-+P3YlZSVXSXJaGIM(pNQWmaTfWL~^g`Dodh7OWVoK5e2O zKgk$D|1@>DN9m!UoQ>Tqx{vE>ayM2l{-iFPtt0h#i-Sj{i0Ct|JM$r!V^9xd-kX84;OAtHD ze*>0Gg#X-tSpHLRmJUvy2x-Kf`o>^NRkmL$%{zm7eGIof)kctqwR$A zl$AdLl!kjq>VM?14x+@Y3Kjc(IUs>(;Q~fV`VW){kqE&+q`tvLjZ1(`BY03CETDm2 z<|*4@smf6{#$kh-8(c@o-!GwV1wDc5OG-F*?cD_jkmH1ei6{kO53FF_hs{ESae+Wt zQK8+R-qoS-Ew$CvDX4^m5D^i9BZ{Meu#HJ3Cm}rub!~xt+hK7J5yFA{vLKp?;%@uA zei0sn!PzF#`)k0>L8&7`K_D2QAxKi<9tS-+iVW;w0%hYf{$2%h$=RzA=+E*8dJPb; zhXjHR_(6Cnf2k9cT(UQZLXC2B2`cDLpu_rul&$tdjhhU+>oiR!j(K zuP;FYa);Q<`>p-YY7MR@XRj}IPv6Q0qjyK%f&wVe7VJ7&w7-Es$sQwu2iIYweZaTp zXYtlP5)}@@K(d0y0KQGANb)B#PIU;`{~u$+AR#XR3u;o35$OHt)3>od2`)^W%i|05 zhtE_9b8$5%~(*;~`iK*pEmN2?!k|8u(x>_@)o^TsXlW{hpxrf)Wfn z;k8%~Aoiu&^pVfH_vS3+5$Hdw?j;00c(xY^pZ5O2 zC?WN|7kPb3W(q|lkx`I0qgoAxp|=LaS_(d=vG7wVlvw!3; zor=!n6)OSS&v+^v4Rd8lt1#y$B1stEA`@?C6!};BV$CufV|qs;ika<-`A|9aA5QJx z#g4)dk!|#}j#dl}JC#*;D>4#C8~b&^XMZ>5{6p-Te8|*CmVcc$&4HpcAdfGasG7b@ z<~jH6dq!AprF9M}yMZ;bhbg-0RRS%(ZMX=$(2Y+Op2axwiz}G;2?S=oGZu&1hmxPzxAXeeWfDYK3cpgeVY}+cq-J`k9!R}yg z!SZiCr)F8U@gr2IrSaMKBJl#YTBRQLLiJdgY(nu~&t7Zr+#>Gp=cN(zAY6Qv;F6>d z6V7GUM@=oSqUMn5ZMkv+QdwKlsl#-T&X$kCl$eJn*pU4av#&%I1xPx_MT&!yRJgt; zva5iQ4CF2i`#~(Z2A-relv6bhr`y(|LU3+i7_!KJzrF>%ge07YG{ES%55iHmWOI$G z@yL;-usdS|ll0c+X5n#nr&S8p<&e#!vSzBNv(H1?z4l(q@MFqU*CEn!HdKJ$oW(X; zBU*XC^sv56NN(H%PPGVUGjz|3X@26IW9=r?WsT z;oSM*mLW<|+;CrJ*3)}2V(8@pDnW;mPF`f-ry1Mq@Hv#C9bc$ov#jhauisHQ{5cjJ zaaR3GB04OWg}U67e#9h*3>?`E6&+(7I}&{=t@aAc{b;P6O3e6`MY+Fh>5S=j!#l^X z`A&p2eje}fw~bJ&X~Pr;2}p9hHy B>2)A8Jguo0 zA{n926K&B0Id{ut)sEfpnzlmOaNV{?Q1PT4Hs3wGj_)BQ!z8N0LW_0MN}U9=n=;3SXqS*dhQlW=^ii%bStiD4L1qEr4IhESv$KPO^>vjU3TWeE zfq({ zP4Qdem5-U$A|zBQdR`)M;;(IL@nL9`49(5y_n9T_la82ws*7P8-_RaxT*!lht73i2 zgiBY*ZG*0k@@1g4+pE@#T<{9%0ZVbLm=63FCug1L7!ykQk{Rr_V!8{f!C z3IEagIqS4^Hv$GT{=B)tJC)6kcbf%Q%~WRLE~V45TlB#$mfBSP;fEPb3BKicAr7Rw z4_BI`+m=$GcaGOnT7wTLpAeG@h4aNg!WuAahm}Ljsd(ZM0vam>&){x6ex(;Ve4b+V zTmlM=fc5g51OCivzr-f%+`C6mmo1-sb$R-Ew7x%MtV5iz2)gL)PKk!&w=l>QGAZp? zg?fI;pIO)sso(uzH|g5gYD^dV35}_LVDnB$5r}G1LpT=kUTmQ|!WTBMRy~xMKZ8an zC5J|_pjk5VsPgnFIXq+$f_*k<-_29cF;0eeC&MsxD0w(B=jl);KF_iX?0%`%nOiYN z@YXLid_HCvi8z76*X!FUwc_!}`@4q(1S_em9@^her4Wu`7jK`RwrKWFv;m2}3U3i5 z(V-%C<@aoz-re-&JwNZ%P$EgNaAl;ezfJ*IJ*cGZ8fPYLB4&&S<*UwOB7I|uYj*Qf z9vdt3uSV6eer+rt12;$N@6nfeWiXA-A`oV9ahPFH^R|!F>bHdw_lbG?;$@h$v{;uw zKThh>8J|&yFTKi+q?^1jYU8cN5@(<8&*a;1P+LH(t+;X*FT}B*@LD zq+C0mtWPuDHJxoslfYIoEj^BaJim-@n)TQf?q+2l*o8k0+5x7_v#!KK-@SoQP4O0iT!O0y zwaQya*?Y3=T$&_?^O5f5Kgb8=4Nh%9LDaUX~aT&HUGqN z#7sdOqN7rT>=Y=oQ!ezq+t&IrFZNk0Nw%@JJ^~}T!2~q;nBz&hspTKk7(c2u8Wu1q z>Fq~{ryF}{9iST)7hH8AR!jbjID7aEwy`V!bZ=_nj_%_}o36Lz!rJ>Wa7Dz&kh-&m zrzo{PoeI2PYZxp4O%HLN-#r;>&Vr>>ZJi(Oz8M+y9)M9qlNqwkYFx7cE1Pe0%3$kJ zE{)r=bGqlR=a^wnw0RJ}Zx^J8kcawVT} zs%N=w`=s_H0^Pv1JxM24{3ev*F)B9yDUni2AyK?UuqK&SGy821dS{)Mx(`YraW8~1 zN0axRa-EFGy^?1kU4aAX_~iyhD+s!Tfk3(ixhARKIY@>OLJ8OKF4LHQ$I3)$>1qY0 zvDexynI>09(3aaLQT{*fyw@`l*2T3|N!#>BPwFsN_Tyjj z+U?1rB3p;sNCZudAm`sHkY$Pk55tu&N2Fx-P<(S>Xn(YgyiPmcY!KmPtJH0ZN{sR2 zby=p2G19-148%tvOy}dQ`Kq%!AiKF9_b7BJieo!>@Ts)vn@l*HnwA!fNNh4)H%&NG zHXrzClxW**7LJ#zTMr^{${qw*!haLl86|f^KMibw@8(X|t?2zabM0u*m>F$XKzVie zW9uPx4k_n0^;HKwo*9+waq*iyh1&-1RpsQw5%X4#qccCSg9v!fhXU*yaH^uZfD?}S zgQS_&zT*D-+79sft_m|4pkJTD#Yid~$z6ZJl4L+>V+NPvv6SkE$rwlqd(a;Ni5_Dz zL0Sdl25uoU3DxPFlT2Ff%NRysyi6^lP4>ER5@BtZRRJ_GRI9Jaeo5<+c~1APXce@4 zeVCb(^Q}Zr&m$tuP(oI#85Go`+0HZ?6LYMT8WA=6YttXRg*l`ou&zN!{jaoo`ItwdrDJ9g#H5^-3$b z6`D$RyDvOCp>fUN@giBB={>kokOyIIX*E#R^flLK(1n`DkQzCXeV2@TJZ*@Mgf?V#dM#fn(B|B z^#%^LU^|g+zmuKVlAeHZ`pi?CHlQ9Ic9!=4Vn}0EllXri6mHH5iLNuXhE9B6Ii9b=gnzZqd3|LJJV*WcnGw@|M{I2sH_{b9`)#T@1&^-1 z;p9+2l6r=*G3nXyh%6a0Y@OI1-DJ!!GL-?yTUVioWoa?9BA9Rdhm;0o%t#o0vvw^z z_DF-GCD<&V9Li*)R(kl&Mk@0_O3Ej^@4JffxUI?IM5k%CWCm1x?3U&bEB24vB{c)4B`iKtL&OV`3;-9(m<4t9L>aOM zpntAr3MaacQVCKdrQTxaVqJx~sr^9u>K>2FQl&0oR2f4{*!=PiQ@143qCLba(3x{! zio%IUiFB2m_(bCyAhfH-*Gd}>P3*D(qUUq?>L3GEniHP!s)Qheb-)yh-P{aMdwBRw zV*625yzN5pcIJpmNagMpi!)G+wLe6!Yk?`kcxEFqbJtr1Bx8RhUp{?VQ!7P>%KWFH z6f&@0EAv8G9qymdaCislW|bP>`N*FrscPT6eNp5J1NM-BMj#)) zb=UgKY70-Jlr}`L-lS5aNFK%c<3942)*mFBiLY)yy zVvnA`-IbBj?SxQD|BWs!|3ll(KiHZ6JBh|6{G(L{pYM%1e(af|=A(S%n)b$fc46GU zEwJ5XRuK(3WKrc39B281*@m6!SlM7ak4*atlO^pkd8qA}H7Eb=p#k^abn3Ce%x3g> zN`0Cg>?Qf4JxbSUnmr6`--8@lJ$r0hp~d=7V<&sEnVZ^fGG^Jv)7@TGjOy}8r5-{@ za3FN1wF$53TU1}_`s`@s=u zvJ!T$1g>Vx%;DLzJylchU96)UR-cju^V&_G9&+!vT+%pfD82%U5Q;<0CBhcH6NVMW zM>l}{rFy$JTylBZ#j2?V>@@tS>EW|;$u2udny0M~&Bx?pr!+I|C8L%|e+dP^knKJn z{E*gg;SDYv8f%~<{(U;6cXt}v2vT@kEre)TH>|3+1RodRW&C35q)|8AD8-qd!M|in)qrR%sY{L`imz zyP1C&VVeHln<6>dFCHtgp>^ZScL*;*2j3fsC;iM(a2(|+esR|`bk`da zrj9xvc;d})crt?kRLpuW zWl$$lLFf%AeBtp_WfhM*hqeY}PdEo5Vy6<;79O1$3u=gyh#iV@UMR|V?md!m(>DK2 ze%z+LD135d66(fr`5mX_2Ey}~lQP|=sbyp;sP*LgpZDTMg7xZFxwI!{E2L!({=V3t zq|`Xqb4O^&gCU(aWc>BW-!q?QlPb^0(4yjJ%f!LbyU@dUQHpLTijY0qi_$1p4@m$f zeEN#(Mv&mLbDQUb0V8th&Std^2XTUDLmhaQ zmZcq3w?vBD|7e|G?81_hPSWGzxsC4pU0RmnFuxVfOg^FD1~nFoxGPBx%qkehonPkP zaN3`Ye~L1bRm&RD*1Y~iaXGl7nq3rsp~-*0hw4#_TVW`;KD88W3X5Jz-`{|qs#bKV zPIwwa+@!rll-_W|DdjD*VS1`8JU-H+*ZM4JmmRRpF2h_rS7xwHaS4xphVBvQ%{mw_ z3n=9XoChof#LIWPG`YIfiklEJ*fwPXC39K|J&*DyB|z zR5$AeBRxxSOeDu&w~66?CJikufUJb{fb=mX;P* zQbpw;jmqVK0V7@+^A%?O0LtpC*a(2f1=rk%rl4EkC4d|`DB)%rmJ9I&#mz8)B#xjsD|J3crv zo(T=G7&tQcKooG5p`i%CS(}HreFs1_f`2VyVt6;Lx^Zg1n@RJW{)VBQ~sxbN<9xelR!({DOMax1`YfZSp90FZ#)xiB zFYo%59jQH;Z#T8s9h{8TYudgJz~j=N`B&9Y=kZe@M{owf0sKIM*JWu_eac@fRDC0V z`bO&LZtt9ccz~t?TjdD}Xq@LRe7! z*&D)o=_dM%V)wlyUS0P0;&ZF@&C&vq&_J&Koavd*Fa5DJJfScg0Mo=pJC z$OH_`)X@V$Vwe*IaIGPqzX&@yf@FyKM}LQU08OFzBQXSM-U~!z!}rrZKsJD8nEntV z05xq948t3!y$0(UQ-2H9F{OS7duRYnP594QLo5rNu?24sJZ1@fA$pnC+z||aWFfwW zKUI^>y~W?N^^E^}weu(t@t|rB)9R~R;4j0;=_dmKe1{mDdLCOmW0-(527e3As9#-9 z0RPLIWA>~W`b0GGp0>1mf^6#R%-H-k{nn}&(Z5X{%s0TTOV#q?)CdCnG8WF+@OJ*NV`;O0te_@?GAC`{#$^ zYnA*b{xvlPbZj2xQ~gpypT_wUwl{765yR)n;oov;`}Y_=bVE=7riX3n)A^O2d}H_= zAKm@W-~(dQs$?GFsDJ9)E{^=$OX^8}aLVy4!e^h%t$@b#IQp{`aOr_5cTGRqLRxgNrgZi- z;c)ucIyodX23YfJxiUt;vFR(EPcjG~y16oFca6WUja*HBJEJlJ0OX7213gbFHrm9N z$qKTFhW<6#Id&d*)cD{M#bvgzd9Wb~gnc-A(2ci^sA!YSkg+;`^loZf-BE8B>ilP# zGp<$RIvNu!Hzay7H(`bWk&`L0pMSp8z3H!KVf&$z147R>X6GZ7=Y=@D>Mp|c9&xpI zB-GWC#g!uK7HHMYP6>ms`b3h$Fb_gRA#)-RVwgg-Y<*}-$>eSyOTBL8Y};e#ul(^v zmpl4;&P);n?0^rG60!C&^s8er84`!VMN?ty2zSOBL$Df+yHNLIh&L*p-3D@UaG*Cj z4df5J>z9{&T&|N@f@Y*9Sh$h9-xC0T!ALtaP_OkJw4eav;zWttll?qCL0pzCh0ufG z6C0`IKh3%zk2_?_;Tg;7Qk_8(Q6(iS${Zsiwr$N9sM=lAcxI_>&9^v}{; zWTCNXh^(Y#N2V-X5}u#nd%?=|gw{+9S+$OcX`x2i0r{GTm{N-D1eRinOfdTV3PBvyRv>UC)*TpMay6R;}XXlvoKKZu$C`Qs(35)uVHHl9t zs;O%#h^O$gs-Lvb$Mdpl@cPo6MIL3RIG2exJ99@_SNX}?%Fpd5|J0iPu8hM2?9tH? z{au-;?iJR10}Dwfb+|0t-rG4zc9lZeNCqew4C$#{v{>#`N{slL^duX}R`i{uj?Es4 zc!oMl=X(aHXS3w2WR5=nD0zC23|}!NWZIZ(m_P(r(-d#n6x4SozNh60wt>?W zPLCTJ2x||AlO#jd9EC^1oUjCTN&aBxeT!st*e|U z7BI@Ik=gdUN9O)lI%~B7NZ++vD(!PVNeP1Uq(=0nP>QuFTe^Rdkg@p7HBz+;R$Z{8 zd&?7786=$dUilbUYg1f1a(vZ0#eoFYj&DDp9C+Q5um0r9f9m#R?PcU`KS|mo8Hi<~p+5824x9*nL8SQ&<5|qIL zZ3;v_Dt)k-VZjV?+zaW8`8A+UTbudMD^Iv5#+kOOcGmg?zr8FOsTR0qa;oczc4@XJ zeh%RyI}W0s1P>fHbScVg?g~;!XH2SUX`niHS}2T);^dAgX%twg9i?cg_ky(Em5uBd z7km{lR$^_K^gZ9|1x&9U2@-(Q=(f7bU}9s=E+evVYHGCP0)F{K!%dQ zGtc$go`PG+GbuI+y%&T$|MB3!bIX76L@^E%^`w5WCNwvk>WDh;2v$~8+lW*V)c+5F zT!T7TQv;GT`o3i;9{-TpJ8Y_Wf;L|uW_AUT;ZU9GD~by-#J7TFjq`it?aqeRH><&t zh<0hwpM4Ic_Qv=sM7Zb4p-w`GWOdT1vBhDsu`+!7FX_KbknDg*h7P5RIb|ta2os;d z5Jw-zto$u%e&5qP$=i?^q1C1#Ej8ai1lCIrsRMpnuaG3yz17vH<4zR}Eoue{pFl@mcZb-ZtVaTV3vDrO|(z1`fEE?tN2m8kowb z>L+Ge-&{j}KQEOi-cdf1A>tI0buzb%<9DvZ(kFEAi7HI_0gSzqOB5pEvM=M%)Jd|W2h@fz*K zvY6d>wKy!@wt>${oRhT5X@(2ah(S&EY@B-GW zym+cE7d8dWV~euxW+i4+ahWMIZsSw2CmmUV>Ra-wNN0;I?p}8w*CN11jjDy}qb$E^ zSu&=(q-K4YH|*s={DCTxDslS;WIZgg+N_ zQ<93#VgvZ5Kyya^$tY508xxEC^wdKQJP7wZ2OzU%`MA6{m;^z<-(lS;a&i1y+-)^x z{;GBCVwavyD}@p!_;y`DP8bLzY#_zw)=|lD{Oaxt zF&ln5;>?!_P+LSu0K=2(otJu&B-|%8g84M;?Unl&w_O@dEo#LLLZ7EN0s!8l$QtL0m;+43|iuZ=M9yg6tAwA;!t7YXmpP1($zsdsOZ`N#dbULSZ z#;a%gy9K#%sI|kT7jXnE|7hOC&A9Rh=16k7)J9EGPu{Ui&u>T|rUfwT;(wdnN|j^| zH?-Zbd2a4|FWg;;I2Qm#ULbylMFFos#ilT=-FYaO{BeZP0LYhj)i6 zZpc_Bx~8MQ0?XvhhFy0)qNCT(v;=~I6I5L}iMRX7n{K%0q6( zl|%I>ZUNo7YXtK}k9+M_Z2t%rcu3)hL5gR#qfcCc1PA_vWEZtK=u+tm5IXDg?azz;w+$ovR7~eDQ@xg#Go(}95oAGkz5-WI_bU&E7M1;E?8!~8BEWd{W_&4yP>rft8kPa zERD|(%kKyyS}M73zsJ72wMO39TijDovHGJS#jcWBHvl#HHW0VLw~AD|y~k%1(_b($ ze{a6L32B-szQ#t!Yd_G6-f`4hir@+?efO9KsX7?h*7YkmAF&Gy0W&_2W2uK0R`R3N zX+(&0nk%K!(#?2y7Y47*S%*Hv)+Wl2D@8{pqF6?e6 zLwo;{0N-S;*{BZbHi8flMWh^1M_e!nl(UsF>04NQnGfk#(tN#)_(KSB8o#8cc++J8 zAEIDX!S@!LR9zZ}ir^zxCAq_T#aTZaJK=&iay_;^3e7Z|-RY^Z%AbXm(QrE3Ds6_+ zaPml@H9u*^!1>00N7B@;#`8gJkSk3CA(|YJqb%sG{P=UC8JRm?HN#3$R$e_!H}}F-wB1;a&tme0?{HFMyme^ z(q%J$#zTKW@^RZG4fcAsc;i`2EU{2%DBGjUP~%%t+Q4fe*(9OuM20zRj@84OaInN_ z-ngI4TN=kqE4gLq7s)vtx1!s(%9Ykb@dZ>OUt@U_aZ-Ltol3W%Skuh#h=|CHLAjU` zdnCEE@fIMWbG^e+4*m7O&X*isXNw_d#WhKV@Ba-L=l^RCLtlFOc_7-+-mEjXehp%* zLb-i)%(jPlFhT_kY9!t7vYU;Iwb&(JkQ##{kr6>%1WOP2m(sJZ4SREqhBO;^zKvf8 zSS4s!ax^9gbj@|Vb_ICgukshcFZh^PAo-;E?ebbEb&{+BW<6vHvX?+h4eRXm#Ot+b~1xp+IIP}Ol$u;3Xuf3T5K(KGC z_WJPr2Kx29nS$9Q9~3}>nVgljGh3m zXUgn^PC6GLl6qGz9c={BS2+Wr8RkIX(8mK)-a}Ck(;6dzo_(-X_V9sp$A`$16}fwk z2@C3W{ADFQT)r*loh#lQ5r<8^KFQX%7uj4<_)HM=Q zY2Mg9XL~i`j$9t$pN=%I?g_C^=1aMBhf#Yurtr~|jBrwNDnarV+m&71Ws^5+CuSEW zE7xGMMbnc79dSVN^c|y|zZqZH%%*f_)@f<#)ii3eHS_>N!tYGSiawjCDIjbCAJ82b z-tJtFEo29`ayKf>1xW?CToYtW1@uc-g6F_H?pnKir7%YTWEG1Fk9E%X%L)+D4Gm>b zZ8d1)G1St=8j!;2wYi?k5!#&*$B}P4rmKbRueL?)$RTC|`>*oL_}+(Gn=dOHoJ}^$ z(GgtyrcpyV_H%uQTGw=dcmmd6a|_ie+k*rJ1#dgIo$Lx z+R}fw@j`JDT^=b1JhZ5$Aolw8V*cx8IOtZ9|`{FuiIJ4{C%25ID2_NUD-| zKVf=aDF(&Zk7tk<5wZhEM{r1~r&D`WYHLC%?5a zQdZ)|m+Kj$t0W|!0(Qrlp51iaJon~ZdPCzwx88sP697!oZ&P+0{&lrc{CWOZxU7G% zZ}D=U=Nf*G11VBAiXi!aezebSH$DwXVri~Sq!SV7l2|&r*KOt3EDIbx0%=}f!e|g< zC6URoCyeLfLts^>dQ(1I&=^3eU^_2I z$?G}JVWyFO&5~GvaFB$EJRMaVUwr`8(_hKxlFcfd&AP5Pe{JjJOQ0hw1`zyrpKc#3 z-jqfG>U(>pHyuV`er3bZyU9Spp9wF5TGxxPcNk=tmB7q`6j@YJY!6Y~NAgipifmjR zI;Op0!S>?SwQ}z~DGUcoG!@HLJ$@3KM-7T&x{;B?7aq4HxBXr%rmWhOJ_Q5+=E zwVRp-fC<4j$aD-ER{*Um&vZaHbs$yE4k4*5Gt%8MX5{gqZ@4K2!Q%>b{#nYrO1$7B+hsNmIrQ&wh;@0}|QYU(v2Py6>?n zqT=X|{%a_itZfwR*7q`;WF+DF zjhix2Ml_z4)$h{5=Z`(P^~nU#zf4WUQo67VIN_r6En-5O$4#UnWZEGrlLx@*c}gpT zd$G2~`x@%igsR^8iUQ2l_c-vA$2{;t@IX$;jhu}4r-KILqId=UsUfpJ_o>-KvqW1E z7lk$}(8!p|6twsbj+|u)GP?HMv+>sFJrI7tpj$fNm~sI%n$ z&0BbyG^H*@vL;^hMOM)4nM>)#36vs7zK6Wqt95p%UJkJiwWgW}TJvuS9cLS4x3Z9} z@!s5~w9K*{_CGzv>D%k37~>^OYA6WgYl@9@E?>8wu84`%W(#?jJ&}}=+lT~Qo?6^p zSuey9?OalSO1tmoTCYhKhuK+A>J1TPSo?+baH5A4nK2gh7XxMUD!88)C(L)S*VrJ( zE2MhugFnqIE>sx^Wx45eNOI(va?I6gqGH$CAN^}1=qP;$&VqXd)I~j*qS#R$?u*Jg z4oN-)-Ko}}7Ab_(EeB>;4j23-_HB{cw@Mw zU%7=Z56`!+gtDSct|%pJ2iSsdAnmFe%*)IL zWI2z7Y)D+JBAn`X|AnCHL21Olkq>r$cuVo8BrxX@GtB2qY^T}W$yNE%I%8!p(^1bQ zs9Lql%cF!^rK=|vLyseoy`0+C;!}RVGpR=hfjF|%PaF_NgV3t%SCzd{q;(Crryoki zdCcNFL6dNwzF`X|-7>SdixhrCHQ}~{Nh6f_do7|=62ie@`pVzv^IPs{uCuzR7@3=H zb>o$ATe`5>$LV|mdsraYuJxv1oC{v8s4ahb>z&#B;cF7e8hhLz(g6oUkx`YT0U)Tt z6_bd7G=?13y~Y>sG*W#{wpLpTK|b;O`R(b3CoyMXXA&a53Ty~Ew>dtP&Mal78pjuw zk>I7`;>Y)pxoB&X?{&CKD0iyKO)D|2#YaoRf?2iTZp_s*6uIlQ!9Y_-+m~9Kq;j4+ z4KkE&JJV%@p%qRjJYwOY`?5zmWA1?keqzQ8zHKgLj1! zW82S^u}N?2yL_2a%Mraq(wM*+9&w)yl_wtL3?+f9s3Vlln7Qr4aY`&cF>!G{%cPbL z=B?at(pv4hJ(pd;{X_Awi}8JA(eanN@CQv;a;t!NA8RZ+Mm|PhdL&YpAO^tD+9`JR z4&i=)6hr#4f&--F)$l;ml2z5oFNXcwbuRe-5H%tLK%`)8*I$Iqr!zRuj6M^(cepSF zPiLG`NxyZ_BQnu^6;bo6J*xW#ek;1`wQq4<-yI~|nf@LbR>|R$S)30uPLUgj zE;nqh(h(lwdOn>1y`~33h*YHOqFI*!hPYr`z15$6JKg44S!tZxIHg9GSUsfMrZ6ry8tHggV({v@lPwkLDLl zRJgrRvdud1lD`&o7C^FDeHM#qEt|=+31t0qIoUjZpUbebZP=TLqQl{$1YnlHtSvU* z?)JdnhGcMiEr|&4N|uh^5yh)=NTVA0GvARU42xyvrXjbCQ(S~z8zley?J7PLbW!*_ zv^&v1H5Bko6aD*Gj}a07s3T(q=dk|cE3KPW{jzH+prEBWgnlvM+qh08(U+z zWjsaT!P!`h){msQv-VfAK{)95IK>U&SP-D zAw!(H@SUe9*Q;Dlc;B9jTCj$y)c6w`WbErHBApTP?1s=`Wq~)%>~aC;r~*WVsxLz> zht11dvJ;O4N7tOE?AvpppOw6&=aToU6Q2>EJ>nZo9Y(|4U)0&0$s71kQZs&{=%}eg z3#8@goeTH)7zH|B6^{A7n>H4o48yQOwzuEPB)c`{Yj_A9hI^ zSRC0^Yi7BRR)VBMRyo%dx7~XFI)wGej;|w`@QdDLGTQF#xuO+r4Skc?n1g0seU1%Z z_=2mI^Xs3fFKxNU4m}9$&sl!{NSQj}l_gTvL@r7S)y*wHy-dy71c^$pB@#eS9Qn+V z9Zo)lUr7;0w2o54cG6Ex*7iu2Y0e!+WkhL;8G?a%e~rYuHupHu2x^;PrTGm-wW$d1 zyXgeF#rtCPJ6>#gi^}(OAvNbJbV|p9H&2Xaw+hs?Q4JTZNZaI&ju%|$Y}&$bO{Q1K)sG8p zzk}q_e$b~9W|_01XXmMnN%l_^=Dn4EuzFr~`sansZbt(v3uf7NO?NngL1|@pH>*ls z=paV)_u5kk9@|nw`};tVcOuJH$~g$8?RIHk_+(hwZz#BHX}3i;nZF~Ak83@G+k=&C zgu-a>FOQ@?#WxY3_o0v3#%Z;Aty-Dt@1s6+gZmJf7)(hg4983_7$lRpB=X+;yllf9 z1RZz{x}_{EVcrRU&~g7405(9$zlO%p8fAWxNGCzK#+8P?zx6cQ=0QGOmMnhaP<_O^ zLSoI9M*DnR$)nd|;F8LW=Z)XlZ~A_+1B!&extKhJh6*7X-)EL1<4`zdwwQctO`oXz zQHy2QohyrZB%7JG*K)r#><3t`Y;1lQA+uQfowo3SrX&H>m=_9^7CKE_pJfT7##=2O z912K^vC=iv&|mar@d@cM*H4zrgEv}C-lqq9GQpk$%*mJ0?U``*5uDD&be})@D}&`V z7ft%{OynxmEBfG8A#NKZ9Ok7ukK#34?4@8!@ETyom9_?c6!QDzQk;O+$=cXkMa^aO{`PG9%k!#AS5;B7UMqj71d0of|n=*e(9f6uh-XkrMLlazGp$e zd{3SXx>QfIToa)kGYuL#>8@)c#g4^`XM>R_o(P~2SVfH3Ayuyy&+fa?U^u-K2)>6^ zj+8@b4abncO=pTC?wF~f!AU4oBKcYI#nPu43mfC+v)2SOybS49W^8kw|vssR@yDKj(qy1URZ$! z#UDmanb=FM5-u0D>RS0c3r7&#n0;YLD*kWQ-n zD8==yvZwuGDHQlaed-`7RnjN1a+d~@J_zjsiZoBB>#Su@@Q%4xGT|471hJo(lovO* zafenha~aXJrJE0j=O;6j&C9Ce(Ly=Y!p!ebEmEU)EcwlpQSdbD0TVXpcX+ngn`-a5 z_16W8);>tn-3-U=bfagpF2p9?=gg9i2JF;rnta@*7*Ct&v;9T=p_%aw4a(v;vCbIH zTvfMgyxZ;E#~|vaidqV354hTPDYKD<+TmlARB@#$ULKv&n4bD$K9nlRvE?BIn_X_2 z@WS1a_JdKxP5%SrC!9C;ygP1kpp#7Lz?v2+qNnd)-67p+G+ObKK{a}8kFvOEnl_VJ z17|^~Z;~<-XtE-eS2Hm=Bo$V}ge{O|u@_7&*?<$|JG&(Z5P_FHA=S`}S+0swZU{4Z z6)aW&Dy=}yw?pLd1VDWyEX`)0uliI$5UuT!x+RVQC>}Dkr=gZX2T^jkkXz>xsm=FQ z-IhfVR`QgZ76Wz~MYL7?NCxiEQUV?)A#(4$`+Fh4Z0QC=s|_DIhJ? za^3@8zzBT4b|D1Na@i%F`>_#_K zcComBCQvKg*@PaF{J6TPx7>PmuB$b9_G9s-r@1Fgd_;9b+Un~r9@y+cfRWmMrWJ9Q z>FYLH5izZ_k%}Iw?0fzMH7=s>HCllu2mXwXV8G7=slURaB(KX7yNFEaKAr#IQ2yCDyPAJ6G}D&Ud_2O#?^_pg~AD zZ<55F%FVxsA#d=Ta{2`I52J2WKtNEJ$DV6-;Jqbr)=VC2?CXf%JC#&@ly1oliN9WT6C*$=&9m-XRsm&d(KS2_TIkRubt#>1jE*HPh)VO4X1{ zJKp0SnweaTk(iLo6d9OrtN>EOyhbGV1$WDcMF|3*w|U0juByXB^J&X%(>Dvaf3u3l zSJgWX<2b@)4Wm93JmR zwD{FYim+o-Hv1PpO*H7kyLaTwmAQ+qV@uf`J&|^mFM3Olas$!@SCp8C*7kj}CdN*} zAbL-8qy{Rn`f)5I(a~d*`=qGevEOAhBvNNahKdwxIhlrwuP3(*9G_CXf6!z(h;=ae z#h-6dS8Z$M7Z9BbedQcyz;#o2BoD)qBZ4Nh_GPcjxW4M4YjodKLRii96nidEJ|=B@ z-{k^xC#jo_^V>Je@6u)u$c7PS0+D%jSM;5y(_ARlD`bJ)TswkXK5h(L%o-8iDQ>tz zTq8T?M$w~#ZlQLjXHIrJ=h8%pyh#%+Y)|En0U`d=0e(AP#E`j!4lKct6sVtr1oH2G ztX1&P3P}l-_A2EUKZCN^k*n&#akUR?NKz7M$e*6mAP1%cQ9L zkBD(d*mXR{3;O%nbLg<}t{TO74U@z-&K?=gvOii@Yo_)<=!l~&#SPt8^bcpR4)%67|wi6AOerKH3e(7-)nxo3o_*s0zJ5jzguULS+ zF6&6Q6EgtsF+x4MWn2dLy{9RZa76#4?V60qR*}E;gg>b13pk=01x(8Z>r|Nzbrq@o z{`Q(3>GAL-etRRG^Tw*O+jMJr$t%vB1Pi=UUL4cA!34hclPN1eVeZ2|9@bTEB50{z zj|d5oQ(^W^OPn`vW>GK{@R0YfdzoVlL7pY}q!f0_;z1x3P7wIv{EI@pHbx!agUQWj-;GdLbw4782_v>*3C?=vs3wHYOLeZAX&jxX*`y?G~-KV z+&0y8)jSZs6D$cTpcW&jy%DzGC2?!Fj3KMy6>|R|(5W&~y1u1jW2KP-pY#SMdCyGpic6hbe;?X;^71>9l>OA!B8qN}%&DqV>6(akR` zTM=RdKGQ!*c2tQ{KPV2UlxugPXyN-#^_<=ImT*2vqQs`tLV`OlM&PWDodq)?|Wk1D$5iV^S-Y=<%uoTSc2&ie2$?L3emiY=3 z4Jh@As#Y%^WOs1o;7*~)#$pX%zA-JY^K3ehn(WRv#I2W|^rbVyn%vVY2D9mef6^=f zao(W=2J#%_V*kF%@q1{@!h-L>^>ra9_sgG#J>big3d?BE$ zLekwKh$^oBDHfD5%yEF%InSID^p1wR<3m_P#0i$;*ErDTg4F5h`?y})ss!ldy3HX? z#fOsZ*@~w~i@t%?fP)(x)|JRk>e)}@*ZlJRZiO9>Zxf?$x46tQE7O?*i(e`u0tbMy z`2n)>qoq%dHdI2t+>{f9>SwHw)6&CynWEeoBS;}V+m{X*CfMj2gN);gCB(6NKQv6a zXx$GFGDD~qftYNYR`aRIOlpc^ShT9wa2ErShKO31s@#_C9H5v`b)z*ZA-*`WnrC}l z*hyj6Q977t6W;75KJugyEsFF(TM_=#7Qy!WUpeFD|oE%(~Oges3Ny>DNs zl38VjEYdR<-l4^*jNkVLt%5^OHBAnp{zE`|(!g~$qGEI%j%zVJkaAn$-E-ZOxDOj) zo98wH(8vH{CE~0j5K%|R>VyrDz;iek`?&!u7t8kesCzZ_QLOY`3;v}fYkmn%!j)^w z+b#Z!WTOi=n+f6_djj`yqjn&fKgTfH?S+Fv`5i>u0Q>EJ=V^%gRufS`SJ?IrE$t(S zrkoxV8hj=6tLH!#Egdh zeZPh39w_B1`xRNn@uIIzvogh}8ckE=B23exk<4`-7G+pz9}B+8HZy%Y9tI!IGZ@Z; zfNmx_A2o3vS1R3#fb0irS_}4e?(r&b%ZcYH5&IX5=yB&G7YtbzVmo6f%Cn4u4Nwf$RSd0y|_4ZGkQnx!%0pshy4Wu&N_jh{e#nat8 zq49|0=+}8u>+#GIKCLf<#_t2`?d+b3uwF!{yL6Q=3KAYNwr4jkQ>kQW2*p0r%(>~E zvCqnmKstqZGNdy61m=mF(o%4>IDntUdr2vzOnjZaT@8HSV%T{N zo1NAwo)11o#sCY&jJ{Mz;*$OHU$`yl33XzBv{fcM6hzOF7pSuU3>Vs}FjVOmkm2GkCD;lj=%4Du zc}KR{C%$(D@MTeHV9`tEuqPfSA$*pn4ZdukEHnN#j-A+e}^R!1hF;V}p~Ql@-pdbsLQe8Nw_?3y}8*r-aH>-{UhtsSmE(gc_qLrt91I;!9Y>4E7#hf(KsF&4u z#7xS#TRoZ!$ZCS8S@kxk-3X3qa@C@c{n;j|WG#^Why?eun+4Mkn#XnqpV@|=`tRr> zrD-P+I=&h9!B62(1WO@Ik-A-ExI^Gs%DXnBl9KM>`jG=aDmgE0tZU+W_JvzP8qq(*>VgXc62E!pp5(0QoT3{1bdAgoQ|NaH>AcNS5& z`hBC!=X{S0BNa4kbp0m&XC#=3g27O& zBaAR~Qn8(aZeE)>;G%idI!%@esx|Lc*!pn0vC|i5H$i=y#OgxTpv-*(ZaqGbvM&DR zrX!c)^Zg5^QSMRlye@~%OQ_EqEEdj!!2O0FS%WBZFu82M6H1%O<_K>o*fe2@k@>`+yIGEe^Ln6d{Ie|%~2h^c2vB|Eb@}nwrq5H|7*9Vf-Bac z{=7$0cJ&)da~*pCz)30MaQEyNq&tMh7oqvQ4!*rU!xqhJ7Kunics{3FQeU})cqt2h z1PE9q+4e*IPfX0$Sfbw}TDk0Vq02uN^>y{$1VjbA``9m3&-pS5k$#-~wXgykGYQ-# zK`D*2R!mZiLtGPlQ)gf>_o+|?Y~Vjza301y5={V)nfL7p5i~4gR?b30(8-@szA&SD zHd)P4wJiFn6_Jas^pM6VJ3`z}(7r{cs{Slyj8Tqt8dbJQXaw(vMG|>%WZcf~F!6JCW^6T2mX{}LS)N|X zzc$?!A(tRW&<`kyeJ+1NDHzsuP~FRtnS^b42QwPoOBM%)DAcrGxF{Z-dnR7*h_)TS zDqLCa+b_q)dH-Zn7aaI;#AZv`xzk{$XHWPk;QXyRYmc=j#dzba9Z6YD{lwI@;=mzN z?S^ApI4^6bgmWEw`O1oyjvIu73U&5Wn!__~m5c3@Mi&ng{FRpe38Ly(7tttaMz1Bu zW1HZ?yTxbG#OJ0FWTre#FniD9z~M6V~dDdfTdLo~thN)8UNPxC&r`8# z?iAZdP9_ANA2w2&?+OttGwda8ANId2XtQLD zFcj{W5PfsrdTO7cuaG#iE)qU^EdPLN*Aq=EY)1reJ&ds9Jz(Z7!D49-$IkHYEH?g< zn*1aw@V%M>zBcYhhCUb`)%T+>Ej(ki#=C+s;b63JOqF!F6A5f6tI)teRue6(f&(OF z>;(6VDpGlyt0{!{Jhes2&qyfOB6=<`e2?;-hA?kG9A<0tkTs;$+s-PV$BtGY`|buA z=iOA;0yQcInILw*V_DG%mA}hhlWtxpXKNP-Tbj^beHa6Vs_fA#x9RGnU^g4Yn*F3 zr#qQ=tAU7(^z}BZvsPZ)4}~I*0=kFk1}E;xUajx*+IB7O!f?>YX!Lj9Z>TtZ+W zb;O9pb;?zXj`uiNgK!mE#`BsLgRA*=_Wqv|wvbD#QAL;Mj3z~&@yS6i~{E)AJYrI<5tl;8|f7gc|z-ppG z9+%*26@&;7h&Ob)&0}Xw^Tv_JkUS`|lY!bYn3`yZM6O16j7b9*yM>3N5jhTjXkjFW z>}M+8j=M*I7ZP>M*4jG>d}CZq4Bz}vE+A+N(A(HM8kk$ez6WVIE~&4m_tb=o6{{Dl zZ5dtcjbj-^+DtS{mgf?gG^Op)d9`(qJ7mMFlUn&~2I00Nj_g`ZZknVyb2fu4 z@Ne)HfC`l~`;t2k!`;N{ouH(6rV{Gb>v-`M2eZZ!+QE4H)pA$oc~y3g|)7l|3yI&yFigauBLQu zCU<13MMM!|s76Poj#GjCQ?I=eSE4VBY_IlW3`;4s(gk%fVFzwfGqvRB%u*Yn7E;jZ z(Jai}raS391z+xS;rL;U+K*M*Ty;aRst|sxNfJKy1lh++ve1qwo}`@wQbwB zZQHhOdpDbGHk-Wv;pBWd=Xs1!u;=RGFUHy(grs2FFoz`_evv6?D}5st8w@i1UgeXd z-SK`gI9NUjeOomML>TvtK#L_CQo-$RIO4!mi+T$gqtC)Mn!KHA6X*xBj)5QJq?1~x zx|ltS`}L`}*^ip5aQ;hX7W$&sJBaJ-BLg-}gUB9?rr!8&=e4rs-T6YJglD)Bp}9sUy4%-l~$tWB({`Dj`FEy|Qv?WA2y&_!lNF zp{cQneQn=;-wS81cLbej-lS7wi{Kb2h^RU)@TLKQryeWz4kao(Zg$>|4QHW~d9oLPL}~lAwwvz_*O(eDoq|D2RM#q*W;W7_JJ}MzP z+8jBagV#RCTMRm3FmChnh+WJk3u|nK{iCWP(3B>mV;E?EaNF3)wC6oq@?`+{6rDn3 zoY6az)k}huubqDU@~uT~?jxblWQc&Vm}=FpYn5wjhafhQ#cs&^7Zs%15sJP+jY$Id zvz>KyUtE`3my7rhi!g#6A_X!2EeUEw0GRpaP8`5W#n|6GNf6i?w#rApdIZ)JOKjrV zVn$(-$=rwJHYSBVm&L@!VM|cZsXC-g=F2@ipiCPO@1Etr*)A_A<7JH-h}Aes7DTdZ z;n5Teu{blj2bp1>J*`C^@#QMxt|7CnPG1YEu?L_c|LNpYA)Bqr3TV;NVCdY!I~lR| zF0<{`!JrydO`8N*`)+*VKSb}Val{m@%l<(?GW`LIo9Z|Zk_o}5hTaS?|D~;xB}!iVy_AMqDoM$<>7nLl z@{t0Q6Ve>rH5<9RfF0^~2>oY;?@Y)aSJ1u8gL$Qjk2U0{&~lv+^f<_zH-6CfuV=82 zJb=ksbIe-=lhBbxIO{qMTgJR9`@;=Jt@`?FgO5653KmsIlQfJR>fR|{1mgzAktY0f zmT2jd+rhn%`AEffH)`Gnv2V=p#EviPoAFZsc|p0smGvO>eON-uE>2QCE{*)CkW8{| z>4*^Cx&{fS^UTF08i~qk@HQLIV+p|QQT~-e!ZM&l3=uYB6{|9+$&5DMz(*=Y^m+BDwa1p->!!&vFN}5_- znsKM>W()&)`5$n|{yXKC?N>0bpy8-Osop(x3M_7F0ex~s#Z5}NosN+v!C`08Lkwozt7G~jx<4oMe?Kb;7R<#L$h?9$rm=+%11n=OTH@srdDZU!)6@O~D zGN@k?u`qs}#F8D#m(oOLsbyq9tn<+kUMlSsc`+?Ov2d)Qzmi*geWm?*zc%kZjeRhP zISUk8FafTi!6wGHi^WvN>x z63i1<(nSOz-0y1uvHq}`OuX2i)8)I}^JfQlj7O4^&E>b}-@*MeQg1qFkq<;&#t@e4 z`NhTvQpC9*y1e`g9*u@k5dJ-`-$XjO|KVJ{1Qg(`Z|c^>mNo;`>OP$diC{Sa zTC>Br)#OU54232v@`bu#_b)-*PwlvC%04+B%z@5s#a(+2_w-4 z=jXYbaqxL)C6X6Np+OIUN*GKm!FGo|t&8y2IjOi^T%Vt$9o%x^4Ij1BinVirZ0-s^ z31qOZtf#{JfgnaJ(!b5I{j4R*w*R3bC(6bH4Uo|6Yhn!iS+DZ}Yw!3E?1)0E@T`_j zqC9yQ?Qa;rlZ`51EJ-uNL7$WlB`U!4^(pC2Rjb@2ld{ou+81v?_BZ=cEodZRg~*32 zkxr6v*2)T@`!O=sp+_Kd7xqfoR2&1%PeA^lofg?p9PvqbcmMH>QxH#rsP&?7#~P|^ ze`C=ts#lGjC2J=IFlLMHxFR_IJ{2{XtSHOa{AI-eze@Sgu-wx>lfL+{a9$1DpylcX$~tXI6|yVDblD*R$qFK5uxfa^+cqU> ziU<7Nqo3vJ{nqUxx10aY_u)OR1>O=huMkpt)6Ja_#fQf z1%SqL#1Y4(G0X#UpB#Uvdugm}_1vaDw-#nI{TgbfGh2#9ekQQ#A&+nkYk^mTv@F6Y z%QjCc<*?rIzR7AAyqeUV6|k!+5ycqlvcv@S{GwQH?ZV&DR;9l%z`be(I}hJ3u%bTT z9i5A_s||6+V&NsI8rN`1Zhh9H1mv}nzzXZU1Y_z(k1bv%4Lm>F%8vqqSBe*xWPTYL z?maZ?y0@9L%{WcenvWlM>LI%s*RX`W)l1_MmCa1tp_LM`e6ogF11nZct9$GHZIBxC1 zZ-ff=6r!Iazz<;HVaw{Q@I<8GyZBFmi`hrczufB#Z#7RfmlL^bt{z(8c&APCByV^3I3)A01fEN11Dajn)SP zt+uzs)noKZH-V8O{pP_jQl?009n0eU7OUJ_aOEowTx_c8Z?dz;dwg3GtxFH%hKhkz zg_AA7s$Xgn7eNrjp6wQjYQQyNo%a!(u#s_?8h} z`g?-A{g?%YEciv5Py_=@B9IkDsl5RDR2+uqS=%~DWjN=ie_<1^O$h8G^qGkWU45bO zB|Tbi2j73htJ;2S^eV4!m*5K9%%r#|LWG4t91D{-*uNNIGT`27EV;zAvk)lHHL0kkufAuxVfu4Pf zmad7GNzdyK{kf|vDm!S=PkW7Rk1%IES_ZY1)(lh_^3Ci_y&0pA?A(wgBWh47{CzUi ziaE_El_+MNi-GzjtxR42dmXW2C(-^<`6PTk@)Im7v{vWaqu|tX>f>-?AL&6at8(Ie zWhsE=VJx2KX>sEhW>(i*$}KM(27FsIT}D9jvUq zyfv=-)=YxHM8XMM)wj9Y6(>5G$+cyj;Rm``0Uddz*RXgAZwR4(pF5)%eAJ%DRSPNU zxRh7ia&wlncAPFS>k}T&AOCoTpY{qFthf|}MR`KwK2|d3wx`}48R|2G0>Z$Ess)Xk zzAuw4v9TkX=JeQ;RiJS(#YFaf8vi&?D}&k81R&yj{k^C#Y^BRLHkcG79_G>GLHe&k zDb&@g$$1vL6uyVrJdfsRJ37xSbA2(nL}2lAbqWCjGLw*)l9>PqIj)Fe=!!_B=gK z+KUlwEd5te^8q^#tWo|p;T|ppl84-wj0_@X^Cyhx&4q^@_lNv@rJcd(ti>d5QhBWt zp43-D9_rm9HzRiXpK2jgTi{w>ob&rqfLmiwO|W*BEuLgWtRe>xW+go=Nrh~L+J zlKTq>Ubl$P0a&XlixP9k;8k9z*xbNd9gOYWN^MkSv1Ga{9^YE-NC@y?T&5`$M=P7Q zN@1g2-1@@_$6~6QuInz=uDZM$OERBFDobqRXvNq^_lKDvL6TBQMEo;ob1}X-^*V#g zI5|`9LOQ-)xVj?*Tmxd9q;EElX9xRho2!I+#X9U*BYSitmKwi2MHA@O_*{YJBT`p6 zku+2Rnwb@zShI+lk+}L}MW8%R{zLlx`RMxaHpelEux0oRF)hI23Spb1-d8SLNZqA_r-!(p02aX{2heBn9{tA(vxkz&83TW* ze39y$c(fFO&yB@NFc^Nn_#uypMiM39$dc_@1QG2P~B31IAd zD<;f?2&m00K!V{NuF2{P4KgRv4Ml(vceI6)+ARH?tRvlf-+F&oN{)Y=9$irmf0Tw{ zGgnN%e@e-{6}b(iy3;Zor-+*5G^58Eo?}e_6Lv)SLNe~btYt+y%C_>9o|yl2bQkrk z!iX$4Xp*ZoE7Wv9BK?nMS+ybR8GJ+3_Mtc6Q2kR)SM>GPH-@~Fo~+7J$fcp}j~(fP ze8}OW`lm-UEXs6>8#Yu^q(Jp*gwZ-1EPHttoiJ`7H1>1@{9i(}P)LKjm{*Ip`l5#Y z(vzw%#xYmmY-w@q4yz6tf9rjI*ts1qsR`doKR9Q6(sY|RcUTcbh5CSN2NW$B@!xUq zV`)CkPS-MhJlGP& z1tb+#Sj#HHlK0AQF8Ec&R3q_LIY0P#7IM8QtP zUk=$9@wb8jWDVGO>P~H^;4UA||5;XsxR94w9UrO&?C81ikEmt}e>y)c zbw5;WAodHWV=djgSU(v&PL0ty?YyYCVfHfZC`w&%EJorexwS;$bFAn4grBRE?$*n% z3=4Ap2~gd_7_b0cr=XN6CCJpdCYK)63gp9g=)J0(j>A|uvQc!GQS~Dszh*2$OmH~` zum5-@cH`V%RuK(1Bto1|ZLn@SkSd zo1k6+j8w9GbNgJa#|Ns09)J(-Zn#`9TW3VM&_xwkGl~G&{8362UkKF3N-O5xxD@)nbJ%KA1+do znZ22g3SU&XrP$X<1=Xmo&Kw}jQ6RlJ5i4Kb%(qJN&M2S5Lk3iVYiLjLfu*i`O#HWs zt=upKpcS9Cz|WZ3(`CRUVU~$V1z4gKfKh#cN|aSRWNp1CUQMb(X#l9^K2c5rhbUo9 zhHFA-4G-zKg$4{x zS_?xT4qt9lnlO4Smm+8Nf5jEGLF*?rwUF5h6`?R8CGKbGwF!ik%+V9>ZaQfNq^j|} zn?)7(_Z-y)e7&!pGwRqY|6ar7#@cm9w@z|IV3k6#(6}}`MVSMXxAoH zZ=K=`-N|A2=b^5)qxr9GeVw4dl7ToOM{v;R71IJ9t#0#yXg$2 zfY|n+|BeYO1+VFQn?=9X3XnWZdh?S1){${LlzE}&UO|(Ib8E zM>#5g)D*zWt-Tw$(+HMdp@f{4uTz>e?cukji$fHaDIz7d%x};-SQ_4Nr}6}j)EZ3X zDO?yM2dXa(oo4slf+LOjbLkLb7wzXNcxHHCd6O_;JI0R*b>yxbKEMWXX)o zG={L$MIqhutKs;)0U@t1AzR_t?I2H8oC+C-P{KbQZnp17JM`6dkRX)r`B!YAS3acl z#c#FTbI0^rAu?vZ?51FAXt$`lL;iri&+aL8=B@)0V0~!QapT)BJT|2AP|SG!my%|A zk-d19Fjw>}*GkJ56aV@Q|HXKrZoA_Ev>hJ^)-la2$iHsZ{i-B2yql|3z|Kw)0OF}2 znnDhwz~~<#HyX0Gnv@e$!1cD@u~IFoXB0%P<}*PXx1>LMU#R6icU}M=wUSR?PH=O7B;x3wMdZ64^F45*6&p!EP!x9~QRyFkYG;#<8Y=DX`fx9@v(J=R_3u#ZoEFTTd0bPm z=dcIRt81xb4BDSYX1%=-w6Ho9%BBhsn(wM#s{@Z3t{>5tBi2`6fb11FF!9^!_eHIz_IokxfIpym$OX{+Evn}i?ht*zLGf5MT z78YH5m0Wo#shoBEMJ^*MWqyOeYn3HcozbAztO%blu55z!AX)Sp?zdj&We-#xk^Wt# zFo=0(pDI;CW#5Om#)w#Gv|=?$?b(k{tldurpPSx0GhHfKm)Q-6aJ$aPfiqVQjBm`9^?`YK0 zLS_erft>~h8l20)((@i%%s(6K(oYi!v{OteWLfJ2bSs4kTfScZp5C6(D_%^!W+n5vq$hcX z8xgwWPT45)dMhO9mQ77Zz;Fr;7ox}T^1xw8KY`{vh|tSL#&FTVH5@66MDD@&S~La! z5G)P^?7Qh!u|J|XeZc)>5VVVLVNd))TzL(JYaLe_HvNflDaqaph14pSfjRTgUJ$o3 zOMy}me=%ki9OjC-L+SwRMW`c;o|u@=t*M|C<4-MrkUqk(;05u~svM#4>oNqggKZvE zF;8L|=1RD{z-?t((Ws`J6s-Ze)OuIxvJ+Z>h=Lh%x|sTi_##i|1ff#A=C0-dz3Kw@ zHe$MU=3$S+p||ql6aYj|E+&bXNX7-%q~VYAIr#JK>h}>H@^veFvmy410eAZOymD}b zumMaF{C<8Q1>f_`g(@qaz!Z-1o)Tx7fb#sgJE{27^-Up3NA&?@O~D6|dBDy;s#Jby zMXSWUMw1mHgA1BY`fL^k}u|7f<7$|YZDpRh3IuU|m3?clVL9D;op29g zsJ%IcFOfR~k%*2m$qM${rM~tyWLXU2T#|J@AU5!SY4(Y_cPnjR=KBO(jR% z>d`M@N^gy7F)UrznvF2DoDHi$Ww{pv;kjy^Oq;o28ZXD zWWs^fDeQ*w>dw5Z?CM%$E>Z9DWm80amSS5sHv#|ayfhqdCttO=p9d|Y1(Ibf`52jb z;k(&j?uaByNPLnmnDAxFUz!WgmFM?zB@Z#v4Aq+uCR6`99~WUatuSPLj5IsAfaz|) z`4ws3TMSCsQPy49ygn^ zQOyycnc&%!RiBm%!e_SqJ#L;e2bZ3QoUQ^npeoTJ>+E~`%T)5bD|zjE=#S!D@#3hE zDjP35d@u^!X>oq-*s>~g;5V6rTYAQ#$6tiWnVI_mc{@#PY_v67{oz9?0At=_vq2}( z(D0{+F3wMk7F7c{#(+Z+6Q*@j4w5oZ0@Kj5O_PeR2CEW;xv;D&K`r1^i@fiV=DJGA zC(6u&DohX|XWBR`6VBJ1kAc@4bKRJ>76Q*x1mVb-GoJo!7h+}^&81OFFPk&d)RxW1%;MGgy-!!9qC>ubN@kX_Qw?wqI$&UT|hjzHo znb<5otA2Gs)}-a|bRq2e@0d#-rYXg7KmJM=y8lwFffH4HoZ3D+B{G(2I@14W4ZU1j z^L|CnEy`5mAqYr(atZ{GuAY>Obu&mkT5kIcD1_#T+FYa2vr?2XWbVB~zD>B&@BjQi z<$+$!h@g<4I{?9q3-b<>IA8O{H;32N3ylzowOUKc{^{Js(&k7DO!fR@VIme2DlQG4 zkf3nuG5i1KGF@d^UVdZ&9|VxvL4to?_|9)X?ZaFs1WnX-^`1zUv+^@3%7}-?{^~2} z5aq0pOs7)ds68W^Ih#P|pC( zaqJAo1B8}rc|DI=gF^wjgOmV39_#`KXE^64j$d9v=fl6wiXwbWhjLxv)!%r<-47MY zr%)0f(nDuGE+`*3B$(PSkc%%c8JS#VS72c*z1>m0CUGF)<*AicwQ-0h+6GN^}5T`FE-4;#TiN z#%H%Cz-bdndMFjW0PV+iP6H+~Ef*R_Rg&Y|Q_WuR4nu+!o!?jzsp zAA{?*Ot`Wti`HH@vZ!E)Ft#EMm&8OffmZVk%7j%c#nJhIxgRf z^%IjKOS&-4ZIeUPLS(J-vMUH}%NQhj09vZJSj#R>=kh*gmt>knX-wy{R=$JCL5!zcW$m{#;n$eI{~UNuB~ zIwh1%=;zpE64@T37NZf)eAS+cPOb4YH>!ryCfkHxjXe(_J=+i_&{8q$Q zH_l!xA#$|;usksv(iVwc?50~=hCgy6D&E|(MI!(p%>2_fz2v)p4Q@6uH7kHy?5z0+U zJ-t~i1-byEKj2PkZmvDrHlK6@>|vm?DP6Q1y1chR{IE(P3z%bOXZB)eG-SYCCx7Jw ziI-sa?my__slv#4$DrQX5kTABkJEzQMAL~Lql5mQM}P(*|B0WBU~8(1j*O@5gsGg{ z*TK?dKdD#Knh`4d9&DE(N_VPM_n2}w;I6zO{F wGh_i*ZFbiMk|alRZ7Qbf;WEC zEa;UKxfo$qaB&{rVp2K(4K)e1)Awm2XmIV5{W_}w1U(&gb8DxKcV2tDC}UcdQXu;Y zc5P8@<%pFCToC40VkUm+fxQVO(zRr&*uvGptFo9N3JAjxCO@10rMi9yTM>MZumZCC zOtE|#Lgk98m(WW>*?|L{v4=E8?>cQQ=EjYqe5QukhMg&YPgkA5bo~)ywO$OU(OV5i6$G ztP4=8O-z}89a_Dj06v^MRNNes{JAj~Mc-ZhAoM)C`BdHQ81sktN`;I7$$P>ZGmJo# zb)#Bfr&pGPkx#nj^I@FyZ#+ppT3r69u0K74DQ3XMGOIObG}sxdO-|WeZ_5Y|7&HU7 zqRXaIG8Jc!q*;hLQvCTcG@5{2W}W)y#OlQkmsI(b(Hi+~)%i=IUq^JZ_+YIFFR6%* zXrohH{EL^`ao}mQH-&9V*9>2Nr`xU|zkss3XW1d~RCj8Ct%zrmA_l`uQ27@$KT-wp z_Vk={3X0L387j@(NHY9-aocFsof*;oJmR}`1>&!rlo{EMYRD`-R7w$LKHe82Qh2(g zPU*=S#WT)cgcRcITd6?$vQ=7`Ggsh5hfYf*5Ayp3t`4FkW+tfe z=7R~MsWy1A zYJ`D<3WpHxcn`OO7Qt{PUrE6f2>ovuFDi`2d>vx4tub*8zTi&}pW`N* zD5~?pY`qd5m3qs;88RVEgP20umNvX)h&o89j4YBexJJ)q+RdJYs1$JMyB0qXQ`T|# z%iLB9o7XEX&KQz?KUW-ehdVBq%2dNhRb$lXDA4nO!A91s{xdU|;&I4d4vURdW*s{G zHdFrP3L&_pkwg?I&#rqI+9}-QB?W}83NHG7YK-nXaJ`t)Bn6ld^79)iKjQA3`h~rZf$h_C)h8Zn zh+r)gU?Iaf?TWziKg!W0Fw8N=5 zI_jTk!d#ZV(s3NJ)uikV;*DKMn@Mm9f7C5JyVu)k4{Vy%BJ(>(KH<`ZR%2?RP9(S~ z#S3@ZTE|*9k9_G0o5xfW{OVO(hj_)n=5@ zrcXtIEbPR0#T-=5i{@DwqtX_5}<;*tEIG`dQ6@=W}66B^EHf*XP1{y(!Sz-88 zpV(-~$n(s+E%pTBQ;8&3l!9{x3P?4Qq_`E;zXYYZM(w|iT`R#0-rIs1QfK8Sq|eKNgi7b?bp@}6 z2*K`IVrL++=`rJMIGPhJpOFmbQEp+6)`7m1%{HJ_>L`zZpffLK(Te7XF& z!)2>L)-h7z+z}5bpCA(X82IOd^cIGykD1M@b2JT>K{*CRU)(gxrsp?Hk7E=`sq=g2 zh9ZJ6MZ#oHRCFqf(cd1_%Mb11D3#2eSlrBXyI#RwJC z#}sP4uqhnbqFLSkG8awx2XWW{f*W4b5M4RU$JN*EJ4vGfr|Ay>AS z`&v^W5DLHcfZ75{A;bmD50lB8PVe6xW$*jTrj5Ip&4;J<80jlrFBYTt{Kh>k=U`BGecgHV7?4DKu4DZC;}J1-q(GHmRUbs5I>&586S=Re*+I1${6rv&W7~aH9i9bk&b8Tef3A)ne{xE+BB-DWJ8*+o{#6tVWsZL1$FRf6F+qCDK@J8L)_%v z)vD_NJ!EB*?&HtGTN^tMA|cJy(Jk{%8GdhsS1^n~tl`Km5FNTtZL%nUM7w*Q zaOi#Mb-f~+o;g7I$>DY;?FFx)HSuI!XIoaCin7_GM-7@#n8V*V?_xV zd>K%K$vkzeOu=ao?i?lWUn4d76;X7I*Rk`#Lk1v5&}(PWSEYfO<^%zK}7zh2;{Y=S2%8xr^rvNC?|5lZlXv}9Vu}PhE4YIHhvGG6}May5VOT;dH zVQ@0UT*u#X2q_mjy`HuJr|geIfhmgd9cufe`Ek1V(<*i0`?Li@5Hu1T^#W-7mZIH1Qbtw==KqdM!6}sxNTQT^0Uv*R0e^dY0cWA>@YLsK*>m+~*R@`6P4>ER)^P>{ z6Uvze4U#GjR1WP3#$;d57#)PX(kvirDw~5{ome171wcl{#l;2%6O)21k7Et)h}s$s z3<~JAuLXcblAd@Twr_lf)=v;P4ww^o8()UXFBqO56oPg2*&fgV;B5K_@B-hJJ~FtS zYY5hZBJP(`$nA)%9BxVI5j)fnq41R7tVSWi{%)H{f%#xu_5X#cMt<7)iJ2o;Yr6mTN ziC;rdSrZIk$rxIy#ljv*rD>aMGJ<4e8+0>ImNXQ%Q)R zXMGCsIlc4slC#e7W2hJ}ryQ3&zhELC0+YiO2?bFQ!$h@)^(p&!+PxRw6mS9x-11ucHm$(P6406P`LS$nacpV$uI@+=!a1#?Q`?VcT@If zmcZCAOO7h8u@Jrw$}D=I1PVwf#Q*yzm+6_W+lU7nM|nEi@9~G`?B{Pf*E+}hr|Z^F z&>5hwV)2hJ@n-wvhC6We(!aixmLG*DT^G^4gNJ*+;&^d)c_CMv}3K znRT$f@j+;MS4XB70sYV^05W*^zF1(!2^Ibu-w=?#kqa0gAq4n??C)7rEh#OzGXg$1=)XmVR*l~sNcfk>u-Vi=eJPJ7lzeW@&zuiQu6>V(02AM zfI#c`S)FFsjOar1U=j#h8}X)Y4BRvg$$j4zN+X(0F3HzkC4{Ts9(`^L9E z%>3}m=rG#Zrz*&h$qRZA#KA3kP}RmA9mM_%x+fe&X0L=je+u;bA`H@Rn?)oScu>_O z9vkuMF!nthJ5Y7MZx!~B>DTJ?Jsy*hGHp!U>5(i$*znWr-1lEx{ukewBVU6ycVA5b zy**#_!#%&S`&1)r1ki7#lMsl#YqM*c4^@C)1dJ0dgh*XA*6r+s8@?o$@zW|fe}u;mD1o;3cK`y7_mBUNcVpR)kHU{T!`hD) z%g>AUAzl@z3or-3(^VDzZsRwUJOg;=x;s|AP>m;C7|#8}&5zV~CE#>!>P4-UxOiME z9yHt0T%+ARGyuCBJ~(cJkQd-jvgKQ?=}&Gcm;>22){l=PAb%d)9Fnm61fV(!k;VwO#p=ybY@5?DEsdMS>%|T z*RG8uH3k>sVuBuH+%hDw2m&I8tm?@M3P4?ldwacSdoRloP0ZAus-dBAAhXBwGI63FQj)g z1N22eQfuI-fg2<%*eUOWBN~%({2KrMJj*X2Xq@*+cvaTa7_=&3-XHB1cg=2*$rElss2@U~i8^E0d#ikSbT=E+l}Ywyt34;0ph5zK>M^FYUYe5U4QNq-lyrBY(Vx=BRv)$>Sf z{W0jzM;Iv1$}|2)B`CVKDG)Od(t-QaEFGlRswnM*i}TEO_GtG@^x~kCX`{!c5lix! zK+W;sJ-TUbJpAAq1-9QyqD6-zc?L#MdKdC=J>fUm_?Nt_)J=%AB51}O&TRpt$n=H< zB#^jyR&t^N7WJk&I&5A&j1QI*Iy0Ciht)-r{!9O0|CX&!D2I59+QX#^Fv@Ub|JD`r zLAT}F#e>+?P}QQ?e{+}7CLS&6gHYEog%JgsX}kL=aB*9bAT zhXO=#z^|O0Du)@8RuN7J9cL=;6WmdBHuPk;X}uK5HZY4~T=3OHvztU7TOAw+%tqgV zVZUm(kq`dL5^F&<+>yfULy$}mKc+O6{|?ZbDyl8wez#Y*K#TvTnT5f~Oq+J9jl4wl8PFxJ<*{s(Q=Ig(k{e2Y=VZ)^>y+&uMa!a2_NvJE>n`=*{ zRMWD2Kb%-xly#0%PKo0?!8eFl4px+NKjv2f%n=f z8g22fOwV_Km=DrGb;R^!iiAxY<=w!$>v99_P#TSRg<9gd0eOq|?lFeEs1Di`D3>Sy zFE_zi)t{Qm)j6gsXx6<`_BP{>c}$ss0a-HPwhF|VGeKuY0!F!sQYc5@4a}(XNu})& z*&J4_92dgcQPp9L0@h{s(4uZZRKeKx}hO(|K8l?$Qs=jEK zU31D13itUyvv_~AT9Jt)ZX+OJ`=K{VJe9)!<@VCyDmXOoI4}JZ>50-9VqLA(C54t6 zgP|=2zrSzg`gK2C;o=UNv?v|AScscZ}k4BG+%w&5udG(A494)jz8q!mqS024<5cUTmPs9~h*qvHq z2l&1Zg{@MeBpQ~MA2(d1*Qt&snl)2v*HJs8DU4IFp%#-{5%&mfm;75}r&()$59j1N zgetp)Nd>}sH3buy$m55EKr(y?$*fw< z+iGlH;mowke>Go>IHio(Q+FoR7T&B#yKhzoi4-{^Bw{h`Dm{>eejM%b3D~7wPhz(hS=&aVyp?P zCJ*k!6ZK;+S(4gpv4@$gdpG{cA-p3($lf&IN)nlEn7dB}!%=cM!fkpNCCODGZUsw( z7@7-bi7_|pdg)^ivGFo;W~+=&SWp7mA$B9vMZh%O&;51&bHtqueoRH~9|>KJpK9qB zLT_aP<%j-^U556!aR2eCQi%hzEP$kZw4`c%)&+;I;GTRm)kp zS-LyJd22oKfHAIa+}>)X9jDoK7G|aGox`+sxWOO17H7pBB6uRzrmJ>S-)^xY!HisO zp;OWx-G&vu;y?2=&S(gDvmp?NHR~6-QB15?f=X0*ua`r^ihWBZp45ZQI0Rp&y6`jA z-2sig==t~?gctDYn)($eWegcpSeqr6{j!zV9Wc~#DD>+I6 z@!dnMlBw$3CYwe(h9G^_sdrL0sZ{R{9V<*Re~5!Yyd^rRV=S!{ZU6m#4`QTfXp9Zr zp)%p-w#oP6D&-X7dt=S;mH9l?k7QK%j~r+FvRxEJ6K}&$0uzq}AdGQG;LEIoqqO%WyzG_>(F*q+ z4!avyX>A>ja?cxRW<&SgKnunhjGjOJgnhxd(z@^@>x@;*o4#cILW+|%V>)Dq zD8S#J%Hn)P+RRyq7}1-E3zWD|MZ_I&O{uXO|13N`Q$W1VlQuZ)F`U<8v9FN^8!?OZ z1gkcdg1?Ypzr!BmWY-b`obQZ0!MmR&-{FOKy-EHq2Un^A_inY}W!mSuN*DBqkD6-Z zOg1`OqQ=0P43TM_c}}DzhI9Q?k-aM)Y7oKVq!gMo6Xh!5NKPdr0tzK!^wJ3+i=s*v zLg?ZbMh}I<*EAa+9eEA4FcsQ0Fai>o7H@2h9>UHCfOrPPm}U z5rj-uVv2)D3yG9M=CVc@5g5VM3uJ;zwa3de+oa2Cj5ewy}1?f;%PX@k{aP|*4_-p3fdOOVYfu@Z8^t52ueoW5WUA7tDq&qB?`K?;5 zwZMVyPaOkJ5Dy`D37aer%E9?lLIgkGi25wDpUCZCD$FsN=?T}+AH`l)Gccui>`JQv zQUul_wTk{=LM;NG#;0G}ZSy<_ze2YpB<8EeA!19);lWmdBADN)YS&WCQqQUBGW6=^ zj)kocraYm23eycO(6-W&768fKx7$E>vvoWZ<;~bB29efR|BXtvw}9tz zQG{oNK}uhrN_*oi)f)K2%?ru-mJXx1Si(oJdG5R`%WGKnWsWa%PpH+tPX>?dv80|sr8HKHL=-`A7_GM2 znW7UnPxjEW(6Z@x8hJ~Mpqr+@BXrj`_9H#@9K;zNx>46mbDa>;)DN@yq^7_A0s2$W zm6XB)PB!zn5>X*_5V#|0$_Pb%{*Ig(hw#Ih_Bm9!6q)05K@p{zEE1@ zd*6Xntx)o}sv&zf85a{}o-ONNtWgn&!OR=eJm1@+x8GaJn3RTSAlcUYJ~MeTktt8= zjT!yMdv@JG11Zgo)L`HP9hH&pfC!smKUY&^lIv6~S4`n+{u{?gHKxip-8E5j^ZaQ0 z9s#)m98|c;=uXKL1ha^16vpF5q%N-EOM}Qnz*rREUz6RS*pz`6X!{>&zw5 zW!(}`^lg|1my^(!$R%`G)6sEsXmJN+x^gwrGC{srL4X*9hf~ zblJ`tbFE3_sve{!|NeVyeCd%o!Jsl%bYmurE5DV)$B>69oPOzi7*uEy~TZ-U?9&P0$&EoDM{`Plz;8;k}q#NQREl zPF!X_Xr7lsJfw>ULc!&6(A^4Dmxgx}qhOx>1l$bYv59O>}VkkYv1=nKR~KP32H0?;uGyY^37@+#3L zg+a+D3Dw=T+-=U3D){I~bz{Topyo^yW{f)V z_ek#Lv9XjoD8hiR%!p9K#GN&&qR2twvD5biiza`Hwxo5RfaxpF+{Pqq1PR-=&GPp7 zC;FEmw%c3%4)Ox$uAqmH5)o<9fQ{qQx ztz>0_hClYZe%)M5`pJfqTPD-w<7sE+`YTTAEjaMyX3o~gNjpomI+sH#*K2e)IREY#Ha#(WgO0B`VIgyKRi zV0$|Y-Fhf_WNI|Dph6>$Mc?Q4)TlMd3-<)C3QOg_|J*AlvMkDhjXZ@!aZ#s@KpNLynNCcEapa}wlw)p&Knl)=oG48JLR{|Fv*Vf~N)2z*vlC$B3yT?lt`oj_ zDl&jWv|ZZx+(KuREIq`-xeoRw-9APzgOyLTynUErc{JnKT&pP>tAChO956wm%7oHp z(|$b*RRu#vGM1Om+<^@$3Lgm>4OKiO&=B&dS9VKKF0OdhwPO!}y>D`u@N+ils0+Jy z%C$yXH{_gmHCF;NWf^1Vx9sHMQhHNgY-Y#wPMf3_cizC7Te(cppVEZk8YIfm*a^TK z>VKr0YA8=j$R{ofOdO!YU}rGPJrou1SQiIggmNg}uPECE8k8<7piH|`g^ zFVC0OqG)=E9OP=*5Ps0{_T|Qv2%0Eyiiei=pET>ynvG4q5|mG2|F=fV!u)d$h3R+D zUC*{qh7Eh8t?EE;y`l6|9K5}gojW&&cF|}lLni>&pf+V)?IR&qS*lVTb7&RgdP3Xt zk91-tJa<8H3Zx7_Y9?Ql_NMXs1`E7=i(QN_njb+L)9FxTda#-5#QWU{fX6e zdzU*5N=?MMyO2F8KY3ckHF}PwkAAB9`G|E{Ac{ncvDzIXCDeOa;&P}Zkt4%8eZ=2D zVVPaIm{v>~Rqj8i9G~81XLe#fT(ujffo%SJVsyEN(wEZU=z%F73-X`~6__g1n>+Xh zOs4oO*DTu6yy7wG;^1#(mvtW3=>oaunSDx+f}f8iWFF`ELsBPF~)Os8D$G-)ugJ$My2STF7#LyVln4&AE@fR zP1q3XH`i?z^(-whcNt_F>6b0Rpd8JnwaBMNujSToTrkN=?_R%N{oFOk;b}n&AyWI|Rxugk=T-4C^qdDYv?05?Zq7;adnrkBlSe=sayHlxjZh zxR#xhgIc1tSnm{)($1n%nW(;&zU0Y$KKT`H-D4FJo$GW8w2veUx1@)bJtPK(24Hk^ zixabYD!G$oPpTkY&9PFqB1cF9l$bJivaEQ?@3bREsasFQq}ShEYHnF`Vf=~pv`v9X9^~88-%K6aBd9LqdpHz{ zLx@PXSll*4XCKqF&ZtQg^*}qhFK0$d{E- zdK6%{Ekdn57^gKcUjR{SYp)M0JqM=Hvmyy6m4Jq`}I{h z6p+!Os$U2lJK?wjQybv+i4f$~-B{tPbMor`Cf|~Ywmh4wIem#-27k@CZF3MVc06Gi zsXAyzJ4zT_DcaPaslIou{D<1M`;m{-QCzSzwuq0KFaXI?ye0OfY%9z`U5YQ48E1if zu03v_+VfGo`A#Cz$y!PY|E*r4U$y$GJB3}90AhML+jaz6Tg`oASfJv93uA?Ho_}JX zSCXkrK|wO@V{O}!ERb){^V#i5)FDUOuT1`c{BMTq}0a~bn4g)y)rNtuu^FTaZ3h?g8?ppn6 zK`FrNZ({Bu=r;-g2L(ZiXxm32aHc2Q!4Mr-f>cIp5$8~+A%qmTmVNi8#N`?706^?h?5#+f*=FTXpp668(Iu;dmoPT)qSrr=wWJS|U zM7pG?$zxQ}ocUKzEna^SDdZtXv^kNISANMEZfq59I?Rax%Z2ZjtZ0K9Ob3x7;Jt9= z?-`gg;e}Z6!J>b5m zEq^dP68Q>HA5Nafz|=g}Sd6?75)mL#VZIL7E{R{OR9)z&lpQ%ap+N;OQ~um+*`kJK z8u;>{>qsg)4)&Sr>UJ|U>F7yKack};>kgSm4bhJ#YM78?9;E+ZJ|LIIuiUAxIntAd z8|M|+xK6T(tas7)%QF6xR&Vd&-EJx(;%aYISdM#2IJPX@iO5~N!))5hUCpi!v)VjO zzt9;)G|!ZKe5*HD2Ii(uD|QXxiwywA;PoT`oJ%HPl2 z8NUdbZi22ZAVXR1@o4n!@*{hJe2-+O!#;Ly=?GRoKD{f0|GnNiPyc*yQ^?kOA5qU7 zFXdAEND2L#^3u#4ALJ9KhXqgVAUc8b(A^*Rl7hd2sp{m8FD9{OB&*cNH~noP=wN+a zPqDNd3lbsxFyK}se18vl;x7dAsL(i{kc>@N$o&Nsj-?f{5fqW=jsf=%Zt_+$C~n+O zW>%z-FY<$Ix~;8Fl`Rizk5SX0XcY(U2u*|ZV#5yVW8KpMn$y4-2yuxSA!@lqqh2o8 zGst!#-ULhK?jZQa!!XMukhIY)xOH|~n@ige!E3{JVTCc2Ti9z`F$(&gKThT-6z-ayPf=p|A~D-&Sl8hYxZz)wG9Fv`&X8+ey-x6Nd{ym2-LvLS3T;n$% z!6vBZXr5xV-TxfC(;)3VS26Z=<)-+G9cvEEo~(l%tuRQT@#+$$)Lnk#?H-2D_cN%D zrQ(kuBUtIM=V8j~`%L3qE;^(=_|sZ*`H_cLk=74OZw=2W6AmTbgu0trdc=ei2W#iskq&hQOgp5(z zd9cNPm#!I=#W<|Ap(G%Gy{Ei4K82=znJt&`U-5%LLZ_h>O($al*`r)UZH}{iCt;dW z_QB>f1v^<#wh0?y$K6!HBmFx^Unun(&4Ti`IY$Cv+RP9AcsNRQj9 z%e?(j%Y>p;-y&$C7dnoNLTm36_(aFBiOaOQ$b=*n1gFw|yXS2kpj!L&41Yt1PYC{k z!!+U7M-u~yrFTwe^2X`f%R@Ab2hF4UU4xv=RxO&iBFbzpL((s|v%~unt^XQH!{W`e z9+~Erb6-)^zqRdPsW4Km1hGMNP%R%0Ll-Fhgh=AIR%NeBG=+ohKc75c?w-EbI?1-c zZ$M`hQ$iXdWX=U;ydQwHiYg4|=M&Vhm2c{W$m`v;Giradn zc2V=Sjcxw{(8U&oF}hVZRQaK zfy;QYFPIfu$H#y(lXW5AjNV?bKImw080H*|c826F-7-ywlZc~PNS}qOt24yh=zlQSWgu(^5ImNjecuI&K&n9H^34 zZxENyDj=VU6+H0fA*1dds{avnDL74(Kl^UFa>r<#+hE;ij4G1L@acu<9MpoG7)6Qp zAN8+4_d(OfX z)&ZU4&Gipk&k<;uUU+)m=ZAR>2mg@TaRn^X@h=6_9S+mP;iVn1^qQ&&)g^<-3o+F? z8p2_;o^IDpxJMsJbF=@h^QYsY`C$RrhN9rxy88PPbp8r}f>nIjboNv}x)AKrw1kDd)c(&< zAriAp)vx#|Bh}$&d(jE1AcBZEWKt;+^^M+YGutT1mfe4vW|wvm<#ZX>PK>IU1V)jd z0?r>J-7*&nX6`aV`ws}wB*l(Ls7<~avn#+zz#V>QmJta91K44_QF+ad$<7f<^H1wd zXtvpZTR9t-?;@?1KJ@r39vG*^YW+B4?Gp@|;Qo%vpGiL)wu;8cx;tGuZ;-gEG-oSX zhYX9wyhfo&AUgVsP{m-m>HEW6)OLaCV6;erQH;Xk=bWxW{yhLFBoPGDYfXhll@} zwPTy-GI1UfthzF7^I(;T#9Pgi9nd3}(@9jQu|YCQ@+}zs%@G*Qm;MlE#C8W1cF0GI z0tk=xUy2!T8y{O;IB$g3t}|9Xm((41?6JpO2w}#NcYt_m#?|O{fw%|}b2wO$c<{A- z)ON*MGe$XG|9j9pzKx9JusarwgBio0jFROo0*g>l9t@kGg zcaQ6l(Jt!rXMGOJgJMWYHUS#UF@_+gyJ^^TT=cC*ti8lHNKK^Xo@`EYJsZ4ul>kgEVSGR?!K&r4v<)+G8fcB}RGYN52D(A}EzQCl zOynAWxx0f0#jVG}8S>HFb2#CW0n@YhtCK)z2>Ofh_a}opG|@IFpN2y`|9!eu#}J+r zTF=|`HjU+Sx~)P4*};H^FUI(&^4Vadpoj=c5N&bs2y5w%vgMx-i7#Hb6-^3(v38_9T?a zGV_A1mBPneTHIurX?j>`{7Rk`wZ}=4?NXjN!6uRj`Xl=M{hHNj`&L#W!I6_Dy!uqn zRY`R5xGmI{G5n}W=;&}S;o3XyX83RAJi^Pgg2q&DvCEcHvFN*KVU?EiQ}_{oq6<%n zpLzU|=|XYw1}rM;!>pIS#vx2C*Vthx>?nUL*shp?1;v-7wMLk@6wKs|b{HyO;APr; zo(eF~ZMWt_6f=f>4wZYNdY4@f5vK?5UrY4B0QItu}x@lsltQ8!e}B1KZbw2=G~ z;)C*-*!Q|4lfxBqznI#EXpm%{t z>v3%MC{mq{mFw#X;rj*Mn>Pn{@F8auyTh*w5%LX>K-}Tmc0FaREXZdk5z6uG(F;$j z>_r7`WX1|&>_`|ZYI99_GaKs&rnFtsL9@IqJ9D1_X*pdxo?hDX^ z1FmOXLZeClvxG|_NNv?iEUxLT&sHsb5*JJ3DO4dt#J=H8L`f#~w%`UuG~qG_)RpgK zy4+Bweub&Tbp0}&RG0eUHiQ#Kx-FM|xZ)(zkEt#B0=RBWpu3(j#(fc?a>I+)>LKJc z6BZCd70@Ielunb=c^|~fXd;*&c?<(vR4m+v>${+fxp_aPw?0SIyVoK=Gt9{mdq#`A zroo(i%0`pAtiO<}#yBolZWlfG&Nu{F*?E+G_%)SN=ti8^!Pj8J6SP5Zb`fQP3E7D7 z0$Bd6ViLaHz6T~o%DpUHaT9s?sar-(zLL085(L{>U;=C*L zx%hAPp&<-)ABAZ8+1ovYR_=uaf;|l*f=t|99Cgl2r3<_pDeY>sW6v1+kv)kk{*;H0 z5Qm_RS(^0Qec-6sL*rmnq#|nsFd^J>aXxL-_rn0+OT38l%H%h_Kr?YtOvX6bEt3( z>Ky>lG9I#T0Y^vg9j9rVv}T%#fFaGr3mjf_dMl0Q&mGY-m%;Nl9cuM(ty*#2Nbd+M zSmkuto)m@+qGF)i8^mygR}3xr*&axhOXbrRr_`FV2&KMo#nvfg$SOo9RQl!xq>@5C z<_xnqu=Ebh+=P#at} zYuxB@^9~+2xqe=W?Ex$-AMSc}EI2CHzvL@`NThmtppw`aXF zr7U06a4M@}l+e3TRz*p?eG`gJZ(?`<zJ8fG~0F6>k|Ilm)|on#M*mPjN=f6%bahs2;LJ&Nq%+WhQrxzip0%n6BtjH57Shvu}?pG{K*cfVo( zj3rXmkS;s*y%^~n&-1wk6Ha8^HhYm9VKOjjFFlZdFXPj}>!5iowX>6#(TLoYl=^1x zwO+QHj4nuqAZ)~a7!G9_?Qmp-29CyGV6^RRaCC3FiJ#&7?S+3Ij>LLkG4jp|f0lk~ zfO7&7_nx>Gp#|#A`{{F2g?;S3xAAOI`T{GONIq_EdM%mqXLj}4o1d?3`f)Zelw^-f)htoHyThiTxV%9>nEV$d%T)aAQ@*$h`d(A7 z!uUSbWxK^x(ZQ@1lNw;P&;JlScwWV)fVV9uM0^iVogJtuA}75xV-A6O4Li=?Bt9&+ z>{DSQm!ha=X&VEvKb)fAZ0!aevfh&d^pgC`z2YX|5wGBU=+DLQtYe%dj=|bTj`;Hz z6DwRiBbnVAxiXM6JSFeiIUM_OSO`R3edrY$Tn8Uhob><^5&Q3{myqzr11Cl)MrRP*-1e>E{UX@o?IJ@y8#@+|>)o&zGHnr@OG32XY|7k_` zaP#jRRF=g>OUOtP^kf%t8r_t4pt%d`i}Q|{gL(QaPdNsYsJlh+$;mQZVidMEo!eb0 zwvg!-A?o>No$VE-<}ys-Doy@s0m z=)^(6%1n1$gfcQo(-0G!aTye!ayAv<)2EIoGQlrkS8KS);a0JfaSB_cu1Se{V28LY=!m!f;^U?;d?Q& z#R&cLSMo&p7>TY@bAghH+w^8=G=8#c4UV^(x(uy3rVFK$l}5eH6>lE}vL>$lJ=IEp zYue=m8Rf1?f>%X@62>YYYP@;KA7n*;p&%_0*kk&*xQAOwJ z(b@n^H%2yqFQFyu@FwB++Jxp)kGy@8)+a)9^z|}74oibF#@x;NY~~Ol4hn%C$*;!M zjHW4tKrc8G*0~tN$Gp-H{Uo@D1+IeaFVutk7AM!Ws2r^~JQKpsRkYuO{b6WK>g%|6 zQRzqn`V7-pMc5&Na3!;|#jIfxi9VH`Eqhe)GQ5+2Of!P1$43XZUR?$7m>5N(SBTSQp20B6lfd+1-vYYr zT6aS2TZ^NcHGefNG|Y$ua$DPd_O3+l3X83)JDC%>rV?Q}WHfh}(9>b9p4OCRn|M{x z)fPJR*$zJz1e~rA*sM^(q(sBKUnC(G7nNjua>}JlLTm=mvN4a5iB!>{8tuS$LjCT& zw~jI6r%5V|GCEQ98ny-+ui6`*$pzV*?<^&r@o=CYO~(TNT>NpohAdX%K~u!pgbv7@ z+C529D_p)PSvHE!XMt^>pgVe7ad$ef_p=B$nXDH_@WOgcP=<&~&0t%^YyzNrUYi&7|O?Yq)cfc+0r01@XwfWXMjFz#Vre z56Nl)Y99NBG|&lB%E`}iGx-=(I0Gs0K1$;LDD4URCbU`NDYn$sGJ}FrLf1rkXm4lU zs!uE0i|-}%VwLT|p0?8V?E&to=a~&ZF<_TzRqJzq7VC1rI2}y?nnK7jicD>J*GF4m z%L%jklI#LoILLVaI7n>ey*oSHlh&0xX+6IQQQ3L6*l|5lmv_^cZIG=?ZBg=ZnfrTs zSbHVTS_RSoRYVSG)+>0r4wRgyijbU|_+?y{p7QFI9(Y8GYzb1Wx0q$}X((#kG)L|; z*4t6!Eg+ejMN2ZDwFko9izvN!M>p5I_PD&}^^0**)6yRlE8*?)8a#}ZM)9j7hHow{ zx@7a_$}v56zBQ80XNJy`9O2zWU>{8=cGHzma!Xdo+r46xTZ6wTl68JJ1%p{LPlgQiDA!L>GgF4kh>8Bo4Ty;9gSKSiQ1xZCxUM0H-&v=aK=3DHiKszXWxyB(n;mC zu<87s|Cm|MpnP5DtPuf}05>R3H`GSG$Y-?^&J$*La;e{7G)-~34GK6xxvloBdGex_ zC3-Dz@3DeT>ibr+7NxtrjU-O=1+o4# z(!#v`em9tx$5@rTYFc@5m>G`iU}~u#{~A-oz%&$pwXK>VuPEz>@r51Oqm--B=Ex!f z7->%??=6c29kzG$ahd-s5eP+|@GSgD_BuMyW)J5<-&|DIRFV8g{psto-RcI}o|5V# zbbh8%7XKeb_*Vb+UZ2V1b3kFB8U5<{Eo% z>Hmu*y)?~TaPYzNNj+%=kPgm!1{qjp5YeuzJ_`w;m?T|{a$IIeUE-vCK9QBk$_fx<)XeKvsTE{$!2?fx5LK~cw zyS!RsA)B=?;xuOJERMPoN4`Pn^%hEg#Uzl9qPn7@S+LxZ!XR(egxgLoiWK4~3>G>z z3bnNgHDIo~DZX-ar)9I?WDighCM3zDAudCr_oj4rV@=$R9Hs$DM~dr&)a_etYOs@R zb`Usc+Q)4SA8SJWXc=JXzq1|UzT)l;s9o#G%id7zk{&Kw)t&H-U;05;4YhB7K0yz4 zC;f2s@hBI3uJI~;aja-&MvHRmYrSG)Qva@@!>UP>j^1GEeM>mnb9!Z0M^IN9SF4@Y zl>=fBR1V$C%*6MypL8iy-ovFZTB!0L>cT3LMkBX3Nkz!Rp)pWosnk)LcEZO(_W0(- z4lE=@C5-@gtVP}j1vh$G;XKCQ^`}(Ye&{7JOwP2xz2Om4684GzJhc^Qap*6M6OuTY zn@DEdDwt!Up{h6UZq4hO!{qb$jc$B5_Cmj9iEMweH52Q}DiTw0Ua)Y&Wer*NfG@D$ ziRoJ`uUX_yC}pmiBvfu&^~VByS93TZ9d770#l=Dj9?oWVJ3Cyw+)@09DNkZ13m?+! z3ldr^cq0?9#81j-O{R>+JcKv%=N})qCVlU7sG9tFFNUNi)^_aKe1D2{zJVo;e-y7J zA|b*z^xzI>>L06*dQmatcivh}N1udeAsvWMk~Wvd$vKJ+?OpM!Bo8M##8jZ(A&UnS z$f2AXW8ar8@mE_p(2oTODV*kn84us_#VQqje)!24lN3%@dy+S9#P~B^ix_3#4z;(d?p20;InAZXP8}6#1LAmvEkhZH&ucFVAER_Z zgJd@4Vv0i!h5|FLe6Bx1QSz=z7T#j{s=@|Y3)x1PIScqGjUwRz@}{XVF`DjFPqv#- z{te`oeWQiE)|e+V4)d@)rui)*-=V~s5S78LjZteU1*L;k#+ww3m2A{##9{+iK*~O zm)oZ<9{HY0=l#S~y0u@>!CntGYDhDa=)$0G)G^?zue5DmA>XI1l9*~ow(w4i)|$VV z2wN=EX2(!RCl)B}qrhG;z8;NaO<4X2_nMaH{M3TTA!>LCW!+b>;}{ zh&M6#BN8+5&uw4#vIe_m^s@LsGD#Hk+!zC|ta1-8rm@DD)h|;8>0yyhHE$41C50rN zAIrOV@6b-PWz8|NIfdsg4Cd&aKPiywDM&0la}!%nNWC$An62cpBQrIGUsXO)P-fBc zP^bO;)n@BK5^7?5WcujxrC`KuY2#(RZsu$7jKSm1ZQ4S*7f znQm~S8-HtO3Dy$*EQJbhM`p7hRlpKL>Ov}d!Rie0r`|ZKV`_a0-M6>TTxG37rbu|> znHAUMpx)>@PCHbkrag3rU#ibFol&>BW{N783B`}@?6M-L6uBDRWKM{x<`n!Ksqf{w zzugQx&WSQy>Bptb9u+rkjAhi*nktQ`fjQ;QkF`nC_PQR8w+pE^E-x$VqUNOAgq5HY>nBP|s zigJG(XM@W`J8Mr}YyU7lIhyO<{pqBUMSjDdjOEmsjc0^E;U&^Ac;Hrm^oN)3U4f9& z4gz%0A@(FTK8sF3^6QFFKQ#)RBL?s$%ulFOuryAF5skhJHoBEECzVx%cU)FNbjl!& zV}6*LpR~`CO|Li%sR`;gmLk=pl7y%G8i|3r)Y{Htt!q@TlEvJdCIanRF479sQ*Iuq z(!??o%2`u4nrqsP+N2o#7#p!xZ&Zf9cV=}}WkEjGYUsC=@1?$Arw{MKOS>_{E#@9E z_6Cl;k@hyI=|>ESuuV92v)y%8+_OYEnrJksMv=KQYEOA-P}?Er1@}$37U0qo!>08` zN@)yn>dOnZN}Kt5Q7u+c?ke`iAa<_Bpb+?fa#L=a03L(%Nk50@y?ka9QjkoO3Mp)~ zff;iu_VdE$H*MNd#>lG-omz<<%ERW`Nt7jsC}H7aQJuP5@^%UYi>-0%s@qT%|DtUt zz3e#GIUH-%?7QqQar}L+gF` zaX-?J;gVWs)gmv{p9mkdVVUq0GZs*=Ftv^dWV}2Wo6v5v#fPwsx^c@^1fve!%BJ9A z@-ZsigDrBz7}5+YIMAKixem31zORX34L$mA)+FN}#-3HP7C=99AX1%#}d%X>d?Ojkn&cf!WSkNk%e2z*h{3 z6B_%o@p0aIb%B#jqX0J7YLfNvQC31hT0c|{a9sZ^OMBbAo|SaB$=bxPN)XrwiOqDf zGzb0$F^WGDt2VEg6HkJ@SL5jbW`Gx|d~#Znt3(FmEs2zz@c#wdH6+T~2p!0BQ2B+- zB#O=gRmj8=Vuw8oJdmT|piMK2I03l#sR0PrWYHOVAeIjCX5;Hz0Y z#YFqAop1!MN5zBD{?b>qO)szS1+JXlFFXfn@|}jH!OT z|9ImMcZzu`;oLn3sa`Ebddc<=2Dr1s-T{`7ng^IjlnpYPXz2xQU#fkPMY4<>M_-7Q z#@$XE?K;8+{4j1|V}^FjUu!y}3E9V+l2R$1p*{X+Ku+Prl4@E_k($j}YMoFkucQ~$ zBPWwO4j|?>P9A+6cCs}PHUheRkhETry(y+4M1)mpqF3Px8) zhEmxaZJ{37%d(zPMp@Kb6Pma?ha?+s=Pzcqa5i4q?JCFmYnM~c9Vn8qjKv(#(#bT=Gah|ipt$@5)Lrb-o|7VZ!zDNeFZGl_eH%2v=!+k%wNUg6pSaIUF;^tS}}WE;X@e8;q?@ubArO|(bql6`XXN@R9F-?;z!*+TOv zK6%Xnk*V9`85_eeZlV~W%LDwmMSMsiS@DEb$o<-o7R0pUBe1C=-J1(4 zWw2He=y+SnotD(vr*w@i(j-?beZZU1sQ~Z(&Z9EH;!ewe^ZnCYTZcje>|Ur0vBzP& zhg2*d|Krp$?%ljLj>YYxy~HbCGwu75K7~RBV_y;&gXVdQm)$_7K(ZxHmtV6C2pM{t z+eBFYZb60k^8RwHkskz$ck$4g+O{J1`!fUhAH*{s=SiA0#AC&m$1Jf5UJ|4Ci7^BZAWzH1#)4crbXM5)HFuN*TOX2=JJ^Ioeu zl+3w|^>aLZu=UuVp7t35g!~#Qlr&>G(4i)28jaKhb@~scAp0C%*w2tcKnylme191y z=&XMZi+qh81@h(c)15*LS2F2hkYjrnHoZuhv;qWFaA7cdNHf=Xn)1>}g5o9vAywf! z7KHwQjZ4%Z;_v3nYgl|P+B6@kLH5eWwxZa3h4LRc_Yal&2rOcWxtHe{EQ3-j$jMAR z?&@6ENv80evyC8lNXAS`*yjJL64HS1!Dd0q-5j!}`i#9K^wNN^S!AwYV|Lf~n*iOi zA%%)*5gyL+R>nFufzNIo)o;5YN;!2F1Y5fj_zzIn_vGXh*XSXv?&Zwa?@(OLT1_kL zMhD@)n{Yw$r>g6wP$b!~LFQvMC=uit)p-3oPe>CpC;>n<%0||v+6ZMKKo{faaSlZ4 z7>=($*WIO(5&@79YGpN0>Yx?;M#=xp;6aESgo+{Gb2qo`LMRPCqNo)Ui#F@2dK;7eQs#u%Kb4qx6^Da3aW zikX|_&je;QfO!udh!0V^iZ@n)?1^BwkV@ZJ=gS<;Qb!|{?p2I`{g>DG+UxiUD;TMx z<<&b>r$wm~H+8crItT3*KTzp!<5l5J5o;_pNK&ce&4Z^u*=+Uc+o1)x@K7yI4LkDy z4H$JxO&Zy7qCuSnAK-xg8wJ4d{q8{Do4Dd=9h95vt6jPO9`x>u0LRJzg!AL7JAGm# zU4d%ed-Rq1IsHg2EiZNpYyPsiv1U?A(%6EuyD$?hT84QC&JwcmZZqw;MVyoF5Chw{ zo49pLK#=@BiVJsPA7pmn(uGsLDxeDOX7}3Imz6~UV=<$y_d=?nWfmt8)QZz=IW|9n zIZFw3;XCPSRkg@`Yg2?Oi*ye_;c6OdR)X2qY05)1=F0LmF&`D8X|x)%@D&*T2< zhd)&;s4VdgT27*Ksxc|02-4?LI?)3JKG{xXo)wwiZO1;s{>E<{5kC-2jw?=}wVc7@ zJIbs03N^(TxJ0;s4j$+ z4jvsjuVhhq}F{KqbM1wAYAK z5=#+#FEyd!_awQJjWTldbOW}hHgP3Ep%>c)ZNJ1MVq(B?mk8DtZ3g zla5Uzdao;2_yqxLjgkt}pc0ubgJq;=ijY-2%qJ8xd3R@mp|8f-8qSn`WdZ24O>X-w zO)bQMtA`aIHE?j8skyBj(Ldz$yN-Z2C` zgnbd$+H(mf)i8ctBdWM5KnfmTnG*m>eTvL<9WL>9BQi%w15K0+m_W_p#NS*qRf5FF zZ`Fbvvvyca3UiP@v&{CC8@mx<0xC*cv;y3(ENZpl+gBGk2`Wc#^k1Q&&xw!eOo*~J zDmu~lIuyE;)MHTGoFaz#qe&lW9i;5-e%6A-@>ONQK|j&IhJj`ITQW8+T&m*wNSCY< z8DJd#GUWY&shMVxCA#X{1Tlz~iko&m>&L~p6w2f>O5?gm&?(GA*9Jc-4hq^nX$rYW zl9uF8_n;QpnG6Q!5!x;zb-D>MOkCjDeT|kpX2kIW;CJTtkk)r!Nm@9?*~k*LySL_S zkp5A)oCGrNRQqFOi38`nwJ2zzI}@FOI-OLn3Hq`z5{1>WWW4=kwo+*me<9Ki)|o`j1l#E6-RZM1jl>Z;yf_N=CIGj=F}irnqHM z2$rxDJe{1F=o8?vLIQqy>-~Mf*Gt}~c`)=Mf-A9I8~ll=QUxpo@{VoO%J36Uo*Cbd zTD^wL4~*KEXw}&z->S(gCFMh4KPy^Jzw|Mj7xoHj+MpJjYB=`j)?G+BK*@DP_=xJ} z(TWUae9lJ~w+9Q82m}D)U2vggz`=SsX(;(f;~R<2pwvqqpVR5+&%841ZKg5qdQJ)E zkkdZbYZUg9`y%m@)$L0BH7HFed-JS=O`8Mem6s=%IJM}p%~*R)F5UQIJ~q-P-ywGD zOPw%$)a;_NL(a;BB?5s>E4?&l(=77SAQfX_r}dQ6O90+rlT>W3twd4nFdWl5i1OS` z(nXN0<&yUp0esTtK{sjjHAqFv2|NfZ-$mgGL{`0+CZ3wDjU=l2ImQm*Q=qp-VEs?v zt@AHg2M2vizH-G_bg>E)6JirEyZhP899p9!JP0et1MO${m2W(CcbV22fENM8a7LM? zYC|nws02ZfHI14=uq};w2p#!?=FwYsw%Sy;!_ClwHAcQeDY<_9A+joO4>Fep=4}hV5i6%#=fQ-;KJ1g+WCl1Qcu^RWsALDBGsLFCD7ab_ID9@S&=>3$ zzJ6&K;U0Rg>9Vm(3`6z1pQ6KMCbB^rD{{{@7bqo?o8bJ_?rrbp1#YD69DSMS!pY*3 z#U73<9RVlmsqy|hqzYa^%-MS<#L=!0#7K*=TZx4ndUpr<9>b$OvZ6GT`3g;h_S9dt zFvp=UR8=#BMac-}7rgCHk_7;OdGMRE`+~92;^rX>{|fYqR;$xNHb5ap@`CtT8~A!<0RNNY4?Xtl>aM%AH|LGH znH&HuvW?&+ay?5AH1=nsJ@%^&y7@B=e4tP$wI`&H!B{TiGiG%)xUN@gge%_bcz(uniG&1Ku8OQoC=8gHk(k@!DWuKJMyi#=s zzxEQ{sUpa#ZaWe^pk~_)F*QwWc3@fjOBYiA^@v ztmE#*@ivDGiUA^F z7-cN#MI2_mxDEtjJp~sdAqTKsGQ!?TCrwX-WyUYkxtZP-yPn03>cZT*K$2Z4vy}ec ziTe=8z8zI#-AAN=NUI6naR`~YDeak6+4Zo)=aEu?Y0?uL;m5>0x!q5kbIKq}dl!aX zn|%*ph=^e;2~w%%^i0N@ODFuN4jwZgMy$in(NaW9dCMhV-f9D)D z#b*`nO~kfgx4`yhRG$IFk%OG1ohcbtc30sSm18mj(K#JD@P-p3_O9Ix*#h&X>aRtZ zDjw<}b3%X>-KIgHK7hh7uUr>?3e0}q15M0CbO7~wchhzp_56*MLL)K!olZdKHKy7f zCfnyz2tk$UaKEg0s*%Ppw}T&t;b18GjDPnES(&Y2;i9Gm5|>*QVK7ryHTy3T=--@f z@bDGL7(|o&Y|#mG|E>mw$HGoLreX3b`*HXJZOU3?*Y+e_ z1z7}Kk>M10b3hN^9=i6|Nr>T#4YjI$1F;tO7>r(0UsF=)8Bt;FF9ekjOP&{BW2s09 zpBz}U&#g(QORIx9vA6j!JlDdW)fJJs69Up3LAqF^rgftBD_%^^sOWz8MUuAvaTBs0 zbfOmQs zxXR2~k*e%QOlC>Wp`Z;{c6km=JLPAd>D#(thsR?Yn(2O*I<^@Ed67qq)rZi9hHf-} zW4io=hzcuo8)l@W`x7GC`(ydj+$(G|(O*1Fdamp;Asm(&GMu*^X$}Hm{+zr}tdEtw zsPqB6DzhCk>vq&XEY7%EA6|@UJTqD8B34MtyX2KD)fBd`_*?@A;ccJXOaN;0lYD}V zk$42!Inqn}PO1QD|03ASjZcHe!O7$*Yy636+MX-4+?(l(ick~znAkH(ww_Jkqo`rg=q7sS|BxiA zz__-cBR1*7jp0692e8jA#+MSzF@JidnV^{KNQO0@sizni+}VgMUy9I#*(j_nL#RdM z>)Ao?yii7h$?_#h{%Gr_a`D!o9^9Wf&F)}40jX;>^om9Ja-fDBp%0^X}397a&xy$SN+QX+KqyHT2Wzbw*`W~D8 z^nPQq`aMN?21fX^`ABx>O@$UpnQy#fGUfY~?zJHKP7Ysu`PfSmbC`YrfvV{!5V zNO6d#y85klndNihYe~}4_bMdf?A<<**&J8cRoEEFXc^ji3Tb&!*DE{H?a)0FcxFdE z-IO7C(o$$@+&#|!aelA5^z3^nuvqJKL=?QGWWQ~?)?AAzQbCB!880nFgXZEqtDisW zAu_$X!tRtcX89MltoEj^^mXXAb#5X0Rc9|EXjn1IN8xui>w930D$p1>^GNqwR$MHEM!JFaljJpq z>EQ%=P&WZR62>wxIUT)i1OMx!k7+$956LVf`hX>$Vm3zD09wCkVhkN$=?VZDXAe?6 zg-aK>ZN`NmSr$7|f2<{zDSJRtP_c8z>VDF2UU2p+dK~)w@2Sn&5So7XgtH~q7d4gp zfdGDAQ3F*F2elE;27TLc9T#RqLO(K7=mtmoO2z51%W@Z4w3v&-RP&jq>JP1_nOGPE zJ9TW?pUF8CNYa+^AYAQ?Q4z;S98=b=etyNqoI1L52M>!pM`h}3fm}Icn!@!Gbf+@PFl^HL-Lv_$c0i5Y_ec(-4x#nlxrKoi09sUeYt7V+0(~ zZr#=D&&Y1tE|MEDr)t*oGaqsVGh3b$I{UHh=5c-yq?c8T=R)b*#eeLgRD->^rJkWhEK$F-bX zLdvq}jU?di-zRoCISR|mZ=76kE1T@EBVXJz1U{Xb=|PfZ6u5mAL=1>frbYu=}(2^y#D(@0_r{1KttVMF}luc-T!A6 z(Luc3PpQ^ndv*WWOqZzkRAg6u-^5|&tnRX0ts4Zm^ajtP^DS}nP3H~s$%)JJQ~%Vu zwnRn>4{}{B61Q-+Mrw54k$|IUmBRr(m4sTFWDq%zS8_C`oy6%qJ(D;cuDnJddjd0n z1b(m4Glvh{vAHzoK7we9&*ZdB&ITmDm@R7M=a#L`cpo2zY{OQ+v-$7e`1~c|*0dBB zVt_>uR`ABX?3BE}b?Uk?F*Vw4X6F4P_SV%?hrmAV6D-U#mots!$zijqaM~i`PhDSq z@a|^Eu80m_3i`51kKxyuRJ~JSoTGLD_ymmAWTMf!un`VwD@vmf?lb{ZPh*3be3Z)ypWL*^*AlwpZ*9r-=p9S*|nOp!JUl|Gu z{Kyy8+reu@(D2$=09{JGkWFwHpby~dWb)mF$72LgRE5A5T!I^wh=EYh|BH+(J_nN4 z<}uR%rm^N`J1}4Qehz2D+!es&PJj^g)YzZdG+}b~Th!F?%RJQ9u2rSPit=j61Ct&= z;3w&&0A2B7qK-*?BN__|tfz727PFxVnntbspU|8$^AAa!_1oogu!VkXNUZ-C>2|Z; zJHcC}0`~E|OGS<5s0ChP(J+7?G~sN($ab*Ma_YLb?CpqpH|Q&NPiOjZWx`5OxPEbV zCR?bAIA2dA-olU%Rf^Ivb`&Gek*OzKkz7S7k@h!@Pn8Ik`IK-pzdUKub9(W--qq3| z+07t~*v*B@4NrnihT)T8XhKwZdbdC-eoor?P?k=RGzISJ@arq>Ce3CH{8wW+2^L~E z9n>aW`8WwSt;J-XY=bBi)_UR?9ZcXEhmcI}Pp?=ni@4Lv6 zO$$gPwkq#_8-zB3g@IB#O*WP%UDUfW&^SuxUS5|nh8VwO-b?N*u<~Q?cq|Q1n%M#I z+OcK@fPmC5kV%_CTG*=nnXc%J@No2o5-#A1NN`1AVMU8wFs~m8`OLqxc?%JQ`jM@? z{`Po3E@tior6Q-MEJ{?p)a{~$S>_8c!XTpE8kFxVm&EjSJ|!DDh|nN9u-Zn6G7g4^ zenCxzeK9iw>9ePV zs<(|~g0#ZENzK)1+#de_)#&^aJ1yA~73W@AZV1st(1cLx?UJ4qkvS?e>UI`Y=SW*t!Ij4XJawDikoMtCrK}-QaKttpcgPi!u zZB(Lz^@yNh6*^-b5<3HXbociWZ4g;6B9FDt(kpi}aE#06Snna9*DU0d0Z>aDL+wch zylugf167v*&4b+Y?^ymyZ-;K=MYsv_?Rc&2+OyhEmg1-WSH%^C0!ISsikdOhT2l-> zcE6+rZ1y}{%ZnvRg~lC}qfO(^cVivXvw-x`cRn@$1TuH`A-l{b%d~pUodRS4qy=8p zndRL~7MqwMT_ipye%fhbb_vX=>6!yuCeUC0+j6T+WZlS1lL(a^EiGrPq!Adz%VwXb~e?E~DzceJ9e)1Z+Oyf6mJ{CV0ifk${@t*(Yp zlnlT?>#Wthe0NP#%wx5Ge3^PL!*Ao$U?C>N4Oa719CF)>A7F6%kaS)TRMVS?Yy;Ww z2|&3mm5rXSe2DEer8(RQMZ_-)c_Sz^i#i=UaRO-G-?W%Mf)118Pj3!n!^)o1^1ytz zxy$m*INyEX(@kJC?cly@9PA?d(nZAnAJmUJLrO+Z)?#8Nb;vbuA7)X;K}I zil#G$PtdFZtc3sAC1%#fK#<&!5(5EU!2^{X#B{-ee1hoKRnU_26n$UE+Kuh|)~qMS z98Z}p;UF)iB1@-AwE##jrak?sg1c2c_|s-*DE$oId-%q>{`%jtg{WQ36*Gff7_8rZ zEi^)R;0^7yKW-~c0?sSB?hMw1M+9kkiouk#8lmQs-dWZTWNmiL0Y25^b2U>h44P@I zQl^Ne(`nLV0a@giCb0Xv9MJp=k;$K)qvF)61K{|e+yg$7ut25~9i*AeE!Iol0v%M< z-r(l6WxOXgXmVTsD{Jy`wB$QxT9{60zhy~&-4taOlZH3Z-SGD%TH1Pn^LW@}G{Ee@ zZLX{d6pmvcYWNylfKThQdGMkeb@NGwx>#&)^JuHwMwTVVA~nNvl4*MRU$pVB*v#}I zRr56g#-4N%#GhJCTM0auq-nk~-m?b~C_Z*^VD4_C=IHw|khkH#Oe@peiidY`i4yfJ z)hp(2d`R;WbSB2o@4pU^#Ogpb)^ZK3VWv{@+Y0QSCxBo{XUxeOq&##riO631 z-96iaAsx6l>D6?JqcWo$lEQDtB}*Rj$cwrk&-%tpf;k%%9+xTI(3c^M@L1q~i41Sx zAWSCMuWzgZ`4@y1u|H6P70I?>QH)A_6CIVq@K6X+bgi{(71i*^`t20#l8`87u@gJ5 z05u2Tt1=S{)N(xuzbGGHx*F{!0$;bYbU{lA%RJM1t;hjmdDEE4KX&ZWh%1r&_vVzl z>H`HNpu)b1AqGEhRSJj4!)a>RJfTpT?B6;kK8)#H;61B;H7nu( zGAp|F-O^%)lwm4&mDw`a$@qJgM}FNy(E`1Z{*oxj<&GFVXH=t#A=?B%ZV#0h@mk(NkrqN6#3LJ^(mTa&;Z4}S=?K3`SPG7c3;KOtIk7dPh6UjTV9ie;xrf0jrlyyYR*XpJtNivcQpBx13{CH#d}^#4`ni9IAcK zwoiQIwoN0p)w`MIp9f~_>8gWN;(l1Q+7G`x!&o=xw#i#2DTbKFPcP2q#*FKBQZ4&; zX&f)A=|l@yrmwgU1;&OD|2ZI#+ysZik%M{b@P6h|za}2s8UyHTR-(%Sm*&YQj)=Ex z!hSWQSe!-2T)gD)qkLb|(@OQM?2oj9!@Ru}?inW~YLr^zj=0g#zCBmXE(9?fDG)IQ z+jZ{x``p9a(0N1CERflgq>WhQzm*J1OM)@Ua8E>R1+YRZNw*Dc0ePdFiFYHiA^;ZI z3aN>lwxLVkC}$}vtrkkrZ*?*MqJZ*K7-A^jOXJ%n__pvuFEcd|9+P6*2x z^)$7Qb*L^`2eMiG_u*s}Af%PqUVa&+!WoRv%pb`74bupqFzFCkud2M3;p%fjM9!+4{?Mu;zH(%)cAv-xGLe87o7_`+nV4-|gd6)LtR zhSkSrle5Y)SI}r0r!@lnhMnH$SON7?>_5m)Hf6EU$*Yv%X`9G3b}I62?K06iPZ;+ez5fTR=qPj(9jNav z=J4%NPL=dYa$&%n=L?W65|v^FmH_GDJcC$LqpxQ^a+yY2xux&qAy}*R2%A+xVUsl; z?Yy*`;vFx8_JT5)vW+JQa5tCJVIZFZwj?1|-+Umgk3FPG8lU5&XTK17(K!=-9+@DS zVXIm_xHyz4UR!Y31CBv+%-i>#d4EQ*X}EM)1Q$+3{Z~6E1~H*?N61M$>Qu(C>%Uq+ zJZ$&;hSryX2Q?G4WX92N_zOCKDI6#mY+Ba)8sR{Qz6{W#3DZGiT!cJ3ICSo@Q{A0{97D<(rnb#qZYfmXQFiwFw1Y zl}Eruui+)g3_W_MkV(9~8M89##NzC$SBWhaGyAiQLd2b^>$6X~vQaF8YNf=`yUEKv z1f)5t)s`cQGg+`QIs(6m_J^GwLKRGw4LH$4t5-foM227+?xezl70T?PJLfv4+) z8Dp{wJbimg2};{vYWmo#f&hW)h>YJ^1PrkUt#Cghzs5|%^e*W0zQH*|iE7b&M=2^c zQ5$D-y~%;HN09AJQ5>ga7e|*;J429o^!Q-dc@voE}UQ8wRwMJ)v9{ zEGdz|B-9o08omyFUX(Rk|1{?D_>&ZIEg_r!s<{xsdlQD;JdY6u#USK<16hb1j2ZsrPy5{x;2s2y0sw{+$#q%+~OwPUQ{0U89X~ z1mWo38kh?{`lD5w)+Zp-Rc8w!L_ot$hE;+TA+}oen5sf%8N+Cd2DMrC%QZQd+qISF z1ato(R)`5+qRc~hE=XgmQ8Ks*vL0?uisA$pYAjZzdD{cJh?2+%n`c~q&&7P~CfFWE z=}>B_dw);PBAK#nk7W(>D6J$Kx;^cMRfUI=#(-o~OD#{a-fevAB0uxxnVkCgN{8xO zde#pbS2>mFq))O1zYiY}psneB{wE=SPmv!cgSqZ9e~_nptk8nw1$*dLEcuHiBFBi8 z?K@aC1ByfxAd>8k{5*LagKz!)LK*s*sT^4yyp~^0**b&)l@*^7-#oVyJO^!qTCY+3 zx;GU-aoNsDqVD%rXOfr@&SW(XVxprIFK~_fgsY96sa)&nMwB5o(iOcK?IxVo&nlM- z1f&$GAuhgd%4v?be*t@HG6|xticrYd}GvkUT+3 zGGepMvXO;RfxgI4fI7~+>7^-RPk<%y*PwNG$m>hYh^z+Ua)hi_bli6WtnJ*jHJ8vK zk~`-1xP02lD6IA8pEfq$2ZK(R`?qj_P!O<;L#>`NfTH{SI|O!wU_<6kU5__R#UINR zqN>g)ey@GE8LNL@8?%+t0$?G6g*B<*a)$fZTkk(McZ+$Cdh*Vd(gp|n8? zuRsLr^wB2Hl*e;uL#o?w8XI`yIlDD0vJBBk6Cow8w{Zu3hSTl##!S*I2CdMVk<}hb z*2sw@djjR#b+M{~KQ3!t=*0URpM1uXFQ#q@rV!yN$eOK>s3`&Ud2XvVy;gT=}%~<_1 zkRzZpEt(ioJ5!Tjd;o55o~XyC_LI=Rt?gM&r6q}Jqu&vsWo7&dTMjyR=V}D!A)=-M zjyD+CU}0o_|ET3Idy_l6bz#_%G-o|%!h)ZUAU+BtCP0@U^2!-2Xtg9dwJUlVM=q*+ zg>uw0)+pnC$Xt3}o&@DTfG`L0KJs0A24@2OIh4B1VQfBS5W|Cy>i%&{K6C|G23Yr| z7B<6#pDB`wXd;8Q^K1YQ8p%`xX~pox#|TFN?$GGCgSy?=}U;?A6$&Dq@1d8h@5AvtPK#%QS% z5(9<<8XIJoYq3>Me5svr^6m9YYQTtGuC2(_-@ABpj-J~XJvzz*xBRRvkpE->gKOf( zSv;>fh8t@U!*jag*BYTwN)Sl%HAgNnz@vr=7R0$>61beUvBvnz;>eGwGhl+qtZr=# z=gKH09{CuwHtRAyc&mep+&1fEu0!3E=bTtzIg#SA>nwD*#iP(XpeNGkkALK5}*$jGaS} zX3@58)3$Bfwr#VLm9}lW(sowbwr$(C^H08t8*$>@#%bKnirA~M+iQ;a<~NwtOMfPf z{1n9WxZhsZ-xmJm;a0o^gk}&%D{xJ7CX(Z^*ia#y#Gk+9@lkBbT+&B8n{3?#V8weK zxnkZ-ZG=phc^xQcDM(n5x%N6hFux592t%1v(!ijo_m*2IP0F!`CCpjkyR;;^>&{W{dNTNTEfLRsA-SHGBoaN|3!zQQgoG$Jy$M zR$0M6E3VYi$XLZR(zc7?UAFut;K$>EV2Vl+)8&?L z&fn;x?3ibNfBa64*toD-+{1yp4g|S8T(AcFefn2Z#a2IrMsVr{raei`85!aS)kkji zaAHe+QK$NF`Y3lpj~!wFtRPLqHq0XpgZVjv%l2@BD!Fe|76SPOORuyR(}1xh%ixd% z1bSM^_L4pnRC=hke^pFhYg#I68Ui;v zZ=%})%N%Mc6gT7oJ*6_%n;WbJnQKdla9CbgU?fUMmcuLp5h148uNMT{JnIRt>}Pej zk5*sUB2WJz`K+)1WZgMv`u#et*1Ovxoh%F{@;yt0%+gNp*|H)=jc|WKcHJ1B)t{j= z43yYjQbe_<=sdtX{zeuKaxXfg$a9=ee-8l5Zx_rTp0n#0+m6^AOkNb< z7T2+B>YeOk_zh%Dh|shqgv4wN+*)138Pr1R8CM9TXzfwlW(~wT1=Rh^d?!tmK!FjM zh@?=lNJz77Iq?VZGi%o@`Ca|@Us5sqrs2;#_U2eq*wvO84Uj$I%{hcm$or*bLEzd~|fiw0{9A!zG-{844mK;D8*%^1ziOR)Xx^DxOoF?D{#e;eU(wF!9oJxeU8)e-8UV%8t5KlGG5fX<2G ztTBr0`r^eX!r4JVZ8k57)6d+AGo>2i=@xq)={k?Ot&C9GCQLgm&^stAU%bv@A0sde5gP{w(|^AHw;!0Dn}hj(^uR8_gP?d}?2tHIRuUriStC4RK~y9}LA;p>WPjF&WITVO7N1Q78?` zIZ_b`RpL;{Df)#?>m;WBzn}ONFJ8ZQes*Sk%(-7(4PSk8yaBn$C#`kKOJPO&LZJUd z#?XmL%D{5dQ@=bA|LG7S28idVB4UoT=W+r*^RVLiVK-2;dMafRLzm zK_ehB>~Kk z4+(b;k-rT@JBLu$vLt8?jg`x9YimSJAr50B-S=y5t+)A1M@Ji12FPpUMB^REf)p4_ zM^`6Nuv!RBy0;VTm!0^)xTjQ{X-7s+N=r(2sDN0e4@$eet`-vX4SHBW(4UFO zS0H}~B2FkK@wTK6kR)k{v%GxH@?0oyNo+qR%14K%#uvKI3ujsqetLpHhHi2STRHZ-lWvqV(N7 z{NMtpwgBH|z*md@e6jN2zM~*%Pyj9nbkg$S7{6%#i;ell$q2-Iszy?2@w%vVW`Xpw zu=M`dz4y+M$A(ua4Un2PK|sRJ5%;oWzw7+Y4%sr7juYSsshDSfUaRoQKhKd7Z37Yg z&7Z4{SjmCYiE$4srimS9Ulb_z`MWexuBq4NhKL{mScnkl$^AKj0(LEnumM|y;|~lf zT7<9(5GNK2Bn0u6&;q6{5yA;9hzA`o9LyHtr=K{iGT0uJCZFY#^rdJg<~G)G7(}vv zrnJm=;5Pw`vNrXCMr5J}W}F0ZBoLlUQ3{9Tp)@JU`%j~%|It5hcH#+>z0y|$m$Eb43U1gZsOTOX(l^l-NTB=koAyPXn+ab`6X6DxJDHT#V(xfm zmy^_j&7T=cFhB?>8S%rMAk)HHhgX3R9zw#8G^lhaK$N>2*K|9esXXw-GHdbR$2a@s z^I5sZf(zsrVT?uy7z+cIbYUXFTueh47%C(I0|?>$79N?1K2+pT8VyLa4-(kUf{wm@ zz5#|Mpa~Y^1jcI*8H>vjB{3g2_v(=o4gmVSerectC6FBe2&Q|*eS=iLv9G|4pj>o8 zy7@6y8Wt=tO#jE$(M&Xod=D5rdsqi>=s@#GKHx9 z!t3AkzMUQVHvaCB$2g*#*_o$)wC?`)L%Y@2J*c-}oWlU7{6bzo7180Q8~EvhJKqXU zxGG*1?8Wn!CHlM^+ZkP%7yf{hnV0}gYM4-3;FzEt1C5Gm5bjmh_ZL8Wjni9TzMEwS zM1lq63!cgFenMPV;Ou9p;Jb2a3AWaz%V=p^y%1D}Bm@1;z|_Fa zWJn6%k7AsY8wXC43{tCqr}ng&RAwbk|TZ<#b@S9c&o)u&KNN}icy^noq z)d8uubH;}r;YG~*?I^%m0nIvXoLEj4br1l5qdGhuXeliCJ%BBew#&5FQrm;H$W#wYNyyY~ar`VV<9EjS+e*6*+sPW1j}gK6%fAubmXm@cl+2wa9+$c4J%b8;22^ zb$O?kq?#>MGU<3a(kn6AsqSt;rU4#5tiQo^Y;{RYfYjs@xa_RU%EO9c)&0nQK%8*L zwL|+hdgdgaC)Au1EmCquPor-2;%@NRKb0GnYG2x#Sk@I5-KsFNj?>*aPuQwcJT+C7 zGf=sWbAq$G#711Gc0E?-T%fyejmzZwf<^B;>XumI8X$bJYPN6)!cPFzU0c0`iGv|R4y*#~1^ zI)zbA)+K}Z}zy~Pe^rL(z)+0C5QRxLYBo|hX3Xy7bg@m7i8CnaWD2; zW8?bE%8^l6d=Gk#q2)`#J~dNk^<9t389FH0IC_zp6Q2HN;E)|UOg`9bd0I{XaK$v z!C!I&8J4x1K^@DxlUoyLvmB4px#`YEj-J<=r;JHIs_sMl3f+EDbNy7eXD7CM^1H!B zv-e0fda@4BR5L8x_DZH=FOiFcS03Voyv>OjbE2=u+t+v!kN`m`QaqBEI(PYK=khIX z6S@XY{1~81u(o!^F!tQc_hCsH zWPlr`{R;H#7pfE48Y;SR4NRl&(|Ux}v7n5S&6Midve{d1)N4W+b%jxmxno(FGStmaxk_b$YS%t;5=npI=Z~G>qo*$O5%4DLu;*AyViw3ysYGbo z-N&IGmbm>cZfrR8?7cXQW7V2=(vah>O(rYLm~j@z%V+u~fL?l3e?q_I2%x!Eix+d9 zpQxOh)P&8RpfOZ$m1netU(dcG7- zB^Xl|JfcdT!!@d%g}Nm`wfYy(Uzb#%Z~~DIsnFtJqFApKyAp}`2%W{Ndz7>kBK~li z4@Z!Y)D2VTWbqc^!%YevoFmM!VJR*#u{yJ;jpxkF)uymY`jn(V>sEYn~g?=@f zbd0Z2#G_;9S&N>*nWqk>r>yEpYa z{O#*pxRciD-vEx25_Z-Fvqz9`yR81!mc`$d-(C{FXt+;H8y+zY^KrecZe-3cVs#>F zoPxu@7Ql0WN=9JxFje$8<9h=n?&4;&PTIo|_$H@VdUfx{A7i@wrHK?u(H%YYEKWlK z{5-uT;al7ln%hX=ge?AOEPpF@BB2GiQ{)#7ID9vS4ZN%?nDVXUp#6heXkHJ?2qUlB z6(=s(^ht}z%068jDGC4^h1%%UMQK{Vfvf)f`siz(K#h?-Z_-zBDXU{8eWFn#eu|tB ze=~e=5g98ky9FJ}7HeZ3jh>rJiDHvgl|!42W9WiH#0jqEMuQhCvm0xl)?EC_G~Bv_ z6g(|B(;@EPYjSU(g|%D^exE&;@Vaw07CdCW%uGj7vG1R!TK)Al-H`XlIm-zvxKr(? z84Ff0BXvmyA+{=oYo4xRldDCECk!Q=?AW#HTh&kOf3=K0T_letvK+alPwBF|W&0^} zQIE$EM4umJnsd@*7C>UvP}e$E!j{rB%p1>=V|K9UuC~}C@!i`&)V+A+6C{Y18v(r_ z(Vtb1)qUFtLBJzCYmUN@XEXGl!{dJ-isS^CS!t*%Os-K}G7}HQQnK&bx5Q{S`Q?Xz zE+sCDe&bOIRyRL}_{B=I>x~CU8JLRm9CYI670eWzu2hOiUP*-AQq6xmJU;}TE|8|_ z&A|=fN11dfrL{xo{(e!nUCj{K)FTSxODM;7zzjH`i zoMsXA58141=JonJlUbQ0<1P^aPr4=e0gc#O*lvjx_ts969C>18`cg3+o(my_?(6vm zTZV^RvfyUU?V7K`_qD5tHYamroZ(&uy{RYLLi=oB_T8hhKPJH#siC^;YfQ~BcfBX% zoDD;MJs_U}FZrAb0>>U5Iw7$VqK^&yA(rMxE9zC?yaFsQE9cRMRxRPBG7AbeBhmIO zE>%YBxgASRBi)5(PSVjc2&=;9mKJB@Bd4GA#+`?Q^cW4=xBwp4^l3IEyTq za3bKZz6vm(ZY)%^Y@D{p>uU#C(2R6;szYK$!eVdx`5K!i`S<`RVTsr#gu}Axj%gHE zya;(IGn zg`%EEUd7GKhBkMc+|osO142}ey09(Wbq{bi^>h~}KMDf$tXQ}kVzfM5?i?+;{|9pi z5XH+^+{`pPppTV4dM51%sFh5&yKm5<-K=JS=d@?b^)4nz$9#ySYX{w;eXa0$8Z`9cGLc!Ub zPn;9vJtBQm#X7}f|DUG&Bj9}kKMpWL>bj-=6pvF%wy?G1rte)ruk(x5?N`yjzRCVs zpo3eEb2j?9=lex%9C}Vx1>f9DONPd1>*TSWG=ql@|7B6Igl5ThaGNMg#UTKB5Z)Ts zYw7Yb-nR^Cetm0+T&gr)XWf=Vn1NDagLm5N=tfv^#)>@Isb$v!X}ck2jhr-O_`7bvwWH0#?->T2oAn!;1VY1h+hd!AA^5I5&)mZ2C>-{uA zP~w2F*?+5B*`l`BfOF+T!XjHTryYICOep1b7-U!xUqyT0++yQZAv)43y^L%Q-1#sz zojcj8%mI$|02{fKZhOz9tJ7&K`pv3}$UTC=WZHtJy+fnY*0}54XIH59^L;$IdP98~ zpVR|m`yP^;Cr7{OQz}zwuQ1#iKgXfM=k$d8YHaYplA0huoPLB2hYc>177rGc6<5aI zcgta|a+p{_CZ(&Lt-uuv9wj7v(f0HWJDF(k$s-(t-!FNMODF5;VTTQ;QCIURp64Ns zW*t6kyX8ndf!QC;w}ps-g_ZF}*5N?3LI;CxFMQyrvBByxgl1=^Y8$PHIq(-%h^m(G zs$t5u?TZPlVm=x4@~S@?k0Qdh5+Aa^4uj&ag_T75FZ25<0o?oS?bEM$Q5Cg+W8Ukv zXiu@dP`tb_FbbJ0V`<6{1FQrbYF-`Jj%xxhTuEAM1(I`Blnz=Xy;RDrggwj@AsWX& z{|=spUKzp1GP@kWho)>sK3M&k&1%;(;@c|c7oNr2?GEgYi+Or()WYZMCeTLCeqXsY z+r(m2RwI$Q*_Xj}o_eb>IJ6lZLK+UuoXJqIU_BIRT4+7KQYgqhk@unY>sRv)WV{Hw zXvND~5W$fY$;2>HVkKno4JeX3;U`JY#pXtUtOZ6$&65}@RvSa1yCadJmkN)U|XE(Ll?F;6x z_z5!OLg))F)r{QOj3(-VdKkEi{}&(AhxVsNK#v$kqP)55ZO7(W*E~5Cf-_AIzH9Mwfx{vI^eBN4%OEjcrq0rBZCL`T& z@qcmt6uqi74QC!hTH8o+I~bxJeXm^w&C_6%X}h!CQ+wIS6lWqCebwB7_Prdnx=wKkF}JI>BXwDLHfSK&B#s3wDdxi z<Qxl4+82~#qA!e-ceNwxF@Zf*YP25DI5;xP@ff8&*W8>-CqScgz89ks^2 zgz(^pJiMw3p10s&UP#$7A?gze`8%RR>^x_ol0tM#R_^>%{~FF%b=iXj9AucR`hq}K z_m2O~<;npi(K-+iXIph4Hef8$l7l~6VYFQQu@%{wIfvT5ikT0(wB2fkqsi-jESsP7 z%Nwe3ob-b7l$2K{4I3bzKAK1)j%*43%s_x}a8u3u+ZI+&(39vmoON5v^gtr7!;MGV z?r({mc-QD#_ZuJoz{koZ>9BZ>quWLoBTY?>1Fkz zqTN_0g|vk(J*xJuyaV1m_hQj3;l6)D)a9?asZ!XwvCHq!FU(A-k(%XM>|{xaDCtit*eD z;QEt+Xfsiffw45@Aq7}c~%_P zeE3>r_|2EqrbwW(r|##kSjQsy<%}L@pkSAhL$O~3{T*erpkBM)T?bPfqw31rqi%Ak4jdBBqZ%>RwxGqW;tasLm(|346XcUP^Y>lTg$Y9gW}gzar=DOqh0}qa(bfeZQ}e8HmOn0_uOvR+skahUhA5SeFM+h=c?ad5I8XCt{9H2yr>CH z_+S$Q{SypON=j;ABJ@Z3Lb$T32yPhc3GIL2=YaeAC&p*u6huk)fj~s)uZfa`sHBkk z`eB=a{svza5b2^OfC8D51So(e01FEtFm+@14Z-I_1%a?{T?yj>dF|Qf8=pd%F$41q zED=StlYWKe-_VK#ze~fI_SV0^<HxGy@?HH+5ghv7!+0DkzF9u%xnE++|Jg%WCA+9N?BpL31se}N6?883V zvjEUu`#TneGJ}2=KsbZ*uC0G70Ohv^h+KoOuj%ROX-((g6;!z)oK(GV^a3okw0J=D zuJEdW>+B&D;LqGmvWd*=+(hVwe!$*bd4RxWRgs2}pC+X@ZU{^0t5TRK)A8Qzjq4mUPU-4M<eUyYDP%1yzpkOMHTq2Ddv=!$wO!bp9zeein$-Tu?% zXhU)XGbukiSx7k8Y zNk506s0BZX8NvBLZa`=FKu1HYybKmMqb-F z#+@e_H-r|Ute*vK9vR(^peZ%Ff>MkhHUJ?&uwaPzgo6kO7Lt>e7nAW+5NF!3bsBA zcRfW%d;+9EU;s4N4}2CjgOG)!KtK=x32AdTEmYGh3Dq|OQy)B_2lS$##0a!D1N^nh z{P+O0Hsl}qE!+jvvg(gS7$ARuco2RM2nGYPQTdVZxdP%i7mR@Iqj-hvH>UX%u1k;d zN1`iT_*Z#X{%Jpi7+wAV=?Jp6^i8A%@hljD+gExI*-x|Z8T@O#`QM$}wIEcg{^_n1 zP$}rQFjLMrA()BzyZsRSV82>8nH6zpM?;t!NjyXdl34%}iv2kEl4#@9o#4I9? zM?*!iHHptda9NXk%#dZXe>r?~^JmPE`qppSVMeH?)}JhrDw3>TqAIdY!8=UKF+(Jc zfNevhk=bv>uezldB1qXzL3C%Q?}oQ%0?3$8!)AxRG2U-8_(dL_%%5doJ1iqf-26jB zaOb`Sp&uG(43L(g0{@h!N|~~M{}eeDMBu9K?3Zn5M1jTAuM$>`~5~f%|JSv5mIaH*Nr;=fKr5ae?O=7`;+AZ z8ee`M{Ju73mV-m1QTqnQ2fwZH-hh4ht_*(mE#GHRpI=wP=#%{S4dYpXppIZz#0}G% zSv`>kxP}0XVdG$#4QwOiFB3T*m(6Q-X^3Piq9xMbeG;qCw z!>m5Bb)hUDKzk=M=K`8u4kU2<)_vXQWy2j~M{Z(u;uU$!+NdmNovywvBsm){ZaEoO zvaAdqb=#O(;D5C-=w%Trzn;ekZuOwb^@BYAnh_U6kV20T%YPEY3Fw`nPv@`#%o+JR z<@%7h3$&BwhNtZeWF64@7-h6~$460lbCT3@G7ba$b~@r~RIBn!AA@v+`Y!SX9VI$m zqj1s|e$2*1tvEPQ53;uP_Nzk{GTLRW7gng_s-QM4dj{gcRD!aWOMW%hx1^VnY2ey) zJLzZ+t$KY}1u>fb0oUGh?WL8ZeTYz)yR{U3z1peWQEWQ7h>Sj&#qQauf~+hPxu>01 z2py-nDQZ8%2}9tIk#iZz95^4u0v1zE>eQ0}pb`t56>=^i;ssZzY`QG-uN_d6& zG$1e#-mv*fM~EzEeef9{Y8vMOy6**T|}tW1*x z*C4hW#Yn*TmQ}DaMzPYCfkm?nBpJE2V$OAhFkH^tyKcru2!a#Gg zy^FpIZ-X@s!u-DQV2cv%>4e9keNAW*tGOoPQU7(2?dAJ0(A;ahsRU?4Z(WBy^W#@D z6|xa*+d|7q4Ti+f_5-4ocE6mb%AK6Tn)R?J7XsIo>n7^TTpD%&->eb_@JV8{scH4d1#0M+`xyV0oK?riQY8_~1u}hel#q%!nQ*Lxvmf74?6KbXSW4XEQ%ZYB z&3x?@DS@D2I|bSpIreJBQEW7wm?Q|AlqZg7s9BfMLyDAbbZd)jy zwXWcA7jToCtVhD7w~KNAM9O2I1f!S0I1RdNIccv~j@$dyzD!Ioe;DgNaQIHG&U78) z{V`8S>NE2cOGNZbaj_MRuHN9;)9YBh!!c?vRH=4x5Pi^nn5KAhQ>tW%ypmbUI3S_Tf>`CRn{{!~=8bDr%p4z}V=LhN*toScnQWoEooNg< z<6LMH3nY6B$jRgG7V=cKPq_r`j_yf#FPK*D(7-8Jr3+{ebT;x>Vh0t}=G z>2IK7mtWRlH!^^iCow{(?}ZNN@nEG1q1^mNL|E`h=J*fbD<9b@d(CaPZ_xGBxwD(okQn8^0O`}@4R%rYP>W+o> zUf64~<0>TsnSC5Ut|aHdjdv(vh@((IJJ&|A`JLbYcfg2OAX$^*ftjnD_mo(^%;#2V}n{ zsm6%Z%9K+#)16o}v=GQcPfSE&aI@?u)?{JmhB2}PDR-d7zMP=2kO>cw!}j+9Y_;g= zCC)=bwXW$;lpJI+vUK6*n^h^`+n`u(RwLBdq2vy_-844y*6UwK5;%6M(kRm7BzwR0 zrL7_VO6`5P43==?F8#U+Xw6VmXH?mJ8~zc|SAS$4#A`A)T+MZECvRDqzMjTyQ<7(m z5ae)0YdO4%;|(n;PYddwFt#8Ofu5iT1w$r}Jm->&R8TTmHX^Hnw>nxuFB+&2Casm%*EN zo%|SQ;$uWtN>BW-kEfA4JXO_h^8HtTC|ZXJ@r%Xwm3l;;OuIoIr)|ZDx=jQ!CC-*? zUL*IwRE@AB)N{@~i0#@~$Wv{vZn5*`C>Fv~9)K>b$;K0e!pSMIDeu0yT@49IpyQgYO) z!^;b-g7^d36ESr=`q+!`BQ%#JGo0xo*yeaH%YrUBh@8l;Lir|zFCIK!cGNfEH-W!+ zJf$~lO0&DKS~IG0&}V{+Bza8jtDy9XI!_oCLRxRBS4zvkpU0bfew|B{;dhj=IB7?D zV{ID-8*BBLYG-#(r@zpTNXs*)VdP)Ce(_Y%3+7;+3$sh07J#Nov8HE9;bAe0{7X%pOoj z#Eub`%F4KtRkiruB~EP4rmWP|>qZausJPot^(OOc{52z;Zh#VDuCpaA*=P~@Oes)Eu zC3ZKP{Em5dd@|cMtehiGK2pjSr9t4wF|)an-_|EdtM`82ch_drF4y>?TOxrfH2(@@Q z`JfwfJaCtL0@9_g-tvlmc~CqH?Qh`kR~2lyAbu`st$-cd)`hVk8u>d)0hhM7sOd-T?4}~0s;pcW*RCAArwd$QeL#j?Gc@!WaP@kJf*w?^ku4J!NDF-x4UKkhz za?YI)VWB;o{XToS*#r;+5bsrhJ-iNS0%OM+k@C`eO-ohSVYZrBNK;t_u zebEVQe-aDSC)prAuf(i*Uz!MAcn9iC+(LqB1|-m&wmB$EdSal*P5|=<=)Aik)~0sC zr@Tcm`;&ZVgc!PB^w&*raa;Jpc@6Vx^!_=k1Y;cG- zM|OH=l}!&reNvLa6<=YsA%35-Fd!*|2oKr*Pi z9)s*b1-|OV?|h-4fXi)AdBh4<|0f#p0_4@zz5UN0pjG53$lNq>x0 zHshBxiC9y()9$SLy+`gbh!T7s&9sZ!$W=*qNLBfbzp@}g5~Z-e%Iw_r{+JJCGoz1+ zKa4PV3bs&Crpg$%dCR!=mTE?yfHiahn|aX zc*Q{QMym*nFwW!U%aweb<+E;nRmX|T8Ryfu=a96a>5n?U7l`d_Yz4MC@ngqCBs@eB ziZNHw%iebQN|~ANnQ1r|QqUo}b)eVZN#ht`RS4Ij?{$nri(a?B2G4&HU*t zjq}GUrN3C5@?ZG3C3{c7``5@_zibZ^s#|3MFTdMRNw$O0M|e8(^pwLV zHQPhIK2vtX-||eIeVZg!IbRQ-+cW& zfMigf+HNO!tH^cb8iM8YW2nKpp0nbois;Fij_4oQt(+#!Nsmz39b=x)8GD!tcHhK` z=-I8Lp0p4I4e$jaZB^R*8Hsa92K(yax9ATn%Idd`(gU)outvouI_qr}?REeeJ+1RH~pBvhW; zl((U0Nv3VbB2TpxaN-nr?|HG0Ire)6I)7dHD-4i0k@+Thl?^CZu~^{UhAx9y$Em4s z!ZGOI22?)V8|<=VU-Tom_6?Zmw_&5egg_I)QUlU)26IK*l15LZSP-;g0$vFAAn?#;%?sYWz5VsXFg3KquaL{a!bviF+)7Upd# z4P;A$d}bjx4jWvQ3L$8#U#|6dh$SgXBQ{moQY>W{M-$1M@%wE;Oc<99SiJd(Bq+b(gv@cvX z7mbpKoC4OC1?ef4S)$Z13Zw4imbn};fRo9%96J<0#U;t!MOVscM!k0i0~DXc)Q9$? z0FGJXjt>&gA&!K3GS!)$Oa3wz4_EW|;Z1Y8UppCkLklg-<~nWsI)j1pO}i|?8*Tep z267T;u?vHC0^XG_ILSsv|9I~ZMSL>+oTs*427UfcLtYFN6EV^Am4&R|!h3};Y-B56 zcN3B?y?qE(LL{ZBf_~Z33x0H|W&NdN-B?Er)?@jJV&!n?oR*AtiUr$=3mx@^@HdWk z0*9h#1m-K1+YR0j(AN?6N)vs}*AhrMM&dLD8wI~g=N4+QY54q7&^r{QlC(Ys^itR& z=9K5jC%!EWV63g(da`iZO?h|d_iiI^#)8Ctdc171-g>!3U@ymMXk&PVc!0S-x`B9H zH(~m*_+dP~?@KwKwd$~1YjM}FH9!Z9`a_`RWBl zIJds~*2!+ZzD%Pwyg#E9blAqJ=WOb?%c5yJ&@jJrvj}r7H9|M;+;nND2A8*Ik|f>J-oeSxiTg*zGc@eU ze;1K37cUZRs3dv_93HEc$NM#Os(h9@Y2Q=l(fH+yl{9*wKS;mc7Ywl{&3`tTU8xrH zx~r;xVu%N9_j8c_6{(80ZOV&g@$)y?NZ3X$1G0%e_Rg{S^iA7Oa0m+5AD``3{hZ|F z(^)wuzvs+b2XEhiu+d;}qLXD2^+-zKX0$HfG4&CHW|N@7LWauj^KK9arQ9e#A*W0S z;@GJ0qTcVd&B@f#$YYU;A%j>IP*{kcWP4~{9{c)QqInn76>BO_lXeZx^43VK`&k{$0%+rd#aV^SQw-(9q6^{iFbD!QMO|7DKh7^l35!ds;cZ zBNR{&@OT!xDpO)U8yfBp6YlYKb!QZKb!BeDYc6Y8@stDX=zpu;OXGpH58M$M{hy-{>JymJ2Jj z20!H05lZhFwbPmU3^m`A|UtA%=b3T^1f?2vAKb5%;ci%b^u_w{s0t@-*6*d4|^)okr>{vS8kPC2%Bg0j@>0sm$f_h;n-9U1lfl_^SL3AMvgjpk_`kBPR{!8Z4S@sC! zzEn)G$}omj!Uv_S4fmSzi=fRcoyEy7HZUvy6UPOzA^pm7?`h5FDo5Bfl{{`k+2kSu z-zLZ!l^u)8k5eVTf&R6wNUDuT8a+Jiaq*EFs6d)zA7j~9)tpTa?ch3f>}O^!=mxnL zz*Ea6W#KS7`tItEO`Cczow()r-BU4BacSgo?W%Lh5>X|jDmhYa)M_^#3vSqI?Q6?* zHN3wI>7yV4YK}$M2htNY5B>d{@H&z3+T{V%Ngm?z_R7)iRl zC>X+3T6)NtbppujP69RquY0oJ1;@VMVphPaEv+Qrct^sdi6=@9({rK8_Vx0YQWxMSJO%zlJAP2paNS#EkcSMZOG?zqK^uw#=I0%w6wmN20eFVKzeMP4%9=nOK zLlqc!A{}}@o=nP>FZ0OHai^0M?B;3~yZ@EabUe5SP7QUErl1553ea~k210ZsQKV5}ThGs9h1A!kt%X-g9CT~pyXOwc zzU34b_R+?Sjkv2_6?^xD_wb|P1=)!=5E59?7}}i~$9$NR^LQ~YC4gOPU7P%k^u*AW zBN|3g1Hj&tmg`C>SJw!dIA^S~#PBB`N`chAr~TWk7%ZPGhI)7Y(=#4;I=*#95VqK7 zG3a}I)EUz(%ax6s|A>6Q1`C*G(;8xP!wfvXSB4vO`bdBSJ zw;>fe63GFq!DDVx6GD-8He)^X`fE^ODhnC@IuT0%kF)6)KiyHx<>53%X&L0DdCp-Q z%)ZmDI;xK$wgH7m+92_+xg0 zTP2M#l%MaVsKW2(&3az9#`&=W&ZwG)rAn4O*v<_%q||Fj3~r`|CCO*T1vG1^K~(59 z3*1$i?K{#+{j1_P6*VAbf8v3uY|>luZ%@)2C$#z33X`?SS^Xn(B9tplH5q3ylSHll1kj_ zBQpwjA&N-DLnHt`QY=>R^v6iFZAgxU-P56wkC{YI zoZ+K}pUzU?*xmw8mz<%!_C?A{4*CkBf-ee%fs-VkHxOvhS@m+IIitKxi?;-PHqHW6 zm`QRr`&l0C5oZm_pC|IvrB&t(HxP|$Ufq0rF=6YjpQ}zptzZ#~i9PoVYuo#!U^7wh z{8g|5dlUf`EB(7BhFpJbe`@rRy364`$aIqa1K0$+@YyXo^F zHOHqDf*lKO5=Jour0S*Zhk6*jVFdyJL*hZ=bY}V-cW5&S8o@Odqhke)$K6tNemJ1e z(il9m<8s5W@yC^R>cTI%Q2Yx|C-)+l_`GrsD;%8*1w4(MSwJt?1@By zgBWi39rV4X5K(Pks7GSuLS#c}Z_1e6M82Fb;Hj0$299gShKW!ZL;M^EQmPIg#W0-TDXRvp0fJdP|Tw z*;)atlC4&6+ExjR^~yX7_-VXfd!&@6Q_CT5o`FCqmM;s76>Jt$f#FQR1 z2NOpfln6=k=sble9r zjDxryR2QSZp?Y>3i{F2hkY*4qccYORm#?B!| zv~XLuW!tuGW0!5)wr$(CUA4=$ZQHheck*)bZsRm=^Y5>$%zSfxV_3*YxuA=+?}NCD z4c8_)Oy#z2J-{QR9gsVoJmm8Dm0s|UB;T?Hy*ko{p(s2tI|WTkWj$&t8+v3^f3%+G zf}F+@VXfs!^1cmvWlX2`4#*{)kI&yJfh43grxqH<5w-|PhlPXGD@{jH3gt$hL-3bv zcrOVW{}vQ&FmRVd^p4$zeU>KSNG%gI;M;XTDkWndRM$15rUP|UL}r_Vz$aa&;*pK| z97JK?4IQA;2FK;bE`!oF>K39w?zsCQ_@*#}heaQI3Eo8Wd~!)b_3W#nosgZ(P{9HLE(8qk18F3CctYms(+~H zSiqI>?f~5z)>-oiuy5VYK_d;I$IFaB=4Y?T>T%b-RVJWe;q3Tq5s~S4R4cEhMZ<^& zIxR;Zp?0`<{F6VgtMMAx9KZ_qV$pY#`EDhzlluTD72vE@KpZSF-28<{kZ4DuL-VYz z1;O&KKZz(mXx{!xFSg;j-6c+KYUhqjzyhz|{Op|Z@L<4nVr7wZ&S{n`nLy5w!C2>D zVVM?nv7Sc6G4Xnta%+`N2kkW*9JFg^wy@5ksLget%)lw)DgJ|m7j$#uF**~+Zx%TN zI@;hS>}??@R$ZGz!Yjoe_a>kF_-Vhak-<_`Ef`6oo-Qe>+ni*QK74mw?cvHfi+ep-dPxGLc#Y7ym`5=q9P|La7Ek@tS7@ovq2Z?oRLW|pMX4&|fYoPW z{=1Kn(V!;{KdW`1hQpmtbbghV)SI#GU}q0TuyI_o=GqXcB373o_fJCtNGI-U@-A(9 z1y(RTCu~?7FS(c6*>+)hIXQOYijr!-o&vf$*D;zYhPE?n3R1UI1;8m5JfC-OsxW~n zx>WCjrSVchROqhRW=zzQ?>K%M!r2yS6mdE4cE7-;gwXf_yICjUJKBN;OA2Lgc;?&3MKv{ktRbKavnK!F0sk`=yNBM*zZ$bszO04%?hU2Z}*tRe1YE8-qzm9l4Ku#U6*pHF52i+Y$QX@`$FbX7@MykOuNu6NY6 zQAoGc$$`yH-5#wMp2dg{Om)ABWELC{#6qm_XvV#hH4~aAY&lEQ(V~?`_oZgx*cBwj&$xyVFjB()HXHO&cic+-ln$|rp-`)&|0s3^w`%FgcQO3FLQql$Gd)VU$AYYP z4!xzNgcKjQ*X(u{$Y~x>wq5U1>*S7huqvd?yzg=I_i>I^Q|0w#DLnDGyK2TY47)dW zdK}avAg+t>{c`CweHX(oasN|*IwR*g5C#!BEm z@>Wi1Q`FlTk4+N>z{qQ+xILmX*O55*QG~eOAY}FVjm+X*v0-@GE%EL+MlD0cB+Xb2 z;RLPq`l-T5=XBGIt|PlXEK0c*ksu9JH>b9Ae7t4bS>JkUCu~Fg&n)kf6q(XfsDjP6 zkkG^6-A|D-`A4DsHQH5bJWDkqI?Bhn$n@DdzITKJNuE!wZdWPY|HMAVjY=GmYY27V z&Vhot@gy~x6WUK`m(0t6^-f5)m-`jM<`Kx-Z=%s5SbBtV(ju!q;x)pv&|`m4143DC zcZSGg^hz8imK`zNr%`yrNEeoxz|s@Vud=1=bv@4JW1umIIIUi>TCP`^!f@c|sB2z$ zRix~UWAMYG)d~Cc(&oa_KeZ+|7oGvq>s9qQ$$!pk>r z!n>JNIkua>H?y$iFT-V?*-aKp?dA5`A3yK4ftd;4^Ffm$lPtJ)wzs3R?k$8Dk~I^^ zgeovDkks2xBy#LizFl0d7gzpZ2cAA|i`b%=&{oF7Ffj%Ab6$K|K-9pna6>n|bA>eb zlz#`)7U_I5ob6=2oBh^)92IH_q4jP(?(C(NVs^hh| zuHsbFC8|x}r%S+yt}>K8h4B?jSM^SZ4~#0JmMr{NgmUyodSX(9mI~$Jp4#itJwl{@ zZI7ysf31;$d@Wq{sp0Dn6-Otq^4zQkq69sI-&kCRr&6gtgvNnQn7mn7GJA%A>N^Pnl37q9 z=q@t@!s2{e+LL)<5EqtG&hr|m(jwChSu8{Yg|Q0^(`P`|#us6}cD9e%%fW2#*bS@E z9(V}1^VBO4OCPfD2su$te(VxLTNb5k(jG=!U#{4Y4}?6CWqJ@OJ(UC((P5ZU_5_ld zIR@pG!Q}V)I%>nj&$*c3gfp+p0#LfBCDdwW~(9)m6?j9kX`j0Qwz>7 zrZ@1NecFpbovJd9?xvMm2J}U#23afcjE*#Na$34&?-q~QeV>no*$i|HtRexzK<5HocJS0oDPQ* z7kot@0$6-oDsg8sB` z?bNi85IW)36u1d* zj>C_+5WbL83pL7{{%xb!GjB3*Az9eI;L{-vHrXZYe4x&lpy z$imJ`pD`aP#{WXx%&^R&+tFzrWaL09qkA02aBA`>w295e@m07@_xG0D1vnJtR$o<;>#_+U*z2?9k` zg?#B1N|9|^3eQCDa?Eu_NA`e!dO}QGG3U}}Ez}?&qy9qgB8Pus%)YCNrLxl=ABR0c z0Rv87&Kd~BW_QGXnKJ&e#AnTLS{5LPa`!i>0_{eIpOD%OW5`cc)&Dz9LXhAoM4Bc2 z@)yt+i9PGTwSPGOOZ$hBos;!Hbsr`IW;SNF|GfS;zKeyOmE-@6?^5yLP`>_^RTxWb zW{(uI8)GuPl4?kDF&?KgI$;=0pQnM4NLAJx(@ds8q-qGZ4naz!oiwhw`cw^n%bD+V z_1We3Th%t3@qYPi&-?T^Ne>T6iDnBu1H(dqM8O1ssRc{V%qXBxCx(KG6Cyq`rV)Ur z!V>{PVBqA)eqLkLK?*P23kTsv0aXS@kVK);2mO%$W5*s3@`tjrdK5q?9w0HF1I(Tq zK0N>>`Pl;>0Z=^O6$Y9pgXl1nNXQ_5FM!y8v;aw`aMlB923V@#H9jq&z^zd2vJb!r zWh3fL>}3Fu0tk#DG@~EDsSy8Y2oOmaXb;R#!QIXO3{E}B)juYsrvz2sJff-oFBe=8 zmX?B#cX%=X-!n~(O$t*247!a26(CrAmT(G zeZDtb?-g|%%sb-5E`T!`p!+>IMr9zd!lXb@Keqf^IQ>YWAP^>TdM1J&7*8Q(L6P{p z2XuyT2&iuIA2Y5$VEukE{e@%P&Hs^LX1TEPccB=z(X zDxair{&c=ikV_%F;IaOT(1spxCI<1(#m61%?@)FN6na5yCDfNw6e#NHxAcc@rPl|K z0Ms9Nj}zcY%*f=t$SqmOjrhzqJP|s8PZ+-sH-tP0-TsE)$q<&L$Xmc)urLrE=oj;? zbvRNUjDdp%6p7c_Ufrz&8*wYb|3Z#E_-J+x-!VQuNZ|CG!`Yls80d$`A8=d4WI6&x z90%!_-|Qw$15BA`2j>`8^2^T9Z(7vj@b2_i?1$c?g#foTm^0xhX5bfTFag&7hhPLq zOu)!5@oVw7F@>!k7#@fT0u)1ASK`KODAaf3F21^;3R0|hE5EJbfSytOAA1T&oD3KW ziklF@@C5y7&ULOHxFPkW%=yWsKJHGmZW~6RYL~N?#ra2 z5~$&3 z{Z|#z_v66^C(fMjx5c6zEC9&201I_ZR=6tstF*T-C%HO*5pCLeI+gVaeM$&PlnIiITm>iis2-8=Ixh6-pdD=LD6fT1fA-2pAQT%k+R@yq ztt>BaBcu%5CG2<&8ZVzkcBXA!&0NA4(Mbq`{vuCFm8M3cuh7jm+p1z8!>MFW^QM%&Z#%p3~tT@|YnPR1CK?ktcBc2uBcv7Wxf?vX* zMwq`J5#@-RWUKvf#cB-xM(SlJRh~y6!7M$fWrYQvxK)ifp|#JN<#->>zm5TCoY>gl~Nlr^6Bt%Ml zj-_9@+Jo;?31U5f>58&b%G00(68E;K=_z)LAt~tIo*E_kYJ_n&f zzxnnp>A1q^xi;%evX*8r^^~!dHG67L=&Uy|E}APH9NbPjHRCn~qy4rajw9bZuOWHdu;AyYOL1ukiOYoHXB}P7b4#F&{OA&%MTJZW$-7&&1`2(M@D_ zSlLvmtEK@|gq#$v0a$?+F^wKi<5>$%PwCP^cPL~~lZ{-%SszdHKKlzX;fKf2c z8!b|`gZ>p+$J9)g62JT#TpolL}kLmlmT%)UJZ8HYB z1(fG%H9`Fp=kuGh)F|g;P2n~!w0E1fr6i}_a^CNOVn#KctkqRE{1$9h4P1g7iAwJe zS)08(gK4#NH)g_zqtwaGGwIGxxT15;Gl&^^bT#y^s-jFh8Yl9D)W#{cWf{J_2^E?P=sQJ15XORqSoVhQ%PZRoL+k?1M9of zSOfXbDCdwxy43y^dg{c#gSpN~$f!pZqAWtJ<71}t6h5&l5~ESW$mx0+yp#h(DWWD2 zJl97Rtr7CDO~#L{1Dp?!92XBK!7e*z+M@PUtDIb!(bDdbki=Se-^vj>lKNDwt+7RT zqnM#kNR_{pgO?u?4o#cgK!kc@xMPX+qfwp2UeuSQ?;MrETRS_?PM4A0DAw+j-^Y&kj;zx$M%$U5n`8??3g#qFN5`>j%&-TTuJzQ+L@zvU z;a{#~A(TqCFAtB}93Z(Zto0-V&odJC{-g3*sodo&&l%XXdQ=_!f83~|i$G;-eH&gM6pka{=iMSBM&=!}vf!d>61X@MO$K7&zlGnraieo7 z)ND=lE<1irh7353x3u#|MCH?%4g2r39BMQQu`!K#Hs_eBXL6W)=wiyGMR+%)@+h~cuY6&1zYC##NfE1TAhzH9twTxc*s@}$HxFd~E&`T^nbzfF|dy2HQT-BHH zTSlas2!rH`M4YttE>G1CP0>p9%3p*XclDNx4?ZERoR|#PTW6%!tbH9a6yw5{1IUpq zDhfE8d{mm>U!zr0YBf$wK*0~#Ovz~jBUOq!pronS=R;b85 zKG|oO*Rx#0OyTXK>>6fMFmf4JpL!P_hMHN|IbsF8*2-r5rTj*wWO0@Bd=>UXTOj|X0 z@R>8wzuD-GWA3?a*xJorsHRXD4B~c8(0g6Tq`QB~c0o_ahu5QCB!qN_lfEymU|2t` zNHft~d~UKc?bc34fpu1mk&%cKNh=qbprSMR@Z6nkcYbIc)toiN@18#C(mKxL4&JHj zZMQUZ*kRJ9L{WR7SkXeJYRB}P-HXUBsqM)V4TwvHEY#^#OJ z`fDCc^4pO`?Ky-qG#vu*6vM$EUy+iWtk5D;gAtFXXfh__`f!!i%h6ykF~9OlrWRRa zO2pucWc#u%OwwahicGSsZ;gX=mc@7OXola!qrjj5J zs`c9V;az;HU!Q11hSMuAI#)fYI>YP3rFPKlb|w%YdBB*ZhP!Jk`*hZG>ZjX*SkU!` z9mS|6;3s0coV9gq*iE&J?}#L2W;n?7%HrP4qYfxaSt0vD-p04iK+9cDO7ju{v9{^C zMGcGHS)(PNXyeu_nnU_ASXpseb6M7YF;TvC>Xu;*$&9p#jSq@TDz|~h7=tsd@an9r z)4&s-5jU;Cz~$>FReO7h2R^d8khU6uoUv`SDZT2vl}N|m{v%*gKI0AgzGHo-o=&33 zBm8~@EW1JKJm@65qG_kFNfjq|+p{v*>48KeuZ2na?p)63@v|&=_&?sPZ<-x`{CC zc==M5yg^M}k4zKhJIe>_7jBP&Q^q4!TSiFywE5KkSJp~yr~Y$aTwM~fpu9jv_+Y7} z`zIWYIDX`&U?vMQ|K6u--7;RF^9`~tWq@>$sP?@B>yN%kTb3hmXxAG-GO6{|Rn5w3 zWPh%Zo7bhbc^S6NsUzmeg9ee|?}QN2y9g+nFd8muR^(J$pRG;ZXZ-7$kHwqoa%j;U@ZIhvJ-ne-OWTDpw+1@LW$ zeyH1-qIs^K+=$#G@{OQp=dKJml}v6Rhpp&eN;YTZTg zZkLd($85{@DHfOK=tE4)ko{eLn-pxrqEZ)RSJ-X!15ayF6XtXeVjScweQJSC>+#(v z*QB0X^={=~d&IXHS>_E{eBL$P;|y1cYtNd|Nwt9)!JouxpDw@)y2aa}$RVSsoD)=G zxm&*kZYN`peB$0Q&K_lyx^Xo(r?dUIZiQ^pGJ)46vH{h{AJo_-0tda_Bmn~&%tXE< z$Sp&U12LhCM)aFdG&*mevOV*)y1swH=n6}f7g}Z}$z7_OM!R})LCGZ#(nFQ$kE7`% zYY>rlvs*|0*&Wb_6iz-Qk+fB6xWEjD*;o%Bx2H+)T}v{ zzjaBStF_MG_)g}2ga0KBV`cnrX&4*F|4PFcIsQM|{(q-oOdKq1|K~JJ*+Lz86$_1u zuD(#w0Wb|11yT}e08TTW)SN*$K3F7xOM;9ferm{A5C{T-%1}Vw3zh;^08QaeaSI_P zP^tm~RiQe+Iq+fZ0H}~4FqIopGiCGhk?+x%>{M&_G53}Ggv=L^#0_sEpj!ciSYdpH z&VTBX}wh z&rpUwoN7M=&G0u*Uj$$e16aKw2?Htu;q?G~iUA26$>25v5}~lZ6wfen%8+;nB9OGw zV5;zf@`ABfd;t7Vj38+ZoPwngCHlea&I<-U`{c!O@hAHch3uo#=z0Ya5(EW@7mXl(73Ag@%l9nxb=m zPf3R?2IMVZj`NOjL-L_WZu~gj1%pRdaRBA$3jQMQlW|JFe#8f6LF8E@GJsVBc0u+@ z*7_G(RHwv@NH3>5@EshIl!M$6Ccwm^KNx%og(nHjGxQZZa`4?ajiylpEhF9qYs0AV z@Ueo}xNr@K1w5u>iapFf3qT8FGx15Mz70Ld3!?zLGvxg-D8#6RIruf(8+m?6jS`j? zhe7^LSiSF$SEq=4zrAxX5cmtmnh+s59B-a2NAZY8WssYPa0oR*f!7$P;wVNW%s`-U zJ+{&U$7Y1rVmc?>R(P)^QQp1^Fh1{^4ZTXm4i#X+q<>iK($7JA`1jDPKmd+{&o|&I zzj^XULwrLWYe2+9kOXLLjT=CmD#Q?CrH#w_>RJE?RgDK;)$$ma!Wd-u4uv4W=-xk0 z5;BvxRFfM(vUudh;2eTNVdFS0{)?rtQYG)1JQ|;l@tO-DsqkOsA0MW-pBJ*%Rw8%| zW~I%gycNrj{~O{n-}jYt!9V$O_b0_+#oyQ{?sj}-29sk$Yt%1c6t`2VDRDUBt8E2_ z$=`rk=L`aebV#hgK_Aho$-I zh0~eiD15sn+sSK&7U=2&PtMHcc~ggtUDWKkN?Ir`=aX1ZPGL#9CYu%&lS0cBONYU> z-9rEz+lQN*8MotVr{n2mWRTM7!iP+PPQ`DrAW8S=g)U|-!DzcAiwXVldNirM(_j`?=?>s-Se%&+fGkdL}!1U<*2M2*AQ5C?vi$-Ue_jowZDRQJoN+h zX74j9S2QqMRbVTVyE&yQY%sQMwg(SHbAvdxG_L%$!u!UkFcXRab;HMj~ z9%jkYMWyRywl8C&6jqr^h&j9S7v z_eZkX1FesiIiM3gg(aLx^MYxJXx9{Ujg!x<>N5}NzM|I#niHlQ^(bpXO$MsDAWxfUvF2C z6|#nXufdb@JWm-_*~~d~8Y?R3O7mQ4+>xy2{pVcsZA8VlYl4<;~P2Z^VS4iN|rAXQyNx z$mDQQxK>u1H7zFiFzy$PKMx<2{iuspJeLF}vpR9C#HD75kxm~U;C@ce2C2xAo!K`> zIycfDh{yMb1lGPLgW9f7#uHiWiVqdh(&K>|MKUShTMakSfhuv0WjzarEaz_5WyOx^ zw_MpsZl}eZOy>_bw{Y8Ju*e-a~Vul-S5tdLrs%Pk<^J&pX7S$HgBc~l}%OC^nPyoo(BYzcjdq9 zJDbmt$M4i9?_3UFEi|i?h^;s__~m>*-!ES91E>6r^cy!Cj@JSkf4W<>%B(saGmR_P zciMu7-RMG)b&y>gwOk$a;7ceE-GVX6!Xj=ft>f(SsB^+Do^@p%b(la{8BvU@3fWs|Iorz%ZH1m0r0{=?Bu?s;>f}h`SD&h-;hATQ zlMO|JtaPb_+c}ADAZNE=FMW5KT^%I4xxUjr!L+zj>%P#2>9gk#XGT`#GGUUhITG9j zT`aoYW{P=$5|8BBqnuk=LKlpc3-hk#xstLj-hOu0?aHB7t4!^ZrajbI^Lin;cMlyt z2B1v+KJT^zcGJu@yEJ>YOoVmrN9dS2MZZZZ%G*Eu`!jJ`awP1wO4z0ly_S=@0tT&g-QyfGP{PfsVX- zok}s;HW@rZ;GP#SR=&0t$5Zaazd2C7{>`bCoGHV@1&k3{}YYK^`>koz;&o?U5!pzVqw?(>Be33kZL#yr?ENS7Ap9smMc&r`0j1uRAj&s~e)eD>)xd*OeBXnV?nBoCY9xiP4&)Uq;i) z5+`bPaJGnHHbPJl575%;jXjhIm={s-+#V*AG3~o>b%-_F57R-g5Rd5NS6~h z$mPMqNF@gd6eax=>KU`o3gv>0<{&`FAM+c*!D7Mn5iBSvJqZbAcp%6N9n@(#5S4-S zAB55SaGc2lZDk4Y2*R>U(Se)=Xu`=4#rwvw8Ned!q3zrdz`_S!VVH3OD9%C1;|&ll zqelV_k5FW?JK6EBuLO&i;uDgKl?#on{r~`T44|Hh*oy$$(;zYUV+a8tXQQu2fC)RO z_dz-b7|8>`fvWqn`Iof6csTue?W!orXvYZFUfUYn#tr+8iYjdnDX9g?FR?W&fb~~J z@~L4l)Zv8yfsmt%c?h?0bant#2B6E{UCsY12Apv(mjO?m zL_@X^`mGV**dBnG1!xHAk7Od;4f&a9YGStV3-T|aC|aO+MZ`NM9KxSFJsa)4;BIYV zW@h3(*w_&y0UDer0&t{#VR)duwo~F>f#NZ_K!W`9fKeq+j9=LOUyDa}7g(J4@hgTN zU;8?KIhfdIg14{I>0W&7fI*}=5TboE5rjVlj07RP0gk5TSGlw5+roNcn_Gel;r~d< z5=Q@&!Zg9+L^mQz7Zp~Rlz%8@7V%G|2EZX0P(^00g70+)3C#^ zMQ!7OZ|@z4TYxAyLIB@$6!Bjow^IIIfdGiNfFJKaa9^8+VUz&kEP>*99uFIHw<@cn zh4y{tdA3YRy#>9Y0?Jr|J7kOx6GRT`(d_;p3QzLLI0-yhg}?mRuVVDL@Gv7FH(-%J zWwm}_gWGc$x4*bQV(F~n2DFxegPr}1e){HtaG4-P({sYXlX4M$AAYKeqg+OR{veS- zCnP4M{Y0oMvcU@BSv)icmug!ZYhTDk^VC47(TEcOFg17~h_4*OKO7C_%Y)~GU4W^g zksbWojBo)jI6eU2#D#e+N+pa6Y0ARgg(R5aK_Z2@rc6GhpM1_(_4W&LAk zF-_4rvx6n^z3@?Y=|0Owe&A4>&x$4S!~8)TT+NRs(?KD!1BkIM=llIV`J)FL>|1kd zw~mFykyu9p6Q7()j1MOb2u3j`@%~MXc>k!mrN#Fk{AHtbLjnLIK@jL`j1VRd*Pi{w zQr)M=-Psy1dd6~OgxFabBS7;kZfVEL9;SN2!e5^1R71od!vStqTxNWm=ML+&D@qXz z`}i$mBIqz{cGTX;Z0~2}XG>=DJOe`=ZX%y~Z?A7a+q)JhFv7dZv#w^LQm@kGZ*p3A za(>Z~qqVTjA6rX$zlo|EV>a)84A+-if512@J~*Y#x;5C%syu|xT|LjfA9F!m_JMmF zQtyT}*TKG2Kebbnsg^VH?v_OhPAB`o<%!h)o|eE#bAhuQR$%iOZBU5WBoQNo1LiPS za29HXugDu|JyPiwnTYQ;n04}SmN2p@0D&H&WS|IcMD%yGowscFVfv-*-2C$y<(l6V zwPvv^=9GA=7|GtFNp@Pc7&NwkdEciM`Y{NetT-5lfKw8NDh|sbn)4+=mM`0#X)bA= zUTYxg(I^*bmC4cX#bq)DB!;hqwS1(!sLFS-TN^DdDI2#ccI6%+XNWWnnv0W*!^%28 z1f%4Evqjn2v?)2~dt^}JZl%8F-|6AgDzeY0bA>CqWAg1;v;Ff^Ze)j-=H>8AdB09L zgV~Q_+KP!VfV*$)HH(n`g+%PU;xx*!?(i(9$!2CijN!YEbZdPSaNH2QDpe)N2OBf^ zi0fQRa!u=_EfsY29?o;y(!^>Zdi#~!U+_Kfx{-8ki)F`hO4~28M~T;xf&%D)Jgi7J$! zLvss#mq%#Nm@oc2Q=0Ae!J(?;0oIKd&94q_?_0mxAL77FC5;-o{fH4L`J+RJBT}KB zV*xwX+;harH8(5I)c&iIzQ+*i_A4e@LnheaM+{9tCPPM$Bd02s@FVc@YUP-aV47|h$Qn-g@o7u!63tw1Q}P| zA*ak3Wv-8fQOnvbwk)EL40KkV0W3=~mlG*dls*ExIsCB{6raE2k_$HxIlXg)X)Vh^`tAkP#Bt8t~&ItYtjD0Lx z!#n~8Hlh3!57{E?&b)pd^P0m4 zi!`d*M$YDK%OZ)h+)s0FPvhMZPTHDvy2`m3B~jA zoW{xIC4{=J9<{wuCK`XF${WD_=) zaKs#-YC_zAt(XZhR%gUd0EL&nSf4gB$CJ9Sk*dc|@!Wmb1|V#~l%9wxEgvP7>dj6>LdIMhQq(syms%MvknXf zRfm|CR(dK8*3_AS=U1Uap%GwR4aLE{`*R>j3)x1+xEOspj;jwhdTp*J% zW${8g_R7OKoLlV%HF)7h>(^Ilk9?XZ`{7E{xFr1GnLS7o} znLD3ucMsGV_cwMDTX!Eu487Xof=w%XhZ}*?xrRcE8DDNPjSH2%M}GlpXmjo^;=0>J zc*MCTKlC&QdAB25*Z2r&kMLjNH_)>Dv!fsgxaeB^b8CriydWZ=-7f{l(=b?TnW93T zM=2N-u#Ceh` zXkV##CXrDhdij3V7)6^?Qt|mL8b<0MgkDy^+BQjUV_T$E8q^}q8x~<%mWa%6l^8?C@yh}P^f>!RMsCyw?wud?qw;lfm3Ky#E1lCj8Mkf0N4~mS`tEL7QF1Jr zW+gx6MoO0T%6uP~{^qJy%T%`$Dil2@UN|Qzc|3epR+|O);FC7lZw2A2Qh+Xz@U%x8 z=(PueoDD*^2wKl*7`bGud^PE%62d4Ow}Czn9sOPgtlDWF?y>Q^`)@i^FztM zP|b?Ool~@wgP@+-+<050;Nb5PibQ&GR~) zDO6ZNn0C??Wep0t<+A_HM*3spMY?v(6Ip?X9#QC?(PJjq7}fNl*h*@Q z;kr>5Lg!Iu`^hN+%&xIKNNTY$<1?im??vePRZZx%%CEH1quUT(4e~q-F1~Z&?O0ys zAwwDz9Q_2CI~G4E{j+H+%#(zxecyUo`+9m>FoiO>xb(4rmJgrEB6V){Bx8yvaWE_U znBkDLCC|3&fEl7tGuES26dyrbsdpgAomr;tA3|cuLXc)1!on5;P-Df}h)Yr~{Kav? zVOf&VhMsH@$0Szer0(gVNu*~D*Ra(Yk@IjQrsQ4Qt36d|f>`Xpt9Yhks@`k9BWg-= z_MKFI2vO9G^%MG0WH$SJI}pc*FuT+hnhe$Z^XOC)GewgK|1tVtUyr$wDLC0+5^o_0 zdF<&`B$vV4u--mgfBr@3vj|8`eV0U*Zwnz0R2hL=6xRC_H)lt{HvB!JgUyz(n1%;a zMYnY5E-Xuu38^`_1=#?uYp|BoU+|m6O}TbJ(N23>g)Vt3)foPh^hRMt?Dl0@M)YOq zN->Ljh|-+5E9Gzfwt~5?y87`nv&&;8(}V1$0vuybBVX-OgGQB=?`|&MmWixcrdfTb ztABkxN45%lDnz%{@)WTGhrB}uyHddG15_=;-z!QLayKhkp3P4 z;EzQvGePh$%(ROKU6drY?`kSyZR7^H#hXbk&Az1W)OFhgCkxG-OAKi zQJnJAr|^0;{Zm0#l5zm>9=`l2{s}KxlY^p2(F>K;O-=Nw-d3@037tW)Ggycg?OBu_ z+CY4MRtytZXiev@z8n9EmfD@>{2qQ`+7H@kkT78dcziSMi|qCJ_;|GnXw_XaC3a4m zGXkJ!zlxYge2mx1PWyjCaM|2t_0hsoq^i|))fbp-_TE3{9I1my^Ynp>s}ig?2My0S zP@cj}d>RV2@Q*@fIr6bI`vhBaY*!P7Ml=pE(wb~-K`B>2v@oh^GtbuB`fwC%9Vr4* z93+%Yd~7W;XIag?esyjSOil2^niEz0SQ zEHQlPT!yLVEA5nrpC8uKjZ^WiCGyQxtAF0JV6@__2*IuwQ+%%aSKMqzb0ZtKC^Rq; z<+8gH3d>kBFSjaHKf2i=a8(VQNi19XZ`0_SXJXa-ToFKG@FJ1J*i+nL?mVLzCXJm2Hk`Szn zo%6z~2wZifr)1$`BS+T+E4_c6a#|lb#5Nyj3{O$GK8KaY65XmoKOe@d2cHq`xmM{F z4+ssmC<-D^$|f(cyCG|F)G z8f0f8MdZR#KqYJCLAK3}4v`)E1(@hrjI!GlrO`P(xbuxIH(}RuM#IgITIO(VX%5Mg zo5Y&G@00L~9gW6PJCNB^{SRU15GxE5HR)^Hwr%6Rwr$(CZQHhO+qP|6^Zl8{%wm#R zc52n>MW38h)gynVu+#0kAfb|5JG#c}54^4$5ck@&jyL$cxIG()Nj%qjwnxvty*R_K z=E1`oNk~QfiBv5apj)bw3n~+p?kdfd(Ag*bkLT?Yd2zPaCS|TcFv+T!zcziqxmed< zA}39Z6G|9cG`GBEwQs!L3>zMn;su!Qchb_uFKY z4Zbnw)6J=JHjcCg70>-X^G$|w=DBuu@R2sNa>vPard8}6f2Lz?$H#d{y5YSx-v?B0 z*6lu|am7j^p!1**Nno*}>?fsiAnu9b0g2;li&6~=3>&(;&zwc3TM3#fHruhhjid+G|~K`1jj-&K4LkF5o_G_j=%F(1iu{-L9n@2tj5G^cZ9 zZ^Y?t?N~QM#3SwD#jgbW1Ko=h z{~cKR|HF!!KTXt8ZeQ78fPs*Z7R6zK7XJej(arz}F)z>W?BFggl1ipIyNSC=MG(Nw zy>I>fM=SdJ`NYFv$flmNs&N?!D2~mJG zJ}`uK2C8RlsJG9TSBT&a@JFEbss}*;Is~>Cq5+&NMJT6V4h#uF4}l_p1q#Z}mt^*j z0RM*w0%+;Z;ja$J((mUV*~1*X0CEna64Z~Ts*gasEV8{jFf=hai1f$}#!Y;5C*IdH zk`CbbPYK(L-3*%=0Kpn8-bm3Hh+?BlAVd`a%Zs5O#2B)f6?{DlnC0IcVCH3_%5|1PQK%~U$_7?;0Nx~tKKPq8NzwrqB>B{--C+0f&#!gU`;;) z0tN(I2$rBf0=k+113%L_{dn}J9*6*d13G!x9w*9QjpL_Wrk@geQ&_F^-s%+i`D-68 zaG*{$?_cGNAN|%c-nI1!jKi;*Fhc+KB-(RU&(mG5M!;VJWhB*vq(r3^1Jyrp_~~+U z_!_u$$V0e~u}?Q11xQdn0C;G0|CBV~ejr>$iTaH8o*dld7Us#jM+tR-E06}aNBv(v z@GUUwJFwT!zXFCfpsb9)+MbLk;3l&5$pugn>Ic75dBjQl3{VIleL#O4Kms=K%Zls-+0$ zt%VZxtE5>#M-}U`%2Ls*Pd)!~#$M__=TV z93?u|H>m(`{;~YY$^5M$n46hgJ?#BQ5GsWIoJkd(*qwkg|KjJOHlw6=i_eHe9bSIa zEBQ&&Mgt0-?W0&9L<6yz;wPHf)0#x%>Y9HVyL@{9)i*xb|HQ|2rjfDfConSu80*{q zH!t`NZ)E=B_XNsN6*+M(KDqSQXY>@8L$bcSI}K$GsDE?}1dx;Mk4Hf>ukqgj#Jxcf zd<*9Icdi~FJ){2>p&Yt@0Q3-;q0LS3V@IA&7NF_eZlMnW-CyGx{t%2I>Q8_Nz%+tC z9KE0B1All1NI&&6NF8u`(GLOJU*i zw6J^x;YK?JJIms&6I=fV&I@k*f)+UcX~Iw^Z|99?%d0Usr~Q_Mr4IkXW0p7l_*E1{IlLEz zf;j;Q(l`EDGJ#+HN&Izmf} z(h;2b!DasqpT5&Jhw0jr`O*HkJ^K~^Fk|9Y)*_-*-X z|5-wjvxGOLeBtlTpKxvg<vSAV`tV>j6X3{`ZK#^x)<&Y5Y&{GaqwJ zBOA-R7k7C7u&+PSgOOantau6pcK!nAtMUH3Uo2qwIj%%b?E?o8J!wq!v7K3UD1lSQ{+$ZU+E+l)XX+cF@WYvXWO=(k-E>BKw= zlR z&ESoEr=%YwpEp2`%;wexf-)u_sc;I6g}Bm_quqXJ{Ci$STu_ITPU6wMt^bI`Fh;S!ABfkxjH(6MyMq_%cozQRCv#5yIB4=V!>u8ki|QAHF+Fw7S?H z5Bw>)RwLj!R4n(N13}Z~Go^ahA{Ao8s4)S-XM&+Z7_XY8yxUx-y+{*6w15C9dJx5vD+~z^bm%olT%?SAYvl1(j19>C}^byT$3FI^?0~6Af1e= zoM-}B<*Mhv#?E-uZnfmC<+@zRnnQ@o6Xx7M*0DfY&bz0_yOQ84^g#fLh^{q~3N!GF z4U#Zw34w+01l|ZY#s=kwFnN3J$M9C8pn-@sjK-=;?y2r|3Kn4ii7xR26sIa=aG!c= zYy(v`%tlTsm(7}HzwotkwfA7<^eYJL)fQMGcCoUNPKvV_Q>S@;FnLpMses6$=>ZmL zJY2pxs;aq?iXvLH6TuKGsBtPc^(^TP)VK&4x4)j9qicLWG(}q9A;*t5j=WvR++!By z{mafq+FY0L=ibdMoBne5gbm6s(hq-7e3-lsG3E67TZ`(Tp1mZIDn;NeRM`@eK`z^G zoeqx1KZt{Q!M*6N>UQlaR06DbeLBwd8rvF$ zXU=;}egL|O*bT`BwIvaKNcTR0DSut+17EV1h21pC%IjydaGbxPF?wh2E^QIDxjWdW zT9&BrBXwm#mc!0RV3lnb1*+YMcctAuWaK0k}433q!C=E1@y zt--%nIhf;0%BC%QK}i)X^2K3o#f#&7K{0Sd8yI99V?qsUB!8$pu&@FZ7BhBB-nUQr zqVa8y59l(<6cjTQw)Bnr!h9&V4(yo%Ub&4z_m*=NS^qEq3b(emN;Ag}ctO&$#Hon8 zem<;y%vM?QS*eIMG#9e$c65lv=vGOX0mj0`M5>qoD#XJlvWsupQXR`>hh=xt;$=`F-)@a_)KNtHG_Bilbcij#^NM7)rDcRoAsWXu(hK@%+l-;8oywZ&_;kk5bm zu?Z>vLdMuG|yJC z?2z#vT1kGjW^v}UG`o}B(x30HS3!0(DK>o%L1iSGpWc_b7iRRk^bT*a;X@f7Xvzlk zO+#Q2cPx2=@iZPC_=$9>5hkVBD0y$b_$$r88`>Y5H^x+0goFt7YjoS?m>nep8j{dV ze=K{$at8z?71R_V=79lAUA?$(Rcfk`j>y5S zb%Fo9wx3qn%-n18-EV%)^@BwF71zy^CIQcwCcNnZKuF_NHLWN{<%Y!5X#QxJ7!x=mbsximwrq@yK*sWXa2N#9tx(KDrH#lxFc_UH#s#URD5(u zpRxaZ^a3>44>o~!H3O-Kb!Ijdff#eK6VVdgZd;YJ=a|W58A2ywk zoE!Jep3uWGincb^tw5q1&yXBK{8W%9W)32=waS;(SE@{dI&@ivt&^3ZC(d`QYZTN0 zB$#Xn2?E&8>fpdEWQ{wdOdP)*z4^#dXX*Bs0x`3k{!eXGyCPr8qFfU>>Xz+G|qw zFTiz8oSWSjqisFt(SpKi|FqR3&qEO4U@b%{k->a0(vljo|4cR1lDxGIZ&4 ze1=R#v}UaS;bTq)c#rr8r0TlB2U3j551EBiDavNapn^}ZFnXU|zH#2X;&GHy-|XA5 z_pjv=%ceB)I66L@krltB`h3VfK_o$JgPK~1Fe{3Luu;SrpC>AhQGNY%`Q^AGbO5H0RJ78V>72-*|sz6 zOkD_zEs!-Y+Uhj@$Q_guFy~)i6Y}yp- zJ=T50xLV#`VcPwR6$kdW)aX~8M9r|c02k{4w^jC{yY=9b)qj(jGu;I|SN58&KfRN7 zV?y9oAdJ$Zs{%6yql98V^|@%vNw2?#HGI*Kt+y!!{lQgoTRd_-CAxOqjmMKE?sjQ= zbup7~c9b~X49_e`$Gl12*>fn|bqJQgNlAHE%_LnGeI@}QL=HD8a`!?!LudL$M9CNF2}hYS$2eYc^DxOv3^fIbg1%tl|4 z$@Fx@>D^)evDpDk3DwA(_>=5Z|HnmkWpDU?O;pmoJG>RfJ1yfKx-HQdv?v!9Ea%y! zBYZmb{ljzw!PBvTsp@0?eQ)w^s@oldR}^7-sw25aSBhdP{nT7YQW61XlI{#-P&MmdT3LiG-ld=55$|k3%&~7r&GRG2iKXc-^$+Jphqf;mU6+@~NNvQ3NF|kMW1Wti+ zqfnz%C$^HnSezEqUesKV{0BdcWgCgw$H9kvuj*1RsmxVxn=bdg zsqaqScQe|6+D z`YhgIHj4GVAEsVz7;#BDKFXbxPbv4F8qy0Pi$y4FP;ll2V-!j#mrgx>%}@KurPDJg z_VXloLkk&eKWL>h%pTHb*C;L;m*S^$x75S(@F(Xbx5y#TH^j~vCjU*&QLb&M zDi#{{ev)s-u&6v_$>yY5Rin^sgz)Di1$_IDf`lrT%VCwduptC{jJ(kKF$wn4t$u7+ zv1rqPMZ(`VRC2w~9k0?KKoCW@_>)rhJQNsR_-%EPHyH2gqrt!XpCxj9vH=xJs9Ze@ z!$Ap0T6ir~Vp@2EyP5xFCPcF^So!)j2k+ianf_}JX=w)ukhElKG(4rj9wuO+AW-fS zkhrDU-SyhRK2E6lx_+hYlTzW@LKtnYeRMH$@J!sDKavimv|D_Ow@GrIXF0DkK~Ux) zfn1rm+w(w*US~9V067!0hywW>vhlsXi~kj(A*E`_FmB)u_wU`OpG{**ei-u^cj}Gn z#Wqvs_eZ6eYyYLAa47X8Zevu^Qo!jX?9FtR$32V6_Z4HIBOSuQKr?SFU``~mvIB9M z(5qJKa}o2Y4uiu0l%EXAfJjfw^T5>J$t{1&kIvchnBs4Jtf1j!LE(9WxVm&N*6)D0 z_Fz3>U&()M$`Rdzb(iD6v0!zNB3B9Q}kl&EVT zMYnVFrv}%?M9L#D49uoEm;*Z0!Lh%l7ba)VgkZ1yLug~yy6}=a79Lw)8fYpO^wV<@ z1n7qL8p^ZFW3@V@JTZ->jvbs2v-63A;N)WB=@`M0WsH|w;gL6tnGorWP(BS6uSpsr z-R-EG*(DLgBLvNv245ljdi&6q#AW%ahF0o^==sua&uMDM)T{*cV6u(ZC4ta*d)GZO z!t+>IxyEkaewg2Jl!K9YSq6c(N}1sYE>eplL#}r9b-`Uo)W*+)Aj?5M zYFZDU)-NfO%Ec|M2Z6OF;YkT|1Ay?6MW0}RbFr}eIGq2+p(B-RNG~a7`*f-*HGaS> zaa9=~*fh1mc?;2|F<9h&-xHmqW>}`23t(1-RAbHDmd4p|Nh{vl0>plravBc+l@}*U-!P3^x0%^z%;0+KgI{TgbwJ&NV}f*Gg+ME8a;Zye z(t$1YWY-5jzAY)ze!abe3T7C84+DnIgf#-P*TqtM9x9djYcvyBa@m&@-No@yy92I! zlE^<*oK}K^gJrlxzcsEThhP6Wmv)DIGsAPef-tPdlE{X-a5)?bFsD)mVfu>Mdf+c~ zCt}Pgge#adZcC0IT?<6HHAypg$hH&Vb%Ag5bYO?WkzpwJQ7}46RJQp|)XS)RsZFaw z5VV$kpH<6a9M*BQx&0>OuCSbg)aCC}&ThF#{#}f@2Eg(4ULWtAPD3xR8b@d2(Tjy! z%mBI_b)1WP<-OG7)1$&vYz#+dy<+1?8wct^Aa}Wz%$S5u$A+0+X<=P6tuR6k!Vi1F zWzw`26>p!y>!(=*zFa4bB}8%@YC%_{xaF|5h%LtPuaVk3NzC-6u;}7>pzM4I#?Qa0 zN>n?swE5C;Iw58^eX{VT+@pu9iddSEq0oD~(f3A8p$FzQm$O63CWvz)p=>a16;3_~ z*ktU%WsXM(;K$v(*yfV6Exr&)V)v6&Vho?VG0D^jbs=*$+}<6o=2w32R(qS!p^Idl zdGM_#7J9p|pT$TAjD7`i2@g$VBj|jAGritO@jW6HWL!-N1+CC2+W^*oYRUOX9yN&XWnasu@|V!|9V z6Bf#o%3!|RAE6IDCL1i{{6-Yt(aEu?s(H>Y(i%07QC$Q#(ke!Ew(!RXPXm>!E-Vd=JR)9 zPb*U?hvaiE@qz;(|%E)%E>y9%|IOBBu~P+ZYQCp{78Ju_53c_h>Md zUqDXE8`x9FJ}u8z9(G*dq=}QvpE5N??rnUt5hg#J0QdcY!m9cv@<|L1-#@x+6D0fH zFyk{e{HBGbOJSr)hmY=8=0i@p?Zx!9A3N?C@WV9hTL2!SGz0Be+Gr8X!{URS`6x38 zi~D(Hm?7WA9%fT=qT&p+iaO0evB7+=bHkhmxv*X(k7T~mOX0wD9#dZLmas{@p2zaS zC9IDfEs9Ou_npw7$VI{B!r=lc*7r3LPm*lJApG-%%Gz77mUeVyV!PV$&u*wNhwvyY z9dtGgh216L4U#|Q;$>&bMk+;NzciCU(pLI5TLOY?UCS?Fz!DP*YmcE|1~m$844R5a_ro6B#{RZxkJ5khv zxhL7$nx$V{MK%M!C?*4Qd-32vg-uDC*byqdQ_$i}EFTjS$iO^ZrQniXx2g#TuWk+( z^0fJCVK=HQ3?KZ+=^prryo0~xK3?C*7(y8>^0ru6KlMh=Nea8~@f`aXYEiraHQ?_# zc)0;vY);o?XTPtCB0!Qu0V^f3;J1{F4SN1ve7eb=?M?9m{bsef(c9BiygPxk1ddpc zU4l5u`B}oykYUdP@G$r(1?-A;TWvb!f(#(+-(}5-j)aaYD*Y`JfR3B9o{b|8o58Hx zADn@5N(x+sfvaxelgM9nPN}EwSC*J01h`W72G(v{kJRYf2aml2_}oqhE`~%mMmAR( zMZ>j^F1vtjt`g1l>4(Rx&M~egPvfTNr7E?0U2+gWhp~}XYnB!>bH?q={+7#8$VE{R zqU=&2NNHX2eSNUg&tZyP3WC;=*MFsWXb6WeeNcqasMmKyP>V?QbGTHJh2ckI#{O5M z8+m=ZM*Z+^Jg5H+XTo>1PUu0ru%~k)gV0yY=2U}uQs#`$fIG;|7p$2Iq@;|;`7!Nr zic1-@8_Gq$ipLwQ+*C_+c49VX1i3C6o-Lit4tfJqsPvHN-mPh}g4J$YgjeHe6e`je3mKZP2HB?Q~9p zyodandqu}DkOR~h-K@gY+fw1ikBM@z?w$CX8c1Y4XmG+I8!&y1pC)d`W@E3H)D{5N zHEKtq%Q?Oldl#^ADMy=9ZT^iux^f+^4B9HTN8Ho+R{a!A6EPsgV=GfnzqkY0S8jnw zgl#fAl6v%?XLue zQCpAJfVe;21ITkDuERi12(S($TG@@BlX~{A*_EhvBW#-KQJcCw1=SA5t>>9~Q!jZs zw1-nWs=FOkX$WR5rw*WS>72 z$D4FO;D=;a=3I5iVQ%cBznSJ;RR*RJ9Fc=;Ksr;ME65%F#^`oX7B9xXTU=5S_0KfU6(7<(K;k%{&Ug{Mp&-3@zsUc8*q$fs!eTHg6ssa1=z!pDC--d=H!DXm4f#e?0MQ@!#Y=(OP)dO0xJZwo!&QSirq~Kyoa@v|74bPvP#n8jWcZBzBHTRdU0diQu?(I( z^OVp@al*6a1l4h} z_$C6BCq2%!D|L0AauFw1nN=k?Ty`pYdJYRvQ1Hgp!*+%Fbc;Ylk*f2NYs&KNp^I05@2SD;M@ElFIN z=j*1Q(d#BBt}3DqxEuyP3=2xQw7_>Jw5!2PSCV3wn6TqbL&gAC4@}}vCZN>0oJkipMJ-*N zIK{ySF+>bvP+-9v9J53jLCBvlv~$4~smh|K1!s~Q?u1%wMrh+s`dS+3=X|8;SE@Mb zC{^C7>$vo!1qfM~yo`lz^Ij4yox`CS1&yOK zahU3u_ukf8mWk>fCK=v|g^qpf7#~-m<6S*@PB z(4o*`o43Pzl$#l>gvVS0rMr0_5A^AXf@@8K%5{i1d(&@pdbQ9uC#mE$(Nv5%rj#fb}%M5w^v ztGijcwB!UTKs5M|d{x3G6S(I!Aqf5HOzQ#G>2_U_u-+j9W3Sh+=z+AA&MEzrsiFF; zekBB}2>-(&+8!OE!QvOt(eQv5TqzKpVkGoEoXHU13d0h7a8ORbPEo7Zc3_8-f+1`F zhN73uMYed9$PpI_1j?NML{f+=td2Z9y#K7tsHU7OO&E_OI$DG@EW$A@k!?$saS^5bbp*pbLzt=*GV0SQm3Y3prf~Gdb z{Z@5o4<1-5cgDge>MeTl`yQr8S3JCx`v`2Hef#G9$7|SWh->5t3|I+zU~nRI0zarS zh98vIQ~>L2t*kC}|B$b>_Pk4etM`U!RN6qJz4hK3e=vABx8UKR6Q8hC7Kq8iCnG`LI7-^dnc==nu$hnQFVnT?=Oy_|%fN8^vE!G&JolSS5)O zkm&Drz-fXq?eA^+E#h%?yAThvG|hJHLsjv`BE>^{IQdX`cqE)tcTeyil`{tJ5?#le zYNQ+0+w&9AKfb-U&ouR(;PN>ik$dVSc*+=Pol`e^&s>q?SS-7h?>t=epw1|SmfnZb@{f4vCW8L=>-={j6{+1MX8D`j zA1ie?zRwtWC4^-`B<#{*I9NiurfIIa)QyZEh7lEqWo!Qgcvmrx2>yOJ3&42t)#8Z( z&-9ZI2KjkL!g1QBx{9|BTrs4lL^Do0m?e6Z4Ut2($i1mx*(;(G`sp+~9X7)0ofOg{ z-Zj>nL-cHf%m$}C+$*c{CG1`ad5?5Gxx6I>jAv9bF%@>E!nh|za@-)*@_B5vg&POG z9%)cshlEP0Xgqa;JR$`FzH+WpebxlkgJ0kZPk zO#%yxCAkki749PxwVo0E4wPT7hu9$pQ6~~Zm^ND|0pZoMnwyG^c+g3IwB(3m3VyhR z1K-l`%wrrN>kBsKN|+hfp_m!C44w2dg{o0@UFB;@EtfWDe89vBRYili>CtgRB8_`Q%+3hsIZ|sJK-lsrQw*cZtH2m<$4-Q32O3bNc` zPewJ*0QL?1c6K+oih1}Ykyo?AcP!3@rHJ^fu$~vXSk7x=%(M|oipNy zzRMWow$hV;TU?Z64=Dkz+{9#3h~9+WHm7?r=ikWI{t=zxlL}Z0csfM^DKj5~_vUO7 z?~(b>c#`4ZLdw;2byzG3YUVSY>VvCMv*uaAEUznO5=oy98s|F0%)jH=4=Sf9-+XIY zkNd4}K{x$&ODsEZXd7NB5m6$_b~>GFBcGX4%n`gKs`>#DUf=R@9QmjdS!=B4PLsyd9b8o^R(7uo zpjjB%OFh7p=k$9W;3dWBr)X#4vkhxJ1KU8j*&Gl{sa*esI($C7)HOaJIYxv!CpYF!n|NwDk(P1hsoea#o3=@mca0U z9Cr=(J~>U<2DtBF>h>OR+&EQ<|I$1;>x?{**Wfao!d`@XsS81YtaredLPf|+rJ?)6 zju;*i8R)e$onJ`}ylKpS7Q(A)`vABY_o+6H;E`MLQ99*zmEGw$ZoS_W<%Xu_Ig?7| zhirk{>S3J9%jz^pnpi$=3!ds9`;3-mII(HVDQ6ex8VbCAn@A~V-j-|hK*q2WD-R5I z$ZnMon(0=RsPhjmWiXRgP( zRt7( z*yt5+>52o9YZ-3Wl;6Ch>s~9%tko=^0;3K{rv3v^|0zcdru`%~h?aEBh=&Bu zSSdX26c&}pdlr?51`2R7Gj&^f<0V8xdW;(3CAjhN+2NQMf@-GYukM9wVL=Bv0HHR` zMY5kvnqheXU*^vRx5jss$r+9TPDol&i@0 zP@`sg00~UettK0l%7XpPuFo<*3o|9Df!oXlXQ={Oxp3pO%k~NtXYIdB6V=3~Zzfqj zIO)p6xr!FitD*mB?mz1LLi@6R(X^SV$c^q57$E_=dW7HE@jnq3VI_9JdQWr!3r z084F8G&;B6tT2MRonQL)B4MKmlCh?YmYOH<@~|**wFrp0TyljF_y?wYRn0h{?G0xg zAI<_|yY69_|4&M_TBWbrm#b*+IMt@X(JiB?NcfDZnW~ttwA79pfVuGW3~!Oe#DjL? zp6yXoX6_Ip+HMAuQ+NK2;|m%M8^WRS6#ehmj?+ik3Mh)#%R+3ur;>L2TY)vqifDE0 ze$@&;MIwBcYm|rY90zxzVJ%m-e1zt;5{&S-K2D)`YoJQght~HM4e$o~mUfUf`!2t> zRVcva%w2Uz?z=*`PvqO@1T!(#2px*F!!>gQeeF}HYXlIl!Ri}H!x6%g<4QH@n6MG4 zd{{JiF&{?t3nAo|=2cv%kS@e{%Cm)oH)!Cr@LqGImhTE#uOMGEDo04lpU3Y*Ik5_+ zD(E3xv~PT}@-3EGjyc3L=ckHv1nkgxh#2Hietc`}AIIliSET9S?sV;s8N=s2#m4K@ zTNdnQ?X^W<=lHv1yEaPltKqrx&z(dfa?ABD0_8k+v402*`5}36oi!|jcGWfYT}mt9 z?Ldj>l2&%zjF7kMmE}WZ?>h`Ii1k1hI#r~Ae=b+YG_r<@<#Q#)!RT2RXvd}jWow-Q zgvBwwko#O@3fG|nS8LGbB@j1I(ROZN?@>V<1)U4{Ygj)5m0ld~6=qin<~ozp%J%RH z)n1MBqILh7t7sApEoyIT(OZf)>RE^wZj+V6oR57BoP#5uvrmc7vrRB}$OEh=op8sH zp7Ui@!VGbSJWK{U8+pVqFWsv+?#@0a@%EVug1oZzb`ccaljY54lk7uf>%7citE!!> z9E9fkW9v8I-SmjuJ)7ZVsoBc$uGd@jhL0_MAoiO{DZbA~a~X73Dkg@mnx$5#n{0fs z`plQC_sQdLVQaI#V57a4=jZFgCQnC5k&=x*FQS+Q?VVOI*plKEUC5!Fy>MffbFmR! z4j%B#`!n?cze=-Rz35Ac=l5d5E^%PrSBi}KE>xf%&BRR#j;JR9Rt3lEnG6xH_xV$r zTHmiCG&8)lr_Hpp^wi}ePlwlY)y7SN@$h}XuKEt~$pa3K^|cv0BVh~iut%u3Nk-VA?-ueKg>O$PTH}x{5RBdEEzN~0X67K<_H`Law2JbJbe=U zWQDLjEyXmqnM{<>9%pB&BOcRifuDH)nSd^`#jN+EkP1tnQC;S)bjbJ6>f4y;4Wr5# zsFu>!2D6SAZBup)x%CGB?WmqbpNRM3vD0V}R68;B0^WC?(ivfHFREgVP2Wlc_Px`d zp_haEZS){Z262j`+yOGNND677E%J{suChZnF2W0{e4G?d8+YDPvc0nICjw_Q?`{Ih zEKX&9w&L8!7<|gYzhBQ6)6$h6%~6d(TSII4HeXy!%9boab}S@)=-fNmraq!D0+XOB z(B{xBi;(C`7>kRhfn;O-At`(^$I6W|tXkPWk)1e?ea7bJB2PfqT%xU+8%~>y+P$s0 zYDZC2WWPN#Y#p-eE8iuNUBUaAHR&LI#MnSLShUf!QCrKa0$V)~UORHFT$)RI5WYGm zsJ_(XxI_aL99{0m-$sjf&$40k)lNV`K3MTeoe>Q=jNS}uxC(kmMG4&SK++zzoTM)K zp=*u_MUNDwFN=K<(35hP0+-9~oRN;LBHYHUfhjhV$k=E?e9iV@(pkCR7ZLvNSgiO# z9rU=e1&C?K2npy6)_K=$7OFEw!< z9RC2?9y7{Sn$yBO>-E#^xkoM!a6SCoEa15fLzRy#a~PBQ320kvKW+HWcX8Gk2oDEn zJ3|QHx^-mq=pBKpz+0q1Yno%28|$LVf_YE)ocA(I>;PL2jAfec!(X~F z`YT1uTe@Ws&fT@+UtY0vBaE%QF11Ien2i?i<`zn**k~!&6|;RiQcZ%?nA-TsLRLdS zh#griUNy`m3QnIEUFFa&z+2i%206RA!9Ve^pmC#t?IGWnI?h5WL_|;@U5$=}SIzeLww=!W(*8r-cvVz}KnVAkCx0pS$BQ$>uAOca94-hnQsL(H(@j<+bpg=qK`|W>-(aNotHaXew|E_T#-=6442Mu3>*~Q*mFr+W)sZm>vncP28zvszYy-H#?T=9b0NGYpF5 z;gN?5;W|Im!Vo#ntkqPf^Cv$dyZ9QV&Dl7khr_s^hhgWuB>5F@=F})rtau?VwbcqI zoS8+%;gu&)0ms#A)QDM1oMyc4ayV08WEB&zfP793&ZCJqlsX?40?XAZHsR?P{f zDBERsgJE&flTC`@eU>S$P%~~#nG98?KrvG#vdsxiLq4pet1SAZXTikcXp4R=T@TbK zBwdauZHT&sZud#!a1{&NCMQ>oJnA9RzV0W*;enO3BVXPZ8rr^APxw7iB`_)*^M|P{ zj#!cmDbjbxA4KUrye$PE#d)y085KvVFKjC>{#0b^B6%pfD1q*DD=u8(@d-u+M$uSD z?iHc7!H{ZB?MvRpOtpxRBO!Oee3p6LAksl$UT~sEk4Mh}iPlcEzc=pe*RX3++bxN3 zhK`U|{`VDZ5k!tJt|fA%Tieai^V!_?zZZ;#gwo=y$$_RAQ(LgLWAlq~gw_kK^HEha ztoP3vfgV;YVS+Kc-?qG^^UiDJh>)730RoJ#RjyXf-SH^|c95nbuav^0j`#mB0AE0$ zzxA^%FP7i?gZg!MDkG`Ni055o;H>GgFs=wTL7C_`yQD4|KkKU{;=;y;4pKG763H3Y zjFH63Em?^9`o>uC>k<$~D3qo$#KNOX3!4DTHk+`tHtPt}sl>|c%3`Ow82KQ~`&_%^ zh_xPyhSiq=t%T(i>hk#C@w6FSO`N|7aD*|LzZdWzSbh@fC<#*AJPD&UBlu8WJqH0K zAFqkCHvk1uGKIbxP^v=;S?ZsMQ3+DVTQB+T8DiAF3?!Lw+`)dqsQ$jK{4<2jbC3hg zREm1ECUF-P6nsd&|HAk}ldI=5J(sOI+GcL!`d$;_UfI{S8qfrigrF?&372gtXFmI4 zB~i5Kf2-hMoB%;-`qp$tPWAP#9%2;lsQ+q}%MV*@1uCM@< zA;q4kmY?fAxm_7kC-b77@7hUcFB8s}2OXXY2&P+U8nVs#>wM!7X&xqb7uc^? zfIc_j-VC>P3w*r)Q7i|ku#i|Mv?beB0h=#chz5)`0nl=Z1;I9=TZR*F@J+(tPViHu`s>3N?s_d{EZ4KMoPAvHK)-gPBrmD(L_%xW=?e_Py z%_KIrY^CZG^TEs4Ka#@^x<7epc62xKZRDi%MW(F{O^iMLWNuGQg~Mteefi~Wzj(Mh zSf!4m@H;}Eh}P0$^|9_f9d{U43i50Ay56)l_(fh(0p*;JSYI7h!{_!I4u4DyfkjGp z-EMLIKL9a6&c7RUkjPH;?4aKMLX=R=X8T&o<8S43^4r+iD0U;-kVssX#NWG=Wr5hS zD>~J*EI50#<~-KwtrH^}g(wF?QH(>}ox`D)X+Tasi&#`dEMK{|!iVDe5ZG$9#;|&| zteEx;76%pZW@F5!^Ig=x08Mw%mVvm;&tO z48yLDvoFpZ(l_G2t=?)l=-hw@Tl1Ea5vFnp--J#`X#rf#2APr_OfR}g0($dX&a5t? zDGKNnb3O3Wi4gkWW)+4~3JkR_x$vG2dtsH-X;LxJ%?tS;QTiNou{}N9NWGs-Kd@*o z&}Wx$Ge(5Xa0pH!CFQ`W69Ugs+PmDGYtQKYTOP?sm2805V7^FH#3QW9rvNFbJbfyS zek4vwTwA{!>3g3hhsc)063bE81@H{Ie>nb>gAAP56NoBq1*&`NDY0(*%WHDY$~X@L z9m^K~vXop24CFe;Hd1|K6|C1_8@yRKVL58&q7rF{3Hlh9q=M5b15CgsgnPuL{DWX> zuQA5l%28tXrW!2ql;@W5Y9O8z+z3sg(}bazOcuLMfA=E1f!rWF^(%+K;EME;->L`F z7D`Tb(kej-PNgsvSwA=(Z&Gk|Hq z9}#?UGv?};;1+}s1N_rbolQ*4Ph6nfI8|m<=&EM3IA*iaJ{}@nCq0_yE}5&P6$Ohf zYGz(E6v4ESfQA;hT3@xNU%&WQ+R7Jxj4-kNV_I)1NGCQRLW6O`TL%Tna!ro7Nf@2f z*AvMXT4*i6Stn3bnWjPNV1WL`8RO<-xRDB$HB>&=8fSz zBT|F%TYfU-o7ucOnmQX@UfH)eCwGR`A?gxjf|7kl?WOf?$>FnG1W(8OB{hgV22Z}3 zZCi?V%MuxLBsiG19RprKA^pw|f?8#=E@&*(qOIDD8XSg^1(6vwX_A>|du{MYB`qt^L`Ya3c7rsP61Ut{Ej zvcUKiQ~_}**uiy{wS`+KATMca@$VH@{Ec%0780M@nGbMX6%@1dz0m$a11t8#E-iBV z_Fk9UIj#AWSSEu0AFu%lU-zQZ@6mwb8FzW1MJ@rRuM4d>T*>t7N9uJ4@9Fd3zMAW- zUxh63k}CRb=|68FXm&K2IPNYCwLJRcwTua$r`AjQa1+Er2L18BL&@A?Oec;ra7pJ_ZJ>PSeXMI-J_BID~k{7XOT zn(y`#Slj9~lASziw0vApHtG zE>}j|Db<~f*HH+~)x-*KWe974 z>Sf)s-YqhDGTF1Qo%+=2NiOcyTuG4zb+hR0X(2s8@45Y3o~5A*RP zaA;1i5bzIf=BYHjvU4G+bMm0{YKc>o8ui5bJUAL`JxUaLS+k5C1*NjqThF9)Ow9Sw zlr*_MB58vttCl)#mc|-E^0ds#A{;t$t969ZL;zNe8GFk%8Gc~&(7oUQIh_Z7o9ze>ca7(&;tstFXJh)0gF!b`lbeS7&OmjGG)ESlS3mu4%0FB`(!vM8k{7XqL$P{U8Xk{B9GC-)`;RETO-v{18 z#~nB%hZLfWbP`MH7hB){QsiMRk$l?<-4b6s^p_CkTTQ~%#U0P+C>u8-mY@*|q>*iG z(U5()7-3A+U5@dv(fWFDvj<1Dl7)$X#a(SBN3-O{1pp%Yb*xfC;_?(~Xa8c3GZ)Q86BhJz5gBzh2 zn=YeZD*VxiWKSPAb5hMqmkfZW+{;feJarFi*N6t9eJ>+poF}~Cybwh2hZECF_qw_O zB~6P8sY}V)B0Yx>$Zm&>5_IdO%_RaFrMAf|RD-SX03RKQi-U#8X~vvz4H!`DFN@<@ z1JU>sOe8{^fGvF2JMl-Mn7WOak14vlmaSeJDxQEcba2S8#HmOwMJ_v%m+dxXxJ@%r zpJR;*zxyjJQ|=XjxFfI6Vh0W-`vQ^#W~oAUa_}@6<}fS@!NF8zs>mm_sb!&y&O!Iddiaq6%L z^}Vj2Y;uo3c~)xmcA;giSh8!K?wiz$!M2A$j|VTKkz_OlLs|*JC4B$jF*`$?vGJoe zJtkl5S2V~RzZ&@y`h8&`U2nHC*5XP3fqR_xQ)<%!BpoOYi4`EaRfGn_x;IN zR5wx;D8#6S=N!1}p{U`mqYCOr@Ll(j`vad1o76m-H-I(M1-pw5ph9n@A?MZZ8O~6i zq^HZD2GS7?+KHpe;G+mDnRhK%Q!b<@;n8|KE$A>nv1|XkYldf6)_B$m*SR(C0A`R@ zOXmAp8zymM5m4-sY%vRUX}qvErd+cZb*$8QKM&LVi9_rVvUGc!vd|2f<_)4T)sSQa zYXc3?#ym;XEX<^t8;|f8JRO?qWFhGia%bEYFT4w%koL^YS7qRnPpA1@{9rn04z zLWh_iXiXq{W{ipvLHrCRJR}KHBU%H~ZY?>P)=?~8J%{`fpgm^k;J8Q1fbUPbCCD_e znUJ%$oe&+SF3a~aapx^V3nq?Xe1Gn|N@LZ7&iT#cc~5DOn*7v?}4-p@`iXd zK{gHToW$ND#e6Cc;2w+S<`dL&-~eoSnF07q7O1s>UO^S?q&#|5Ux}HYClFx8$q32d zJYay#zz6nBn&eDx2ZIK7DsdM=!!JKt93}VGsM%rTUj*gD^@~30_EzIz zlrLM@-}tqH8qWAWJ(4>!tWu_n#$8FmSc(9HATP#dV}k%Ju7d2H93ew*&Z_D8xb2vp zRu<#vd7k=5f>D-Hd)!jiMQqf#yC6N|CwRpo^u45(4<1GA08yngj z$Ue^+&URfD9+MFBj2~{y+jAx>45?}M65NYSd2l2!ti}T^?}hw+N-{IknW|rXak#vW zO^8V-uB$0LrzrEj^NWhh#GhOPoix1JiH0uS4W22G=)-nR@;fdAg5-rLV`e^E(#~&~ z&nm_w4{uR`9REW8YfH%+bO~&L8uQS4(+`<3p`09!(qac%WvH^qGkN_-66epcRZmBI zuMp2Ii$ItCh|BxZilWU$Eli;d^tVBzOIy(fn|n6W45P6dP-e>n=yS$yqvy^2r21go z+t)(!r+oB4@Vo4al&(AU=k6hx@u8BH_X65v@+Lp+F*%Vikb6wPtUubT4@+JW0%Qq) zi}PoEY=CYSuJRk5Nt)0|7e{!dZE$NDIcxh?!6RWFEtN}hW_VvLv>e!Y+QaYDT`<`t zCaw$*M351d5)B!hAxG-F7WCl+m#2)lcsFBg!K+WNnOyU9=kg7ENVFuaumYWj%_C{K z0ynPPlr~w2?-p^Os3Dua28c-uI%Q>o%fZjmwu;u6*d7-+Y6O+s4)5OP*%5J zi{gqw=p;4?sfLJreYo22^hMXi`duSy4%Z2wI%<{3RtnEi#<4ceIR0-N?N~y~3Ogo{zLC;UvBOfwjRj0BhH8`h}jCzB(&3mTqr<-sz!4iaot+)C? zB^w?ho{#WAztqh;pLy}Wd4Y;(A)bPKR}k@b2mZOI7&fun=%O2E;7N&6tL6Hw>B9XS zf>Z(!J(EQ*Me(eUngJmLLul9Nfz0_CSX4g)=0m(wm`D`_-de)4;ob*Xt_A=tygPIY z2~!PhGcPfB^COVN2q1OGjUEsajYmDp{is(x<*F-;(-<`6Q5-ZbBmME(F@*?(!OwWu z3rkV`YX|>`ZNocee7f9PE1HJxB{+ltNR;RMIYaPiz-t=qfS`5-u`nW_;LaD`;T4ab zW$?iGE>Q+gj|-309^X9)X}K1fz~MU-uKzdEmdJLVZDzSJvXOj0^E{!8+(aD#3oSM4;ctZ5(AiDiyTXdt2vQ z*X-GgX3YuGEydNH8A)dd6Lck1=du}N{5(Y?E!g$E(;nOKE}#@0MgAgP z;c#%RZ5cDTg#2?QT(C)w=1$N-RG`G*Hst1UhTJF+7gDJY~5N^>(z_Iyv)4aA>BknRC?BkqD&KY^fj8Yn{Yh zGQ!?TCrwX-X8j|Uq_y=JL|SIfoS1_vV%lo7>nvv=z&&f{!)4ZrWYkYz??q{d{zUn7 ztrY*Z9HnKRCOt8!y;q42+DtMp=H{&XRm*Kh_aP~~O%2C?=Z~;?YXG(%((XFKMmy#< z4B_P~r!S|WITu6l*%iyg}l>z5`lQ2_nr9aBBgqM@f_4LE{&mw zXz_3TLQXZ$$e)kRR=OtMK!FuJzftI~A6!{_3}_mOTsfMHdj>YBkQX`kQU;T6pml_? znn2o7HR`;Dzq+=!lbk|+0VF#NIb;|^Kp0rtL1yOaS){!%`3N<;R~0orzaItn3=}_z zJH6pO^R5NJqRMY_059qFGLY2rx>oLG!xefD6k-+l^bQrT**bLq8aj4t$?Os?z-{=x z<^%X?zA0TJC}XUkA=(-hpB`fNWH&B&rD(FQih=dSYsv;p%b~;wpDv4lu{JAc4Z<`lh%fq}zn`0-4ebE|EGz2K1)*uI36}umRLQXV zuWu@I(D_!XS$WbRAo5Y$e3ihSiRvF_>fAF2$!Nk@jNuZd(yP=qI0KC;yxEKg4EfKzoPo$Q2r8ah_4(iyy{;p*UU-;anBYUNMR21sZ!Hae2EDOaTy~kICkm-q^p7y?HRlQe>Man^`B5; zU#^|dii@+uohU1KU|HQb?_i!jP5jmGpnY!an(7V$y*h1=1;hGchgLLs^1hlp+5p4@ zCVD-!EL9x{J!iGr2`Mk~IzU&4PBgl{ddG>PSXuX>ohg!UXjW2}4gtM|gICiOvpTQx z78nZ#VUqj&gY+{hUAO;*tl;uq&oHCO1ICv9>@hVQ81TtD8_j)TOUps`WBd(?JQ{j( zK9Q10bd<8?PS`qOt$4ab7Su(PXFzL==201K2!tUWNZ+*~{S`L50NHVvHaI|xU72~9 zHROHN_-cRcfhU@v7%RTh1hR~RU%RUI9COkoPWmslT@dgS5{=>@T_@7*@*TEDa=C&P zOI)A|J$v-VfKy}aOi%_w*l5|$Ov2-j7gw+c65HQMyC)G-U1r;k1E}QuI^{35PW)xBJjn7@B_LeU!XtaM z(=Xzwoxa-~;fi@L+OX@m)7?UbY&8iD$m&DBEG}R&nbPIipw27wZ7x=vt>3p7e|skt z74#`msxHyn-|u#2aOnG#wP}zJO3`Laq!kvA@%Jz7gM!Tm_OT*|E^0OLBYCkAJr~sU z2=$-X@L2)Kmned!uOMiF`5RYND7Vp!tP*8rnxG4?;x%TED1-=vfzb~ByfXH5F1G}q zAUj6+;{^!Fo(Vf;ss)ib!han`)UWSB_FjRY8nF-*Kwqpm(8UgDb$l=GpD-f22B3`=pDw zM73hcavljIT6J~zLof?aR3MeK+F=krvrbSb`35+^wxC?oM=}AQa$`hL3x2I5m_Ws% zSI~r)1=^ECWJq$Kc{IzSJ6f`1BNnGbsl{7+lFp`R-{z6cMyH+hHxf8|W=xk;hB5C~ z4uSxtiZV>=Y^fitXT{5-9_=3$prWM+=7gX9w8HnXOu%F$w@egn7D{*E34oPn2hsoM zqFv5QlaMr1$;+M0Z{ZAKD8<2E-BVDkoVnQu2#)^; z?;pof8ZI~7{6JscChE3zD%68mA$?4I3`0Q%ep(Lp+WT zHsQ^LLZdf_njeR@IK{D%>z2J7q#bQG=+{>Q+h}JWbcz{C#~_IA)=ssI;Qv{Mei#gm zpKmaoT{4c=bvrF#QJs7l9`U^Zq4ePV(C{hcVib(f=G^seYZj?oE*KL9G%n0$=8Kp< zDI9~i)slE!#_^Se+-19{BXkT9k*n!F-&g4Q_~i0|nZwz3I{K3oq6pW>k$)}%h^C8) z35nI)K(?QOpmZto&dXNpN4N-Xc03rC>Q%{AcIai4df6kM>nWuYYQV)GP=;VmoycIg zxmdlp<8N2*u*l1llU8TsHyur{CuFBg-J4aVya2$sC76!%OsVTcct6BhI~t9vx!=@f z%tb_m&8IcD0ao3eQHVfV>EP)>B_(--6=(g`v$|wq9oPqe0jR|MNW=(uW<0Huak7>F3$HXUW{&lmgEnZ z;w*XS&vZ`;hc&|_bx=8uDq!^(B~rT*@h(}Mewg|sv!R`zx-{J`WNU3>C#3vG;Pz_} z80(43G7ogg83M&%tJekQ8F#|?>wyJy_Z=SW3jKqq*npJ@v0W@cn7Mp}2Te{;wZ>p; zu=67i=~YGo&{^`mUn%*9G;nf}w;JyUZfeo%Z;ivutiGkioF$UJ-f-x{F;Ro`2XWcG z&(jm{6UK`#75}Fv9rG*w&uma&9ToH<5I&Y=vg(pW4*dW*^{+)h#T7krv>8)iTzRdC z=cyF??!qt8JJ5TY8Xdhs+lj;=4`^Sc0TgK8w;J~UAi}^+{GT?+B|Jk$+rb*0g&mzyFnHzNjtP2>c>vU{DmDu&oR3HJ|e= z3~GSa?hde$K!Y{}#&f%BX(n3+Z@c?Laagj9Z0~y50KNzrR!8={vWr_7#wmX_0Zqr; z>rLJNv|%g=Vlpph6pu@w@p)h1gF^dHrT8P z*6zsVkgz0*uIm#~LCBqet3R!J2HN9F*Gx}tSyP;pctDLt~gy~EV00%B|XaPGFLkxvd+Iab# zkpEns_q7C73YfrZ7}MNn;Wa-kPjG+2&EZ$NdIBC7}67+|Mz zgpAU8ui5%P{AwE}o$<0porVMswSefiw2sOU!@2z=nAl6KO=OU%YPONwHlqr<7@PA2 zzO;%|BFb(QH$6PnPx>rl7lE$g;maVIjFZ7R8DNG|X3 zXwx`e(ayYaB>oo@VzX5r_ca;ra~q7xdMs>i&1%?caM)26|0#&qvxO=54Y{czqywPCqur?VvRThJXg z*e>se%9chf!3vou7)ssU(^A9jYIxN)bOKS&A5h#j94>Gf(k1KUlwc8MTp{8uR3H(Y zkx^R6>kwXJs&KEu0wQt#Vg)6^b);7BP26kB;mO~*SbT({w6M}w(WstT3;F2gP(Z0$ z<)_6puK2EmNW00I%{pulQc)ZNw)w>|oTQa zuN8_7fuxbE>p&P7zt|ELBT5*UeX**KEa2tSBxgJb1LqYSPQ|92yQQG_R4jqzO(hN7 zj*Eli1;Ae%o=Bu(d&{=Av2g0Z9p1L&EmpqnFM6@v%NPP65^_Gp57(axUJo^nFSPo{lQMVlDy8P9sF{C!1`FpM_8p%3n(%smmMZF=gBMxslM-QNM8%7%Y%0%t&tZ*dsz zil`pE^OPc!bAueIM=5{)brT%~UT^lC`&!_+N#$KfQx7JZ!MTw-2uD}Ha2Ek+Ce~^A zag>Y8fBeTCy+lYB3DQ?}l6$R}`}!-vJrI&aZMQpgfJT9+Y>XS>pTUriGZpE)&55`1I4S3gtWU!sdDt4CJy(drgkw^=lP-iQoW(vy#`tS zdb`4SNU=Fqd(cuJvOExAP5fJW8RLjWwDxZ>k>of9w$3)FRO?>r?rsYLG@q1l>)K~vCcG6X6n`1je3QxvrO)X2T+PiR>F9eED$pRhI{?T~ z-oARvvjScD&b)N$)y*>EAs7`y>~R~bl~A=1(W%V;QIa^&Wl$~*vfI8qU8fp(zN|YbOxROu^ zsW1Q#1Ba033{7rYYAa+Z@f;WOT{OnhvFlHOEfdK;9-H0WSoLM_{JH<}L@}yA)Q!KI}fY!0KyODyse* z=R}Fg1fLH{nfy@*(*nK!f*NQ60<~()S>mVqRfxYy%=r?X7zE+$j4t@)^diF=WdS^b zmhNg`0NYwWH_Wxh^q-bbx$Ucu-dgjpCmRIPw@gdgn&#VwRk$$t)ijX5uPCKf#U6Id zl-42h9m_rf8JMNk?v%<);uO{;Yb9(oDFJq~;I&3+SlcIoM*mmAzyRvwu%^aGx6^~E<)ld5_n)ndw?9RoqOOvZ+Z^)~?KTQc#*Jrwi?i{^4_p@RP5H$Lj z4*E$ODb*EAM7bk?CP`wuc2irdoz^VV*hA*%1289jkkh8116TO0~y!0z;!>PU59 z0zpXsgaBSHc~{~n)}O5hSFniyj zNZkCWlxmgFo`1V8)^5SyWT7<737T2VQ6~9csaM0!B3uvCuNRuq^e}*E`f}ucVH(Yf zIDgm^YIQrl?DI^>5IFs@@a-=taTWMe%!b;`4TxTX>hSrmslbQyI?#LmR{b4r%`lJ= zZPuP=bayXlo09jZ7)k*B*Yj_+7z5&Gn{yrqm~|=NBcJ1EWUvjJa|x#dTQ|^1AM>cU68omwPm`)KPzP%eajn~+o68m+eC zzy{rAM4>t;Gh_O@ajvwii+Fm|#ef8WUN2yGpZ904EdQ#f^7q0?#WxKDtc}WC(?>|5 zyc7E|uFYxxu|~d6r%5`JQ)84iAc5eK1XH~QE+)12GgjC6e2U6NA8QPwHVWC7 z?v5043;>T$`i(GkkqIWoG9iQzHIo~ZY{d!%=epS?IO{+XjI6(2D=C3VsZv%ImGSgr z?a-;f5$^yhZ7PM^)tz^d#2LRj)X_Hv1I-90;T^uP(Q%RZCmLHyq=SK-XQqa zPeXkDX*E_?Ah%9|G~_#jHo3V=(Pd(AIO+XH_#R4p_yk)7)wU54+sZP*$A$qBFF zTYOA)z`zehNzzcL*wDByu+~r+lC60`t`_eHBjOl}3BRqBny`0AaxLwK4Q6+#G}A&&h^YJC2@;5v>rTE^i?R&hdtuRO@;B>A+Y>%Dd0 z!#_EWSyXRxGIY-_egdE;wU&OMCMkVRBu8rB66pE zdnD=zEQ=<6sm^#KDmD;cv#LE)AO-80f~c6+2;x=R=M zH1YEb?1)uwDTkJp?~{Lpy$nX*~x!hr&yKBNW0H2RX_!k_Hn*OvT`FO z{(Jk7B7gnw%hhvoR~f*~iqTbI&P1b?fxT;0QDq4nT|oyMSEKREx|@QV?X~0GVBF_H zv=G2b3JA*#le;4&oWUx7c?f=B>)51(Zl)%HH+XPmdG=OxSda6zva>MP{V9bh6UD!I zI?HiSn|XUCXL7&!@G)Gk!amgb%eyYBbLW_YiFRAoC#AJRx$~7!kNJ)?5Ur+t15>X> zteqd`R00n)(&Sc6Re$pJoL*`t_WBBB`zJX*l-)h>1Cg(3YA#qB#g&y$$x{Ln*gbMn z1{Zya5QxXK|1gwQut2M0ULlRo(s_|FZ&OQbN<8{jjnZRdzebZD}w&IhL z0$C=xPd@D;jV$W1d23z-*H^5eb2xtnfX!kzR*b#&=_befCq$c&zMs2hcNy9F4MiPb zg1ZSua7lR|oFnuQA3m-u`0^v~6LXUZyBpC3!0IlHo9dGeQ8lM5|K;4B{5R8TF`-djfqd+wWjjmqKr+9iNb$z0q#d7kgj4O?bsm%Sf_)3@?;rDFi2>f#K{SshpzIb5r%^2B2?3Y{E#>dBf*cW; z@Bu^9T3m7_9_)l{C#8Mxw9K6mhGxf&(}UL{>@1Nfrm|uTR+m0exPrkqg;F~r7~cti zxj-cg{zNHJWgy2PQU!pYP`zehP(k-*MDXYXbptiAT{Rg0X+x06K})hXexrr@#Pa^XQM;0qrbNH4w%52U7iv_mR|x zYJ(V050lBDoMon%MO_}PRb2FsnfQhIu7z+6ThCL}h3N-xwlxA7lr%k!LnW(|^u@7B zJ~t@vK0P?^(^b`rTpC55|F6#)wF*Zmn-noB2OOq?h^8;;K~=G9c)H)N%yWFQAqbZ; zRajld&#-|s@4fAFVEe0Q9%Pdm??^S@KOt;yPV|Hd@?eiJwMDPZ@k$Q?CnES11KS10}DPhJ{WVRPJFj;Wm%gHDH0m?k3BBT%Ku5JwJnik(% zTrX+B8Rnji6I3R?OC4hFlxr^2CEQF6eQ3 z10a_?E%}^HIr@W#k_Q7i@pX=(HN~`Dj|Q!sDQQ>A68!hFt8T$BLF<~_su%K(ty%2p zOst9nJJt)WuTdp2n^nV8(S(!;NXhwP;UiJe4LbMcfyXg?FeuqKe2k?ivf?cRF?qP3 z>WFBd1vYx9P5=?`FNg{Du0<3I_O*JVc+vX{CU5G7f<2+kXyEP!XJoCF#xDoy8(WU* z*_4Bx8rJV+I4xpYjlKtjmEWm$mw6E)1TaZ zccchvRINph;yGcgbiT_tQ;h0|fL(mlI&5{8h=P~*Z+Uz3NQ;<0OJd&xn(H^g8*H(` z6*B9DUOdSt`R6rO+yhuqca>_79WVL0cngS9=&zGSF5fP6QJ(}nd?Xv_Px(@d(9(C` z1El6k8R-(hrhGDf0!FHiG%H(dmqJY?7kx0+l>L0{(0|jm{3l|XQC@kFEvhU|h1OoG zOS4ARq@#+L@V+7i$?X7pdIefh_vNwAF>}0|i%Uhkx#<4rwrK#QgX=c|5gXjBkI5PH zo2Svi0F;LdO(uSq6C$5`I!39jO zhxhMK3$^EuAmj_wy|?AZWvr(iLMl0!4EV#8u$q@Z0XH=EoHH=&ZdJ?UdPE7l%6hgT`RvEym{k5e$sE z2}*09*orllY2gY5D_J5Zz&j8cGSgc?SD!rJGJ(C)Sf$QVnApc7O3-y>M@`t`9*T$% zmQVHNKPAGaW1T_vL}F!z+DY+6=ZV>^ccR}$=lmjj&iYvbwtVJDp`t0@io;J}l8El? zGXA%n53CoQ@5XV8jh5IuMo=83FWj~?F^mgXkaqq=)?3TE7x4bMwhO54X6pZ82-ts* zdiVAow*G`EClCB&h!d?-q4jEgMFriaUXDuQ0L{N~h9tchCrFpl;H@gojS`$U5)8EE z%<4d6Ko|z>E2>pdY+(mOAVfNa z`=(^&1GOlBqz3M$4)ieeTcq!Iw?HC@zoVA_6U>6okD^itH<`RtaJ5B>J@&X^8}DUe z$~gM{|FuDjtP2p_23GQ;0itE26Fd!{5XzO)_+t3gSebdMZYBX~PHXXib2hm}%-0}? zUaqWur-k5Qe20za6NH<+v`m))R(`DRDWp|_{}Xk=9-I)kW*x9fKK9bQcSxq``XQLY z1u5EE+QVy8U}3<8y|vbbaa_BGIlTEgt8O08ftOFl99YRqm`+d)j0q~%sTANSjy3GU z*5dm4C~{~L7_$<)Rj@j4WR5EZqjShiZ5d1?Zzy@vi*AG0j6|q4s-SY(LF!Q(qYZKM z68Orf=ufT03VHXUyS{g2`SSe79vioO6PD)pUlI2|!sX@q)>9C-|1*BxZ%{+;gB^RN zesC~UI@lHKPOmF>;ln2G#O_4>fVZuc1Q6F`$Ggw9L3(5~%T$JY`L~TNkyY^N#bm;zU|bw;Yw+HDU2=Zwk zN0JT%!jn-Mv+5eiT&hjx=Xw0%Gdlzn6Zv%b?aaGrv=4abw<1n`@JrubHwUF~j?-F1 zNYChIv%q%Mjpo+Gw?L>+Kyr>F2c?OHMm|q%LE&$C1d}flLew;6Z~RGc6ix_@&~h#J z5U&XRB0{2&K;9A8Qr8LTn&Ex0xnO^BMGiH};&}TFNS16fI4MxLB-Bhg4p1s4e z-I_k9kN}}4Uk+9HwYy<=67Cd>Zc=ic%N1?htUghYMjeuc3rC%)E(;%AtKvl1U9R#S z`J9TCAnzy2UU?jLEHouN1pMxtvioaHO>Do@DL!e>pz~YeYa73Y#QF)Se^x!KZ8U+Y zDlZ8mbUlB+Tlc!8C|unmlX@`O)G#}h`N5~-<(5fR_Tlw-l8n;JLfw8w2scjMj{#ky zGlcX_w|z2-rNr47gR<%*rX0E=j(^bS=plDowr(F5W8kyYk!p<4M5WH$Mh-sh0BJy$ zzcT;&WmysEOYB7)01^fFzIcIa?agBo1#WBm7Tii#%qH9Nz-71kRGceKZ+K2hQNZ)x z*+H+s{>+$^;o9_Q|Gkf55M;`5>nY!U@`z;O2#}A5q{D$2Z#k>=`KkSHXlicX-lzWQ7(FLxf)O!F0OyFk7ZjkTVe9M9QBe_~e}E8a3L|#*vAW zqHH_SMN=%S#+8h_C1W>u*d)kb)iOype%jAuVA|aI5893X!X?1}?a`@~ZxO5KY? z1=Bb1}kcw40ZMNuOnK~WR>XPp0*&NCWX^Li6v}q=uh?HI1XOf za5dxT==b@*Lh2U}@=_wbG!u`BR9Zm?AFYnvq15tx4z%By z+Qi97Nes^E{eBnTw~#37b$C`IDOhV1b1uA6Nq{@L9CWi$9byEZ01amoD`=D!H}2 z{-EJb_y8kw`Ij~=+7@UL@}U6oC0#FZKS*oI&JOZbf*P`MDr#r&TtC;(WOU+Rp?M!y z;mcDX)#`@4G}k4R32`X_{RB@sMDt%ARnpChZ<#|xL@`!HEMh&R8vB50n}^g`SAU>S zQ5nx?+_4PT-QkZ0CKoHFDvC~%FwYFFt_OpUQ?R#c;teg#JB!-V>mqLct5KmYGoji^ zGBaB4n90RO>0~ns$M9$5S-?_39weE`a1{6){2%ItquCs(6MA zAr4~7EkgX;t=vR08J=O00pANC^tuGczvW1M!d2H~VOe=~hONdnz{BFywi3dAIJM(y zhPdlH=?{tMy0^EV^XemnQ9{_s&lLsyknD>xt&WN6vO$VfpREM6mb2S0qgUbi{2Zu| zkAo1%xs1kv(6`uSefw`Ds>I3MX+dRQc5PKE{!3Qk?R6D`d{iI>!gaP}&4AI6z~ja56L;moFn=C#{0O-HTE3~P@_s?frB=iZ1lScGX%?g+ za?d%=g0zsyp8PL38hewgJS0(6iY;pQZht7yRyet5U4!-5QPYzAu^GlqCa}0~^~I!0 zOZXm@prfKmXJYG_)UN^0ZHQ2w1A}mM0%6Z(<(Lky4|Uc{|ppt#HsW_wMInWY533M3Vv%WO2O`lchHD)fkq#V1}$>gt%C}hpHeyh%ZJq zl)^jUMuEiEHZIaA16U=}`(xzhQHa0mj_9`cZ(=?SX>{EM-|r3YpVkc&`{|980NSJodgq*29pQB{^iy!4 zis3NcSdxobsCC*r0jZw4hETb_E5NgtT!c4dP$Q&XF&5|6ZapUl_eM4m9Z-V@l|Tak z9K!xQ?EldEXFTM(8JU#(;>^0e<|>B{H|0H;mzX3<7^@F`3a@L2tE%&tpSO9^*|K=6 z+taYfe?3|^bmZr9Tcq0b-Hwx!48dCw(t{r#eTT{we+K#z)SA+hpRH`3NHPYHOCi(w z$E+fItW60=qv9H|jW_YFyJ6q-tT8@~{q+Z95s&i=*0|pG-830Z=U#4Q?jDH`(xdy= zr-XwQSuWfe^L5oAgvuJydFavwr~}HC@(V~9U0IK12UKrNRkC=xa;o?FcrmP%WP{zO zwx*$Y$t7*y$(Ldy>uuchh(d1fM4a1 z5dm?&a&S=d#~Cz(`ZX1X;8teVQ|Dy z4==eP&bc}-$u61Uau?ht^{$`<7`*FYyt5lm@A|t5!B1$`NGeDOj_+>D1mA@@qKh#G z3R>x`GvL%fF-P`*^cKF_1(^m0?7m0{Qb9SkmNFaqAv_%-tBhYH6ldK(sDds_rk-S} zA6kzQZp=if`%`OqagTgc<4jQSlA~$GqWeCY9tX>USB9;nsiZb81{qL%EARDvl35)g z+b2``>soR%v29fBJR>gsTm<*tw5!$`Db-*wjrbQ})m1=FiWC@5 zWJs3@B%46Nh>ObIWS0CaP3$j${VMM;F&4x?p*6I@?Z#3d!s0ARh1MX32i2ir?dBLo zYw0_JH43Y%KiNCKSM^k|YKo9%kdDN3SJiqn%Duizv!*kX?~yrbJ%l7-Fmjj82O$1& z!>YFzU5w(6oE}$CLG5~0pn&Wvv^hff2aERK|I=Dn`mR~3a&c9{6uQ)}T>%SuN>*!0 zbyxvf#!7Shgw^mj4ow=U1n`ynI_;k9mS3|1c6p%fIiQ-)WkR6SN;mZi-bn7voe~L8 zus@oS?5@y=L!z>k z;a3s=f6E+qf@eILQrha1bkU~b#|kd>NDr~oCnNF0KmgprywsXeej8TV*MvQ@zUU|O z51H%SEl%ud(_@s}VEY?D(-#7Uf=@Pf)CMwojeLOlZ@P{@9*ql=;q)i>To8A9nnX1$s-&(TDzuEEE>=>m zH=%?&N3W8eIvG{*=b{ZhJ3A|D7@~doamCBinvb{0;GD9y<-AY4|AY6X7WoG@HhD6s zHw*(O4aPC*{6%<%O%bQ>+1Ka2KiG1pB zIe0?j#N)M%q-8B%WzB*GB+rfm>15YB+#!P{6|sWvFf4+UBZqVzLo_1^iUI(W8_-Ea zi#Ug8`~=@OBhe`js{NMo2;}>`^L%?5RdCId7i8EWw|BJwN%It1*A;vTKGXo1XEai* z)u{4Nz+Ig2;GnFA_zG_vU7J5jty;w1X9^z_d_il{ongM@4Vk^C9U{6t0NdDz{?pp( z%`c|X%$YN;glm6!A$|uyWx<3>>LKcO*Ul|8ma5j81*0?tH+GV~cSz;h`x#GvU@$U( zMMu0l@On^useI`!J+2d76b{NJP`_&jkwC=upT4e|tMC+EXe97aQMVU+uo*0p5F|T} zCejX&&(r*d9!y=ZB!Y*2-#Q(DYvHKYLW`%amuwC{~pI z)H9W!L3{^bE}$#6rULFxzOxw<%=3IP$9VF|8d^$+D$$ALAg^I5osVwGUV`BQ# zh%wF@MY!T^Vpa_2Wy5j-ww}a2~9+R>yl*m`1xIE8a=CynemdQy^x;7A$|g&i?<$N=JK*k|#(AEYazMT2(Z zX)=TJ&cvu*U|IEEZjI;`GFhd}Be;Cvx*{Jd8J5J252}8GMVL4g)4?5av(28Y+-a|L}g=d zWMv9IJ_>Vma%Ev{3V7PJx@AyY-MTG|I|K=C4Z+>r-QC?9hbFi?gx~~scMk!AySux) zy9W8lyU)4j?0xUwue!Qd&qv1^b3C)ES2a0_k}AEh8OQ`E0kU_dXJ%mH0m#X#fb5Lz z+38h)7B05NjsO-0CMGrna&l2eps}+R$X?9Y8OQ_RbhZR2nmT`^enc=aaU+leB!Tuo z#}B6&z{CR}4|FzG^Kbw%1E`GuStx;=oas%Boj%+^dkZUjAoWLzD9FLX(aOTo`ELzY zdiuYW{$`6X0A!3!Z9r~LHdX**dozFxgFFL30p#{!v;t6p>;Wb~OJiGe0LUDm2Gjzm ztBR|r0wh%w)sSl)!(-NfsNt=8Q_1QeWaQ@g6#fj z0HCsTc6Q)lWOQ?LW3X^>a%KQIS}-`+{)11=(#i?o26D6kd|VxYw!nXgaj`f1pwrnB z_^$$gHw7SPWeT)+0{%^s0R78p_d(@H(udvo{{Z_S!ufAa+kb@voPfZ8Yh!8b^iQsw zl9C+2&e+P{8E9{8Z~77FZ0zjf1Tgx?_VEUqQT_`c5FqN}==isX{C~R~|4*C$hAsm7 zxHCOlFCSyK|J*TSdlx6q|HaL}-?k~p-pR_z+38;ufdF$WTi{>xPJj2z%Kjglys(0_ zgt)32z1)ZJ?CIq}AMDsOIJ-Ol6aBZIu$UYVfQyL(z|74CVEPcLxV@Pu$jIeRTm!i~U#hhY8_-Y!*Og z022WC6##TMwPgI;?jN=MH8cM;f6(CLb(V0e!5@fgcwHFDGMHAi&wt1?c1T zza9U+LSW_um|2-Re+d0!SP=fHE^TiP0&xG!{DI`ZUH>Hks(%KR`eRDXK=!sC05hOD z0;2-R`9ly?|NqR{fAo@Yv9(n&wgXcAC#C-xXKZI>>+x@q{}G`H{EL-J0pw_BZ2KQR zD<=sncc7V)m9wekzli;pUE10BL;J$^7Pi2TP5H;9{&$>gKXm(X_E`Nrx&ZW?tpDNq zP^77iJoS~>;o_dIU@YsOg3%+Bgh{3*Z8kB zD}d3#_G2>t8vY%S(%*UfM`}#}gYdr&s(;q1&W<1(pr)1C$07Z{LgbB|9j)ATnLc#F z{9*ri{r5Nhe``SgKNA06*&-q!cQ1N&4h{f4D-$b#nUj+Rz|73T;qz~~rvExK{?X_M zmH)l|JFoyC&>d)surv=cdiuJ)tbPQm z``xJxkpPU)kBp$qLJ~k3uZMu;mewPWod_8=fdU>^;MDFCkS9GUN|_aOj9ZU3wfM}< zdgHAvq_z^6EPlGueVA-R#*=C%>8X+)YSALt=+2Mm#+yhZ4v0<9J(2nOEYCHTPIu?> zxcU0?P{+=I!$#cVv33B>!-bG*9samprAvNHezPXD-G4UN1%=QtT#2a8Zkdz?oS}Rd zq^QQcKYQW2U>B)BGb+$i0FdNhgUrxJgucrodWSe9C|;--db^d9 z3+{SV9+1)`VFdHbvWjNHD#d1$^e36hC&vyDfP5kC&*xycUeno5!*(<%zt`mO1l(&< zXF&K1*sX!LI7=x9bGGR(v2RwxokTERWeLCW`M2z91+O+ML*n`SS7iyuw@6jmk^D*s zt#P8e%ayqGROIR}f*x!+Vu_rB1n7Q@Fagxu$=L;}q-!F)`do4cfP(OU@FT=nnjNfa zf1xpF@)C1omT|jhn|d>_yQ!g%C?%80{m>3hR044-M;_%|gyYvTGf*21Lv zxbz)({6Ot;u-*!LSnM*CQk;(r%5RU(t#=+%6Wks2Q)|#wK7OW}ycnYo#N8Dv#)#skQ`8|hx@9mg#nI12!c}$#}c#XYz1Ha%Sc!6Lr^>p}K9=3A7 zGi672rPqm`sJR{UN7Sc8zFOzmtho~h$W4r^2w~X;=k-Q6AI#1V!*HCV0Xj1K%Pto8 z&-iM^N2>E30v1%V6QtW+17i-`go`VKeo~Izc?@eN$dLYYTCZL;ZothJ;^-M35oXz+E*kuEZ*tX#;kn5BHmh?qeF&-l~qo)+C@4%{OsO^DEHp#;N4 z>tXMdNy;&1LfMu@8a>A>m(O`p31P&F-6!={Xf+L=Ym)o1c0?vZ1pMU!{KURy0Y|Mn z#x#8m0z#9xSQX1c+a9q?wQ)qNK{1)W?1+Sg8I05~f}!#D2WS(Tx8E#G9pZEHG1{&? z*Zb?IJ@o~4BD@`}F^}W#p54K)30+|kuFEM~SXuvZhgk((v}KF&iTdQe(`QvLc>5};%~St>nQqVHyQQio=q z7Y}>S#y_lc^v!%ewc&{JP+DP%j{Uw$Zq!CzoQMwi?$fDoi-;B597{`a@#(x^60RXj zl#W(yZWwBBr5~TP3GF*=QZITZiI0JTdYM z+u4LN!00Dvp*s_EyEH)3v`2&jM)op6HTPvJ+&9*PKb847f8HddZ8^Q8+O=ZSfyPrd$)=LFpTk9{q{Tkg&UNF-SS z@z41uy$7gt)9n*LCo#0$WK#!&;F1t( zeM&S<9$t}?q>5X|G-nA^L)7tWbX-#N>Sc#UL|-+iNzfDJbVDyZcP7z_PBBDjrl7X=!V1{VAi^1$h=!V?28Tl( zbg=I9zw4N9SXnVt>m#jl3)SEOK5Pq z`_SntfaPRk>b^dGtuL6YK&4|4&C&<^Ha@eQ$Z@!HD>xu&PyGvTX5J}=J1lTf{@1l? zytroX*5dHtM(k>^cc9Tyj$OvNSn6VTuN^A{-I|hu-I#L}G1(TrJ5B?eXIZggNnxv% zG`bPj7%Bd!dXFo>lw9@v%}>iaU@+;r3t8M{Z-n{}!k4E$UJoey=3dK=w)&{hnj%chmi$Pe)4+W192R28TY6FJ=TP(QpIbR+7)R!@Z%ur7kKx;?Cr;wzV)kWRq zrn7S$&GEB`#rMv-;68B_%e6k7yY-gO+@{XY>TRWBWg? zTO_h}naohV{DXt|yT>djyhD1TS^HV&E8<1^{WvPDXsW#d3cp^X#K9#64hm^U6=!}H=fyw&ew5f&;2AnJY<__T*}y)*#>myu-XTtAHdKL z$GJw*Wi*^q3SY-ibiSou!mB@X`aE0u-_74Ec7^1(@)rI0+4$MHXA@w6*bbQmy&c5r z?n(~Ts^wYE12QCn_4q9DbTsu%c+FLG5&hP0T8Jw+$~?J0QaRzX41~Erq3Bg1rgIZfIIy=?pKcMnBHpk^(YMx5nLr$+ss-bSTR{iYs!0e(M z2f=|f42ZarB>K^nLQ%SB6=;-8zEAPhxA1E32m_H^8T*kPn3^?Gk3+j45?=RY_N$kl z8VGZNE0~;)&&C+m2nLVK#8OUhCtdWef^9%ZaJ*(RPlOw2qAgp8{CP<{;wTzoa$gaq zPzTRxWu#SkA=g3}4~J<=wi-$*op(C7&212m%G{)0VOeI|dyj5VDk}tg2L`~JKX(&R ztg2x@gbXDx1NN#0ZW$cQRQ}P8DGbnX5wkzNigO|d(cdxNb3g>xUkZ^FUrnYqVPB!1 zK)9!>;A5&K#`OQGWsH?s7kG_B)QJEAw<%6+gOtyKu$1)+&pms#Wf%GSl7&SI+35|e zTGKgUz3F?wGGRS(d1vRcu+@{T^I?5S5HiH0IX3&_0URay~FFlK3RnQiach!4SP9jT8q@0 zg3#uP{pXBfH;g`Ws`g(4$Xo;PkqzI1=3wO>k;@_ zD|dpgOj6Y-gmnz73;0rF%}|-uI~Q%mD+nPXx!jZ~X)9d1E6}7cm%hkJYm9^6yYI^_*W( zjZh($m6R5&WK@Zilepgtbk#+*K|wT$f-y+<#7u#oSb0vz^RdPbi|9E$Z^k`T8q^AL z8iB1d4&pi~dE9dm?opB#EThLfl&}il$_MZBST||D>NadkD1U#EYv{j{&XoSNRIh| z&xG|Pg;d&U46bVDJb&JnFF6H$hElZ}&CMx}LRjZXKn<}R_y;5}?fi=DVDYS3Z~P#h zduXK^sxMBg3r6)dY=djUIH4NSz78HrnVFCQ67yh-2SO5g5A7B7J(X+xpcLruTdcnY zI7j(5Ya}w3iU+MP-0HS0f?6_@P@Y@&ZX=h&82X*0T8*pNzf38iN%X5LHOlle#ag!z z&CGWREt9Vi#-S~Aw?AX64X+yN2-SQV}j!AxbLi22Jj{jfB3IEk1^*6%uw*V$aB z598*vc3=2Zpor^SUVt&z(57**%hM}d28DTiZ6a!cX$v5iPs2=wE$?wH3M{8r;`*%uv5Yi5XUh3?RvhkK_g~5CC!|>Xn$7eQLM9cM z@V+9xZ1wE5?$1^;FvkG2t`nv?G$Y^JEsHn?lP<#-B#C*f_xvAv)yCZPMKi-;1VwYn~MlFd%Kg`DMXLzVxBiaaRe+h4@|TLp}}-_B~CM-bv!dnBxALwyHDP(@$!C| zPt)2Sq{9t3iN?du)Y~!!VtwY4_X{GCP1c>d3|!d5fLfkn@kce*D5KZ)KzO})g{qOM zlSncD?qouSj?}lyLJDe=2&n$elNB;9cxn>1`{a{By~(o^Fe5Y80=6^GtRrt+_hat4 zsL1lCmJYuWj3o={rC05xgSGb}RNrqVqfxx+`3JJr&VgSkO&4s<8WxC+Ln(w1(cAEFYi``?Z_|? z#PI;8UT?%?kK-*Gl;%UK=QX#E?$cEw4f<*gBksVW7aiq;JIIjUTvS&HA04#vVD;7} zU+Ata#3Ki5FsMCUF@2+U^`VrOVehxQ;3 z*P~fl(^rtlQve=f*NO_-fizWJoQUH}x1KGf!Loek*~)eO!i+lqpV$8s>7lM%Ac|@#N{u zU5@!0^kng52cUAM@LP`lwp(69sAhtV;Pc5%GP1G%IM&G_1d*q70}>C(d$XM?xR`E) zL4N)F!U*Q}#`L{nXi$imjoul|7b81~+h2$0S5}Q7}7o>yd%}Nebb7=hjn%^^~4*C)}-y)!WcQ9Odbl|#~bu# znq*ItP<_qrN!sDTks59}V=F*~?n2q~4YHR!5?fBNB_4C8ZTHSfVLt=5W^uXI4xOPN z^TqIImt^n+&mDh+ja%?1tjVCq2~1>Xo}mvT@N#8rdrUAt*254WwVEJ%59jPKpLo_( zn~XXxCjwzzSkF>&9B?WF*G)xbGFFKL_{fhYNc_CgaWr{t+Y-b5pFfilr;Qp=-py6> z4a-PAM2K3x=q6eWYq>Uhi0|{biwnOYKYg}`3$o$6FZY>-aJIhd%@Ej{!|-$W`B4=H zKBF@82n|1lAR*EnEbB695(WvM;`8P-!0vO?Nk>F2wIG{Ad(D4GE06Zvr7~(Q%OH%^ zt0(i_%xaOx8Y_aW=IY%zZz($xe8XYXafm%M6qfkb@b-Ia&TVtS+!9d3uGCGYz05z1 zkkqlO$%?psj#4bO6$46WA1B6dWxKR8-~o(=^7?E?JWIOUZEGg{CmZ^KseF18k-Pyl z)yrfk`tK9W?wxOPYYIyw5_|~gTvgpE;_EThi;8b{QE&T;I4si1XK*$d+8N13tVB1cpI~O%a zahRqj&Q=W7_2qZ$MxcUjD>)59GX`65g;IxuN@U`o} zjDb)i+$?k5)vNtudXB4i8%wOd8fMxi1f?gId@^6r(1Mm6v_2^DCmN9}7FISpk`it$ z!R)a#_bF2?SDj;l(iZ+mbd*8z$YJ zP77>Yp#>g9-3Uxfy;B=;jTC;(z%EcsH&o!B2WyjD17$LGP(EtDzr+r7_+N4GKs`6# z{3gY=+BDifaZ!x^UgIJh1xfF<nvk9PB6Hm0=80n#y#qVgfsq<% z0n%Wdr82#w5Lk;Dd#J>9k>XzzC*FN`-0^}OzuaDy3#OTHe>7`wx_EuO4A6I1?Y~EN zXTw9qp#qxla3nL|HEBp#Ds%{Mn=OfU{fHCZ%b>ehr`MdUz!1JGJcOyvhOA7>rT$gH+!x;_0DbPyz zsB8%;M{h;fiss?V(06%kzm}_IpD$>{8JkK4cG%0Gt4-M#27@7$d)kaSE;)!P+L*-$ z^nyI2P4B8##vu=50O8QT@W^3{_G3eg8ocYb={yc6oD~tivGPM|Lz!22H5EW@^`-5FA7!LvNHUxRV8n=2Wz{_1kU+8 zG-Bnb#6z0?%AAQg#FF%9#6E2p2^J?T26H&cy8LKtd@kDA(fya}mT0 z`v*qb((|tiSb1V#LH^VYP@P6WiKX4mX^;s95Q&sW*jK(vB+-lnKrYc7{qZkH^Ifc& zaJo+vY*StP-&E42@H8aNK z_k}?NE$wTssNTLs6C$NQm8!jvr?O*+E*t)Ero6)=#E+dGBUQF?+2GD3*;r zq`p7Uct6cupT?8ANdOfV1g5>9eb+?%XJHyV5hnv`d&pR%l+~Z;wb#sJJvVZ%A{e2Y z{Z>Nd-k{pN_hCQvU1-`69)*+@IP$zKcjD)1tUJ9#?6W2h9)Bm8B|PP0>>Vq{Kx&f= zHDI82@G#hwcOZ2^*jtQJpxitAGu}lp=15anwDOI`RYB0C<reis%Xv~5^ z@W}rPem{0qijp#i+UtyWws52XguJx|V`Yq2{6_HT$E1+7u1r=TtqL-ksr>Kt=bc~AFi?aG|385Szv>j*n*8OS>fMcQ%n z3w0NF&i$2FQ3`{V3S5YNSYp0t8HLydMC0RD8q3XBI5M1Sv%QC7@}tTm`P*>i1?9I$ z{2yHz8^6guF*cPl9qbWHG?jvCVkU3UWH7o*M%tCrVl?}$zVwwPx&&lyKnPx18}i~Q zS4k=Lzb;s%3{-#`34|Wc$MQMI({m;<2}cB}RiEBWN}qX1nc&>UZ^ZYDH;@B18+VjP zUOnJn$T8g4`-i(Mpfop82y5VR3Fx+c-%RaZ24%MP)GY^ zeg)!QV4dlwfg`N7RZ`7>hAX#pH05{o#xxS6nAH^(@@DIOr+C6)J3%};h5k(D2=n$u zx=4W+eSRX_NQ3?-Fo8`i!g3LIZDws(0T=zIXf6jnHlPA@BBF&iaw5%xT(~zYnMhCK z!cJa$SWz3G(XS~~kbXe6uuNyk^LJ!?J^!D78z4QenQ!l>xDI!Qzg)MO9V zBL@pfDKIhQ3R9)T4$RMpfsx-Yg=jUT*q)o~6?SctRr$Aw&Uv%90sQ>~x+In;LqM$e z)L+3K-lYt{;}Y3m_X1!i=weI4pmd%OMkYpElZtQ2Dnb(vg1ZBj{D*P2}9D zblx|80U=@!lk5h_j7@vp4_0)IhU%%63H24H1E zQuq5Tc=mHf>$u?T(SS~%q6&f4wY&9XLk#Y(={t1=#4^&@pKy$+MiiC3fPU9XZKo81 z>DDL7ZHD4Zx){ySXqs~4{I1DEI?AQOcxcx5j^debg5e`DKk85`a+dmHfaTi`y!8yj z6XUvgz7jjvl@%-Ib3yX<_@+y_VM%yAF14mHa_HxQMKGU!iqyxT5BwM>n>v(>+`N0M zTFxhLxJXRa!d1%%Wu;=R0+la>hT|2|SGe7-X(z4>PjrMCj^rAx+3pt?#CD>de~mEQ z)hJHmGT{_|@fvWibJZ6ih%=mR`3;|B^mDAFfhC~1!vQ&PocB3e~k2HtVi7%ixnqPiZNCA~Il)0+`~brFJ_p`=v7 zE0!ZxAN)&G;#8#EH5efclM$+cnc-y=|~EGL$HGkA;?rj(H!& zFjQr%f&li9Jm1q3))OYTl3xo$D$K?MOvW7R$k`MQdF-}{kANTvJN09Pfn0HDR zIinJ!w)v)tiVXv&@B!+V00&St-Mw=q-95)oBBU6NCB)dCn||OfLCaD*DF-zMeWu$U z9ww}3NE&^y1dHcO(af~Msd}wLF~8*T z0nKBH6uqlm>*iX0r7Xk$<9VMxdKctq=GSE-LcJa@DfvC!Zrzr8!Mg{q!Q6nlTZM%- z*iReW`!yK&=m8CzF4d`HpmGhZsv!DWqIQpInC^*GL>Og>1{(ZZ}4ZkX|j?b zMOS|+IAjSpg9*@N!m??a^jZ}TU4_Ez5>?GhmPlj=Zl-n|YFUv3h>NXRFy*;NG2hZ# zq~w=PzivF6kUqx1g$_IhN5@n82ZImYyLwF9!GR}|>^hDv?^SUL&-KcG5dgG|i9AF~ z%ro@1gZGImK$5KYo3Kdw5OWs`dXJe2$#g3p(t)DNydv)yWwGMysR{!%Sr`ZJ(rgSY zAXPD|JqiMer2H9`j{EsjvAa;nnwaDO67PaS@uT(_za4^ijkkH^Eln=Z^ZaA|!}^^h zuDY2WCe;lOj?5q1kA0LxmeXiSv>M;BY?ZvE%kH0Lz79wPT{m|$6BU7(^<8)wmh@rO z_~a+HE(CDTt{D6ClYbVsv+t2BvW4o%$@&I!I3z~z5huWIJU|78>4tu2kj$1{;Y9s* zq3Vjj5aVb!LG5+=$20!dFH*Q4U}f)rvXu2*eZ=4dxY~R}lV(gSPW8O9D#K-suS9i& zV5s1?kU6S#FlW0=*u8bePL?ni_M+T;R55Mqm%yQc#C)?O@y=Gs9L*gS6-$ce5XZhN zK1-|kMtwXL1e zJ68c_eqG_~m-?E#>ET3N;MltQK`8yiLL`*lMR9>#pbjps#! zy;Z{PIirAcZ^33n#|`B-+58+g&6Q2Sz)QiPq$1Jw&=VExUyPSMk#hT_L^xsfvQFnr z)vxi$mO++aHZ$wisp7^*MeSmU5q^Q*$qtzoo=f*N1Csl}yxqh_8AnyT!mC+R@6At2 z?(0{@aUf%UM;##PX-z==O~E;53NPh;I1>_4h-ZeHs+E?qt%?>spm|VwGJVn>FOIz!=L6=l*yh z^ar|na{YZk=fdTuoirU%46dLt#6K>LsA$QSxyFy(uEydjKz&q}YoxTYjL-Qh{>N{` zBal>0y%d~~ZMJs=)jL-Q`sk!tkIHu0&M6*LevB7ED*vzFI7__G!^yFtqX@}Kn{X)N zGkiL-F3ZeD6q6B2Tf3#G-@92=86se!x}}~XVH|5}gaPRfexvo}tI$X89aX|8m5Ao< zvhqP`H*>ekX>epibZ(l>OJ?^~=~E1ofdE);9ewxylmQM~0&&5GQTv4MY`ue3c=Y2z z7n$v>DxE=Btj=yr^-kWiNV4)c&6eC{OWQk!6vk!mtd~_#hlj2MV+E<-Y_lISiyQr@~>obUzsf`)Oat7MgJ6SjVtFOZm=(6%yUp<`3;LTj5;cVqM*l<=c|-|pIcMhX#Iv1&OpWb zrpMS#8%?pssoei3nvu#VfsqbJek5sIb*6P`<8GB}mm&cF&{_jgy4jInp5I-Kzr-Lt zP~*IN8ZTH_)#Cf1XDdfhISY79uSM3@B4N)_Sn*PeXt}+O=^kWjIkDt|#jJXm>!D5y zVo=iCW)F+^`$?9w$@6Qw)8qY9qSA3gG9Ro#t}K)GO9b3QqqpRdaKDK-%8IZo_$0Tk z)CfAlIN?r`>Gd}ojQ7HZwu4qPxY9uq8)xq@mnHC_i<37ncX)vUALK&PZ9zo+ZoUbN z;fSkRO0k3;(+;@UcRk`J{`gacfCxsjGJf|SxbE1u(}s?XKO;#U^-JB}T4kb)Ot7EZ zC*fs+g0kT@Q--JIE>1%#LFa@$==V!EO-$(~)iU!-h?mfcj%P?Y;zau40X*|ozN!U0 z!xM{!BD8m@`mxC;#!3j^7@2dK^Q|yE!ZQ;4!jspBCdPcxYXR2f3BQ;ITwP5LHs2s9 zllB>Zv4=)P!@O6%1eV~~SOYz1u^f@BD#Va9BN&vNlK1tC?Wji=A9jdJ(n1UYdsF(e zCMbJW+Uf8;;^lj>U+CrsxBOllWx-=bk4JRvHAIo7te?fm zL-k*MeVg>Z5-^0?UpBau?n!Nl)};vGGg7O$m{Au~Bzjkd{(=o>!fbj*-@wt#3JA1f zMItNMw~k?X%Qso%X00f$t>Ts<>lC0hy7w*LUa?CnvP^HM=D;e#wuzW;1$v;x46*n3 zGh|$8XOWliS#hEN95qrKBhVQa^CyqK4d&xMo0r#qP1sMFEK-6lk}35KqukuEOpYt! z6iU;|b8g9DCz20{Smj+nX`r06`K4bR=?1FdGS;Gw?8c|~Viz1XuFyLyq-t4N4c0=J z*bM&j4K0k_Q;aBW)Ft54wr$(CZQHhO+qP|YpSEq=wyineBs0k*|HWL^Z6#He+V6VS zCT@5nTQmcqIm3k&y??^fSVvwCnB8K`W@(n2#HrFzGXT)t<0Lh5{7Mij z@T2zZQ(}mTDu*Lvc9_rnZ%5Rjy?-kdZ0K|?g?QDGLRjES;IiV zZ-CJFN?!twXmA$@nG;jB$G_Fr4<0H?gB-wD!2dEZS0gSZcdg!d!0wm2Y^+5`ap5jB zD_*_Gs=%6}-f6?Fin2{E?_)AfKCLf5>>`{VYz+^0UT z8Rq;GMx^eE@{z@$*&68y2@x0`7vkX`vWR+?Ulj`ZL%U!CPwp=FG)}xFndSWe#5bAY1kB)LBemW&hDgyj( zsA(Pb-FK5NVhk(;!G`48LTJuU{fJ1(%tmhCUsN_tJ4zO^jp~E(3B4AJh$3dY)R&8> z`NX50q0L@14g2t(AwW0thN=tlC47B1C<`5EtWte=(z&_3jI#UyKEDw>WEWG0A?E&j z2bi+59Ra~&c z@0G!j2>}PO0uhgE`oBbTNYLOnahudal@;j8?wM*op)rsO{n1qU7{}>P71KmbCH&e( zVx-TW6^-X0a`B?n7)TuDd~XD+be{7{Yc^0RTAt9~RRPr=&Qo`(YQ~T@@`l%k8Ra*n zbvsecVSRJs8VP!HPSBth34V>}20e@hQ@mMc8=>r|qpBZs?hb}5;~-HhJ89%oWFjkO zNu#OW{1sywQSPy3j=26qNncf_#|5EWX{6P!>0;-cHA)m%+0@9-xq#ZnIUrQLPa-5H z2VKKcwuNETgRQ^9xb@3=l~^9c&nc}FiqBbM*v_f|pLAWDd@X}tE_8R7oyI>y#x*h8?qk)a9_yz990d0*DJsud}(2 zwGREXSFCNWj_ZatKBPe2#L}4S(LB9IrG720Y9?Dy3~_jN0+jW!GNyWoWwI1@s_hhU z&7hAdr`Ec+*a0Ims`~MN8<^tZ`mU`ZhzrU%f(+{^^(JNe_1 z&&n?(Bd7v{Us73IP%S$8rK&zXyn}o7rl&r%w8T=34sbvid4c#RpOlDSQb^`|wu}@0 zDRlQ=Ay)a%$?xJR&7E9FU~orxM{FZ7)?}X9pLzi8fLK{ozl|RttY<97K0hHZDHb>2 z&2O7O`7O?_t*1r?4*x)4>wh2+luMI-bMt4o%+}%<*dB;$sOq+s7x7xaY@ly;JTj<=Mpb%cpNA%>G+p@mXi@j|BgQXG0y6^K&vm zMKuX@65(N2;4rI%~tdK*+3FTGA1Hg zUPwRK4FEAv`0uU(FmV0jfH6Nxa>ClVaD9Ux0DU8q5c)ny6~JTC(AH+Jp&JnQa6SjB zcHA66I6Ue>ZJcDs>b1}VHa39r-c0T#jo~{q{w&>WYZ|?nod?lB*nbbZlu0OM#m`Sd z@(;SMgL)eo;JfQl_ywc+XU7$kq_nd3{UlE8(t*d2txZ81|I|DA{6Xj9#Pq^%>^b+( ziGtc2ej&AB%Ky|W_@kaKpnuwi?B5;L1lNas75-+%#Nh!ztq*?}cYm81h0=F$1^UzZ zBgZ}AH*#OMp#F=s=a-}8w`w^PtB;kRnzFu$@y#cQ9Q~gJd{*Bz9(XbJ^ZNcw`|)GW z|F8@Bjk8uzsP9BWLc>DykH|>_>w5`E!s;6tnK^#M`%eNg{zyK>jQiF7PEq{^hHnlT+H+78ZQI){S zXPunF*m0DlI8mCms+BwTCMIfypN`78l7)llDNPC}q(3eV@hrhZj!5Xrh0W>ZSaq68c|aAJVG*?g33Y3nCA&c`}a}hEtq_aCZSOP^WR;<4_$wxQ%hsknFZlChPLbuvHSe_t z)&!Ioax%g-6|n)@BrHR{W+2-5-{u=|1T$z?Z)#OO>a7oAyHi99^*s3kEEDBY1RO7$ zP{TN|t_WvygQT2YrMcHN_*Iu@I0)@si?N7l5N9VN@gDC0)(&H zwx~0qzz|KbCv4ajMZ{2=HuM3&97<fYQ^P%npa4%|UhNm_*k3zLMMAHL#`&_G~N(EmhdN1%!aCw^~cM(eg! zDCitwa(P!Y`$=%0jETK!8bT!B94>Y7e&zDeBedOF)XO$DXNYl&kbRXPw_7gp`rk$*<=GOjukNy*qIbL#wlef5U>LlqXL~b;JP?}DPs&zY& zfmWX(tw17GMZ8fYuq)k@Eh`j}alNj1wH!t1BS2%x2_Eei+2|*m%|PTMmV`}s95nmlk<+#F)z%^%+@kX4P}`1e8Q*;^}=i+knl^9FMfq2Vda? z#BX-px7ou`2(jQ6ogsJ-!(8ni&maIPw>h?}@}UOiZ2MXO4o82RU(Sw^#CUY6l!R7` zgiO)jb{ebbw4OT7JrBy@yQ|6v1IL!|l;@F={iW7ZYF2Bfq!UX<&{VbE*rOw(H!m!^(DZKXn!wLC0H(NvZ^uuq#XMD|H+l#!>=`ivt>Z(-EZFZU)+lTg4` zxiVhmzPIQLl#L4teOsE+{Nzdmb=`mT>+`+Y(B}H7){I5V2Q7pzb<|V3ci=fcd)wi% zDq4|B3QdX*aol6Dub(E<$f1L>Jzpu(QsR)#?zY~xx168ldk0>*L%Lqu_`S#0y89+8 z=((wVj+iqkX#4MK5Z3?)U;g`+#sIRXbJ&8e-C{^FuvEH2A2h4|5P~^N38{EJ&c95%FQFTsXBIHoCy5ao^U4Z+4WiW!n5E8Fc;Jr z)fir9a>n-l7gpq$z7Y{R{7W99viO<7ePl+F4SdDFl zI{~sewO8HoqK6xQyslXeH4h-cU+PM5A~FTQI+l2PF=i-5z5=mvhM#YqvZoR}vAT#Z zN^L`71l`V)Qq&llI`TfbQqUmD3@1c(WW-wrbB&4I-pLkV+27swQe5zBliwt;@WU|z zx^HMZZ!yG7K8PrpH5R&(t>hdr16O>MZ0(e&(*V@TK1;h9X0YB6G;%Evi{2^W)J=UWD0zW!20 zA%+%r&EZgOToxgxCB%b8Zj~~CJ32|LOf#o1BW{FiYHH)iHY?BaSc4-8qf$MGMJ^Kg zw9N~Ulp}9`w3BB)n~7zPHcil{)!y>QHN_R{eWwNe>3chqVK}n^){+CPQ)=#aeG?_(K0LD%)NDLPijJF8&}9MlHsvz zX=+((Q$br!m=(KS-9YTDu#_J!AYw$f*e2i1^ux3@IOKnh7qR;5LyfL&(?Cq&@cK-j ziP;@$N$Jlda{DwiR}U5s>iIj8b|&0WRSTKev!YR5%){Jv;fTY2ma69{v-=hC75m(3sjl1a7IjB)W3E3TGF-u|^yi%Igoz3* z$oQ}bN3n7>MR6OU$X?e31Aw=vZtlj8Qt+$(OJEJ{>xcvW+Z_l;(hAq}WOqP4Jzp8Z zTpSG8`i@+*SF1etp6Uop)@pgM7(^0usU-_tx>!TIFO7XUn-xG3zji$!`x_T14XQA( zsU`zRpr4*$zTo4QDrbC!H)5C2hEp<2SnZ7*k1vtXAVt*8$=FCrseJ=_I_`m7hMw$S*aY5z z;;x2d;N@xrkJgu!z#TD<-z0~TJDE*7RHYij23N?_*+Otsyk&~1=hB5GRUYJ` z$m!n^c%lf^7@bQiryFgTpD_DKigdKs7ogl5I+%TnFAp+Q&_Nd8?%p^u_ZmAreQCzbMWA4Wd}~gDfGFbB>BU=16O9T}dBej6%6&uJl2B=5 z$6JQ?o9?vPYC}Vub8+>eTk~8le8@xzCnIr)B*Q#b7J!3rh=Jr5M-7^wOos;MhAn(l zIPwb!71S7zJ2JNoqx>IKDEr;{CcA=+j zyjqFA=NiuFV_)f{5iKFN(So4phK>>)wC1V4vWr(Ck(Zo6zdnpca^dHNpbqiPR2^$b7kh=fb{r4!L88$*m*8lA_M%%Bti*bz_piFjikcd` zY%-53xzk z!84zWSNp*91kxy(x9jm@| zWu1)0_8BCk$3wJj-6~eANrB=!unft*(HBRTJrVQ#rkpO8>u>DFAkei>C=W;^JAgv} z9&{~B)2^=z;c82dz_vuxHa7D1z+H}Hs^fqp$P6;s0h{Qy#*Q+;I#FuA~vl!Ke$ciZ}!z#peL-^g(0!E zAmww0xcpf(g`iv-J>rVy)Gg%hFGQ8&Y^zqq?lQ5yB&^AAhoeM|y7v*H5=<1yn#OO;jyXI1U-9*YB2qtk$`s3nP_nwhPN9ob=0Vf zr&*8+Te`*@v$zHf2dc9hZ}+D|QH}`{5G#NgkLP8={b%r%o$8z0{7&9R*RsMfz*L19 zHhY*+l)XGjfRa>dWFMAi`9z}fhJ8v9MZiW)sLtQ~z}tf=FsNgePkY#=7l{NN8-Ms_);MO3kZ6CL4X2l3 z)omAu<7(@Q$nrG*opG|!`oFo#0x^3Ml`&&S`tPK*had-{JDF_K-ZvLQ z#095K4fA_sD(ky8S>3R&nud+ua+6G>5q{+%LksCWSft+Goacd)>j`zlqb_}Sz>^W` zUrqA*Y-)S+c@QWmZ4nQ3MVRDB+)qYBhk8HQ$FmmRD)50$?l-UR8K6l@+s}-k_a9-q zkWrvSj}hARhDNixT8CwUCfyfN>f8odw<(T|B2NYQhaXbGHoyf)%4euq=c$I3aO^)& zhT9PgD&d%f?#6&2y0RVbrs#Zp7@pyraXks#8D>Gqy=QC`V{cf{Hk@05!0US`sa zKrrZWZJkDW>n}oxt#y&vx?l4<8s$B%pWn0Joj;I81HMO8fTnW#g%`u!yL+yQ>!fb?|>p8{@f9VaOHB>@pWn%z+?M zax@wHb3sgjIxFM$xZ$+#shPp5y^|*;zTc3UEWVS4n{0tyR7iFCZZF{))vMYxEmMF z1)Y3%KgYz|@WYTBh<$b^;APoiE%DGwZ-uFwbS`Bvf&}I66`SvF&?{cKBoqVn%&JqP zKR8`68HrGg@;p=$2pl8sPTH=fQxG=uLA~bP_eC^$oGId&SOsMbg&PO9t}fQiQ1BvD z^C0VQjxUddInO=Q5!k`*AUgD6cuo{6+JCgJ(k za&!~OXHE`LiK?4gMvh?%n>Y=~Y+Wz|ldnT9w$e8S74LUsuPk1}4Agx9stX;73Nn?{ zOfL+9K2BHKzgfVfLN9t)0+o+DAF`mQo1wF&I)?0qJ6QPZ6jzD!+)fZ(wI0JOG^_f` z)l4S!Y`ssfL^Jl5J#)g;r5b5XF4%{Y&AL9rsqe&) zwPnxUhc030A{T#P6pvu-uw)wp^~MC}n9!rMstQ8c&MLz2WJ7apumjikNkL!Wc%*AL zuf^>>xXX?%d%DOS@9YRikRQMgmm&xApt6&Rj0|=lgO=3VB`qTH3a1+eQYD42KFGQs zS@wpyr!k|ed~C)dC^pk9$2+G^qL zV+!z{>EV$U9Ig<@iYHV%$ta9`yi2^nWofpIiAW%31p4nd9Av8@=|1Fj`RT*kU=PDJ zXu);Apq^(!$}E0lokvTL+!Q*u*axGpDqZu5N(L2j5|-NzPxD%2OMJ)E6yLrfCMtU) z&WzOp3~fpj%tU)lzpkh#ApBjSt&qdA9EzZD?2{}jnd`{S%-gb#dWp?Z+MfDs7Z|5E z7O%RjfggSC_v)6=ZK4u?)Wzh%ZD9S7_S2}|7hylf&BRsS0(nr zp_51PiYt75dZG_Y?Qj&zP-i-EdrDToSp_bcopM=_iW9Q)3=H__e0RC=&rOE-y%=~} zd8MpLkDI$&3c4H(*I3R(UJuz+5;ovat&MO9<4D$IXsS?FT|Fu46^~F7hWB!+Q;eA0 zVil(SkzaZts0voY+r63uvvx!Lmn;S^BRS~d-*KHpClEShXrT0L30ocjmM;*o{OWSz zN(8(85*rGvk45lShyjHCH{J2WtuaNm}e1qPv+XSxG8NZWabf4-V5@R(x5Hm*G7BIn*@NkiizulS-*ot5kd>za^Z zy*0R8&A}h26$B;6BDxU}6r=2=e|CV|Zyyp&VS(h%1Ng=`ZD7mWxCXJKG+SkkbNm+% zHt|LLJg)#)LALq6%Cf!`8a5>N!6%?z%z3bZ!Eg6MSfPt-;BdZ~F`tl#4`lDLTI%WS z7vW#*j1K#a^h5f{0W}wM?7F}3?K&XyR$yaB7FP}8=We&?DQe}N*{cm0bSc#v+w*{l zL2W0I2{4J?QsL_$oqMqOH^d$1jwcY-Eg(Jw?H+jQv`)A6rsl?>Z+_K!WHz z$0f}_n{b#1IcW*4_^Y_97>kYPg!s9wx?sj;ec0^qYro|EqlFY5A>S?4XgLfpjZbr$ zNuJ&yg-ju6_W*va2TbJgua->Pr?)TsQl%cO?a+}#}E7nq8uBfKzL!4>CLc|~Lc6?BBgTm{Xiv*mLv{9yXwa!`^X zX|g~-4iWl~FHGDhUlm=pX85FWLrVfqdxgMr3kTR?xOe6c8Ccv?xQER(tfa5Z44XJ+ zS7cm~dF(7;6ki+3xGpFm`r%rPq%&R7*@tj9i(&Z6sXQ#+DqD;w5+rRq+ZoA#H)y0lNPcfsPlVB}Ow|4n`Vo z187c0j%jlcQ*7a7biW1^$D$$k509M(`(rD^p;5?RG9)9icJy8+Mckz0G_mC%k);!; z@MmRV`nLmNoJ=xEXOiQ6r|aIx$nxN4_Xyz@2docY@bi}ypA<7Wij;B8x{&YH@gcZ? zYfcqKz-E&qx9E@Lq_3YXl)PRj)DRE}(J%dV8^>{J#SW8PXc;y_ou2_pxXORkNcvXT zRO;(|j*C_%xc9gOvh_HC%MFF5{!m#Z-b#aDHE>0=3em3%~Z$cv`?gE{Ox&uUo$G>=PZW<-%x7YBH;rmKqrEKBX z15}@SHC=ozyK7tt&;u{9Sf13-zHXwEd>dE8V+lMq=kjXp*4K5h{w1ux7;b6|gS>Av zFyFu4tr-sp<2&2m)FeHW4#J8mK}eX@wrX^)eA|Oh%*0}LT3&r2;ZW`UQUZX=G}dqG zp@nagljp^04GNZ53su#UnuDX+>?N9~IRdiJ7j|nT5{^d}QlXexgpqxcR)L+5;2!!N z_Z(Myi>fyTJj`?6)&T;n7d}0zw{fVG)_q)+CWGFxx)2|Id4<6T-4!hn9o`>dkp%#9 zsf9lo!cv$L*r_F$j-n>Fu!uLilMSPKh$eszHLPYhwfN+nB}F*68a(10@I;h#Bc&)R+mt<^$;R zsynjN=IIa&Vh>nT3~0=91v02o9c?>2WRkLhTP!$#^&f?ij*w+!CuZ^!S)n-0rYhB3y>~%*qQcMb z=HCXcY3bR&ST2N^Z2OdsulBfv)JKrUO52;ymsakx5@?p>&>`(_AWCn^ zNu$LTU&dF#-PU{EG5HE$KnYKpirr&M2K*sEq@W~yx(Xzkh#rp-?k^RRGIm1@Uuz*y zKEMYXV*{mpMl03_I#5+s`OFa)lO4}mPC>?%zfH|4o}dU>p8bWs^SSSB*6QS+Icaj_ z2TdHFDnQ3C(ZnkQ3qkBR`$X{PzLp-t6`RRss0=u(;Bj68MTLIU_ir$1u@TnqwXn1@_nGG7xZ~qdh^0o zzrL9+-fK-%B)tJ)Pp;#HVfk3Splj4v!KXbMyNkUtGPmjMEl=I)-6*~k+<1wK@?_&) zOA!dED){&v-e;UK`K;`Y8sCo<&<>}bNf*dxV54PU)3JBM8tEQh$EuAy>b+!VusKee zb!whKO^=XIjMx<<+4Kx9VAHBX{`~t{tbC%;Sy0(scSmRB$mt>^<5yPKIwP_f909RqW9b`eRPuow3AedeNKR`*2it5<|R_)2$>pd=L_j>@EVV z3x50Xell(@)JP=1=aWXl?tBe96PLSYg;nD;z<6tg=o+j5-^G2C5x%I!bwvuP&u{wc zliNdL^ zqt`u22mj%%ZdFOi$ZRREb!0SI_(Nr^Y>BtKZN=@ZBU$B=NncN& z)p2*pVdS>jrmBPfY_3k-1V9v z(1RgMm>MD1+PlTo<}?%WHzL#)tt3G1N| zeW-?drSx|iFaU(K=-t1GA8UP`^aAP~$s@7lq>;h5toXDO(h($=m=ElAb-Ljz5@Jtu-7O9rm3gqorLLATShE zpjBPT{XTDjwt85uXkZk)V#wibCvB=$mGrR#Y#e@B%V5f46E3W)N@~s0fNM7-bAakV z#``=pVThakN7=Z3fYsK&)dd3xSoir0uZ>)wwhNprT=+btnfq3<-Iw1ARzT+wn}`YI z+;j#WmKMgVb^z7sXD!;+xXZ{pn3=B^UH(7=_RIIqhzqgD`H?DP?L^5kR+G;OKf#J} z)u_BRq1^EPS=f^<0qp|~3jnO$EF-JMNp*9v7*E2p19dk|?vC%X#wJY_bW3n<@n4`q z+<5Y-qfyA1*NoRUiP~jMMtdjdp@Dq-Cp1m#yxvSyResV4>&Casp&vHR@zq6(lA2G+ z?TG6`TdR`WQP63G#>NQjiJ!tpouFMkQGdIq&PH#_0!zuMEX%7fMhHb_KM2G|bsdK> zQ{a9=4z7a&VIE@H1+7RxLqE&R0QsBjWVt9Yy3o@=7Thtm)4vRET6q49yoi()l4Y!95(GWVte0be z#uDT;lwg|uD-~6fVfkF`%gHOy3gzJB>FZKN`5N4AkhZc?#<2EANFPB@FGO^WLORG) z^zcyl+5kn~o0kN$Z-u-$)tMtfS8!~Q+QSO!o~5~Z5uJl9BuGxjXR+g_53Jy0tF2KR z@1vLe&HD9>aGjHaejIKdklmbBymHf?)7T+ia%e01`lhYIMn{+G9LH2Y5#CiFjZMYnUq?!<4B7T zj>J3iN;ZPbRTP~r?5VfeG(_dH-C8X(k~o4MC@8!x!( zkXju#U77;&2M5+2_;$;>3tR=P+Wi~Cgs{K3%xK@fKCDs$pfWRGSO@trw%|*M4qpbckzgAkmP5$pA2r2s)5CId#n4A{s+!v zvGD>@9hhv-`roSLk=2q?+Vl89QOwLw!$MJs+2%u5 zNJ?PIGU{y8GCX95rt_G-Qa!8=Zw*T}QFo6Wk6|u@m1`$Pd+ZB3uJ~mGMeOdba@~qc z1vH@0ELa^HoHx)5Tu<%oBs{Qb zUsp4Ptrj;$&23C|BD1txeBJy7d|NG=tSR1oV7eFprp5utgu1iU2!qWA*?@}5lz}G0o!Fbt%(Xn+yi{a(T z7eMX>(`ClqxZoFzfBl#%-m1yjk4`!H!^^5fTU=8?CMkPu&0gsHZ(PU>K-7+bMm1d1z>)4q@o=omW|+>)4ibI6Bemsy*$oRGfE7ZC~FLH z)asfj_pwbj&#&Ut!zBUca-%LwV1>JVe!~;(!Zbi?lw6XBv;lsgAo-cCl#p#BzJkq4 z93U;T@RVAyu_4L3)!Z2(v*!u?V4OzHo-=>1A7<4Br$3%c6T@^@DqRyRZ$8$%#WKLZ ztTWEJA?ma((!*B1oAx1>#*ACuj;qHDf9_oL%nLa!QTtth7KuT2!6vWLWd>6b8!3nq zGgRFco+?UHHx-c%J)SrFoz(Ay7*Gdm$#JT#Jc~Ml zqCU9i<=RM&9p`b`cVl$@>f*A-wnJMz?2!{aB@U4I( z&$c$=f4e19njiKao%PqJjbdhtS|DT?^m;%;`)>IM0!En+`ZZrQ8{5ymt$Rp1aKJZo zUjCH>i`y}|)}zROmA(Yl3U>z^E0CU3Rk`00|5{u4 zJ&>)wk$!m0$1Z2gR$Bp=L&7Ikz~B_T#Y1K4xuVMXGTy|TUX6*Sj`~_7%s>QpCJ)c9 zOxT<`*crAlSC*c_Lrq`>Mp{tF&q$~s&WzhI*iRb7N^K442yDXjY81}=yD?Sgto0M8 z-KGrW5%l*&{`ENe6RG$&!>Ych=7Vl?-4--&^(9^74>aIXr|lwC`TSlY5>e_B}FobK;TG{eQM`M35klXmMDQYqTV``}Y%HbFcwup=YYh>Pg21Qm<9ii z^}&ajZ*1r?jEhdQOwb#sAGK8-?I0gR2#y1VJXH@`M|iuue=a>8=ma#Lf*Z9(X)R4q z=(+cLjWpTBWkk$k*b~l~RT*6lj83t2H{7>-ut*KFkdcilw@G(5-Mwg$3d%3 z=#!6R0fci{2uB*P@43lx*F}0sf!hNkb zNaO+!O&wQpJ}6gCd~VWI)rOFCJ(Qd#Hn#wnFE#ej%s%Vjp7Ls|VJkUN9UN3`)ig?| zsk(?%W?^QOm-!c;)w53Cx}+y`DIGrD3T7o5G^x%AS~ittZg@8Nro_12^@_IylS7YI zksa~zN*g0BQv9zJOm^4h2+3_M`R^M{Tm9;&a^$t8a*wL=m$do6_&N=+KH43MLx(Rj>q^Q}D=L$3{X&Ke zpf*iL2eW^5=aWEui8xj&(ZNQmbij7fqBHU5hb2OMG(WEq@ZL zGPShk@|0hu(Kv+bg9c{pYY6V{$({ncu2$`5Jy~GA&ScMmFYt48w)UKfOAqxDxsIV3 zJ}se{GG?P72y|jYch7J^ErSt6J;ZlX$aInuC*xPqUK~kw+A)FUQzyA$-w+5td_mb> zz~&jhhc;qGs<)9x`=P>pNAL|sO+AK0ERai9VMjdZs(ZlUI=VtPHIVAZ@&qQ2=ZTf= zl0xJ$nMw)&pYYmXh6&Z*=rj_|CZ7HHt$w0#2X-XL!*1f}!J zU94;DMpF^0vb+MgzW%+y!3|>Y(G%xjDQMuE!jf%ypka&hFcV}`r6Kxr;|5*p7ab}t z9u2Tr%)A*HpFw%KZ@{{+Y8J0u1f5AJxnwGIx6eG-ovp5-Eo~2c)ItMRkt+bi`uu~Q zlBqQGU$w0h4k%RFoSut|h28=)+m1dV!AfKtU$$SEu$%Hl1wl8v(%>-wyx9RZw4dRl%ktr=CBEB&MFq@{{x(HemhZd9VW zXKeQsuQl5{O8Bv@$;FU)JlZt2aR@ciV$F9kF&~!G!5Nka)*tOKqbqc9gj?ZB7Ys^g zS#Z06o<7Lkj~`N)qik2|lR+~}t!@(Y)tc3#(rOFgnk&OAo&yM~43Z{GD#=f9jYNW% zz4`cGpZ|3G{sczU?R%w{72>QZFLgao&ebTsLFGAgW93NfoshwlH@)R<%*6N}y0S^+ zoS{shM(u-x#kmO};P8ZlSxOZ?RGK~gMG|#pzALFjMNDp8j=7xT`Jq-Ilp(d+S`rgB z6wRv6mMdnX-`p9-vQ`GKoxV4H7(p$5zA7)7bs9eY9%srxndb{p7a2>!JCgc0{GupO zyr}oJIml$Hzr3LF6UWSbpl&`?b01d`QKxq=xpAX_03)7<^@`9XPVwgPOkvT(3#l<1 zw>N2zTBIUs9xRyIAJ2oiNUUFs^W?x?Y;wFKnpH6k>XJhZrvX3Mbb0BH$o9ZDaM_HXNh?%Qi3LX^ zrbYU)w+^zuI8UG|3Y?PDe>!N&+{xL6Ttnx*dYH%ep=tw)CS~3vxghQKD8RV3!qhVQ zBj^_i)7ZGki|2#sV-5vqg4iDRoCMSxMGT7-#*f-7YdCDSFO4CnKH)(0mhrF*=fl*c ztr>5Wr6a9DrhrpQGc(9Q@1%abo%uRg&NsUDKtkJ(%l61E0_)QG+euim- z?K|T*{$#wBvR5H^N&o0G=RD^hR; zw*#qN0P$|Hde=A~UxWE}?~c$`tGieXzgU2E<+>9xMjYpovXC|;glb-y81R|2_n~)E z37Y3%3ryrRwX$`%6wG=6x+7QZqgjdL^%n%k{tDrV!`rh2?}A0x#+Rq?{SSu|KiZb| zu<9_y5YIGJx+SVHrxOYdR0Rv`lr_o6SgPa?1DJ^G5ofU>tWlN9v|E~^_hn-Ha-K|f z9T&ywt3J1d!06Dt2LNPp=l$SZ;nxShZ~c5f+l0`|=EftaUa*F9HijL={NZQUQ3{l) zxrFYDg$_Vi%^?0zE+C<4tty?aRQC9N*Pn5OC^scHQZURZNxn>N9Hfa#f`vjk> zny%~)O1KJ#_vWux1|2YkNuLT&a;4&}^Q%44d}c;M)#<{6rc3EN>%cR^wzNxwRT|pc zfyvM4cCQ9+*0k78a`Mn*upEbEP%)vmhKIBwtZq5U#}i{i<|bB#eRPN2)kX5ZoWyZS~hr7-zI-Et&INjgr2D={mKg zv3#=@-hu`%85N1FOE7;O&(>DWr?&P7!V>8vj8i7N&`K6K+A^72E+q1@sv;UecyTe8 zif$)+%CtsxJO`+oEH69R~8m6UHD1L8eof zVScD)Uw0>3SFBVD@{B+f!$A1d#h42PVd>IRUJqQj{y7IFgNlX}m~dYb>dA@xW}V*x z8H3d+aE5%vOMzKc?zGt`+GZDsFM zZyMvdwmX-iq$+}+?>*L^it%|Ukr1!hCd)Qh(51uyu{$?FS}G2&^1Nc7Mv`WnmItxb58$@v2$h@hKZugvHVKnPLO>`olIVwExFEQ6`L&Ftyb#R8@aJo+bUz%|IV$PW9dF~v)Sp7pC4UURo<4h z8S@+0J0Q%&Af}xB$Q(q4>DkqcxX9c@Yy#pc6LVvWDZw%j7Uu>w7Z8ReW~Ro$#6(DL zAlMw*Ya3CRoWN}WICJNK?uIAx^D=N^Vsso#0UQfxmv5F#fEXTtMgenSRCsR!e`c+K?8&kp`yBbPzJta>svxRN zs6rVS|2x2b5C>2$F3oS+FZ`ZFe>bpqck8@@(l|G^KNJ9kOm6NiXJ&@>j*f-@42zsg zTb!8-TiSn1c~Kpq$o(_33sCo-t{_^#e}tnWtH;T5=Fa=M(r z`eIq~RQ$nwj%R&cbE>My4nzCID<Oqo+;QAu`Bv5k`#}FtKIre{A#V55G3PP+App1l0&Yi7Vf?3licry1ZQVIlo9^@FYV+ql5<0A zDswZS((iwy>4mw4vEj{wKhNKgFz$Psx)dk3IJWTBU%e(rq-Hy?EDB9_?989&b)SQz z`H=h#^k$|eFhBbVzQoHNho=AK>(h0^ySEGAc{)bM-_R?iq%pPj|71-=U+|Idlf(Wj z-jl!jZh$aEhEYRELM(pqXTFcftgLO!jSQ{949twdF}S#}y!sjapaL^uVfRO#D=d-o zzxbs98RoY(IsL-`u64700ZQHC9Q@s%m?IAIzJ8(qg6o-miQwqMR{y&>0$|wiXZ_*_ zq(Ju@y!EZc0DyT9%~iC3W&Uy_QZM-z#Ql-;Q%{lG{BVB%CFjES?}NC2ZvJ8${)o_Q z|7Vq)KDN1q_;wd%=;{?Z4(=C?LA7w4wV z?+l$d3!nOPN@R6$Yj#*O`(9>WBLDq(zjN>WD+Ac*6Mym-g@)Yd@Lp(eY6Lh>???xf zvANEpmzl2S`y*d{*Z)re-}F)u@Td33YXKmT&pnS}=Q%K&PQK1m!dX16-fU2blN1yE zTa6U0FH8kz#OYgk1RW=F3&(h`Z72}!59V2r69dv#Yp)bw+W@A?Q2O#EW^J@`oD*5t zUt3UDeTPn>IIw`j>b24jvqAdzNM}@OT#@ZFgS46v)7s7k6mWSS$)GO&=8W<*41Oug z)F9`%rG;0ly@0bM4ep}VG{WC#+?_57QuE=If}{dp;) z-pUydk7#h&Y9jU+e^-ardMy!DU;w>{P9a%XMe#vDjQwDu03S{gi0hxys|&*rgg#tvu5xr z&Sn0&x%$pEYU+b7dRA|48Fk8Z=NTXfF8Gk)gCxGVz4!V+U*a1+rUY%yLMB;U0bZLsYLTIhqx}*By}|p)-IHx+A&Um7N9MT7*iZV%to)*y5^sjy>le@Jx6VRqiJi@UINny zGbQKXa&p9R&9>(U+!^sf&j%uxI0V0Q@WamHR*`KoX82nttB;_C%To8!`ZaM#C`y#5 z;6xNgUI`4%OV<^Y*HwBx)FQKu5>NCqfkfO6d|}VJt77mMJ7D!j_y7LnF5PYJA-fJr>vz3N@hLDB`$8Y}bF{7;IPaRmpx*IL{f3A#Y(YQ<+zI!NhODY@NeC zqVrf^iT(uy0LR=&*8b=z4%9a%naLq)wM>e=N*aUJHPM|23fHIYH8s)O}u_tFiZ57WEa z{Hsf&ORJSV!pG7cPrE6LBvNI)H~{vehaq>UQjX~7i_V_&W%;f)8Ak=#eJGScC5_GTt30MXqW1S|%1+{^6QAAH z^d4v%RE9(ta}wiA^?ax=CQA)Km|z4ap$d#?Sid1NoDAL;M3QlodBexpa0Jq(0@Qzp zzBTta1RTlqK3rF?-g;-WlvR3?&9y}A_6ZJJ1>$=Uvq6`NstFnKdQ^Nd#ovZ zycXn$3B~e&6;dl;Bw@R(g3*G4^^+z~YB>DtlPu>c}dZIRRi6kExYrNp@%O^)3bLuGC(WxC*7hEn(jwL)ZgNxDL*)JJ#BY_D?a=p)j;y(B z#>0_!SLIiu8V;c>)A?qhJONg909l7R|Cfm)QD3O^`V$FtwO~N8g)_hrr=+Q5faH zo=bu-L}cGfuio~K2Xd`_vCopU8#TWsDGGG~J?1!{-s`lt7Nb5-734{~ts2FLFxAtF z-i#hBh+Mt%$71rU^^nhTMdE|={2P@y!eM&+h?%^%2>E87{1ol&T(u;^fX<64_ZNvr zLKW^JH_Pl@HQNRAd<^TAL4@8hrn_TOk8TJXgCVWsMLvH?0*mc>wk?lB!nLS2=EP5e z98(;?*0kgKa#?)eYAF3>$}de~#i$rv(XNxbfrte`BB?b~L^D{;Cv*7D7W#rP<#N}?Snf}etundjn+5rzn z>@qqbCl9QmqdE^)P0u|0j_9QU&N+y_38g=hc7r%qG<9$WXz0ta@k{Jt2bFgbg z(?$%bPo%k$KkABbG?tb8%Eu`DY8T!#DftW<)t?U7wMTNg@&M$YpxYuxFzuJJW;+XarBa&#R!m=)!uA{%k_?8KH>SC>GMf5~sH1 zcra|RW8TPYxL!?g6O->{5Edy~7Jgjx?uAr{WQNJ%X<&ucj*fgHCMrKDrCV5Kg zvnbQD7#?${y_|8;;iae|b;>h?{5kfZ%)W;|*Cvdi&dkHlECNFlP&3pIHN z#kF}yM8wZ0m^--Nf>;#_7mBxlFq&*{78W|(?!NlE%_e=D+PlmE>}sE8u%eF*l)BVt z8QF-RjeuQM4$4RWHxtDkR3j;Dyq^6F(k(ys;a?#8KB7wB-jw#Q^b1GDv<2s`WlFW{ z4JZzjjB>PoYV0@>R7p8_Qge)~r_9ty9faaXK3EwTTir_=_>U=bnP)}lOkpx{5rYKv zou>OnnyU&we(@9j)MFv-rw$UKPVrNl&F8F;m?-n=TohD85&Ry97zD~h`k^28&gj6r zqAbD$-w%{?`dV_urtG81eP_s}VWhum5$Rntw7|B}m1?_V2@oi4&|HgAvv@OxK5{dr zDTiuufF!@=>UV;Ldx0S0?0cr%x5JX~iMU_iTL5zd09N!#3w^Qgr#LXsa{@@{>C>CY z3s}rk+7M-z%Qi4D@QBL+xUAygFWpJn=)H5b-jFHBpvs*am?g{n!8QJ zHS@wVVnZ^9`lw^~0&=G4Xsqt$?iTjap1?48be8x=SrM)-B`$%{nVePTWX0S+H|RWv zuub7=ce#($pb7m*c|VpNvBJ^@KNMl+u&Tw1+U)eF2K$cXT%+&oNnq0z1N6Ni0CM4! zg>OH`6_1g--;dy|oqy9zfJ5s0t$P^aGv`L&KQzOE5TF;_zeL`wfN0#U4x5jRt=Blq z$k<~kHuElymIJ_lC0rltAct8;Dscdc7K~CeZ#`R)&_lB(8nA^Yoy4qdCoJT3{Qu3? z&p3kO_i?-cRT61+vP!&G(uRB3m{L4ruXD1qEH|eZi{cna2d@kZ71!F1o!PGyfNA|< zd=Ax_{7Hd z(bfzPO=qrnBQ~!ZCPU6~p(LHZyhz%%FT0F&#-ze9k5y|FC7<#4 zRXro&)qYgHg-HK$ISZ*@sALvtW1*uG7GC4M47p365OYbO`^xTd!F$Ceg`+$`J+u(R(4 zuXQuilBBaFJ?E*Mnvp?0{X79CE%qt~a*`8Zq_h#lb#$dSnmfJhL|^lk2|2Oo0xhkb zp{|@6v;VYHMw&TVA}ktm5W-Oxpr3Mtb$%DM+bqKAgR8<8lym{$&HjrSb9}t*4`j?) zww3W40cB0b*8ZY4Ud#Jsp(!S!Wl*GK*~$h6WEvxFa0{jr0uJeId=nSYGA5K5&hII&u=L^*>~b0^nQ*=o<1z{vX$okZU?uIP5m<96P5NVW7r4&vp;>`&!(*s$tBU`UIhtCqJHsu$C#{OnN6x%{|@+OUU z;(Qimk5vxEI*Fpr6n8{u$)OH>@-~@tXecB7z`^z)6!=VcMGo#Of%F;sCB@6@aOjX2iKY1uoNHqZm3i$)!)KC6iZF) z1v`zl2DfDr?tQ+D+OR~#s;L<^|AtK2<~_j=%E^BP7XmG9_FFd{T{CwTv|!y+j}G*i z$0+}O^KRv2e5(=0Pb%0z_?mi~yEt#P23>O;X(VbA6;5 zwxkzf1S-8xsE|uNtAK<5#9I?H&2Yu>`M%SeOg%oi*E@Maln!5Ux075+;4rTuNZsg) z%rVBm>iLG)HfNcvL4<&Tf;xVa##Fl19yGCh+=;i$8vi3yC0xpd?{Ou2%R7 ziPO@BB^!ZXr0p%7{u5VUu!L*0;Q_ayjY?Dp-`fs&R-8T^zn*E3O~5p>s2y0)*fog@ z+Vf3!`#@Y#*4P`4!gMXX%(!kn6uVzQHVSaD z3x|%_X`Fqj(7>~zFR=ltlA(q1lwBR!?I2PE#+lm5pQUwuRh1@Eza(%guo1>FtU@v` zkhJTyOH|-zxN@Sb{^$$uZZU=yH*8VqmsYQ7Zetp|P$9T~sH>*2w(k9KM-hm=mhs46 z{k$9KMqi49O`nzOEQaSr?0|Tlm@>TU6Lq(!Lw7_rF=&L5}fz8K2zq7S-lU37fp_ z0Qe{%f1Vd|Dc62^ATRDYdmP_;Yjc&7T4gB(3_Qw`YSsA(J^4_Zi1;t7ej|JE?J&j6 zr#&g7yyxtVpZ3g~_-1D~HetsQEA_R>gC=9X|m z&vg_rJNaA3PJ+kGv^@VyvkfcE6XbhcRn`M2>1?}q&eZqsL{01LT>rDEd-{d1GW)V9XNG)RHvnu*RTsE zk7Tx2OG@DSuF(QAVjfBx*!UkBLESc%bOH^(yo9Vya*em&i09Ml0DL<0t;2(lZj-3A zRXfXn)g^e*=hA=^cA10}`ZOnzp_{8C{w*i!a1+`p4Dv02L;dFzvse6FMf-)mT*slg z;7yPehC2H?pqooEbzzawSZy8&)5$V?AN@ZXw^M%`dW&u`cysVkXg<$D7g?^u_=={@ zQL!o)P1MP%z6qxMp-d2z{X?k@O3PZTLpdSU#lscq4*#s)ORnD(7StS5viq-pT5x2O zw>N(W=@d{j&2t-_QGhS9W)|Z4Va!cKmPN{zAmS%DTV{?5!7;VJ`p!w~^vA|XsD7yKE%=76^8zdZvm6leo728i|JnZwP`+i< z1X8@XwEj-5&MyLee;w(l74Cb2j+arCUUat5Z(px?sESInPx7)H9@o)`s(YG9Bcbhb z_L#cE}bPGzkv^6pD|9<1qe?2p-OESNCK zT`{)I$%|h`aPYb>kBo$MevFpv8;0}#4@78Pi1=F0Y0=~yHVw9|cy|JyY95P~--9AGk|X?j!!=*O}yH0MjtS z0ll$ef|ul5tJp<$aN-U z1^%B8Um#`Fli@iKQ|$-qZl6e0``EnbzxsLa?B|HH%SKC5TE)rfx=>06HSqwRN>tjc z5Ty+|{EGgqx1Rj%ZM(g^d&olSJ`Gd|sFO{R3V_+srZE}QbJu`UWJFe-u4@gaIZi`Dr@lus6z6EuMJ^j$VsR z{dowr&t!oLl3V1xC}P)DaYS&qSTl;OQ}x7`Snjp@S&glN!YCfN#^!YZo?-+MUGYGP zSLZGtIc${6R5aSnFu|0C=~*L5)g$p0P@o&9D>2O}ItWYslX^R=p<{rmUKK-{&s_6} zQI;hqTEMsU`ldt_sHm=2hZM&>z1rtjz&rv7U{TRqLbqYr}ONIh()+!N&T@&Jb{CrmoZ=F|+_`n*`W2xAxFI>#X!uzAw z`bd4eX*>8c-04la-gm#6??w7Ts@eo7TIm^c%AdZUX&~H|pQi6(afi@7y);yFU>(75 zL8L&IbyPQNNbU*>%3qd5>fNn!^vZAe`c$=OKHB91#gUY&1RJOlESqFQ3RWxvmMe6v zaKk)3d%4*D#bvI`>*CB?0z={4Y-}!xEgzU*Xa~uSML!|z{bgHlRdc(hMbkt0OVJ4; zwwQ%;FARqhQ>8g__(Q3S_fTJc!kE^H!Uq$??5hpx2UIJBi=&d9!jUI9gj{*ILN#MJ zg%7EV`1p$YZLMKcnF~MfGK-33FVlHDj zAiio*OK(e8s&C~{y7yUi6f^Q;OH)Hw_utR2qsT^rVna!xq$EY_-r<^Nxlc-dp#y6b zB2l|`eqXLkbTqqb=w!rnL39pKZshJVGtKQyBS7sz*q%_4?%DUV^TTb91L*PrNl`i@ zMOFX8boqdA-W56OIBIF>lAacG)+SRoe#afpZxd_E1t!z$*ye4Bqpzps(J3Q%%R+sc zUmf1PJc(~I@Nm zfYL%|Ro!T9S8a{8qDP?NS)SU1lK$qkGqzfG(!CNxxiGWA4H^XMByqtIpX}A*flf0& zz^|=<1zP?i^`p5m+;)pS0<|9tlS8vk$nn-%!uQO+R7^0xG&sqa`*vp3lO!E3evVS2Y#Oal9A^+S=A?D6lL1#6eLZHVd+}+bn-P{-W2Q}!P_Zo=y*;(lMYdil8RX>SK^k z(9Goz*WZp+>ICt!L|E$A0&i{QPhz^2q!=sv_wRtCXQBN@vAXH(8~CpX=UfnUOYm$U zADW5yU3(wsfb!4(blP?sGNYftnBG4~eEaG8MxPae_Vvt|YIF#aCKe2M(RNhzsdW*_ z+$vl3SJIcY<#=b~?n1m|dbq}>A6Dpoe>VQ?Qtie&EQdN1OX^sshIf!Ec>9Q61~p!> zIfxp{igfK34GwL7>A;K!LsZVt2m~JBmQP&5{@4k61?=u$#^~m66V}#5S`BS@PV_9d z@ZsiZ>fJ@(=kz5H%lo%b-oF6&=!sPtj2>HqVp~_#9NL%doIxU4CpEV&f_*&TGN!`u z{X==EX|KD@M0UNb-DI6+a?I}2SjksdW$gOmJRRXcph(qBs?3loPOF(Dpv(Bt%Dn2& zHts1}qC|GZaU4(?s}E=?T(G8DZBco%)*ywebWxYBit6GiYnjB30 zIs`swq9ztLBQU*wG-2iuK0nFA^Aqb}lUyD@4;*86+@6&xEJ_{WkcMY7^RTCb_yFEA zcOxiT6|_zFQUGu2=qux1J_a~QQx*u7KafA?~88{5KU$oJq2-RK9AMAADtcObHm*)eGO&TG#4HWhu+h(R$vBvS)r+Xh= zV?8cRqCxqG_1WoTF>-`I#Kx1}ur&%8mQhx*O$33NzRgj-TjK?Pg{Qi`mXz5t@Ef`} zyvC`&&X0C`r(gUq-vRe@-o|{vqJM=t03I?u7P2?pn9(0qH3}#DvO!J6#3k*otuU@o zsp1Vm!EGOVb!ILymxAP3<9nCrM_fhE{nNhGmA{2aD_$0d&|A!{e5K!CzXf6AdqF`B z&G`4qBNacDxc|Y$1rr6hlr1@5przsm-pas8WLhs?!8t@4r#*f(jmB2|W;ASi#AN?9 z=xmZrzYSEgV|k)p=$3K|wO~glYkDnXdhTi^lg9*X6R*Om3W4-@LEJ-e2^h=~En6eHC z!zS8^Ew_ExRqW<+n2SsTobM0c)qW}_3jmUdF(_a|+W@&NzRV`l=LdiF2AYe4tFc;# z;7m2EdSRouvRe)J*Ve=63-|!J2Z5MqjrqqPOn>PpDuK-bmH*S-M_C~`VckU^$~i>P zkoe&0LF#b-p8Fc=Ml5rh4fO(*@=SQoA0>)79o^K9`{|+1 zLQQ#@L}E4B#)Yk5TdU8)Fp{4yT3L_>)Md_OeK>q|=Q-s{`kf~trlYc2S%@A!^)r;Q z*5GLz1(iA7+ngwoe{+?0;$%6`oxRn^oVX`GiP$2Rk2gn7>jhIF6E?_cp-(x#zkY+v zERPWRj1?!xJ_X!oIc?w{as<&k5S*(8crzkLsOaqgtomG-r* z3$J~V=g%u@66-nMWcoGTNE|vdJ=p9~DW-*zAdNpr&TP04gkVT7jhRlCNG;-F*V+{v zEzEv_$8it&ojw8+esLOAv?lkp@QNL>>N0u$5E>Jxz1Hz{w25rn2abBS!tZWk^Oxbe zF;1nq`o&O=2ILT#?yqoUlDDJcBpc3SHJI~hdmhA@uSnPi8FdgNxce9oJ4v)CB3-J8si-UZRlDXDfeeKRR7j1%)ONfJFO!_YuLyYlP1 zy-c-!V{Ry1#+{}GZ0!5z*d!L#YZxiUMjx0#EoiA=oIE0lU)Jm2PnA$sDC`T?>?-I8 zh?Z1E+LLQ-hC645`URXV4hLjvD)V9ADSiFq9*d8ctz&Zb1jX z``{J>7pJ58-OI0u&TlQaikCSfK!srgtPbg;jb~=-y3r<5V71B_UyF50i1yxq1HDB5 zjX)D;D~X1t{zTD_&>JM9;qOUr!;cK8G~kTtV>I|_Te-MgvF?bvsOIt1>y@`#IT~b; zB)3Ln{5HBdL zP&R`3nA+p(PH|XtJaUqZIlHEc2am3Kv+%>jH$JI2RWZNg;V#w9nYaOz*ap=D#nUp} z0*uc(nCIwPB{8k1`$JZ(AI-UFUcS>n?-hw&Ky*jBwmy*Bv2t1^j~Szme}GawVVhWhRGV9KkoO~PoCof1jS`cGFADpl?3 zPD|Av(hJCS6RDj=HjIKp1!3v?zyOE)@}8bN9wr%pu*PoKuU!?hDx48v)?3kk(Z(I7 zWdg%abBGDt5>W`eWB1`{7aWJiWN&Pd;WM5o39pq&ReH6&>04lnNel}Z<&4U-@SpkT zMhC;kBmZ8+wm0#Y4^!+`CpmB)yZTcAm3^uA7Wa2~!jYtk)4QZZG8_tLRJd%WH3V0$sQ#;AZ1Ork%qi$`($Q zV%(J=6X_*f@ka|f^jC$}X1^n*Ko2MYl-V?CL6t-VZwQQgE7)7KBo}@2{VG{m6g5+)6shc{`!rO;eIMe4_v< zFEqvxDmL*UT@ra?DoPxdLPx!;mOy|XEGl)NBE6_(GGCE) zmJf9vX6uCWdqD6K(~ht*Lm)SrA}fj&%@O;~lI$mJZZB@%{Z#^|aTJfmb4hMMmNQLc z<+@4Dy=H4vR&z>nr+y@itq&;#<&Lemth{$QLHizqd)%m>#dw3CD`#PI0r%uE)%zJ3 z*Y_K7w&Dte)b8QWoAAO!J2P+MATRgNzDf=ru5|aVOwz3x88E%{jJ5jL%&fv=W+_d{ z`Yn>SQnUgfv9@ThM1qA3=)o(VUU?)jtra+=)|*SPGi!z-EsFLYPuWf}jt8N5{_(~D zZ-QuCZVC36ttu266Z{9WL8c)^2vA z%yo!c)fqdOg~B?+H-~)G;vJ|B`GEyWfNa2a16jL8{s$OP!GgmdWft=H1W$U)af8t6 zLfm3uEb99W?uvU#!4m6%%oG_(QnQ0GSBN@}hu*ADJ!&VYGdMA6OtLdz<9??Sjb=wy zc39PWyo`(+TcmTdSQ5BHbd0Y>S#}4EDVvdr^e2$MYaI$p(QQ@TSjTjxd+wq+KFD#) zyRMbvczD-GyL%M$YksP7;nQ%@K1N>+J-9(O{ZXtBj1X(cc^PRHyFv?{+bK0(HCPq| zHR2If;*v8a0in5RrLjkzYTc>VzKQwGtp0Mcs_FiA=^6rO9F+DFlMeYMY)&ja8TI9- zVYnO`{h=U!1Brg81GA;e5^tRehYG@r$ltzl(ZcY9T4o!>+yUE)vbxLfFGIaogJo8L z?6Bl4NuB4!G$;(uD_H-M)u;7t+E1+hOW*RO?t>qCFr(oD4X!P!?~G!zc*N_gljwA5 z0cB~$W({|k>c0NuJhy#BN3Hkfk^UutJ_CS+>vdG-;M{m+(PM7qO{Y+1-t|z-#8;fo zD`9OjRGafd0b`tWqnCDV1VJ;H61;LyC^I9xgDv7^VMR z3bGWr;(T1JcegAr6SF9UAhc7NGutWj6v`wH0(WkVC^{v|H>X~RlkFx+Z>L*PnDtTi z83Ioja!!~}m0j4BDtD6JY^UA-5Dn6fG}VFyZyViCCq5#cu0Uz|6tP;mZtl zx_^I7;X;S77xIe@N0-uK1O;p9hefbKS{b0L6|D(#{B(p)AxxLF|B4tPIg(N*;DkP= zCJeCeIG(uJbd(>-@O-=JP=XJ`_>aH7Fojp!JJ5+zmrJHhKReRmSgNG^@@15JwHDd! zaB7p4l*uSIW3;(904dJFv2$FnWWT#$awsq6Ef8Bff6oxA<7qMjb78=TuaSJpZ>YQP z5%lm7qYXg|;d0Fm8<#?8yp)}!A3xc^AhzERfCdS$tXuZA#r$u{tM-tT_c8_%z4_1V_y6YX(VAE)VFTk+s;LSxst{%%$ZQ1K(rK=$NqInQ_g4O1g!X;%i`o1GRP0dd7(}u)hYOL z5c~ z;l6K;7)g9gRZ{<+SQ6~XfB4xsG;MK`eB?CkZPqhfEnJ7aOQQ8LhMMM#kvKTm-IU_C zf1~sw@8ZTaekkRO@LHvgp?<7LFQ<^s0V5o7^*P=i-;5=87Q5!=GDf>oz)9uChgp> zl~7v|xr*wXrbVl(1c_~n#zhF5mwR}cP|IE)TpCTFH@!jk-A#Rh;pE{dt$G)ae+NVn zoBv?1QtA64K1Z*oeT{H3U&a%JGiYj`)WU0YHhGAeWwy3({c07Q0j2k;{pLjpj~~KZ zZ7D~+T+kw7i@;YJ#9&L%=JP7`Lbb>l`vg8vX2fbvA_=zQX=IlgSCt|fN0G$&G)X4n zqA$bb(a(NYEvpvkV5648vpY-u0AEt*4!2IJ^+|BdsZ2nYT31##&eU|O0SH@2G ztLw>Wl|F7{yll@-n=8jl>vz6cZY$jj1f^8C5dHP*cxt5$ZN`Bw9uc70<;2_h9|5+i)O#;Auw@ z3)!o>hCu<`mtuIX{rGx)l-8{Sa7^(I5_tbU$duh=!^0xpF_|n`FM5Jx0_MM=nwx7; z>EW9}W7N@YbjI&9`3*5-(cCFVuP+~>3q0t|seUZ?uR%UFc+^OuK?$SnqaMdJ?_L+R znDaGbta5RZ$uC~&yb6*-wHxg#2jYX2&Ps)Q_U2lFX#5}{0dH;Nq+<$kybu^R6q#qC za|A}NDrPe}FwI{Wc53*VGWnFwMqL9fcX_Gv4qc$SoVqLtgb>z-LVJ{PB5zxkHJX3& z#uT>AU+S57r-?`bnk4zNa_BR$B&WIsfqS>OybIr%TioX@1_7~RqSi1uEvDQI8}^P? zKN4ZUW%`DAI!U@Ys=%b9*9(GVM}W^lsxadB$=ZZXz4lFN;A;<|U3r8u1u`M$23~j2 zIK*&B{Dzkr^K`H6Zt8$rPGm*n6+D$yWa3KOw@Gvn6nI^$5Ug;DJF|VuVd`o$RZ?ExbT(5I2yatn> zpY@LLzxhtx{xfC29?s{x77YY%w@@tbN>VkB<=L;oVa0C^2&w2<97_!cF9^2}PEzW= z+Z385g?75CXfQI|1JsI5zqguyN9>2R>(XlFen_I~6pe?sT3tkzxwsiYSfJ=w~GNDB4+M?$N1Sg<>9^Xt1WR6Ot)RlFYR8Jo5x?$(0M1 zw_7W3VY1XUjVn5gUB#cu<%A!D=p6jwj%VO=l291LUh`2m_(O9^C;rJ~c+ER7(9snP zejN)Ef%z+_`*S|sZCz@TESS#K=l@jE+laB#k_T7)l~nL45#{TN-#Xq;#C2oioT|uW zD#T3V;``)m*GqH1si;J2SAopAi7tYPxNCl_bSW9CMEJ2|85}oVxi|3YiI1zpzKqC1 z5z_Yf;C)+<;T*)0)lvHr7R<(nGk?9>$8;YJc_DB|X_={ML-FG_RK|3qfkvATrfuf~ zK^#rhmZ!*R%-m2>PkTTXiM!)A5HZezSahmOwSwGNVr^C_uBiDYhmD|_oIV*Qm!uj^ zIkVOIa3+V3htJ*`oB@X1n(h4o84OzD6)Qy!(!`YS{QkAp91iO7JaQ5uPpQlKz>_A2 z@s3^Njohijkt`MUlN*y-WUgA6Bi_r=wxRZgL1*GwhYja$o07Dn*51t6TGQhN6 z=ckpt6$C1Jb?w03O|?3@!cyT=pp!>V7)G8hjlB2d@4d!_%Yz5ttjR+9W= zaTLa{drwE8{(N&#z1fqRg;>*XEzg}YwTJf zEjHQQc2Eu^MciT=28oO1xzWfffbnogE#&6T?nuTJ)^CilO@#@+-Pmzn#mcwOD6P_J z6RD(D8yfoAmzAHgN0fmgSgKD*1s5qIj0vz#!?0p{zzf<_GN$)*O@$bk`gGug`I9Nr zg&5jjhuj%}lR(^Bq-b?K3K|w(D0>f=OR@<|4|9H}r>*i0$R=cR%Z?HAL#U1HN>J>lk!KOlJll=CKLtf4OoEZ2B`s_n&+^{{(ibjt}07Ftwav^SsUl)oPGY z|G&loWWlNohS8wQ%M*ChbqFf1^`ewY%f=ihdC0Er>u|FD*PMwJ$|Nh`n%a&;ZceWo z_n%|69=&4dc{AI~aeI5oe_FGr8`eC5EXfH$B8r##A6HJO>jx~M(ATIi74@z-$hVTU zx@F7l5(5=JS3wKUF{U`V#Y}PWmOkyESPP;I+8d#<`O38blBr=s-#uE!3t#Sh{0Iim z(3=)$Zz+y4VTl9X{*LLWA64>H2S(a>roNOZ9)8@|3<*z(5xK*~4GG64JvAhkN+R^u zo?&uJt=dPBhb}eBT$^FdIuj$4^~XLOTJtIUNEmL2o>07;m(zX>y!+auB^;1cQi)1; z;bh|F(-;8OgRfX!XzqE>V5z~>f@t`i0n1%l%=Jcy{vk_U(cMBE)RYr=AT(*$aLgX^ zEXKOK{4nH{j6f6C4nK2wkVH?bmS#K1o!F8YHV;tPei5i@sVkZO!On9_t=Epfl*Pg! zPjNSG0MzUdCBAe?C#7E`KJ9>exV~r5U+p&>*Bd5ubDQcxqS{nPJ!wujUz2x~?QS3= zz0;Ydn3%4?jw!M)1zeOH-~%+!kp6unt9`qKNQJ75S#L-VR`Q98PmKffvv{iRQ+9(YRMwZ_swsahYne0_X99<@D9^Aw zHS=%}&FiStUV)&()>rFo-8n=P#!E8yJ=(}`IZCgu|0<3w%@q%m8rr0US~1rs>UH-; z^Io6B)ORVl${dx3Uxoe!fXfaRk5WbVop%7F+X#IsgAJicPu@^C&m_rw2?S+v$A|kH zNgISBq1n8~(eW9h_tc(;^?}8IR0Djr1!T%He2zMhHV9!Ql9+f~&>mNgBO}k(GKEukW5gJk9cn5T- zKdvQ3tgomwO8i73!l^y)Ak>>O_W8gm^J;eUk~=a&`X*jAsniha5x3S2DgjcAkJkQz zu)LgjvQ!r)CQqeWl%L0Jo5l3b`GobQ>reE}9(vQ{Da4%GhX26DP`w}BNNn$YQtVXG zc9DfnoJUM#d>KYi3*%cyR@Bk?$Ykk#8v3qKC2Ep)V1dI*!AZ4;j=ZBFRqZrbHeqE= zzCt7h(Q*(-4|TVl